1562 files changed, 47009 insertions, 42058 deletions
diff --git a/CREDITS.TXT b/CREDITS.TXT
index ab01dde..5ec33be 100644
--- a/CREDITS.TXT
+++ b/CREDITS.TXT
@@ -294,6 +294,10 @@ E: peckw@wesleypeck.com
 W: http://wesleypeck.com/
 D: MicroBlaze backend
 
+N: Francois Pichet
+E: pichet2000@gmail.com
+D: MSVC support
+
 N: Vladimir Prus
 W: http://vladimir_prus.blogspot.com
 E: ghost@cs.msu.su
diff --git a/LICENSE.TXT b/LICENSE.TXT
index b8d2c74..1b1047c 100644
--- a/LICENSE.TXT
+++ b/LICENSE.TXT
@@ -4,7 +4,7 @@ LLVM Release License
 University of Illinois/NCSA
 Open Source License
 
-Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
+Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign.
 All rights reserved.
 
 Developed by:
diff --git a/Makefile.config.in b/Makefile.config.in
index 5c73758..9bdb075 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -123,6 +123,9 @@ TARGET_TRIPLE=@target@
 # Extra options to compile LLVM with
 EXTRA_OPTIONS=@EXTRA_OPTIONS@
 
+# Extra options to link LLVM with
+EXTRA_LD_OPTIONS=@EXTRA_LD_OPTIONS@
+
 # Endian-ness of the target
 ENDIAN=@ENDIAN@
 
diff --git a/Makefile.rules b/Makefile.rules
index 71d4307..228fd73 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -123,9 +123,6 @@ $(BUILT_SOURCES) : $(ObjMakefiles)
 reconfigure:
 	$(Echo) Reconfiguring $(PROJ_OBJ_ROOT)
 	$(Verb) cd $(PROJ_OBJ_ROOT) && \
-	  if test -w $(PROJ_OBJ_ROOT)/config.cache ; then \
-	    $(RM) $(PROJ_OBJ_ROOT)/config.cache ; \
-	  fi ; \
 	  $(ConfigStatusScript) --recheck $(ConfigureScriptFLAGS) && \
 	  $(ConfigStatusScript)
 
@@ -133,9 +130,6 @@ reconfigure:
 $(ConfigStatusScript): $(ConfigureScript)
 	$(Echo) Reconfiguring with $<
 	$(Verb) cd $(PROJ_OBJ_ROOT) && \
-	  if test -w $(PROJ_OBJ_ROOT)/config.cache ; then \
-	    $(RM) $(PROJ_OBJ_ROOT)/config.cache ; \
-	  fi ; \
 	  $(ConfigStatusScript) --recheck $(ConfigureScriptFLAGS) && \
 	  $(ConfigStatusScript)
 
@@ -582,6 +576,10 @@ endif
 # Options To Invoke Tools
 #----------------------------------------------------------
 
+ifdef EXTRA_LD_OPTIONS
+LD.Flags += $(EXTRA_LD_OPTIONS)
+endif
+
 ifndef NO_PEDANTIC
 CompileCommonOpts += -pedantic -Wno-long-long
 endif
@@ -1379,7 +1377,7 @@ endif
 #---------------------------------------------------------
 
 ifeq ($(HOST_OS),Darwin)
-ifdef TOOL_ORDER_FINE
+ifdef TOOL_ORDER_FILE
 
 LD.Flags += -Wl,-order_file,$(TOOL_ORDER_FILE)
 
@@ -1722,30 +1720,15 @@ TDFiles := $(strip $(wildcard $(PROJ_SRC_DIR)/*.td) \
 # All of these files depend on tblgen and the .td files.
 $(INCTMPFiles) : $(TBLGEN) $(TDFiles)
 
-$(TARGET:%=$(ObjDir)/%GenRegisterNames.inc.tmp): \
-$(ObjDir)/%GenRegisterNames.inc.tmp : %.td $(ObjDir)/.dir
-	$(Echo) "Building $(<F) register names with tblgen"
-	$(Verb) $(TableGen) -gen-register-enums -o $(call SYSPATH, $@) $<
-
-$(TARGET:%=$(ObjDir)/%GenRegisterInfo.h.inc.tmp): \
-$(ObjDir)/%GenRegisterInfo.h.inc.tmp : %.td $(ObjDir)/.dir
-	$(Echo) "Building $(<F) register information header with tblgen"
-	$(Verb) $(TableGen) -gen-register-desc-header -o $(call SYSPATH, $@) $<
-
 $(TARGET:%=$(ObjDir)/%GenRegisterInfo.inc.tmp): \
 $(ObjDir)/%GenRegisterInfo.inc.tmp : %.td $(ObjDir)/.dir
 	$(Echo) "Building $(<F) register info implementation with tblgen"
-	$(Verb) $(TableGen) -gen-register-desc -o $(call SYSPATH, $@) $<
-
-$(TARGET:%=$(ObjDir)/%GenInstrNames.inc.tmp): \
-$(ObjDir)/%GenInstrNames.inc.tmp : %.td $(ObjDir)/.dir
-	$(Echo) "Building $(<F) instruction names with tblgen"
-	$(Verb) $(TableGen) -gen-instr-enums -o $(call SYSPATH, $@) $<
+	$(Verb) $(TableGen) -gen-register-info -o $(call SYSPATH, $@) $<
 
 $(TARGET:%=$(ObjDir)/%GenInstrInfo.inc.tmp): \
 $(ObjDir)/%GenInstrInfo.inc.tmp : %.td $(ObjDir)/.dir
 	$(Echo) "Building $(<F) instruction information with tblgen"
-	$(Verb) $(TableGen) -gen-instr-desc -o $(call SYSPATH, $@) $<
+	$(Verb) $(TableGen) -gen-instr-info -o $(call SYSPATH, $@) $<
 
 $(TARGET:%=$(ObjDir)/%GenAsmWriter.inc.tmp): \
 $(ObjDir)/%GenAsmWriter.inc.tmp : %.td $(ObjDir)/.dir
@@ -1767,6 +1750,11 @@ $(ObjDir)/%GenMCCodeEmitter.inc.tmp: %.td $(ObjDir)/.dir
 	$(Echo) "Building $(<F) MC code emitter with tblgen"
 	$(Verb) $(TableGen) -gen-emitter -mc-emitter -o $(call SYSPATH, $@) $<
 
+$(TARGET:%=$(ObjDir)/%GenMCPseudoLowering.inc.tmp): \
+$(ObjDir)/%GenMCPseudoLowering.inc.tmp: %.td $(ObjDir)/.dir
+	$(Echo) "Building $(<F) MC Pseudo instruction expander with tblgen"
+	$(Verb) $(TableGen) -gen-pseudo-lowering -o $(call SYSPATH, $@) $<
+
 $(TARGET:%=$(ObjDir)/%GenCodeEmitter.inc.tmp): \
 $(ObjDir)/%GenCodeEmitter.inc.tmp: %.td $(ObjDir)/.dir
 	$(Echo) "Building $(<F) code emitter with tblgen"
@@ -1792,8 +1780,8 @@ $(ObjDir)/%GenFastISel.inc.tmp : %.td $(ObjDir)/.dir
 	$(Echo) "Building $(<F) \"fast\" instruction selector implementation with tblgen"
 	$(Verb) $(TableGen) -gen-fast-isel -o $(call SYSPATH, $@) $<
 
-$(TARGET:%=$(ObjDir)/%GenSubtarget.inc.tmp): \
-$(ObjDir)/%GenSubtarget.inc.tmp : %.td $(ObjDir)/.dir
+$(TARGET:%=$(ObjDir)/%GenSubtargetInfo.inc.tmp): \
+$(ObjDir)/%GenSubtargetInfo.inc.tmp : %.td $(ObjDir)/.dir
 	$(Echo) "Building $(<F) subtarget information with tblgen"
 	$(Verb) $(TableGen) -gen-subtarget -o $(call SYSPATH, $@) $<
 
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index f0e7c42..3c3ddaa 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -35,8 +35,8 @@ AC_INIT([[llvm]],[[3.0svn]],[llvmbugs@cs.uiuc.edu])
 
 dnl Provide a copyright substitution and ensure the copyright notice is included
 dnl in the output of --version option of the generated configure script.
-AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign."])
-AC_COPYRIGHT([Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.])
+AC_SUBST(LLVM_COPYRIGHT,["Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign."])
+AC_COPYRIGHT([Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign.])
 
 dnl Indicate that we require autoconf 2.59 or later. Ths is needed because we
 dnl use some autoconf macros only available in 2.59.
@@ -297,6 +297,8 @@ AC_CACHE_CHECK([type of operating system we're going to target],
     llvm_cv_target_os_type="MingW" ;;
   *-*-haiku*)
     llvm_cv_target_os_type="Haiku" ;;
+  *-*-rtems*)
+    llvm_cv_target_os_type="RTEMS" ;;
   *-unknown-eabi*)
     llvm_cv_target_os_type="Freestanding" ;;
   *)
@@ -656,6 +658,7 @@ for a_target in $TARGETS_TO_BUILD; do
       [LLVM architecture name for the native architecture, if available])
     LLVM_NATIVE_TARGET="LLVMInitialize${LLVM_NATIVE_ARCH}Target"
     LLVM_NATIVE_TARGETINFO="LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo"
+    LLVM_NATIVE_MCASMINFO="LLVMInitialize${LLVM_NATIVE_ARCH}MCAsmInfo"
     LLVM_NATIVE_ASMPRINTER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter"
     if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then
       LLVM_NATIVE_ASMPARSER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser"
@@ -664,6 +667,8 @@ for a_target in $TARGETS_TO_BUILD; do
       [LLVM name for the native Target init function, if available])
     AC_DEFINE_UNQUOTED(LLVM_NATIVE_TARGETINFO, $LLVM_NATIVE_TARGETINFO,
       [LLVM name for the native TargetInfo init function, if available])
+    AC_DEFINE_UNQUOTED(LLVM_NATIVE_MCASMINFO, $LLVM_NATIVE_MCASMINFO,
+      [LLVM name for the native MCAsmInfo init function, if available])
     AC_DEFINE_UNQUOTED(LLVM_NATIVE_ASMPRINTER, $LLVM_NATIVE_ASMPRINTER,
       [LLVM name for the native AsmPrinter init function, if available])
     if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then
@@ -831,6 +836,17 @@ case "$withval" in
 esac
 AC_SUBST(EXTRA_OPTIONS,$EXTRA_OPTIONS)
 
+dnl Specify extra linker build options
+AC_ARG_WITH(extra-ld-options,
+  AS_HELP_STRING([--with-extra-ld-options],
+                 [Specify additional options to link LLVM with]),,
+                 withval=default)
+case "$withval" in
+  default) EXTRA_LD_OPTIONS= ;;
+  *) EXTRA_LD_OPTIONS=$withval ;;
+esac
+AC_SUBST(EXTRA_LD_OPTIONS,$EXTRA_LD_OPTIONS)
+
 dnl Allow specific bindings to be specified for building (or not)
 AC_ARG_ENABLE([bindings],AS_HELP_STRING([--enable-bindings],
     [Build specific language bindings: all,auto,none,{binding-name} (default=auto)]),,
@@ -923,6 +939,14 @@ if test "x$WITH_BINUTILS_INCDIR" != xdefault ; then
   fi
 fi
 
+dnl Specify the URL where bug reports should be submitted.
+AC_ARG_WITH(bug-report-url,
+  AS_HELP_STRING([--with-bug-report-url],
+    [Specify the URL where bug reports should be submitted (default=http://llvm.org)]),,
+    withval="http://llvm.org")
+AC_DEFINE_UNQUOTED(BUG_REPORT_URL,"$withval",
+                   [Bug report URL.])
+
 dnl --enable-libffi : check whether the user wants to turn off libffi:
 AC_ARG_ENABLE(libffi,AS_HELP_STRING(
   --enable-libffi,[Check for the presence of libffi (default is NO)]),
diff --git a/bindings/ocaml/llvm/llvm.ml b/bindings/ocaml/llvm/llvm.ml
index 462eb20..a62ba37 100644
--- a/bindings/ocaml/llvm/llvm.ml
+++ b/bindings/ocaml/llvm/llvm.ml
@@ -11,7 +11,6 @@
 type llcontext
 type llmodule
 type lltype
-type lltypehandle
 type llvalue
 type lluse
 type llbasicblock
@@ -32,7 +31,6 @@ module TypeKind = struct
   | Struct
   | Array
   | Pointer
-  | Opaque
   | Vector
   | Metadata
 end
@@ -162,12 +160,6 @@ external data_layout: llmodule -> string
                     = "llvm_data_layout"
 external set_data_layout: string -> llmodule -> unit
                         = "llvm_set_data_layout"
-external define_type_name : string -> lltype -> llmodule -> bool
-                          = "llvm_add_type_name"
-external delete_type_name : string -> llmodule -> unit
-                          = "llvm_delete_type_name"
-external type_by_name : llmodule -> string -> lltype option
-                      = "llvm_type_by_name"
 external dump_module : llmodule -> unit = "llvm_dump_module"
 external set_module_inline_asm : llmodule -> string -> unit
                                = "llvm_set_module_inline_asm"
@@ -222,16 +214,9 @@ external address_space : lltype -> int = "llvm_address_space"
 external vector_size : lltype -> int = "llvm_vector_size"
 
 (*--... Operations on other types ..........................................--*)
-external opaque_type : llcontext -> lltype = "llvm_opaque_type"
 external void_type : llcontext -> lltype = "llvm_void_type"
 external label_type : llcontext -> lltype = "llvm_label_type"
 
-(*--... Operations on type handles .........................................--*)
-external handle_to_type : lltype -> lltypehandle = "llvm_handle_to_type"
-external type_of_handle : lltypehandle -> lltype = "llvm_type_of_handle"
-external refine_type : lltype -> lltype -> unit = "llvm_refine_type"
-
-
 (*===-- Values ------------------------------------------------------------===*)
 external type_of : llvalue -> lltype = "llvm_type_of"
 external value_name : llvalue -> string = "llvm_value_name"
@@ -1049,7 +1034,6 @@ let rec string_of_lltype ty =
                       " x " ^ (string_of_lltype (element_type ty)) ^ "]"
   | TypeKind.Vector -> "<"   ^ (string_of_int (vector_size ty)) ^
                        " x " ^ (string_of_lltype (element_type ty)) ^ ">"
-  | TypeKind.Opaque -> "opaque"
   | TypeKind.Function -> string_of_lltype (return_type ty) ^
                          " (" ^ (concat2 ", " (
                            Array.map string_of_lltype (param_types ty)
diff --git a/bindings/ocaml/llvm/llvm.mli b/bindings/ocaml/llvm/llvm.mli
index 9b037aa..44f345f 100644
--- a/bindings/ocaml/llvm/llvm.mli
+++ b/bindings/ocaml/llvm/llvm.mli
@@ -29,11 +29,6 @@ type llmodule
     [llvm::Type] class. *)
 type lltype
 
-(** When building recursive types using {!refine_type}, [lltype] values may
-    become invalid; use [lltypehandle] to resolve this problem. See the
-    [llvm::AbstractTypeHolder] class. *)
-type lltypehandle
-
 (** Any value in the LLVM IR. Functions, instructions, global variables,
     constants, and much more are all [llvalues]. See the [llvm::Value] class.
     This type covers a wide range of subclasses. *)
@@ -69,7 +64,6 @@ module TypeKind : sig
   | Struct
   | Array
   | Pointer
-  | Opaque
   | Vector
   | Metadata
 end
@@ -261,24 +255,6 @@ val data_layout: llmodule -> string
     to the string [s]. See the method [llvm::Module::setDataLayout]. *)
 val set_data_layout: string -> llmodule -> unit
 
-
-(** [define_type_name name ty m] adds a named type to the module's symbol table.
-    Returns [true] if successful. If such a name already exists, then no entry
-    is added and [false] is returned. See the [llvm::Module::addTypeName]
-    method. *)
-val define_type_name : string -> lltype -> llmodule -> bool
-
-
-(** [delete_type_name name] removes a type name from the module's symbol
-    table. *)
-val delete_type_name : string -> llmodule -> unit
-
-
-(** [type_by_name m n] returns the type in the module [m] named [n], or [None]
-    if it does not exist. See the method [llvm::Module::getTypeByName]. *)
-val type_by_name : llmodule -> string -> lltype option
-
-
 (** [dump_module m] prints the .ll representation of the module [m] to standard
     error. See the method [llvm::Module::dump]. *)
 val dump_module : llmodule -> unit
@@ -447,11 +423,6 @@ val vector_size : lltype -> int
 
 (** {7 Operations on other types} *)
 
-(** [opaque_type c] creates a new opaque type distinct from any other in the
-    context [c]. Opaque types are useful for building recursive types in
-    combination with {!refine_type}. See [llvm::OpaqueType::get]. *)
-val opaque_type : llcontext -> lltype
-
 (** [void_type c] creates a type of a function which does not return any
     value in the context [c]. See [llvm::Type::VoidTy]. *)
 val void_type : llcontext -> lltype
@@ -460,25 +431,6 @@ val void_type : llcontext -> lltype
     [llvm::Type::LabelTy]. *)
 val label_type : llcontext -> lltype
 
-(** {7 Operations on type handles} *)
-
-(** [handle_to_type ty] creates a handle to the type [ty]. If [ty] is later
-    refined as a result of a call to {!refine_type}, the handle will be updated;
-    any bare [lltype] references will become invalid.
-    See the class [llvm::PATypeHolder]. *)
-val handle_to_type : lltype -> lltypehandle
-
-(** [type_of_handle tyh] resolves the type handle [tyh].
-    See the method [llvm::PATypeHolder::get()]. *)
-val type_of_handle : lltypehandle -> lltype
-
-(** [refine_type opaque_ty ty] replaces the abstract type [opaque_ty] with the
-    concrete type [ty] in all users. Warning: This may invalidate {!lltype}
-    values! Use {!lltypehandle} to manipulate potentially abstract types. See
-    the method [llvm::Type::refineAbstractType]. *)
-val refine_type : lltype -> lltype -> unit
-
-
 (* {6 Values} *)
 
 (** [type_of v] returns the type of the value [v].
diff --git a/bindings/ocaml/llvm/llvm_ocaml.c b/bindings/ocaml/llvm/llvm_ocaml.c
index ce6cf8e..455e191 100644
--- a/bindings/ocaml/llvm/llvm_ocaml.c
+++ b/bindings/ocaml/llvm/llvm_ocaml.c
@@ -152,30 +152,6 @@ CAMLprim value llvm_set_data_layout(value Layout, LLVMModuleRef M) {
   return Val_unit;
 }
 
-/* string -> lltype -> llmodule -> bool */
-CAMLprim value llvm_add_type_name(value Name, LLVMTypeRef Ty, LLVMModuleRef M) {
-  int res = LLVMAddTypeName(M, String_val(Name), Ty);
-  return Val_bool(res == 0);
-}
-
-/* string -> llmodule -> unit */
-CAMLprim value llvm_delete_type_name(value Name, LLVMModuleRef M) {
-  LLVMDeleteTypeName(M, String_val(Name));
-  return Val_unit;
-}
-
-/* llmodule -> string -> lltype option */
-CAMLprim value llvm_type_by_name(LLVMModuleRef M, value Name) {
-  CAMLparam1(Name);
-  LLVMTypeRef T;
-  if ((T = LLVMGetTypeByName(M, String_val(Name)))) {
-    value Option = alloc(1, 0);
-    Field(Option, 0) = (value) T;
-    CAMLreturn(Option);
-  }
-  CAMLreturn(Val_int(0));
-}
-
 /* llmodule -> unit */
 CAMLprim value llvm_dump_module(LLVMModuleRef M) {
   LLVMDumpModule(M);
@@ -373,44 +349,6 @@ CAMLprim LLVMTypeRef llvm_label_type(LLVMContextRef Context) {
   return LLVMLabelTypeInContext(Context);
 }
 
-/* llcontext -> lltype */
-CAMLprim LLVMTypeRef llvm_opaque_type(LLVMContextRef Context) {
-  return LLVMOpaqueTypeInContext(Context);
-}
-
-/*--... Operations on type handles .........................................--*/
-
-#define Typehandle_val(v)  (*(LLVMTypeHandleRef *)(Data_custom_val(v)))
-
-static void llvm_finalize_handle(value TH) {
-  LLVMDisposeTypeHandle(Typehandle_val(TH));
-}
-
-static struct custom_operations typehandle_ops = {
-  (char *) "LLVMTypeHandle",
-  llvm_finalize_handle,
-  custom_compare_default,
-  custom_hash_default,
-  custom_serialize_default,
-  custom_deserialize_default
-};
-
-CAMLprim value llvm_handle_to_type(LLVMTypeRef PATy) {
-  value TH = alloc_custom(&typehandle_ops, sizeof(LLVMBuilderRef), 0, 1);
-  Typehandle_val(TH) = LLVMCreateTypeHandle(PATy);
-  return TH;
-}
-
-CAMLprim LLVMTypeRef llvm_type_of_handle(value TH) {
-  return LLVMResolveTypeHandle(Typehandle_val(TH));
-}
-
-CAMLprim value llvm_refine_type(LLVMTypeRef AbstractTy, LLVMTypeRef ConcreteTy){
-  LLVMRefineType(AbstractTy, ConcreteTy);
-  return Val_unit;
-}
-
-
 /*===-- VALUES ------------------------------------------------------------===*/
 
 /* llvalue -> lltype */
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index c1b22d4..46f33de 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -336,6 +336,7 @@ else ()
   message(STATUS "Native target architecture is ${LLVM_NATIVE_ARCH}")
   set(LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target)
   set(LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo)
+  set(LLVM_NATIVE_MCASMINFO LLVMInitialize${LLVM_NATIVE_ARCH}MCAsmInfo)
   set(LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter)
 endif ()
 
@@ -349,7 +350,6 @@ endif( MINGW )
 
 if( MSVC )
   set(error_t int)
-  set(mode_t "unsigned short")
   set(LTDL_SHLIBPATH_VAR "PATH")
   set(LTDL_SYSSEARCHPATH "")
   set(LTDL_DLOPEN_DEPLIBS 1)
diff --git a/cmake/modules/LLVMLibDeps.cmake b/cmake/modules/LLVMLibDeps.cmake
index 509ac52..14575e5 100644
--- a/cmake/modules/LLVMLibDeps.cmake
+++ b/cmake/modules/LLVMLibDeps.cmake
@@ -1,9 +1,11 @@
 set(MSVC_LIB_DEPS_LLVMARMAsmParser LLVMARMCodeGen LLVMARMInfo LLVMMC LLVMMCParser LLVMSupport LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMARMAsmPrinter LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMARMAsmPrinter LLVMARMInfo LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMARMDisassembler LLVMARMCodeGen LLVMARMInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMARMCodeGen LLVMARMAsmPrinter LLVMARMDesc LLVMARMInfo LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMARMDesc LLVMARMInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMARMDisassembler LLVMARMCodeGen LLVMARMDesc LLVMARMInfo LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMARMInfo LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMAlphaCodeGen LLVMAlphaDesc LLVMAlphaInfo LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMAlphaDesc LLVMAlphaInfo LLVMMC)
 set(MSVC_LIB_DEPS_LLVMAlphaInfo LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMAnalysis LLVMCore LLVMSupport LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMArchive LLVMBitReader LLVMCore LLVMSupport)
@@ -11,17 +13,19 @@ set(MSVC_LIB_DEPS_LLVMAsmParser LLVMCore LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMAsmPrinter LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMMCParser LLVMSupport LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMBitReader LLVMCore LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMBitWriter LLVMCore LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMAsmPrinter LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMBlackfinCodeGen LLVMAsmPrinter LLVMBlackfinDesc LLVMBlackfinInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMBlackfinDesc LLVMBlackfinInfo LLVMMC)
 set(MSVC_LIB_DEPS_LLVMBlackfinInfo LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMCBackend LLVMAnalysis LLVMCBackendInfo LLVMCodeGen LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils LLVMipa)
+set(MSVC_LIB_DEPS_LLVMCBackend LLVMAnalysis LLVMCBackendInfo LLVMCodeGen LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils)
 set(MSVC_LIB_DEPS_LLVMCBackendInfo LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMAsmPrinter LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMCellSPUCodeGen LLVMAsmPrinter LLVMCellSPUDesc LLVMCellSPUInfo LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMCellSPUDesc LLVMCellSPUInfo LLVMMC)
 set(MSVC_LIB_DEPS_LLVMCellSPUInfo LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMCodeGen LLVMAnalysis LLVMCore LLVMMC LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils)
 set(MSVC_LIB_DEPS_LLVMCore LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMCppBackend LLVMCore LLVMCppBackendInfo LLVMSupport LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMCppBackendInfo LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMExecutionEngine LLVMCore LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMExecutionEngine LLVMCore LLVMMC LLVMSupport LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMInstCombine LLVMAnalysis LLVMCore LLVMSupport LLVMTarget LLVMTransformUtils)
 set(MSVC_LIB_DEPS_LLVMInstrumentation LLVMAnalysis LLVMCore LLVMSupport LLVMTransformUtils)
 set(MSVC_LIB_DEPS_LLVMInterpreter LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMSupport LLVMTarget)
@@ -29,41 +33,51 @@ set(MSVC_LIB_DEPS_LLVMJIT LLVMCodeGen LLVMCore LLVMExecutionEngine LLVMMC LLVMSu
 set(MSVC_LIB_DEPS_LLVMLinker LLVMArchive LLVMBitReader LLVMCore LLVMSupport LLVMTransformUtils)
 set(MSVC_LIB_DEPS_LLVMMBlazeAsmParser LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC LLVMMCParser LLVMSupport LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMMBlazeAsmPrinter LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMMBlazeCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMBlazeAsmPrinter LLVMMBlazeInfo LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
-set(MSVC_LIB_DEPS_LLVMMBlazeDisassembler LLVMMBlazeCodeGen LLVMMBlazeInfo LLVMMC)
+set(MSVC_LIB_DEPS_LLVMMBlazeCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMBlazeAsmPrinter LLVMMBlazeDesc LLVMMBlazeInfo LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMBlazeDesc LLVMMBlazeInfo LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMBlazeDisassembler LLVMMBlazeCodeGen LLVMMBlazeDesc LLVMMBlazeInfo LLVMMC)
 set(MSVC_LIB_DEPS_LLVMMBlazeInfo LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMMCDisassembler LLVMARMAsmParser LLVMARMCodeGen LLVMARMDisassembler LLVMARMInfo LLVMAlphaCodeGen LLVMAlphaInfo LLVMBlackfinCodeGen LLVMBlackfinInfo LLVMCBackend LLVMCBackendInfo LLVMCellSPUCodeGen LLVMCellSPUInfo LLVMCppBackend LLVMCppBackendInfo LLVMMBlazeAsmParser LLVMMBlazeCodeGen LLVMMBlazeDisassembler LLVMMBlazeInfo LLVMMC LLVMMCParser LLVMMSP430CodeGen LLVMMSP430Info LLVMMipsCodeGen LLVMMipsInfo LLVMPTXCodeGen LLVMPTXInfo LLVMPowerPCCodeGen LLVMPowerPCInfo LLVMSparcCodeGen LLVMSparcInfo LLVMSupport LLVMSystemZCodeGen LLVMSystemZInfo LLVMTarget LLVMX86AsmParser LLVMX86CodeGen LLVMX86Disassembler LLVMX86Info LLVMXCoreCodeGen LLVMXCoreInfo)
+set(MSVC_LIB_DEPS_LLVMMCDisassembler LLVMARMAsmParser LLVMARMCodeGen LLVMARMDesc LLVMARMDisassembler LLVMARMInfo LLVMAlphaCodeGen LLVMAlphaDesc LLVMAlphaInfo LLVMBlackfinCodeGen LLVMBlackfinDesc LLVMBlackfinInfo LLVMCBackend LLVMCBackendInfo LLVMCellSPUCodeGen LLVMCellSPUDesc LLVMCellSPUInfo LLVMCppBackend LLVMCppBackendInfo LLVMMBlazeAsmParser LLVMMBlazeCodeGen LLVMMBlazeDesc LLVMMBlazeDisassembler LLVMMBlazeInfo LLVMMC LLVMMCParser LLVMMSP430CodeGen LLVMMSP430Desc LLVMMSP430Info LLVMMipsCodeGen LLVMMipsDesc LLVMMipsInfo LLVMPTXCodeGen LLVMPTXDesc LLVMPTXInfo LLVMPowerPCCodeGen LLVMPowerPCDesc LLVMPowerPCInfo LLVMSparcCodeGen LLVMSparcDesc LLVMSparcInfo LLVMSupport LLVMSystemZCodeGen LLVMSystemZDesc LLVMSystemZInfo LLVMTarget LLVMX86AsmParser LLVMX86CodeGen LLVMX86Desc LLVMX86Disassembler LLVMX86Info LLVMXCoreCodeGen LLVMXCoreDesc LLVMXCoreInfo)
 set(MSVC_LIB_DEPS_LLVMMCJIT LLVMCore LLVMExecutionEngine LLVMRuntimeDyld LLVMSupport LLVMTarget)
 set(MSVC_LIB_DEPS_LLVMMCParser LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMMSP430AsmPrinter LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430AsmPrinter LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMSP430CodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMSP430AsmPrinter LLVMMSP430Desc LLVMMSP430Info LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMSP430Desc LLVMMC LLVMMSP430Info)
 set(MSVC_LIB_DEPS_LLVMMSP430Info LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMMipsCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsInfo LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMipsAsmPrinter LLVMMC LLVMSupport)
+set(MSVC_LIB_DEPS_LLVMMipsCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMMipsAsmPrinter LLVMMipsDesc LLVMMipsInfo LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMMipsDesc LLVMMC LLVMMipsInfo LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMMipsInfo LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMObject LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMPTXCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPTXInfo LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPTXCodeGen LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPTXDesc LLVMPTXInfo LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPTXDesc LLVMMC LLVMPTXInfo LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMPTXInfo LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMPowerPCAsmPrinter LLVMMC LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCAsmPrinter LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPowerPCCodeGen LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMPowerPCAsmPrinter LLVMPowerPCDesc LLVMPowerPCInfo LLVMSelectionDAG LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMPowerPCDesc LLVMMC LLVMPowerPCInfo LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMPowerPCInfo LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMRuntimeDyld LLVMObject LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMScalarOpts LLVMAnalysis LLVMCore LLVMInstCombine LLVMSupport LLVMTarget LLVMTransformUtils)
 set(MSVC_LIB_DEPS_LLVMSelectionDAG LLVMAnalysis LLVMCodeGen LLVMCore LLVMMC LLVMSupport LLVMTarget LLVMTransformUtils)
-set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcInfo LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSparcCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSparcDesc LLVMSparcInfo LLVMSupport LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSparcDesc LLVMMC LLVMSparcInfo LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMSparcInfo LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMSupport )
-set(MSVC_LIB_DEPS_LLVMSystemZCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystemZInfo LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSystemZCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMSystemZDesc LLVMSystemZInfo LLVMTarget)
+set(MSVC_LIB_DEPS_LLVMSystemZDesc LLVMMC LLVMSystemZInfo)
 set(MSVC_LIB_DEPS_LLVMSystemZInfo LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMTarget LLVMCore LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMTransformUtils LLVMAnalysis LLVMCore LLVMSupport LLVMTarget LLVMipa)
 set(MSVC_LIB_DEPS_LLVMX86AsmParser LLVMMC LLVMMCParser LLVMSupport LLVMTarget LLVMX86Info)
 set(MSVC_LIB_DEPS_LLVMX86AsmPrinter LLVMMC LLVMSupport LLVMX86Utils)
-set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMX86AsmPrinter LLVMX86Info LLVMX86Utils)
+set(MSVC_LIB_DEPS_LLVMX86CodeGen LLVMAnalysis LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMX86AsmPrinter LLVMX86Desc LLVMX86Info LLVMX86Utils)
+set(MSVC_LIB_DEPS_LLVMX86Desc LLVMMC LLVMSupport LLVMX86Info)
 set(MSVC_LIB_DEPS_LLVMX86Disassembler LLVMMC LLVMSupport LLVMX86Info)
 set(MSVC_LIB_DEPS_LLVMX86Info LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMX86Utils LLVMCore LLVMSupport)
-set(MSVC_LIB_DEPS_LLVMXCoreCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMXCoreInfo)
+set(MSVC_LIB_DEPS_LLVMXCoreCodeGen LLVMAsmPrinter LLVMCodeGen LLVMCore LLVMMC LLVMSelectionDAG LLVMSupport LLVMTarget LLVMXCoreDesc LLVMXCoreInfo)
+set(MSVC_LIB_DEPS_LLVMXCoreDesc LLVMMC LLVMXCoreInfo)
 set(MSVC_LIB_DEPS_LLVMXCoreInfo LLVMMC LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMipa LLVMAnalysis LLVMCore LLVMSupport)
 set(MSVC_LIB_DEPS_LLVMipo LLVMAnalysis LLVMCore LLVMScalarOpts LLVMSupport LLVMTarget LLVMTransformUtils LLVMipa)
diff --git a/configure b/configure
index 0b10a90..ae7fcd7 100755
--- a/configure
+++ b/configure
@@ -9,7 +9,7 @@
 # This configure script is free software; the Free Software Foundation
 # gives unlimited permission to copy, distribute and modify it.
 #
-# Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
+# Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign.
 ## --------------------- ##
 ## M4sh Initialization.  ##
 ## --------------------- ##
@@ -707,6 +707,7 @@ CLANGXXPATH
 ENABLE_BUILT_CLANG
 OPTIMIZE_OPTION
 EXTRA_OPTIONS
+EXTRA_LD_OPTIONS
 BINUTILS_INCDIR
 CXX
 CXXFLAGS
@@ -1454,6 +1455,7 @@ Optional Packages:
   --with-optimize-option  Select the compiler options to use for optimized
                           builds
   --with-extra-options    Specify additional options to compile LLVM with
+  --with-extra-ld-options Specify additional options to link LLVM with
   --with-ocaml-libdir     Specify install location for ocaml bindings (default
                           is stdlib)
   --with-clang-resource-dir
@@ -1559,7 +1561,7 @@ Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
 This configure script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it.
 
-Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
+Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign.
 _ACEOF
   exit
 fi
@@ -1921,7 +1923,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-LLVM_COPYRIGHT="Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign."
+LLVM_COPYRIGHT="Copyright (c) 2003-2011 University of Illinois at Urbana-Champaign."
 
 
 
@@ -2335,6 +2337,8 @@ else
     llvm_cv_target_os_type="MingW" ;;
   *-*-haiku*)
     llvm_cv_target_os_type="Haiku" ;;
+  *-*-rtems*)
+    llvm_cv_target_os_type="RTEMS" ;;
   *-unknown-eabi*)
     llvm_cv_target_os_type="Freestanding" ;;
   *)
@@ -5116,6 +5120,7 @@ _ACEOF
 
     LLVM_NATIVE_TARGET="LLVMInitialize${LLVM_NATIVE_ARCH}Target"
     LLVM_NATIVE_TARGETINFO="LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo"
+    LLVM_NATIVE_MCASMINFO="LLVMInitialize${LLVM_NATIVE_ARCH}MCAsmInfo"
     LLVM_NATIVE_ASMPRINTER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter"
     if test -f ${srcdir}/lib/Target/${LLVM_NATIVE_ARCH}/AsmParser/Makefile ; then
       LLVM_NATIVE_ASMPARSER="LLVMInitialize${LLVM_NATIVE_ARCH}AsmParser"
@@ -5132,6 +5137,11 @@ _ACEOF
 
 
 cat >>confdefs.h <<_ACEOF
+#define LLVM_NATIVE_MCASMINFO $LLVM_NATIVE_MCASMINFO
+_ACEOF
+
+
+cat >>confdefs.h <<_ACEOF
 #define LLVM_NATIVE_ASMPRINTER $LLVM_NATIVE_ASMPRINTER
 _ACEOF
 
@@ -5346,6 +5356,21 @@ esac
 EXTRA_OPTIONS=$EXTRA_OPTIONS
 
 
+
+# Check whether --with-extra-ld-options was given.
+if test "${with_extra_ld_options+set}" = set; then
+  withval=$with_extra_ld_options;
+else
+  withval=default
+fi
+
+case "$withval" in
+  default) EXTRA_LD_OPTIONS= ;;
+  *) EXTRA_LD_OPTIONS=$withval ;;
+esac
+EXTRA_LD_OPTIONS=$EXTRA_LD_OPTIONS
+
+
 # Check whether --enable-bindings was given.
 if test "${enable_bindings+set}" = set; then
   enableval=$enable_bindings;
@@ -11571,7 +11596,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<EOF
-#line 11574 "configure"
+#line 11591 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -23427,6 +23452,7 @@ CLANGXXPATH!$CLANGXXPATH$ac_delim
 ENABLE_BUILT_CLANG!$ENABLE_BUILT_CLANG$ac_delim
 OPTIMIZE_OPTION!$OPTIMIZE_OPTION$ac_delim
 EXTRA_OPTIONS!$EXTRA_OPTIONS$ac_delim
+EXTRA_LD_OPTIONS!$EXTRA_LD_OPTIONS$ac_delim
 BINUTILS_INCDIR!$BINUTILS_INCDIR$ac_delim
 CXX!$CXX$ac_delim
 CXXFLAGS!$CXXFLAGS$ac_delim
@@ -23515,7 +23541,6 @@ LLVM_MANDIR!$LLVM_MANDIR$ac_delim
 LLVM_CONFIGTIME!$LLVM_CONFIGTIME$ac_delim
 BINDINGS_TO_BUILD!$BINDINGS_TO_BUILD$ac_delim
 ALL_BINDINGS!$ALL_BINDINGS$ac_delim
-OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim
 _ACEOF
 
   if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 97; then
@@ -23557,6 +23582,7 @@ _ACEOF
 ac_delim='%!_!# '
 for ac_last_try in false false false false false :; do
   cat >conf$$subs.sed <<_ACEOF
+OCAML_LIBDIR!$OCAML_LIBDIR$ac_delim
 ENABLE_VISIBILITY_INLINES_HIDDEN!$ENABLE_VISIBILITY_INLINES_HIDDEN$ac_delim
 RPATH!$RPATH$ac_delim
 RDYNAMIC!$RDYNAMIC$ac_delim
@@ -23564,7 +23590,7 @@ LIBOBJS!$LIBOBJS$ac_delim
 LTLIBOBJS!$LTLIBOBJS$ac_delim
 _ACEOF
 
-  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 5; then
+  if test `sed -n "s/.*$ac_delim\$/X/p" conf$$subs.sed | grep -c X` = 6; then
     break
   elif $ac_last_try; then
     { { echo "$as_me:$LINENO: error: could not make $CONFIG_STATUS" >&5
diff --git a/docs/BranchWeightMetadata.html b/docs/BranchWeightMetadata.html
new file mode 100644
index 0000000..6106e6f
--- /dev/null
+++ b/docs/BranchWeightMetadata.html
@@ -0,0 +1,163 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <title>LLVM Branch Weight Metadata</title>
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+</head>
+<body>
+
+<h1>
+  LLVM Branch Weight Metadata
+</h1>
+
+<ol>
+  <li><a href="#introduction">Introduction</a></li>
+  <li><a href="#supported_instructions">Supported Instructions</a></li>
+  <li><a href="#builtin_expect">Built-in "expect" Instruction </a></li>
+  <li><a href="#cfg_modifications">CFG Modifications</a></li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by <a href="mailto:jstaszak@apple.com">Jakub Staszak</a></p>
+</div>
+
+<h2>
+  <a name="introduction">Introduction</a>
+</h2>
+<div>
+<p>Branch Weight Metadata represents branch weights as its likeliness to
+be taken. Metadata is assigned to the <tt>TerminatorInst</tt> as a
+<tt>MDNode</tt> of the <tt>MD_prof</tt> kind. The first operator is always a
+<tt>MDString</tt> node with the string "branch_weights". Number of operators
+depends on the terminator type.</p>
+
+<p>Branch weights might be fetch from the profiling file, or generated based on
+<a href="#builtin_expect"><tt>__builtin_expect</tt></a> instruction.
+</p>
+
+<p>All weights are represented as an unsigned 32-bit values, where higher value
+indicates greater chance to be taken.</p>
+</div>
+
+<h2>
+  <a name="supported_instructions">Supported Instructions</a>
+</h2>
+
+<div>
+  <h4>BranchInst</h4>
+  <div>
+    <p>Metadata is only assign to the conditional branches. There are two extra
+    operarands, for the true and the false branch.</p>
+  </div>
+  <div class="doc_code">
+  <pre>
+!0 = metadata !{
+  metadata !"branch_weights",
+  i32 &lt;TRUE_BRANCH_WEIGHT&gt;,
+  i32 &lt;FALSE_BRANCH_WEIGHT&gt;
+}
+  </pre>
+  </div>
+
+  <h4>SwitchInst</h4>
+  <div>
+  <p>Branch weights are assign to every case (including <tt>default</tt> case
+  which is always case #0).</p>
+  </div>
+  <div class="doc_code">
+  <pre>
+!0 = metadata !{
+  metadata !"branch_weights",
+  i32 &lt;DEFAULT_BRANCH_WEIGHT&gt;
+  [ , i32 &lt;CASE_BRANCH_WEIGHT&gt; ... ]
+}
+  </pre>
+  </div>
+
+  <h4>IndirectBrInst</h4>
+  <div>
+  <p>Branch weights are assign to every destination.</p>
+  </div>
+  <div class="doc_code">
+  <pre>
+!0 = metadata !{
+  metadata !"branch_weights",
+  i32 &lt;LABEL_BRANCH_WEIGHT&gt;
+  [ , i32 &lt;LABEL_BRANCH_WEIGHT&gt; ... ]
+}
+  </pre>
+  </div>
+
+  <h4>Other</h4>
+  <div>
+  <p>Other terminator instructions are not allowed to contain Branch Weight
+  Metadata.</p>
+  </div>
+</div>
+
+<h2>
+  <a name="builtin_expect">Built-in "expect" Instructions</a>
+</h2>
+<div>
+  <p><tt>__builtin_expect(long exp, long c)</tt> instruction provides branch
+  prediction information. The return value is the value of <tt>exp</tt>.</p>
+
+  <p>It is especially useful in conditional statements. Currently Clang supports
+  two conditional statements:
+  </p>
+  <h4><tt>if</tt> statement</h4>
+  <div>
+  <p>The <tt>exp</tt> parameter is the condition. The <tt>c</tt> parameter is
+  the expected comparision value. If it is equal to 1 (true), the condition is
+  likely to be true, in other case condition is likely to be false. For example:
+  </p>
+  </div>
+  <div class="doc_code">
+  <pre>
+  if (__builtin_expect(x &gt; 0, 1)) {
+    // This block is likely to be taken.
+  }
+  </pre>
+  </div>
+
+  <h4><tt>switch</tt> statement</h4>
+  <div>
+  <p>The <tt>exp</tt> parameter is the value. The <tt>c</tt> parameter is the
+  expected value. If the expected value doesn't show on the cases list, the
+  <tt>default</tt> case is assumed to be likely taken.</p>
+  </div>
+  <div class="doc_code">
+  <pre>
+  switch (__builtin_expect(x, 5)) {
+  default: break;
+  case 0:  // ...
+  case 3:  // ...
+  case 5:  // This case is likely to be taken.
+  }
+  </pre>
+  </div>
+</div>
+
+<h2>
+  <a name="cfg_modifications">CFG Modifications</a>
+</h2>
+<div>
+<p>Branch Weight Metatada is not proof against CFG changes. If terminator
+operands' are changed some action should be taken. In other case some
+misoptimizations may occur due to incorrent branch prediction information.</p>
+</div>
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="mailto:jstaszak@apple.com">Jakub Staszak</a><br>
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+</address>
+
+</body>
+</html>
diff --git a/docs/ExtendingLLVM.html b/docs/ExtendingLLVM.html
index 03cfa7e..cf57fab 100644
--- a/docs/ExtendingLLVM.html
+++ b/docs/ExtendingLLVM.html
@@ -146,7 +146,7 @@ cases, new nodes have been added to allow many targets to perform a common task
 complicated behavior in a single node (rotate).</p>
 
 <ol>
-<li><tt>include/llvm/CodeGen/SelectionDAGNodes.h</tt>:
+<li><tt>include/llvm/CodeGen/ISDOpcodes.h</tt>:
     Add an enum value for the new SelectionDAG node.</li>
 <li><tt>lib/CodeGen/SelectionDAG/SelectionDAG.cpp</tt>:
     Add code to print the node to <tt>getOperationName</tt>.  If your new node
@@ -384,7 +384,7 @@ void calcTypeName(const Type *Ty,
 
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
+  Last modified: $Date: 2011-06-30 08:37:07 +0200 (Thu, 30 Jun 2011) $
 </address>
 
 </body>
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 48bddfd..8e17243 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -74,16 +74,14 @@
             <ol>
               <li><a href="#t_array">Array Type</a></li>
               <li><a href="#t_struct">Structure Type</a></li>
-              <li><a href="#t_pstruct">Packed Structure Type</a></li>
+              <li><a href="#t_opaque">Opaque Type</a></li>
               <li><a href="#t_vector">Vector Type</a></li>
             </ol>
           </li>
           <li><a href="#t_function">Function Type</a></li>
           <li><a href="#t_pointer">Pointer Type</a></li>
-          <li><a href="#t_opaque">Opaque Type</a></li>
         </ol>
       </li>
-      <li><a href="#t_uprefs">Type Up-references</a></li>
     </ol>
   </li>
   <li><a href="#constants">Constants</a>
@@ -241,6 +239,7 @@
           <li><a href="#int_pow">'<tt>llvm.pow.*</tt>' Intrinsic</a></li>
           <li><a href="#int_exp">'<tt>llvm.exp.*</tt>' Intrinsic</a></li>
           <li><a href="#int_log">'<tt>llvm.log.*</tt>' Intrinsic</a></li>
+          <li><a href="#int_fma">'<tt>llvm.fma.*</tt>' Intrinsic</a></li>
         </ol>
       </li>
       <li><a href="#int_manip">Bit Manipulation Intrinsics</a>
@@ -1164,6 +1163,11 @@ define void @f() optsize { ... }
       Most of the functions in the Windows system DLLs in Windows XP SP2 or
       higher were compiled in this fashion.</dd>
 
+  <dt><tt><b>nonlazybind</b></tt></dt>
+  <dd>This attribute suppresses lazy symbol binding for the function. This
+      may make calls to the function faster, at the cost of extra program
+      startup time if the function is not called during program startup.</dd>
+
   <dt><tt><b>inlinehint</b></tt></dt>
   <dd>This attribute indicates that the source code contained a hint that inlining
       this function is desirable (such as the "inline" keyword in C/C++).  It
@@ -1529,7 +1533,6 @@ synchronization behavior.</p>
           <a href="#t_function">function</a>,
           <a href="#t_pointer">pointer</a>,
           <a href="#t_struct">structure</a>,
-          <a href="#t_pstruct">packed structure</a>,
           <a href="#t_vector">vector</a>,
           <a href="#t_opaque">opaque</a>.
       </td>
@@ -1697,7 +1700,9 @@ synchronization behavior.</p>
    possible to have a two dimensional array, using an array as the element type
    of another array.</p>
 
-   
+</div>
+  
+
 <!-- _______________________________________________________________________ -->
 <h4>
   <a name="t_aggregate">Aggregate Types</a>
@@ -1836,9 +1841,7 @@ synchronization behavior.</p>
 
 <h5>Overview:</h5>
 <p>The structure type is used to represent a collection of data members together
-   in memory.  The packing of the field types is defined to match the ABI of the
-   underlying processor.  The elements of a structure may be any type that has a
-   size.</p>
+  in memory.  The elements of a structure may be any type that has a size.</p>
 
 <p>Structures in memory are accessed using '<tt><a href="#i_load">load</a></tt>'
    and '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a field
@@ -1846,66 +1849,76 @@ synchronization behavior.</p>
    Structures in registers are accessed using the
    '<tt><a href="#i_extractvalue">extractvalue</a></tt>' and
    '<tt><a href="#i_insertvalue">insertvalue</a></tt>' instructions.</p>
+  
+<p>Structures may optionally be "packed" structures, which indicate that the 
+  alignment of the struct is one byte, and that there is no padding between
+  the elements.  In non-packed structs, padding between field types is defined
+  by the target data string to match the underlying processor.</p>
+
+<p>Structures can either be "anonymous" or "named".  An anonymous structure is
+  defined inline with other types (e.g. <tt>{i32, i32}*</tt>) and a named types
+  are always defined at the top level with a name.  Anonmyous types are uniqued
+  by their contents and can never be recursive since there is no way to write
+  one.  Named types can be recursive.
+</p>
+  
 <h5>Syntax:</h5>
 <pre>
-  { &lt;type list&gt; }
+  %T1 = type { &lt;type list&gt; }     <i>; Named normal struct type</i>
+  %T2 = type &lt;{ &lt;type list&gt; }&gt;   <i>; Named packed struct type</i>
 </pre>
-
+  
 <h5>Examples:</h5>
 <table class="layout">
   <tr class="layout">
     <td class="left"><tt>{ i32, i32, i32 }</tt></td>
     <td class="left">A triple of three <tt>i32</tt> values</td>
-  </tr><tr class="layout">
+  </tr>
+  <tr class="layout">
     <td class="left"><tt>{&nbsp;float,&nbsp;i32&nbsp;(i32)&nbsp;*&nbsp;}</tt></td>
     <td class="left">A pair, where the first element is a <tt>float</tt> and the
       second element is a <a href="#t_pointer">pointer</a> to a
       <a href="#t_function">function</a> that takes an <tt>i32</tt>, returning
       an <tt>i32</tt>.</td>
   </tr>
+  <tr class="layout">
+    <td class="left"><tt>&lt;{ i8, i32 }&gt;</tt></td>
+    <td class="left">A packed struct known to be 5 bytes in size.</td>
+  </tr>
 </table>
 
 </div>
-
+  
 <!-- _______________________________________________________________________ -->
 <h4>
-  <a name="t_pstruct">Packed Structure Type</a>
+  <a name="t_opaque">Opaque Type</a>
 </h4>
 
 <div>
 
 <h5>Overview:</h5>
-<p>The packed structure type is used to represent a collection of data members
-   together in memory.  There is no padding between fields.  Further, the
-   alignment of a packed structure is 1 byte.  The elements of a packed
-   structure may be any type that has a size.</p>
-
-<p>Structures are accessed using '<tt><a href="#i_load">load</a></tt> and
-   '<tt><a href="#i_store">store</a></tt>' by getting a pointer to a field with
-   the '<tt><a href="#i_getelementptr">getelementptr</a></tt>' instruction.</p>
+<p>Opaque types are used to represent named structure types that do not have a
+   body specified.  This corresponds (for example) to the C notion of a forward 
+   declared structure.</p>
 
 <h5>Syntax:</h5>
 <pre>
-  &lt; { &lt;type list&gt; } &gt;
+  %X = type opaque
+  %52 = type opaque
 </pre>
 
 <h5>Examples:</h5>
 <table class="layout">
   <tr class="layout">
-    <td class="left"><tt>&lt; { i32, i32, i32 } &gt;</tt></td>
-    <td class="left">A triple of three <tt>i32</tt> values</td>
-  </tr><tr class="layout">
-  <td class="left">
-<tt>&lt;&nbsp;{&nbsp;float,&nbsp;i32&nbsp;(i32)*&nbsp;}&nbsp;&gt;</tt></td>
-    <td class="left">A pair, where the first element is a <tt>float</tt> and the
-      second element is a <a href="#t_pointer">pointer</a> to a
-      <a href="#t_function">function</a> that takes an <tt>i32</tt>, returning
-      an <tt>i32</tt>.</td>
+    <td class="left"><tt>opaque</tt></td>
+    <td class="left">An opaque type.</td>
   </tr>
 </table>
 
 </div>
 
+
+
 <!-- _______________________________________________________________________ -->
 <h4>
   <a name="t_pointer">Pointer Type</a>
@@ -1993,86 +2006,6 @@ synchronization behavior.</p>
 
 </div>
 
-<!-- _______________________________________________________________________ -->
-<h4>
-  <a name="t_opaque">Opaque Type</a>
-</h4>
-
-<div>
-
-<h5>Overview:</h5>
-<p>Opaque types are used to represent unknown types in the system.  This
-   corresponds (for example) to the C notion of a forward declared structure
-   type.  In LLVM, opaque types can eventually be resolved to any type (not just
-   a structure type).</p>
-
-<h5>Syntax:</h5>
-<pre>
-  opaque
-</pre>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>opaque</tt></td>
-    <td class="left">An opaque type.</td>
-  </tr>
-</table>
-
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="t_uprefs">Type Up-references</a>
-</h3>
-
-<div>
-
-<h5>Overview:</h5>
-<p>An "up reference" allows you to refer to a lexically enclosing type without
-   requiring it to have a name. For instance, a structure declaration may
-   contain a pointer to any of the types it is lexically a member of.  Example
-   of up references (with their equivalent as named type declarations)
-   include:</p>
-
-<pre>
-   { \2 * }                %x = type { %x* }
-   { \2 }*                 %y = type { %y }*
-   \1*                     %z = type %z*
-</pre>
-
-<p>An up reference is needed by the asmprinter for printing out cyclic types
-   when there is no declared name for a type in the cycle.  Because the
-   asmprinter does not want to print out an infinite type string, it needs a
-   syntax to handle recursive types that have no names (all names are optional
-   in llvm IR).</p>
-
-<h5>Syntax:</h5>
-<pre>
-   \&lt;level&gt;
-</pre>
-
-<p>The level is the count of the lexical type that is being referred to.</p>
-
-<h5>Examples:</h5>
-<table class="layout">
-  <tr class="layout">
-    <td class="left"><tt>\1*</tt></td>
-    <td class="left">Self-referential pointer.</td>
-  </tr>
-  <tr class="layout">
-    <td class="left"><tt>{ { \3*, i8 }, i32 }</tt></td>
-    <td class="left">Recursive structure where the upref refers to the out-most
-                     structure.</td>
-  </tr>
-</table>
-
-</div>
-
-</div>
-
 <!-- *********************************************************************** -->
 <h2><a name="constants">Constants</a></h2>
 <!-- *********************************************************************** -->
@@ -6064,7 +5997,7 @@ LLVM</a>.</p>
 
 <h5>Syntax:</h5>
 <pre>
-  declare void @llvm.prefetch(i8* &lt;address&gt;, i32 &lt;rw&gt;, i32 &lt;locality&gt;)
+  declare void @llvm.prefetch(i8* &lt;address&gt;, i32 &lt;rw&gt;, i32 &lt;locality&gt;, i32 &lt;cache type&gt;)
 </pre>
 
 <h5>Overview:</h5>
@@ -6077,8 +6010,10 @@ LLVM</a>.</p>
 <p><tt>address</tt> is the address to be prefetched, <tt>rw</tt> is the
    specifier determining if the fetch should be for a read (0) or write (1),
    and <tt>locality</tt> is a temporal locality specifier ranging from (0) - no
-   locality, to (3) - extremely local keep in cache.  The <tt>rw</tt>
-   and <tt>locality</tt> arguments must be constant integers.</p>
+   locality, to (3) - extremely local keep in cache. The <tt>cache type</tt>
+   specifies whether the prefetch is performed on the data (1) or instruction (0)
+   cache. The <tt>rw</tt>, <tt>locality</tt> and <tt>cache type</tt> arguments
+   must be constant integers.</p>
 
 <h5>Semantics:</h5>
 <p>This intrinsic does not modify the behavior of the program.  In particular,
@@ -6563,6 +6498,37 @@ LLVM</a>.</p>
 <p>This function returns the same values as the libm <tt>log</tt> functions
    would, and handles error conditions in the same way.</p>
 
+<h4>
+  <a name="int_fma">'<tt>llvm.fma.*</tt>' Intrinsic</a>
+</h4>
+
+<div>
+
+<h5>Syntax:</h5>
+<p>This is an overloaded intrinsic. You can use <tt>llvm.fma</tt> on any
+   floating point or vector of floating point type. Not all targets support all
+   types however.</p>
+
+<pre>
+  declare float     @llvm.fma.f32(float  %a, float  %b, float  %c)
+  declare double    @llvm.fma.f64(double %a, double %b, double %c)
+  declare x86_fp80  @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c)
+  declare fp128     @llvm.fma.f128(fp128 %a, fp128 %b, fp128 %c)
+  declare ppc_fp128 @llvm.fma.ppcf128(ppc_fp128 %a, ppc_fp128 %b, ppc_fp128 %c)
+</pre>
+
+<h5>Overview:</h5>
+<p>The '<tt>llvm.fma.*</tt>' intrinsics perform the fused multiply-add
+   operation.</p>
+
+<h5>Arguments:</h5>
+<p>The argument and return value are floating point numbers of the same
+   type.</p>
+
+<h5>Semantics:</h5>
+<p>This function returns the same values as the libm <tt>fma</tt> functions
+   would.</p>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -6619,7 +6585,8 @@ LLVM</a>.</p>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use llvm.ctpop on any integer bit
-   width. Not all targets support all bit widths however.</p>
+   width, or on any vector with integer elements. Not all targets support all
+  bit widths or vector types, however.</p>
 
 <pre>
   declare i8 @llvm.ctpop.i8(i8  &lt;src&gt;)
@@ -6627,6 +6594,7 @@ LLVM</a>.</p>
   declare i32 @llvm.ctpop.i32(i32 &lt;src&gt;)
   declare i64 @llvm.ctpop.i64(i64 &lt;src&gt;)
   declare i256 @llvm.ctpop.i256(i256 &lt;src&gt;)
+  declare &lt;2 x i32&gt; @llvm.ctpop.v2i32(&lt;2 x i32&gt; &lt;src&gt;)
 </pre>
 
 <h5>Overview:</h5>
@@ -6635,10 +6603,12 @@ LLVM</a>.</p>
 
 <h5>Arguments:</h5>
 <p>The only argument is the value to be counted.  The argument may be of any
-   integer type.  The return type must match the argument type.</p>
+   integer type, or a vector with integer elements.
+   The return type must match the argument type.</p>
 
 <h5>Semantics:</h5>
-<p>The '<tt>llvm.ctpop</tt>' intrinsic counts the 1's in a variable.</p>
+<p>The '<tt>llvm.ctpop</tt>' intrinsic counts the 1's in a variable, or within each
+   element of a vector.</p>
 
 </div>
 
@@ -6651,7 +6621,8 @@ LLVM</a>.</p>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.ctlz</tt> on any
-   integer bit width. Not all targets support all bit widths however.</p>
+   integer bit width, or any vector whose elements are integers. Not all
+   targets support all bit widths or vector types, however.</p>
 
 <pre>
   declare i8 @llvm.ctlz.i8 (i8  &lt;src&gt;)
@@ -6659,6 +6630,7 @@ LLVM</a>.</p>
   declare i32 @llvm.ctlz.i32(i32 &lt;src&gt;)
   declare i64 @llvm.ctlz.i64(i64 &lt;src&gt;)
   declare i256 @llvm.ctlz.i256(i256 &lt;src&gt;)
+  declare &lt;2 x i32&gt; @llvm.ctlz.v2i32(&lt;2 x i32&gt; &lt;src;gt)
 </pre>
 
 <h5>Overview:</h5>
@@ -6667,11 +6639,13 @@ LLVM</a>.</p>
 
 <h5>Arguments:</h5>
 <p>The only argument is the value to be counted.  The argument may be of any
-   integer type. The return type must match the argument type.</p>
+   integer type, or any vector type with integer element type.
+   The return type must match the argument type.</p>
 
 <h5>Semantics:</h5>
 <p>The '<tt>llvm.ctlz</tt>' intrinsic counts the leading (most significant)
-   zeros in a variable.  If the src == 0 then the result is the size in bits of
+   zeros in a variable, or within each element of the vector if the operation
+   is of vector type.  If the src == 0 then the result is the size in bits of
    the type of src. For example, <tt>llvm.ctlz(i32 2) = 30</tt>.</p>
 
 </div>
@@ -6685,7 +6659,8 @@ LLVM</a>.</p>
 
 <h5>Syntax:</h5>
 <p>This is an overloaded intrinsic. You can use <tt>llvm.cttz</tt> on any
-   integer bit width. Not all targets support all bit widths however.</p>
+   integer bit width, or any vector of integer elements. Not all targets
+   support all bit widths or vector types, however.</p>
 
 <pre>
   declare i8 @llvm.cttz.i8 (i8  &lt;src&gt;)
@@ -6693,6 +6668,7 @@ LLVM</a>.</p>
   declare i32 @llvm.cttz.i32(i32 &lt;src&gt;)
   declare i64 @llvm.cttz.i64(i64 &lt;src&gt;)
   declare i256 @llvm.cttz.i256(i256 &lt;src&gt;)
+  declase &lt;2 x i32&gt; @llvm.cttz.v2i32(&lt;2 x i32&gt; &lt;src&gt;)
 </pre>
 
 <h5>Overview:</h5>
@@ -6701,11 +6677,13 @@ LLVM</a>.</p>
 
 <h5>Arguments:</h5>
 <p>The only argument is the value to be counted.  The argument may be of any
-   integer type.  The return type must match the argument type.</p>
+   integer type, or a vectory with integer element type..  The return type
+   must match the argument type.</p>
 
 <h5>Semantics:</h5>
 <p>The '<tt>llvm.cttz</tt>' intrinsic counts the trailing (least significant)
-   zeros in a variable.  If the src == 0 then the result is the size in bits of
+   zeros in a variable, or within each element of a vector.
+   If the src == 0 then the result is the size in bits of
    the type of src.  For example, <tt>llvm.cttz(2) = 1</tt>.</p>
 
 </div>
@@ -7302,7 +7280,7 @@ LLVM</a>.</p>
             store i32 4, %ptr
 
 %result1  = load i32* %ptr      <i>; yields {i32}:result1 = 4</i>
-            call void @llvm.memory.barrier(i1 false, i1 true, i1 false, i1 false)
+            call void @llvm.memory.barrier(i1 false, i1 true, i1 false, i1 false, i1 true)
                                 <i>; guarantee the above finishes</i>
             store i32 8, %ptr   <i>; before this begins</i>
 </pre>
@@ -8017,7 +7995,7 @@ LLVM</a>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-05-27 02:36:31 +0200 (Fri, 27 May 2011) $
+  Last modified: $Date: 2011-07-09 19:41:24 +0200 (Sat, 09 Jul 2011) $
 </address>
 
 </body>
diff --git a/docs/ProgrammersManual.html b/docs/ProgrammersManual.html
index 6af922b..ed43f1f 100644
--- a/docs/ProgrammersManual.html
+++ b/docs/ProgrammersManual.html
@@ -64,6 +64,7 @@ option</a></li>
       <li><a href="#dss_deque">&lt;deque&gt;</a></li>
       <li><a href="#dss_list">&lt;list&gt;</a></li>
       <li><a href="#dss_ilist">llvm/ADT/ilist.h</a></li>
+      <li><a href="#dss_packedvector">llvm/ADT/PackedVector.h</a></li>
       <li><a href="#dss_other">Other Sequential Container Options</a></li>
     </ul></li>
     <li><a href="#ds_set">Set-Like Containers (std::set, SmallSet, SetVector, etc)</a>
@@ -159,15 +160,8 @@ with another <tt>Value</tt></a> </li>
 
   <li><a href="#advanced">Advanced Topics</a>
   <ul>
-  <li><a href="#TypeResolve">LLVM Type Resolution</a>
-  <ul>
-    <li><a href="#BuildRecType">Basic Recursive Type Construction</a></li>
-    <li><a href="#refineAbstractTypeTo">The <tt>refineAbstractTypeTo</tt> method</a></li>
-    <li><a href="#PATypeHolder">The PATypeHolder Class</a></li>
-    <li><a href="#AbstractTypeUser">The AbstractTypeUser Class</a></li>
-  </ul></li>
 
-  <li><a href="#SymbolTable">The <tt>ValueSymbolTable</tt> and <tt>TypeSymbolTable</tt> classes</a></li>
+  <li><a href="#SymbolTable">The <tt>ValueSymbolTable</tt> class</a></li>
   <li><a href="#UserLayout">The <tt>User</tt> and owned <tt>Use</tt> classes' memory layout</a></li>
   </ul></li>
 
@@ -811,6 +805,10 @@ found at <a href="http://www.graphviz.org/doc/info/attrs.html">Graph
 Attributes</a>.)  If you want to restart and clear all the current graph
 attributes, then you can <tt>call DAG.clearGraphAttrs()</tt>. </p>
 
+<p>Note that graph visualization features are compiled out of Release builds
+to reduce file size.  This means that you need a Debug+Asserts or 
+Release+Asserts build to use these features.</p>
+
 </div>
 
 </div>
@@ -1065,6 +1063,44 @@ Related classes of interest are explained in the following subsections:
 
 <!-- _______________________________________________________________________ -->
 <h4>
+  <a name="dss_packedvector">llvm/ADT/PackedVector.h</a>
+</h4>
+
+<div>
+<p>
+Useful for storing a vector of values using only a few number of bits for each
+value. Apart from the standard operations of a vector-like container, it can
+also perform an 'or' set operation. 
+</p>
+
+<p>For example:</p>
+
+<div class="doc_code">
+<pre>
+enum State {
+    None = 0x0,
+    FirstCondition = 0x1,
+    SecondCondition = 0x2,
+    Both = 0x3
+};
+
+State get() {
+    PackedVector&lt;State, 2&gt; Vec1;
+    Vec1.push_back(FirstCondition);
+
+    PackedVector&lt;State, 2&gt; Vec2;
+    Vec2.push_back(SecondCondition);
+
+    Vec1 |= Vec2;
+    return Vec1[0]; // returns 'Both'.
+}
+</pre>
+</div>
+
+</div>
+
+<!-- _______________________________________________________________________ -->
+<h4>
   <a name="dss_ilist_traits">ilist_traits</a>
 </h4>
 
@@ -2602,173 +2638,10 @@ do not need to be aware of.  These API's tend manage the inner workings of the
 LLVM system, and only need to be accessed in unusual circumstances.
 </p>
 
+  
 <!-- ======================================================================= -->
 <h3>
-  <a name="TypeResolve">LLVM Type Resolution</a>
-</h3>
-
-<div>
-
-<p>
-The LLVM type system has a very simple goal: allow clients to compare types for
-structural equality with a simple pointer comparison (aka a shallow compare).
-This goal makes clients much simpler and faster, and is used throughout the LLVM
-system.
-</p>
-
-<p>
-Unfortunately achieving this goal is not a simple matter.  In particular,
-recursive types and late resolution of opaque types makes the situation very
-difficult to handle.  Fortunately, for the most part, our implementation makes
-most clients able to be completely unaware of the nasty internal details.  The
-primary case where clients are exposed to the inner workings of it are when
-building a recursive type.  In addition to this case, the LLVM bitcode reader,
-assembly parser, and linker also have to be aware of the inner workings of this
-system.
-</p>
-
-<p>
-For our purposes below, we need three concepts.  First, an "Opaque Type" is 
-exactly as defined in the <a href="LangRef.html#t_opaque">language 
-reference</a>.  Second an "Abstract Type" is any type which includes an 
-opaque type as part of its type graph (for example "<tt>{ opaque, i32 }</tt>").
-Third, a concrete type is a type that is not an abstract type (e.g. "<tt>{ i32, 
-float }</tt>").
-</p>
-
-<!-- ______________________________________________________________________ -->
-<h4>
-  <a name="BuildRecType">Basic Recursive Type Construction</a>
-</h4>
-
-<div>
-
-<p>
-Because the most common question is "how do I build a recursive type with LLVM",
-we answer it now and explain it as we go.  Here we include enough to cause this
-to be emitted to an output .ll file:
-</p>
-
-<div class="doc_code">
-<pre>
-%mylist = type { %mylist*, i32 }
-</pre>
-</div>
-
-<p>
-To build this, use the following LLVM APIs:
-</p>
-
-<div class="doc_code">
-<pre>
-// <i>Create the initial outer struct</i>
-<a href="#PATypeHolder">PATypeHolder</a> StructTy = OpaqueType::get();
-std::vector&lt;const Type*&gt; Elts;
-Elts.push_back(PointerType::getUnqual(StructTy));
-Elts.push_back(Type::Int32Ty);
-StructType *NewSTy = StructType::get(Elts);
-
-// <i>At this point, NewSTy = "{ opaque*, i32 }". Tell VMCore that</i>
-// <i>the struct and the opaque type are actually the same.</i>
-cast&lt;OpaqueType&gt;(StructTy.get())-&gt;<a href="#refineAbstractTypeTo">refineAbstractTypeTo</a>(NewSTy);
-
-// <i>NewSTy is potentially invalidated, but StructTy (a <a href="#PATypeHolder">PATypeHolder</a>) is</i>
-// <i>kept up-to-date</i>
-NewSTy = cast&lt;StructType&gt;(StructTy.get());
-
-// <i>Add a name for the type to the module symbol table (optional)</i>
-MyModule-&gt;addTypeName("mylist", NewSTy);
-</pre>
-</div>
-
-<p>
-This code shows the basic approach used to build recursive types: build a
-non-recursive type using 'opaque', then use type unification to close the cycle.
-The type unification step is performed by the <tt><a
-href="#refineAbstractTypeTo">refineAbstractTypeTo</a></tt> method, which is
-described next.  After that, we describe the <a
-href="#PATypeHolder">PATypeHolder class</a>.
-</p>
-
-</div>
-
-<!-- ______________________________________________________________________ -->
-<h4>
-  <a name="refineAbstractTypeTo">The <tt>refineAbstractTypeTo</tt> method</a>
-</h4>
-
-<div>
-<p>
-The <tt>refineAbstractTypeTo</tt> method starts the type unification process.
-While this method is actually a member of the DerivedType class, it is most
-often used on OpaqueType instances.  Type unification is actually a recursive
-process.  After unification, types can become structurally isomorphic to
-existing types, and all duplicates are deleted (to preserve pointer equality).
-</p>
-
-<p>
-In the example above, the OpaqueType object is definitely deleted.
-Additionally, if there is an "{ \2*, i32}" type already created in the system,
-the pointer and struct type created are <b>also</b> deleted.  Obviously whenever
-a type is deleted, any "Type*" pointers in the program are invalidated.  As
-such, it is safest to avoid having <i>any</i> "Type*" pointers to abstract types
-live across a call to <tt>refineAbstractTypeTo</tt> (note that non-abstract
-types can never move or be deleted).  To deal with this, the <a
-href="#PATypeHolder">PATypeHolder</a> class is used to maintain a stable
-reference to a possibly refined type, and the <a
-href="#AbstractTypeUser">AbstractTypeUser</a> class is used to update more
-complex datastructures.
-</p>
-
-</div>
-
-<!-- ______________________________________________________________________ -->
-<h4>
-  <a name="PATypeHolder">The PATypeHolder Class</a>
-</h4>
-
-<div>
-<p>
-PATypeHolder is a form of a "smart pointer" for Type objects.  When VMCore
-happily goes about nuking types that become isomorphic to existing types, it
-automatically updates all PATypeHolder objects to point to the new type.  In the
-example above, this allows the code to maintain a pointer to the resultant
-resolved recursive type, even though the Type*'s are potentially invalidated.
-</p>
-
-<p>
-PATypeHolder is an extremely light-weight object that uses a lazy union-find
-implementation to update pointers.  For example the pointer from a Value to its
-Type is maintained by PATypeHolder objects.
-</p>
-
-</div>
-
-<!-- ______________________________________________________________________ -->
-<h4>
-  <a name="AbstractTypeUser">The AbstractTypeUser Class</a>
-</h4>
-
-<div>
-
-<p>
-Some data structures need more to perform more complex updates when types get
-resolved.  To support this, a class can derive from the AbstractTypeUser class.
-This class
-allows it to get callbacks when certain types are resolved.  To register to get
-callbacks for a particular type, the DerivedType::{add/remove}AbstractTypeUser
-methods can be called on a type.  Note that these methods only work for <i>
-  abstract</i> types.  Concrete types (those that do not include any opaque 
-objects) can never be refined.
-</p>
-</div>
-
-</div>
-
-<!-- ======================================================================= -->
-<h3>
-  <a name="SymbolTable">The <tt>ValueSymbolTable</tt> and
-   <tt>TypeSymbolTable</tt> classes</a>
+  <a name="SymbolTable">The <tt>ValueSymbolTable</tt> class</a>
 </h3>
 
 <div>
@@ -2777,9 +2650,7 @@ ValueSymbolTable</a></tt> class provides a symbol table that the <a
 href="#Function"><tt>Function</tt></a> and <a href="#Module">
 <tt>Module</tt></a> classes use for naming value definitions. The symbol table
 can provide a name for any <a href="#Value"><tt>Value</tt></a>. 
-The <tt><a href="http://llvm.org/doxygen/classllvm_1_1TypeSymbolTable.html">
-TypeSymbolTable</a></tt> class is used by the <tt>Module</tt> class to store
-names for types.</p>
+</p>
 
 <p>Note that the <tt>SymbolTable</tt> class should not be directly accessed 
 by most clients.  It should only be used when iteration over the symbol table 
@@ -2789,13 +2660,12 @@ all LLVM
 an empty name) do not exist in the symbol table.
 </p>
 
-<p>These symbol tables support iteration over the values/types in the symbol
+<p>Symbol tables support iteration over the values in the symbol
 table with <tt>begin/end/iterator</tt> and supports querying to see if a
 specific name is in the symbol table (with <tt>lookup</tt>).  The
 <tt>ValueSymbolTable</tt> class exposes no public mutator methods, instead,
 simply call <tt>setName</tt> on a value, which will autoinsert it into the
-appropriate symbol table.  For types, use the Module::addTypeName method to
-insert entries into the symbol table.</p>
+appropriate symbol table.</p>
 
 </div>
 
@@ -3085,9 +2955,6 @@ the <tt>lib/VMCore</tt> directory.</p>
   <li><tt>bool isFloatingPointTy()</tt>: Return true if this is one of the five
   floating point types.</li>
 
-  <li><tt>bool isAbstract()</tt>: Return true if the type is abstract (contains
-  an OpaqueType anywhere in its definition).</li>
-
   <li><tt>bool isSized()</tt>: Return true if the type has known size. Things
   that don't have a size are abstract types, labels and void.</li>
 
@@ -3112,7 +2979,7 @@ the <tt>lib/VMCore</tt> directory.</p>
   </ul>
   </dd>
   <dt><tt>SequentialType</tt></dt>
-  <dd>This is subclassed by ArrayType and PointerType
+  <dd>This is subclassed by ArrayType, PointerType and VectorType.
     <ul>
       <li><tt>const Type * getElementType() const</tt>: Returns the type of each
       of the elements in the sequential type. </li>
@@ -3149,14 +3016,6 @@ the <tt>lib/VMCore</tt> directory.</p>
       number of formal parameters.</li>
     </ul>
   </dd>
-  <dt><tt>OpaqueType</tt></dt>
-  <dd>Sublcass of DerivedType for abstract types. This class 
-  defines no content and is used as a placeholder for some other type. Note 
-  that OpaqueType is used (temporarily) during type resolution for forward 
-  references of types. Once the referenced type is resolved, the OpaqueType 
-  is replaced with the actual type. OpaqueType can also be used for data 
-  abstraction. At link time opaque types can be resolved to actual types 
-  of the same name.</dd>
 </dl>
 </div>
 
@@ -4008,7 +3867,7 @@ arguments. An argument has a pointer to the parent Function.</p>
   <a href="mailto:dhurjati@cs.uiuc.edu">Dinakar Dhurjati</a> and
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
+  Last modified: $Date: 2011-07-12 13:37:02 +0200 (Tue, 12 Jul 2011) $
 </address>
 
 </body>
diff --git a/docs/ReleaseNotes.html b/docs/ReleaseNotes.html
index 12546c8..d54698d 100644
--- a/docs/ReleaseNotes.html
+++ b/docs/ReleaseNotes.html
@@ -599,9 +599,53 @@ from the previous release.</p>
   LLVM API changes are:</p>
 
 <ul>
-<!--
-<li></ld>
--->
+
+<li><code>PHINode::reserveOperandSpace</code> has been removed. Instead, you
+  must specify how many operands to reserve space for when you create the
+  PHINode, by passing an extra argument into <code>PHINode::Create</code>.</li>
+
+<li>PHINodes no longer store their incoming BasicBlocks as operands. Instead,
+  the list of incoming BasicBlocks is stored separately, and can be accessed
+  with new functions <code>PHINode::block_begin</code>
+  and <code>PHINode::block_end</code>.</li>
+
+<li>Various functions now take an <code>ArrayRef</code> instead of either a pair
+  of pointers (or iterators) to the beginning and end of a range, or a pointer
+  and a length. Others now return an <code>ArrayRef</code> instead of a
+  reference to a <code>SmallVector</code> or <code>std::vector</code>. These
+  include:
+<ul>
+<!-- Please keep this list sorted. -->
+<li><code>CallInst::Create</code></li>
+<li><code>ComputeLinearIndex</code> (in <code>llvm/CodeGen/Analysis.h</code>)</li>
+<li><code>ConstantArray::get</code></li>
+<li><code>ConstantExpr::getExtractElement</code></li>
+<li><code>ConstantExpr::getIndices</code></li>
+<li><code>ConstantExpr::getInsertElement</code></li>
+<li><code>ConstantExpr::getWithOperands</code></li>
+<li><code>ConstantVector::get</code></li>
+<li><code>DIBuilder::createComplexVariable</code></li>
+<li><code>DIBuilder::getOrCreateArray</code></li>
+<li><code>ExtractValueInst::Create</code></li>
+<li><code>ExtractValueInst::getIndexedType</code></li>
+<li><code>ExtractValueInst::getIndices</code></li>
+<li><code>FindInsertedValue</code> (in <code>llvm/Analysis/ValueTracking.h</code>)</li>
+<li><code>IRBuilder::CreateCall</code></li>
+<li><code>IRBuilder::CreateExtractValue</code></li>
+<li><code>IRBuilder::CreateInsertValue</code></li>
+<li><code>IRBuilder::CreateInvoke</code></li>
+<li><code>InsertValueInst::Create</code></li>
+<li><code>InsertValueInst::getIndices</code></li>
+<li><code>InvokeInst::Create</code></li>
+<li><code>MDNode::get</code></li>
+<li><code>MDNode::getIfExists</code></li>
+<li><code>MDNode::getTemporary</code></li>
+<li><code>MDNode::getWhenValsUnresolved</code></li>
+</ul></li>
+
+<li>All forms of <code>StringMap::getOrCreateValue</code> have been remove
+  except for the one which takes a <code>StringRef</code>.</li>
+
 </ul>
 </div>
 
@@ -835,7 +879,7 @@ lists</a>.</p>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-05-28 00:50:46 +0200 (Sat, 28 May 2011) $
+  Last modified: $Date: 2011-07-15 10:37:34 +0200 (Fri, 15 Jul 2011) $
 </address>
 
 </body>
diff --git a/docs/WritingAnLLVMBackend.html b/docs/WritingAnLLVMBackend.html
index c4892cb..dd6f1ec 100644
--- a/docs/WritingAnLLVMBackend.html
+++ b/docs/WritingAnLLVMBackend.html
@@ -706,8 +706,7 @@ classes using the following class:
 <div class="doc_code">
 <pre>
 class RegisterClass&lt;string namespace,
-list&lt;ValueType&gt; regTypes, int alignment,
-                    list&lt;Register&gt; regList&gt; {
+list&lt;ValueType&gt; regTypes, int alignment, dag regList&gt; {
   string Namespace = namespace;
   list&lt;ValueType&gt; RegTypes = regTypes;
   int Size = 0;  // spill size, in bits; zero lets tblgen pick the size
@@ -717,7 +716,7 @@ list&lt;ValueType&gt; regTypes, int alignment,
   // default value 1 means a single instruction
   // A negative value means copying is extremely expensive or impossible
   int CopyCost = 1;  
-  list&lt;Register&gt; MemberList = regList;
+  dag MemberList = regList;
   
   // for register classes that are subregisters of this class
   list&lt;RegisterClass&gt; SubRegClassList = [];  
@@ -749,9 +748,11 @@ list&lt;ValueType&gt; regTypes, int alignment,
     memory.</li>
 
 <li>The final argument, <tt>regList</tt>, specifies which registers are in this
-    class.  If an <tt>allocation_order_*</tt> method is not specified,
-    then <tt>regList</tt> also defines the order of allocation used by the
-    register allocator.</li>
+    class. If an alternative allocation order method is not specified, then
+    <tt>regList</tt> also defines the order of allocation used by the register
+    allocator. Besides simply listing registers with <tt>(add R0, R1, ...)</tt>,
+    more advanced set operators are available. See
+    <tt>include/llvm/Target/Target.td</tt> for more information.</li>
 </ul>
 
 <p>
@@ -761,44 +762,31 @@ classes, the first argument defines the namespace with the string
 '<tt>SP</tt>'. <tt>FPRegs</tt> defines a group of 32 single-precision
 floating-point registers (<tt>F0</tt> to <tt>F31</tt>); <tt>DFPRegs</tt> defines
 a group of 16 double-precision registers
-(<tt>D0-D15</tt>). For <tt>IntRegs</tt>, the <tt>MethodProtos</tt>
-and <tt>MethodBodies</tt> methods are used by TableGen to insert the specified
-code into generated output.
+(<tt>D0-D15</tt>).
 </p>
 
 <div class="doc_code">
 <pre>
-def FPRegs : RegisterClass&lt;"SP", [f32], 32,
-  [F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15,
-   F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31]&gt;;
+// F0, F1, F2, ..., F31
+def FPRegs : RegisterClass&lt;"SP", [f32], 32, (sequence "F%u", 0, 31)&gt;;
 
 def DFPRegs : RegisterClass&lt;"SP", [f64], 64,
-  [D0, D1, D2, D3, D4, D5, D6, D7, D8, D9, D10, D11, D12, D13, D14, D15]&gt;;
+                            (add D0, D1, D2, D3, D4, D5, D6, D7, D8,
+                                 D9, D10, D11, D12, D13, D14, D15)&gt;;
 &nbsp;
 def IntRegs : RegisterClass&lt;"SP", [i32], 32,
-    [L0, L1, L2, L3, L4, L5, L6, L7,
-     I0, I1, I2, I3, I4, I5,
-     O0, O1, O2, O3, O4, O5, O7,
-     G1,
-     // Non-allocatable regs:
-     G2, G3, G4, 
-     O6,        // stack ptr
-    I6,        // frame ptr
-     I7,        // return address
-     G0,        // constant zero
-     G5, G6, G7 // reserved for kernel
-    ]&gt; {
-  let MethodProtos = [{
-    iterator allocation_order_end(const MachineFunction &amp;MF) const;
-  }];
-  let MethodBodies = [{
-    IntRegsClass::iterator
-    IntRegsClass::allocation_order_end(const MachineFunction &amp;MF) const {
-      return end() - 10  // Don't allocate special registers
-         -1;
-    }
-  }];
-}
+    (add L0, L1, L2, L3, L4, L5, L6, L7,
+         I0, I1, I2, I3, I4, I5,
+         O0, O1, O2, O3, O4, O5, O7,
+         G1,
+         // Non-allocatable regs:
+         G2, G3, G4,
+         O6,        // stack ptr
+         I6,        // frame ptr
+         I7,        // return address
+         G0,        // constant zero
+         G5, G6, G7 // reserved for kernel
+    )&gt;;
 </pre>
 </div>
 
@@ -820,10 +808,7 @@ which is included at the bottom of <tt>SparcRegisterInfo.cpp</tt>, the SPARC
 register implementation. The code below shows only the generated integer
 registers and associated register classes. The order of registers
 in <tt>IntRegs</tt> reflects the order in the definition of <tt>IntRegs</tt> in
-the target description file. Take special note of the use
-of <tt>MethodBodies</tt> in <tt>SparcRegisterInfo.td</tt> to create code in
-<tt>SparcGenRegisterInfo.inc</tt>. <tt>MethodProtos</tt> generates similar code
-in <tt>SparcGenRegisterInfo.h.inc</tt>.
+the target description file.
 </p>
 
 <div class="doc_code">
@@ -866,13 +851,7 @@ namespace SP {   // Register class instances
   static const TargetRegisterClass* const IntRegsSuperclasses [] = {
     NULL
   };
-...
-  IntRegsClass::iterator
-  IntRegsClass::allocation_order_end(const MachineFunction &amp;MF) const {
-     return end()-10  // Don't allocate special registers
-         -1;
-  }
-  
+
   IntRegsClass::IntRegsClass() : TargetRegisterClass(IntRegsRegClassID, 
     IntRegsVTs, IntRegsSubclasses, IntRegsSuperclasses, IntRegsSubRegClasses, 
     IntRegsSuperRegClasses, 4, 4, 1, IntRegs, IntRegs + 32) {}
@@ -880,6 +859,13 @@ namespace SP {   // Register class instances
 </pre>
 </div>
 
+<p>
+The register allocators will avoid using reserved registers, and callee saved
+registers are not used until all the volatile registers have been used.  That
+is usually good enough, but in some cases it may be necessary to provide custom
+allocation orders.
+</p>
+
 </div>
 
 <!-- ======================================================================= -->
@@ -2540,7 +2526,7 @@ with assembler.
   <a href="http://www.woo.com">Mason Woo</a> and <a href="http://misha.brukman.net">Misha Brukman</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a>
   <br>
-  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
+  Last modified: $Date: 2011-06-16 01:28:14 +0200 (Thu, 16 Jun 2011) $
 </address>
 
 </body>
diff --git a/docs/index.html b/docs/index.html
index e53b4c3..fc43569 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -164,6 +164,7 @@ understand how to use the libraries produced when LLVM is compiled.</li>
 <li><a href="HowToReleaseLLVM.html">How To Release LLVM To The Public</a> - This
 is a guide to preparing LLVM releases. Most developers can ignore it.</li>
 
+
 <li><a href="http://llvm.org/doxygen/">Doxygen generated
 documentation</a> (<a
 href="http://llvm.org/doxygen/inherits.html">classes</a>)
@@ -239,6 +240,10 @@ programs with link-time optimization on Linux.</li>
 
 <li><a href="DebuggingJITedCode.html">The GDB JIT interface</a> - How to debug
 JITed code with GDB.</li>
+
+<li><a href="BranchWeightMetadata.html">Branch Weight Metadata</a> - Provides
+information about Branch Prediction Information.</li>
+
 </ul>
 
 
@@ -284,7 +289,7 @@ times each day, making it a high volume list.</li>
   src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
 
   <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-05-30 05:36:58 +0200 (Mon, 30 May 2011) $
+  Last modified: $Date: 2011-07-06 20:31:02 +0200 (Wed, 06 Jul 2011) $
 </address>
 </body></html>
 
diff --git a/docs/tutorial/OCamlLangImpl3.html b/docs/tutorial/OCamlLangImpl3.html
index e52bb6c..c240bb9 100644
--- a/docs/tutorial/OCamlLangImpl3.html
+++ b/docs/tutorial/OCamlLangImpl3.html
@@ -95,8 +95,9 @@ an undeclared parameter):</p>
 <pre>
 exception Error of string
 
-let the_module = create_module (global_context ()) "my cool jit"
-let builder = builder (global_context ())
+let context = global_context ()
+let the_module = create_module context "my cool jit"
+let builder = builder context
 let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
 let double_type = double_type context
 </pre>
@@ -176,9 +177,9 @@ variables</a>.</p>
       let rhs_val = codegen_expr rhs in
       begin
         match op with
-        | '+' -&gt; build_add lhs_val rhs_val "addtmp" builder
-        | '-' -&gt; build_sub lhs_val rhs_val "subtmp" builder
-        | '*' -&gt; build_mul lhs_val rhs_val "multmp" builder
+        | '+' -&gt; build_fadd lhs_val rhs_val "addtmp" builder
+        | '-' -&gt; build_fsub lhs_val rhs_val "subtmp" builder
+        | '*' -&gt; build_fmul lhs_val rhs_val "multmp" builder
         | '&lt;' -&gt;
             (* Convert bool 0/1 to double 0.0 or 1.0 *)
             let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
@@ -1086,7 +1087,7 @@ main ()
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="mailto:idadesub@users.sourceforge.net">Erick Tryzelaar</a><br>
   <a href="http://llvm.org/">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2011-04-23 02:30:22 +0200 (Sat, 23 Apr 2011) $
+  Last modified: $Date: 2011-07-15 22:03:30 +0200 (Fri, 15 Jul 2011) $
 </address>
 </body>
 </html>
diff --git a/examples/BrainF/BrainF.cpp b/examples/BrainF/BrainF.cpp
index 54f3553..67af099 100644
--- a/examples/BrainF/BrainF.cpp
+++ b/examples/BrainF/BrainF.cpp
@@ -55,9 +55,9 @@ void BrainF::header(LLVMContext& C) {
   //Function prototypes
 
   //declare void @llvm.memset.p0i8.i32(i8 *, i8, i32, i32, i1)
-  const Type *Tys[] = { Type::getInt8PtrTy(C), Type::getInt32Ty(C) };
+  Type *Tys[] = { Type::getInt8PtrTy(C), Type::getInt32Ty(C) };
   Function *memset_func = Intrinsic::getDeclaration(module, Intrinsic::memset,
-                                                    Tys, 2);
+                                                    Tys);
 
   //declare i32 @getchar()
   getchar_func = cast<Function>(module->
@@ -99,7 +99,7 @@ void BrainF::header(LLVMContext& C) {
     };
 
     CallInst *memset_call = builder->
-      CreateCall(memset_func, memset_params, array_endof(memset_params));
+      CreateCall(memset_func, memset_params);
     memset_call->setTailCall(false);
   }
 
@@ -171,7 +171,7 @@ void BrainF::header(LLVMContext& C) {
 
       CallInst *puts_call =
         CallInst::Create(puts_func,
-                         puts_params, array_endof(puts_params),
+                         puts_params,
                          "", aberrorbb);
       puts_call->setTailCall(false);
     }
@@ -229,7 +229,7 @@ void BrainF::readloop(PHINode *phi, BasicBlock *oldbb, BasicBlock *testbb,
           };
           CallInst *putchar_call = builder->
             CreateCall(putchar_func,
-                       putchar_params, array_endof(putchar_params));
+                       putchar_params);
           putchar_call->setTailCall(false);
         }
         break;
diff --git a/examples/ExceptionDemo/ExceptionDemo.cpp b/examples/ExceptionDemo/ExceptionDemo.cpp
index e5bd377..e5d9451 100644
--- a/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -190,7 +190,7 @@ static llvm::ConstantInt *ourExceptionThrownState;
 static llvm::ConstantInt *ourExceptionCaughtState;
 
 typedef std::vector<std::string> ArgNames;
-typedef std::vector<const llvm::Type*> ArgTypes;
+typedef std::vector<llvm::Type*> ArgTypes;
 
 //
 // Code Generation Utilities
@@ -895,7 +895,7 @@ void generateStringPrint(llvm::LLVMContext &context,
   
   llvm::Value *cast = 
   builder.CreatePointerCast(stringVar, 
-                            builder.getInt8Ty()->getPointerTo());
+                            builder.getInt8PtrTy());
   builder.CreateCall(printFunct, cast);
 }
 
@@ -939,7 +939,7 @@ void generateIntegerPrint(llvm::LLVMContext &context,
   
   llvm::Value *cast = 
   builder.CreateBitCast(stringVar, 
-                        builder.getInt8Ty()->getPointerTo());
+                        builder.getInt8PtrTy());
   builder.CreateCall2(&printFunct, &toPrint, cast);
 }
 
@@ -987,7 +987,7 @@ static llvm::BasicBlock *createFinallyBlock(llvm::LLVMContext &context,
                          ourExceptionNotThrownState);
   
   const llvm::PointerType *exceptionStorageType = 
-  builder.getInt8Ty()->getPointerTo();
+      builder.getInt8PtrTy();
   *exceptionStorage = 
   createEntryBlockAlloca(toAddTo,
                          "exceptionStorage",
@@ -1239,7 +1239,7 @@ llvm::Function *createCatchWrappedInvokeFunction(llvm::Module &module,
   llvm::Function *personality = module.getFunction("ourPersonality");
   llvm::Value *functPtr = 
   builder.CreatePointerCast(personality, 
-                            builder.getInt8Ty()->getPointerTo());
+                            builder.getInt8PtrTy());
   
   args.clear();
   args.push_back(unwindException);
@@ -1622,7 +1622,7 @@ void runExceptionThrow(llvm::ExecutionEngine *engine,
 // End test functions
 //
 
-typedef llvm::ArrayRef<const llvm::Type*> TypeArray;
+typedef llvm::ArrayRef<llvm::Type*> TypeArray;
 
 /// This initialization routine creates type info globals and 
 /// adds external function declarations to module.
@@ -1650,8 +1650,7 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
   
   // Create our type info type
   ourTypeInfoType = llvm::StructType::get(context, 
-                                          TypeArray(builder.getInt32Ty()));
-  
+                      TypeArray(builder.getInt32Ty()));
   // Create OurException type
   ourExceptionType = llvm::StructType::get(context, 
                                            TypeArray(ourTypeInfoType));
@@ -1661,7 +1660,9 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
   // Note: Declaring only a portion of the _Unwind_Exception struct.
   //       Does this cause problems?
   ourUnwindExceptionType =
-    llvm::StructType::get(context, TypeArray(builder.getInt64Ty()));
+    llvm::StructType::get(context, 
+                    TypeArray(builder.getInt64Ty()));
+
   struct OurBaseException_t dummyException;
   
   // Calculate offset of OurException::unwindException member.
@@ -1727,7 +1728,7 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
   
   argTypes.clear();
   argTypes.push_back(builder.getInt32Ty());
-  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+  argTypes.push_back(builder.getInt8PtrTy());
   
   argNames.clear();
   
@@ -1746,7 +1747,7 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
   
   argTypes.clear();
   argTypes.push_back(builder.getInt64Ty());
-  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+  argTypes.push_back(builder.getInt8PtrTy());
   
   argNames.clear();
   
@@ -1764,7 +1765,7 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
   retType = builder.getVoidTy();
   
   argTypes.clear();
-  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+  argTypes.push_back(builder.getInt8PtrTy());
   
   argNames.clear();
   
@@ -1800,7 +1801,7 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
   retType = builder.getVoidTy();
   
   argTypes.clear();
-  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+  argTypes.push_back(builder.getInt8PtrTy());
   
   argNames.clear();
   
@@ -1815,7 +1816,7 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
   
   // createOurException
   
-  retType = builder.getInt8Ty()->getPointerTo();
+  retType = builder.getInt8PtrTy();
   
   argTypes.clear();
   argTypes.push_back(builder.getInt32Ty());
@@ -1836,7 +1837,7 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
   retType = builder.getInt32Ty();
   
   argTypes.clear();
-  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+  argTypes.push_back(builder.getInt8PtrTy());
   
   argNames.clear();
   
@@ -1856,7 +1857,7 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
   retType = builder.getInt32Ty();
   
   argTypes.clear();
-  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+  argTypes.push_back(builder.getInt8PtrTy());
   
   argNames.clear();
   
@@ -1879,8 +1880,8 @@ static void createStandardUtilityFunctions(unsigned numTypeInfos,
   argTypes.push_back(builder.getInt32Ty());
   argTypes.push_back(builder.getInt32Ty());
   argTypes.push_back(builder.getInt64Ty());
-  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
-  argTypes.push_back(builder.getInt8Ty()->getPointerTo());
+  argTypes.push_back(builder.getInt8PtrTy());
+  argTypes.push_back(builder.getInt8PtrTy());
   
   argNames.clear();
   
diff --git a/examples/Kaleidoscope/Chapter3/toy.cpp b/examples/Kaleidoscope/Chapter3/toy.cpp
index 80d691d..33980f5 100644
--- a/examples/Kaleidoscope/Chapter3/toy.cpp
+++ b/examples/Kaleidoscope/Chapter3/toy.cpp
@@ -395,13 +395,13 @@ Value *CallExprAST::Codegen() {
     if (ArgsV.back() == 0) return 0;
   }
   
-  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
 }
 
 Function *PrototypeAST::Codegen() {
   // Make the function type:  double(double,double) etc.
-  std::vector<const Type*> Doubles(Args.size(),
-                                   Type::getDoubleTy(getGlobalContext()));
+  std::vector<Type*> Doubles(Args.size(),
+                             Type::getDoubleTy(getGlobalContext()));
   FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
                                        Doubles, false);
   
diff --git a/examples/Kaleidoscope/Chapter4/toy.cpp b/examples/Kaleidoscope/Chapter4/toy.cpp
index a50d2a4..1bed182 100644
--- a/examples/Kaleidoscope/Chapter4/toy.cpp
+++ b/examples/Kaleidoscope/Chapter4/toy.cpp
@@ -403,13 +403,13 @@ Value *CallExprAST::Codegen() {
     if (ArgsV.back() == 0) return 0;
   }
   
-  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
 }
 
 Function *PrototypeAST::Codegen() {
   // Make the function type:  double(double,double) etc.
-  std::vector<const Type*> Doubles(Args.size(),
-                                   Type::getDoubleTy(getGlobalContext()));
+  std::vector<Type*> Doubles(Args.size(),
+                             Type::getDoubleTy(getGlobalContext()));
   FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
                                        Doubles, false);
   
diff --git a/examples/Kaleidoscope/Chapter5/toy.cpp b/examples/Kaleidoscope/Chapter5/toy.cpp
index 5dcc7ed..58ab6f3 100644
--- a/examples/Kaleidoscope/Chapter5/toy.cpp
+++ b/examples/Kaleidoscope/Chapter5/toy.cpp
@@ -504,7 +504,7 @@ Value *CallExprAST::Codegen() {
     if (ArgsV.back() == 0) return 0;
   }
   
-  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
 }
 
 Value *IfExprAST::Codegen() {
@@ -653,8 +653,8 @@ Value *ForExprAST::Codegen() {
 
 Function *PrototypeAST::Codegen() {
   // Make the function type:  double(double,double) etc.
-  std::vector<const Type*> Doubles(Args.size(),
-                                   Type::getDoubleTy(getGlobalContext()));
+  std::vector<Type*> Doubles(Args.size(),
+                             Type::getDoubleTy(getGlobalContext()));
   FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
                                        Doubles, false);
   
diff --git a/examples/Kaleidoscope/Chapter6/toy.cpp b/examples/Kaleidoscope/Chapter6/toy.cpp
index c557699..b25e946 100644
--- a/examples/Kaleidoscope/Chapter6/toy.cpp
+++ b/examples/Kaleidoscope/Chapter6/toy.cpp
@@ -589,7 +589,7 @@ Value *BinaryExprAST::Codegen() {
   assert(F && "binary operator not found!");
   
   Value *Ops[] = { L, R };
-  return Builder.CreateCall(F, Ops, Ops+2, "binop");
+  return Builder.CreateCall(F, Ops, "binop");
 }
 
 Value *CallExprAST::Codegen() {
@@ -608,7 +608,7 @@ Value *CallExprAST::Codegen() {
     if (ArgsV.back() == 0) return 0;
   }
   
-  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
 }
 
 Value *IfExprAST::Codegen() {
@@ -757,8 +757,8 @@ Value *ForExprAST::Codegen() {
 
 Function *PrototypeAST::Codegen() {
   // Make the function type:  double(double,double) etc.
-  std::vector<const Type*> Doubles(Args.size(),
-                                   Type::getDoubleTy(getGlobalContext()));
+  std::vector<Type*> Doubles(Args.size(),
+                             Type::getDoubleTy(getGlobalContext()));
   FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
                                        Doubles, false);
   
diff --git a/examples/Kaleidoscope/Chapter7/toy.cpp b/examples/Kaleidoscope/Chapter7/toy.cpp
index 6afd118..63a7bca 100644
--- a/examples/Kaleidoscope/Chapter7/toy.cpp
+++ b/examples/Kaleidoscope/Chapter7/toy.cpp
@@ -685,7 +685,7 @@ Value *BinaryExprAST::Codegen() {
   assert(F && "binary operator not found!");
   
   Value *Ops[] = { L, R };
-  return Builder.CreateCall(F, Ops, Ops+2, "binop");
+  return Builder.CreateCall(F, Ops, "binop");
 }
 
 Value *CallExprAST::Codegen() {
@@ -704,7 +704,7 @@ Value *CallExprAST::Codegen() {
     if (ArgsV.back() == 0) return 0;
   }
   
-  return Builder.CreateCall(CalleeF, ArgsV.begin(), ArgsV.end(), "calltmp");
+  return Builder.CreateCall(CalleeF, ArgsV, "calltmp");
 }
 
 Value *IfExprAST::Codegen() {
@@ -905,8 +905,8 @@ Value *VarExprAST::Codegen() {
 
 Function *PrototypeAST::Codegen() {
   // Make the function type:  double(double,double) etc.
-  std::vector<const Type*> Doubles(Args.size(), 
-                                   Type::getDoubleTy(getGlobalContext()));
+  std::vector<Type*> Doubles(Args.size(), 
+                             Type::getDoubleTy(getGlobalContext()));
   FunctionType *FT = FunctionType::get(Type::getDoubleTy(getGlobalContext()),
                                        Doubles, false);
   
diff --git a/include/llvm-c/Core.h b/include/llvm-c/Core.h
index 2eccc11..a4456dd 100644
--- a/include/llvm-c/Core.h
+++ b/include/llvm-c/Core.h
@@ -68,13 +68,6 @@ typedef struct LLVMOpaqueModule *LLVMModuleRef;
  */
 typedef struct LLVMOpaqueType *LLVMTypeRef;
 
-/**
- * When building recursive types using LLVMRefineType, LLVMTypeRef values may
- * become invalid; use LLVMTypeHandleRef to resolve this problem. See the
- * llvm::AbstractTypeHolder class.
- */
-typedef struct LLVMOpaqueTypeHandle *LLVMTypeHandleRef;
-
 typedef struct LLVMOpaqueValue *LLVMValueRef;
 typedef struct LLVMOpaqueBasicBlock *LLVMBasicBlockRef;
 typedef struct LLVMOpaqueBuilder *LLVMBuilderRef;
@@ -206,7 +199,6 @@ typedef enum {
   LLVMStructTypeKind,      /**< Structures */
   LLVMArrayTypeKind,       /**< Arrays */
   LLVMPointerTypeKind,     /**< Pointers */
-  LLVMOpaqueTypeKind,      /**< Opaque: type with unknown structure */
   LLVMVectorTypeKind,      /**< SIMD 'packed' format, or other vector type */
   LLVMMetadataTypeKind,    /**< Metadata */
   LLVMX86_MMXTypeKind      /**< X86 MMX */
@@ -320,12 +312,6 @@ void LLVMSetDataLayout(LLVMModuleRef M, const char *Triple);
 const char *LLVMGetTarget(LLVMModuleRef M);
 void LLVMSetTarget(LLVMModuleRef M, const char *Triple);
 
-/** See Module::addTypeName. */
-LLVMBool LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty);
-void LLVMDeleteTypeName(LLVMModuleRef M, const char *Name);
-LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name);
-const char *LLVMGetTypeName(LLVMModuleRef M, LLVMTypeRef Ty);
-
 /** See Module::dump. */
 void LLVMDumpModule(LLVMModuleRef M);
 
@@ -401,9 +387,16 @@ LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
                                     unsigned ElementCount, LLVMBool Packed);
 LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes, unsigned ElementCount,
                            LLVMBool Packed);
+LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name);
+void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes,
+                       unsigned ElementCount, LLVMBool Packed);
+
 unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy);
 void LLVMGetStructElementTypes(LLVMTypeRef StructTy, LLVMTypeRef *Dest);
 LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy);
+LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy);
+
+LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name);
 
 /* Operations on array, pointer, and vector types (sequence types) */
 LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount);
@@ -418,21 +411,12 @@ unsigned LLVMGetVectorSize(LLVMTypeRef VectorTy);
 /* Operations on other types */
 LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C);
-LLVMTypeRef LLVMOpaqueTypeInContext(LLVMContextRef C);
 LLVMTypeRef LLVMX86MMXTypeInContext(LLVMContextRef C);
 
 LLVMTypeRef LLVMVoidType(void);
 LLVMTypeRef LLVMLabelType(void);
-LLVMTypeRef LLVMOpaqueType(void);
 LLVMTypeRef LLVMX86MMXType(void);
 
-/* Operations on type handles */
-LLVMTypeHandleRef LLVMCreateTypeHandle(LLVMTypeRef PotentiallyAbstractTy);
-void LLVMRefineType(LLVMTypeRef AbstractTy, LLVMTypeRef ConcreteTy);
-LLVMTypeRef LLVMResolveTypeHandle(LLVMTypeHandleRef TypeHandle);
-void LLVMDisposeTypeHandle(LLVMTypeHandleRef TypeHandle);
-
-
 /*===-- Values ------------------------------------------------------------===*/
 
 /* The bulk of LLVM's object model consists of values, which comprise a very
@@ -581,6 +565,9 @@ LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
                             LLVMValueRef *ConstantVals, unsigned Length);
 LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
                              LLVMBool Packed);
+LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy,
+                                  LLVMValueRef *ConstantVals,
+                                  unsigned Count);
 LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size);
 
 /* Constant expressions */
@@ -1117,7 +1104,6 @@ namespace llvm {
   DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Module,             LLVMModuleRef        )
   DEFINE_SIMPLE_CONVERSION_FUNCTIONS(BasicBlock,         LLVMBasicBlockRef    )
   DEFINE_SIMPLE_CONVERSION_FUNCTIONS(IRBuilder<>,        LLVMBuilderRef       )
-  DEFINE_SIMPLE_CONVERSION_FUNCTIONS(PATypeHolder,       LLVMTypeHandleRef    )
   DEFINE_SIMPLE_CONVERSION_FUNCTIONS(MemoryBuffer,       LLVMMemoryBufferRef  )
   DEFINE_SIMPLE_CONVERSION_FUNCTIONS(LLVMContext,        LLVMContextRef       )
   DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Use,                LLVMUseRef           )
diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h
index 2cd15c3..d216440 100644
--- a/include/llvm-c/Target.h
+++ b/include/llvm-c/Target.h
@@ -41,6 +41,11 @@ typedef struct LLVMStructLayout *LLVMStructLayoutRef;
 #include "llvm/Config/Targets.def"
 #undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
 
+#define LLVM_TARGET(TargetName) \
+  void LLVMInitialize##TargetName##MCAsmInfo(void);
+#include "llvm/Config/Targets.def"
+#undef LLVM_TARGET  /* Explicit undef to make SWIG happier */
+  
 /** LLVMInitializeAllTargetInfos - The main program should call this function if
     it wants access to all available targets that LLVM is configured to
     support. */
@@ -67,6 +72,7 @@ static inline LLVMBool LLVMInitializeNativeTarget(void) {
 #ifdef LLVM_NATIVE_TARGET
   LLVM_NATIVE_TARGETINFO();
   LLVM_NATIVE_TARGET();
+  LLVM_NATIVE_MCASMINFO();
   return 0;
 #else
   return 1;
@@ -141,12 +147,6 @@ unsigned LLVMElementAtOffset(LLVMTargetDataRef, LLVMTypeRef StructTy,
 unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef, LLVMTypeRef StructTy,
                                        unsigned Element);
 
-/** Struct layouts are speculatively cached. If a TargetDataRef is alive when
-    types are being refined and removed, this method must be called whenever a
-    struct type is removed to avoid a dangling pointer in this cache.
-    See the method llvm::TargetData::InvalidateStructLayoutInfo. */
-void LLVMInvalidateStructLayout(LLVMTargetDataRef, LLVMTypeRef StructTy);
-
 /** Deallocates a TargetData.
     See the destructor llvm::TargetData::~TargetData. */
 void LLVMDisposeTargetData(LLVMTargetDataRef);
diff --git a/include/llvm-c/Transforms/IPO.h b/include/llvm-c/Transforms/IPO.h
index d16e858..89b1298 100644
--- a/include/llvm-c/Transforms/IPO.h
+++ b/include/llvm-c/Transforms/IPO.h
@@ -30,9 +30,6 @@ void LLVMAddConstantMergePass(LLVMPassManagerRef PM);
 /** See llvm::createDeadArgEliminationPass function. */
 void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM);
 
-/** See llvm::createDeadTypeEliminationPass function. */
-void LLVMAddDeadTypeEliminationPass(LLVMPassManagerRef PM);
-
 /** See llvm::createFunctionAttrsPass function. */
 void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM);
 
diff --git a/include/llvm/ADT/APFloat.h b/include/llvm/ADT/APFloat.h
index 21b8c86..d2566a4 100644
--- a/include/llvm/ADT/APFloat.h
+++ b/include/llvm/ADT/APFloat.h
@@ -109,6 +109,7 @@ namespace llvm {
   typedef signed short exponent_t;
 
   struct fltSemantics;
+  class APSInt;
   class StringRef;
 
   /* When bits of a floating point number are truncated, this enum is
@@ -283,6 +284,7 @@ namespace llvm {
     opStatus convert(const fltSemantics &, roundingMode, bool *);
     opStatus convertToInteger(integerPart *, unsigned int, bool,
                               roundingMode, bool *) const;
+    opStatus convertToInteger(APSInt&, roundingMode, bool *) const;
     opStatus convertFromAPInt(const APInt &,
                               bool, roundingMode);
     opStatus convertFromSignExtendedInteger(const integerPart *, unsigned int,
diff --git a/include/llvm/ADT/APInt.h b/include/llvm/ADT/APInt.h
index 2feef07..e68e579 100644
--- a/include/llvm/ADT/APInt.h
+++ b/include/llvm/ADT/APInt.h
@@ -1241,18 +1241,19 @@ public:
 
   /// toString - Converts an APInt to a string and append it to Str.  Str is
   /// commonly a SmallString.
-  void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed) const;
+  void toString(SmallVectorImpl<char> &Str, unsigned Radix, bool Signed,
+                bool formatAsCLiteral = false) const;
 
   /// Considers the APInt to be unsigned and converts it into a string in the
   /// radix given. The radix can be 2, 8, 10 or 16.
   void toStringUnsigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
-    toString(Str, Radix, false);
+    toString(Str, Radix, false, false);
   }
 
   /// Considers the APInt to be signed and converts it into a string in the
   /// radix given. The radix can be 2, 8, 10 or 16.
   void toStringSigned(SmallVectorImpl<char> &Str, unsigned Radix = 10) const {
-    toString(Str, Radix, true);
+    toString(Str, Radix, true, false);
   }
 
   /// toString - This returns the APInt as a std::string.  Note that this is an
diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h
index 97e42cb..6db866e 100644
--- a/include/llvm/ADT/ArrayRef.h
+++ b/include/llvm/ADT/ArrayRef.h
@@ -39,7 +39,7 @@ namespace llvm {
     const T *Data;
     
     /// The number of elements.
-    size_t Length;
+    size_type Length;
     
   public:
     /// @name Constructors
@@ -56,6 +56,10 @@ namespace llvm {
     /*implicit*/ ArrayRef(const T *data, size_t length)
       : Data(data), Length(length) {}
     
+    /// Construct an ArrayRef from a range.
+    ArrayRef(const T *begin, const T *end)
+      : Data(begin), Length(end - begin) {}
+    
     /// Construct an ArrayRef from a SmallVector.
     /*implicit*/ ArrayRef(const SmallVectorImpl<T> &Vec)
       : Data(Vec.data()), Length(Vec.size()) {}
@@ -96,6 +100,16 @@ namespace llvm {
       return Data[Length-1];
     }
     
+    /// equals - Check for element-wise equality.
+    bool equals(ArrayRef RHS) const {
+      if (Length != RHS.Length)
+        return false;
+      for (size_type i = 0; i != Length; i++)
+        if (Data[i] != RHS.Data[i])
+          return false;
+      return true;
+    }
+
     /// slice(n) - Chop off the first N elements of the array.
     ArrayRef<T> slice(unsigned N) {
       assert(N <= size() && "Invalid specifier");
@@ -125,8 +139,30 @@ namespace llvm {
     }
     
     /// @}
+    /// @name Conversion operators
+    /// @{
+    operator std::vector<T>() const {
+      return std::vector<T>(Data, Data+Length);
+    }
+    
+    /// @}
   };
   
+  /// @name ArrayRef Comparison Operators
+  /// @{
+
+  template<typename T>
+  inline bool operator==(ArrayRef<T> LHS, ArrayRef<T> RHS) {
+    return LHS.equals(RHS);
+  }
+
+  template<typename T>
+  inline bool operator!=(ArrayRef<T> LHS, ArrayRef<T> RHS) {
+    return !(LHS == RHS);
+  }
+
+  /// @}
+
   // ArrayRefs can be treated like a POD type.
   template <typename T> struct isPodLike;
   template <typename T> struct isPodLike<ArrayRef<T> > {
diff --git a/include/llvm/ADT/ImmutableList.h b/include/llvm/ADT/ImmutableList.h
index 714355b..d7c0074 100644
--- a/include/llvm/ADT/ImmutableList.h
+++ b/include/llvm/ADT/ImmutableList.h
@@ -103,6 +103,14 @@ public:
   /// isEmpty - Returns true if the list is empty.
   bool isEmpty() const { return !X; }
 
+  bool contains(const T& V) const {
+    for (iterator I = begin(), E = end(); I != E; ++I) {
+      if (*I == V)
+        return true;
+    }
+    return false;
+  }
+
   /// isEqual - Returns true if two lists are equal.  Because all lists created
   ///  from the same ImmutableListFactory are uniqued, this has O(1) complexity
   ///  because it the contents of the list do not need to be compared.  Note
diff --git a/include/llvm/ADT/PackedVector.h b/include/llvm/ADT/PackedVector.h
index 272322a..2eaddc2 100644
--- a/include/llvm/ADT/PackedVector.h
+++ b/include/llvm/ADT/PackedVector.h
@@ -90,7 +90,7 @@ public:
       Vec.setValue(Vec.Bits, Idx, val);
       return *this;
     }
-    operator T() {
+    operator T() const {
       return Vec.getValue(Vec.Bits, Idx);
     }
   };
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index 8b0a13d..5f0a55b 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -410,7 +410,14 @@ public:
       this->setEnd(this->end()+1);
       // Push everything else over.
       std::copy_backward(I, this->end()-1, this->end());
-      *I = Elt;
+
+      // If we just moved the element we're inserting, be sure to update
+      // the reference.
+      const T *EltPtr = &Elt;
+      if (I <= EltPtr && EltPtr < this->EndX)
+        ++EltPtr;
+
+      *I = *EltPtr;
       return I;
     }
     size_t EltNo = I-this->begin();
diff --git a/include/llvm/ADT/StringMap.h b/include/llvm/ADT/StringMap.h
index 934cacc..3507787 100644
--- a/include/llvm/ADT/StringMap.h
+++ b/include/llvm/ADT/StringMap.h
@@ -140,7 +140,7 @@ public:
   /// StringMapEntry object.
   const char *getKeyData() const {return reinterpret_cast<const char*>(this+1);}
 
-  const char *first() const { return getKeyData(); }
+  StringRef first() const { return StringRef(getKeyData(), getKeyLength()); }
 
   /// Create - Create a StringMapEntry for the specified key and default
   /// construct the value.
@@ -307,7 +307,7 @@ public:
     return ValueTy();
   }
 
-  ValueTy& operator[](StringRef Key) {
+  ValueTy &operator[](StringRef Key) {
     return GetOrCreateValue(Key).getValue();
   }
 
@@ -355,8 +355,7 @@ public:
   /// exists, return it.  Otherwise, default construct a value, insert it, and
   /// return.
   template <typename InitTy>
-  StringMapEntry<ValueTy> &GetOrCreateValue(StringRef Key,
-                                            InitTy Val) {
+  MapEntryTy &GetOrCreateValue(StringRef Key, InitTy Val) {
     unsigned BucketNo = LookupBucketFor(Key);
     ItemBucket &Bucket = TheTable[BucketNo];
     if (Bucket.Item && Bucket.Item != getTombstoneVal())
@@ -378,22 +377,10 @@ public:
     return *NewItem;
   }
 
-  StringMapEntry<ValueTy> &GetOrCreateValue(StringRef Key) {
+  MapEntryTy &GetOrCreateValue(StringRef Key) {
     return GetOrCreateValue(Key, ValueTy());
   }
 
-  template <typename InitTy>
-  StringMapEntry<ValueTy> &GetOrCreateValue(const char *KeyStart,
-                                            const char *KeyEnd,
-                                            InitTy Val) {
-    return GetOrCreateValue(StringRef(KeyStart, KeyEnd - KeyStart), Val);
-  }
-
-  StringMapEntry<ValueTy> &GetOrCreateValue(const char *KeyStart,
-                                            const char *KeyEnd) {
-    return GetOrCreateValue(StringRef(KeyStart, KeyEnd - KeyStart));
-  }
-
   /// remove - Remove the specified key/value pair from the map, but do not
   /// erase it.  This aborts if the key is not in the map.
   void remove(MapEntryTy *KeyValue) {
diff --git a/include/llvm/ADT/Triple.h b/include/llvm/ADT/Triple.h
index 078033d..fd23608 100644
--- a/include/llvm/ADT/Triple.h
+++ b/include/llvm/ADT/Triple.h
@@ -95,7 +95,8 @@ public:
     Solaris,
     Win32,
     Haiku,
-    Minix
+    Minix,
+    RTEMS
   };
   enum EnvironmentType {
     UnknownEnvironment,
@@ -237,19 +238,10 @@ public:
   /// specialized because it is a common query.
   unsigned getOSMajorVersion() const {
     unsigned Maj, Min, Micro;
-    getDarwinNumber(Maj, Min, Micro);
+    getOSVersion(Maj, Min, Micro);
     return Maj;
   }
 
-  void getDarwinNumber(unsigned &Major, unsigned &Minor,
-                       unsigned &Micro) const {
-    return getOSVersion(Major, Minor, Micro);
-  }
-
-  unsigned getDarwinMajorNumber() const {
-    return getOSMajorVersion();
-  }
-
   /// isOSVersionLT - Helper function for doing comparisons against version
   /// numbers included in the target triple.
   bool isOSVersionLT(unsigned Major, unsigned Minor = 0,
@@ -275,7 +267,7 @@ public:
 
   /// isOSDarwin - Is this a "Darwin" OS (OS X or iOS).
   bool isOSDarwin() const {
-    return isMacOSX() ||getOS() == Triple::IOS;
+    return isMacOSX() || getOS() == Triple::IOS;
   }
 
   /// isOSWindows - Is this a "Windows" OS.
@@ -288,7 +280,7 @@ public:
   /// compatibility, which handles supporting skewed version numbering schemes
   /// used by the "darwin" triples.
   unsigned isMacOSXVersionLT(unsigned Major, unsigned Minor = 0,
-                          unsigned Micro = 0) const {
+			     unsigned Micro = 0) const {
     assert(isMacOSX() && "Not an OS X triple!");
 
     // If this is OS X, expect a sane version number.
@@ -299,7 +291,7 @@ public:
     assert(Major == 10 && "Unexpected major version");
     return isOSVersionLT(Minor + 4, Micro, 0);
   }
-    
+
   /// @}
   /// @name Mutators
   /// @{
diff --git a/include/llvm/AbstractTypeUser.h b/include/llvm/AbstractTypeUser.h
deleted file mode 100644
index 81f5c5c..0000000
--- a/include/llvm/AbstractTypeUser.h
+++ /dev/null
@@ -1,205 +0,0 @@
-//===-- llvm/AbstractTypeUser.h - AbstractTypeUser Interface ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the AbstractTypeUser class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_ABSTRACT_TYPE_USER_H
-#define LLVM_ABSTRACT_TYPE_USER_H
-
-#if !defined(LLVM_TYPE_H) && !defined(LLVM_VALUE_H)
-#error Do not include this file directly.  Include Type.h instead.
-#error Some versions of GCC (e.g. 3.4 and 4.1) can not handle the inlined method
-#error PATypeHolder::dropRef() correctly otherwise.
-#endif
-
-// This is the "master" include for <cassert> Whether this file needs it or not,
-// it must always include <cassert> for the files which include
-// llvm/AbstractTypeUser.h
-//
-// In this way, most every LLVM source file will have access to the assert()
-// macro without having to #include <cassert> directly.
-//
-#include <cassert>
-
-namespace llvm {
-
-class Value;
-class Type;
-class DerivedType;
-template<typename T> struct simplify_type;
-
-/// The AbstractTypeUser class is an interface to be implemented by classes who
-/// could possibly use an abstract type.  Abstract types are denoted by the
-/// isAbstract flag set to true in the Type class.  These are classes that
-/// contain an Opaque type in their structure somewhere.
-///
-/// Classes must implement this interface so that they may be notified when an
-/// abstract type is resolved.  Abstract types may be resolved into more 
-/// concrete types through: linking, parsing, and bitcode reading.  When this 
-/// happens, all of the users of the type must be updated to reference the new,
-/// more concrete type.  They are notified through the AbstractTypeUser 
-/// interface.
-///
-/// In addition to this, AbstractTypeUsers must keep the use list of the
-/// potentially abstract type that they reference up-to-date.  To do this in a
-/// nice, transparent way, the PATypeHandle class is used to hold "Potentially
-/// Abstract Types", and keep the use list of the abstract types up-to-date.
-/// @brief LLVM Abstract Type User Representation
-class AbstractTypeUser {
-protected:
-  virtual ~AbstractTypeUser();                        // Derive from me
-
-  /// setType - It's normally not possible to change a Value's type in place,
-  /// but an AbstractTypeUser subclass that knows what its doing can be
-  /// permitted to do so with care.
-  void setType(Value *V, const Type *NewTy);
-
-public:
-
-  /// refineAbstractType - The callback method invoked when an abstract type is
-  /// resolved to another type.  An object must override this method to update
-  /// its internal state to reference NewType instead of OldType.
-  ///
-  virtual void refineAbstractType(const DerivedType *OldTy,
-                                  const Type *NewTy) = 0;
-
-  /// The other case which AbstractTypeUsers must be aware of is when a type
-  /// makes the transition from being abstract (where it has clients on its
-  /// AbstractTypeUsers list) to concrete (where it does not).  This method
-  /// notifies ATU's when this occurs for a type.
-  ///
-  virtual void typeBecameConcrete(const DerivedType *AbsTy) = 0;
-
-  // for debugging...
-  virtual void dump() const = 0;
-};
-
-
-/// PATypeHandle - Handle to a Type subclass.  This class is used to keep the
-/// use list of abstract types up-to-date.
-///
-class PATypeHandle {
-  const Type *Ty;
-  AbstractTypeUser * const User;
-
-  // These functions are defined at the bottom of Type.h.  See the comment there
-  // for justification.
-  void addUser();
-  void removeUser();
-public:
-  // ctor - Add use to type if abstract.  Note that Ty must not be null
-  inline PATypeHandle(const Type *ty, AbstractTypeUser *user)
-    : Ty(ty), User(user) {
-    addUser();
-  }
-
-  // ctor - Add use to type if abstract.
-  inline PATypeHandle(const PATypeHandle &T) : Ty(T.Ty), User(T.User) {
-    addUser();
-  }
-
-  // dtor - Remove reference to type...
-  inline ~PATypeHandle() { removeUser(); }
-
-  // Automatic casting operator so that the handle may be used naturally
-  inline operator Type *() const { return const_cast<Type*>(Ty); }
-  inline Type *get() const { return const_cast<Type*>(Ty); }
-
-  // operator= - Allow assignment to handle
-  inline Type *operator=(const Type *ty) {
-    if (Ty != ty) {   // Ensure we don't accidentally drop last ref to Ty
-      removeUser();
-      Ty = ty;
-      addUser();
-    }
-    return get();
-  }
-
-  // operator= - Allow assignment to handle
-  inline const Type *operator=(const PATypeHandle &T) {
-    return operator=(T.Ty);
-  }
-
-  inline bool operator==(const Type *ty) {
-    return Ty == ty;
-  }
-
-  // operator-> - Allow user to dereference handle naturally...
-  inline const Type *operator->() const { return Ty; }
-};
-
-
-/// PATypeHolder - Holder class for a potentially abstract type.  This uses
-/// efficient union-find techniques to handle dynamic type resolution.  Unless
-/// you need to do custom processing when types are resolved, you should always
-/// use PATypeHolders in preference to PATypeHandles.
-///
-class PATypeHolder {
-  mutable const Type *Ty;
-  void destroy();
-public:
-  PATypeHolder() : Ty(0) {}
-  PATypeHolder(const Type *ty) : Ty(ty) {
-    addRef();
-  }
-  PATypeHolder(const PATypeHolder &T) : Ty(T.Ty) {
-    addRef();
-  }
-
-  ~PATypeHolder() { dropRef(); }
-
-  operator Type *() const { return get(); }
-  Type *get() const;
-
-  // operator-> - Allow user to dereference handle naturally...
-  Type *operator->() const { return get(); }
-
-  // operator= - Allow assignment to handle
-  Type *operator=(const Type *ty) {
-    if (Ty != ty) {   // Don't accidentally drop last ref to Ty.
-      dropRef();
-      Ty = ty;
-      addRef();
-    }
-    return get();
-  }
-  Type *operator=(const PATypeHolder &H) {
-    return operator=(H.Ty);
-  }
-
-  /// getRawType - This should only be used to implement the vmcore library.
-  ///
-  const Type *getRawType() const { return Ty; }
-
-private:
-  void addRef();
-  void dropRef();
-  friend class TypeMapBase;
-};
-
-// simplify_type - Allow clients to treat uses just like values when using
-// casting operators.
-template<> struct simplify_type<PATypeHolder> {
-  typedef const Type* SimpleType;
-  static SimpleType getSimplifiedValue(const PATypeHolder &Val) {
-    return static_cast<SimpleType>(Val.get());
-  }
-};
-template<> struct simplify_type<const PATypeHolder> {
-  typedef const Type* SimpleType;
-  static SimpleType getSimplifiedValue(const PATypeHolder &Val) {
-    return static_cast<SimpleType>(Val.get());
-  }
-};
-  
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Analysis/BlockFrequency.h b/include/llvm/Analysis/BlockFrequency.h
new file mode 100644
index 0000000..c4b1e08
--- /dev/null
+++ b/include/llvm/Analysis/BlockFrequency.h
@@ -0,0 +1,53 @@
+//========-------- BlockFrequency.h - Block Frequency Analysis -------========//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_BLOCKFREQUENCY_H
+#define LLVM_ANALYSIS_BLOCKFREQUENCY_H
+
+#include "llvm/Pass.h"
+#include <climits>
+
+namespace llvm {
+
+class BranchProbabilityInfo;
+template<class BlockT, class FunctionT, class BranchProbInfoT>
+class BlockFrequencyImpl;
+
+/// BlockFrequency pass uses BlockFrequencyImpl implementation to estimate
+/// IR basic block frequencies.
+class BlockFrequency : public FunctionPass {
+
+  BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo> *BFI;
+
+public:
+  static char ID;
+
+  BlockFrequency();
+
+  ~BlockFrequency();
+
+  void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  bool runOnFunction(Function &F);
+
+  /// getblockFreq - Return block frequency. Never return 0, value must be
+  /// positive. Please note that initial frequency is equal to 1024. It means
+  /// that we should not rely on the value itself, but only on the comparison to
+  /// the other block frequencies. We do this to avoid using of the floating
+  /// points.
+  uint32_t getBlockFreq(BasicBlock *BB);
+};
+
+}
+
+#endif
diff --git a/include/llvm/Analysis/BlockFrequencyImpl.h b/include/llvm/Analysis/BlockFrequencyImpl.h
new file mode 100644
index 0000000..6580fd1
--- /dev/null
+++ b/include/llvm/Analysis/BlockFrequencyImpl.h
@@ -0,0 +1,349 @@
+//===---- BlockFrequencyImpl.h - Machine Block Frequency Implementation ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Shared implementation of BlockFrequency for IR and Machine Instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_BLOCKFREQUENCYIMPL_H
+#define LLVM_ANALYSIS_BLOCKFREQUENCYIMPL_H
+
+#include "llvm/BasicBlock.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/BranchProbability.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+#include <sstream>
+#include <string>
+
+namespace llvm {
+
+
+class BlockFrequency;
+class MachineBlockFrequency;
+
+/// BlockFrequencyImpl implements block frequency algorithm for IR and
+/// Machine Instructions. Algorithm starts with value 1024 (START_FREQ)
+/// for the entry block and then propagates frequencies using branch weights
+/// from (Machine)BranchProbabilityInfo. LoopInfo is not required because
+/// algorithm can find "backedges" by itself.
+template<class BlockT, class FunctionT, class BlockProbInfoT>
+class BlockFrequencyImpl {
+
+  DenseMap<BlockT *, uint32_t> Freqs;
+
+  BlockProbInfoT *BPI;
+
+  FunctionT *Fn;
+
+  typedef GraphTraits< Inverse<BlockT *> > GT;
+
+  static const uint32_t START_FREQ = 1024;
+
+  std::string getBlockName(BasicBlock *BB) const {
+    return BB->getNameStr();
+  }
+
+  std::string getBlockName(MachineBasicBlock *MBB) const {
+    std::stringstream ss;
+    ss << "BB#" << MBB->getNumber();
+
+    if (const BasicBlock *BB = MBB->getBasicBlock())
+      ss << " derived from LLVM BB " << BB->getNameStr();
+
+    return ss.str();
+  }
+
+  void setBlockFreq(BlockT *BB, uint32_t Freq) {
+    Freqs[BB] = Freq;
+    DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") = " << Freq << "\n");
+  }
+
+  /// getEdgeFreq - Return edge frequency based on SRC frequency and Src -> Dst
+  /// edge probability.
+  uint32_t getEdgeFreq(BlockT *Src, BlockT *Dst) const {
+    BranchProbability Prob = BPI->getEdgeProbability(Src, Dst);
+    uint64_t N = Prob.getNumerator();
+    uint64_t D = Prob.getDenominator();
+    uint64_t Res = (N * getBlockFreq(Src)) / D;
+
+    assert(Res <= UINT32_MAX);
+    return (uint32_t) Res;
+  }
+
+  /// incBlockFreq - Increase BB block frequency by FREQ.
+  ///
+  void incBlockFreq(BlockT *BB, uint32_t Freq) {
+    Freqs[BB] += Freq;
+    DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") += " << Freq
+                 << " --> " << Freqs[BB] << "\n");
+  }
+
+  /// divBlockFreq - Divide BB block frequency by PROB. If Prob = 0 do nothing.
+  ///
+  void divBlockFreq(BlockT *BB, BranchProbability Prob) {
+    uint64_t N = Prob.getNumerator();
+    assert(N && "Illegal division by zero!");
+    uint64_t D = Prob.getDenominator();
+    uint64_t Freq = (Freqs[BB] * D) / N;
+
+    // Should we assert it?
+    if (Freq > UINT32_MAX)
+      Freq = UINT32_MAX;
+
+    Freqs[BB] = (uint32_t) Freq;
+    DEBUG(dbgs() << "Frequency(" << getBlockName(BB) << ") /= (" << Prob
+                 << ") --> " << Freqs[BB] << "\n");
+  }
+
+  // All blocks in postorder.
+  std::vector<BlockT *> POT;
+
+  // Map Block -> Position in reverse-postorder list.
+  DenseMap<BlockT *, unsigned> RPO;
+
+  // Cycle Probability for each bloch.
+  DenseMap<BlockT *, uint32_t> CycleProb;
+
+  // (reverse-)postorder traversal iterators.
+  typedef typename std::vector<BlockT *>::iterator pot_iterator;
+  typedef typename std::vector<BlockT *>::reverse_iterator rpot_iterator;
+
+  pot_iterator pot_begin() { return POT.begin(); }
+  pot_iterator pot_end() { return POT.end(); }
+
+  rpot_iterator rpot_begin() { return POT.rbegin(); }
+  rpot_iterator rpot_end() { return POT.rend(); }
+
+  rpot_iterator rpot_at(BlockT *BB) {
+    rpot_iterator I = rpot_begin();
+    unsigned idx = RPO[BB];
+    assert(idx);
+    std::advance(I, idx - 1);
+
+    assert(*I == BB);
+    return I;
+  }
+
+
+  /// Return a probability of getting to the DST block through SRC->DST edge.
+  ///
+  BranchProbability getBackEdgeProbability(BlockT *Src, BlockT *Dst) const {
+    uint32_t N = getEdgeFreq(Src, Dst);
+    uint32_t D = getBlockFreq(Dst);
+
+    return BranchProbability(N, D);
+  }
+
+  /// isReachable - Returns if BB block is reachable from the entry.
+  ///
+  bool isReachable(BlockT *BB) {
+    return RPO.count(BB);
+  }
+
+  /// isBackedge - Return if edge Src -> Dst is a backedge.
+  ///
+  bool isBackedge(BlockT *Src, BlockT *Dst) {
+    assert(isReachable(Src));
+    assert(isReachable(Dst));
+
+    unsigned a = RPO[Src];
+    unsigned b = RPO[Dst];
+
+    return a > b;
+  }
+
+  /// getSingleBlockPred - return single BB block predecessor or NULL if
+  /// BB has none or more predecessors.
+  BlockT *getSingleBlockPred(BlockT *BB) {
+    typename GT::ChildIteratorType
+      PI = GraphTraits< Inverse<BlockT *> >::child_begin(BB),
+      PE = GraphTraits< Inverse<BlockT *> >::child_end(BB);
+
+    if (PI == PE)
+      return 0;
+
+    BlockT *Pred = *PI;
+
+    ++PI;
+    if (PI != PE)
+      return 0;
+
+    return Pred;
+  }
+
+  void doBlock(BlockT *BB, BlockT *LoopHead,
+               SmallPtrSet<BlockT *, 8> &BlocksInLoop) {
+
+    DEBUG(dbgs() << "doBlock(" << getBlockName(BB) << ")\n");
+    setBlockFreq(BB, 0);
+
+    if (BB == LoopHead) {
+      setBlockFreq(BB, START_FREQ);
+      return;
+    }
+
+    if(BlockT *Pred = getSingleBlockPred(BB)) {
+      if (BlocksInLoop.count(Pred))
+        setBlockFreq(BB, getEdgeFreq(Pred, BB));
+      // TODO: else? irreducible, ignore it for now.
+      return;
+    }
+
+    bool isInLoop = false;
+    bool isLoopHead = false;
+
+    for (typename GT::ChildIteratorType
+         PI = GraphTraits< Inverse<BlockT *> >::child_begin(BB),
+         PE = GraphTraits< Inverse<BlockT *> >::child_end(BB);
+         PI != PE; ++PI) {
+      BlockT *Pred = *PI;
+
+      if (isReachable(Pred) && isBackedge(Pred, BB)) {
+        isLoopHead = true;
+      } else if (BlocksInLoop.count(Pred)) {
+        incBlockFreq(BB, getEdgeFreq(Pred, BB));
+        isInLoop = true;
+      }
+      // TODO: else? irreducible.
+    }
+
+    if (!isInLoop)
+      return;
+
+    if (!isLoopHead)
+      return;
+
+    assert(START_FREQ >= CycleProb[BB]);
+    uint32_t CProb = CycleProb[BB];
+    uint32_t Numerator = START_FREQ - CProb ? START_FREQ - CProb : 1;
+    divBlockFreq(BB, BranchProbability(Numerator, START_FREQ));
+  }
+
+  /// doLoop - Propagate block frequency down throught the loop.
+  void doLoop(BlockT *Head, BlockT *Tail) {
+    DEBUG(dbgs() << "doLoop(" << getBlockName(Head) << ", "
+                 << getBlockName(Tail) << ")\n");
+
+    SmallPtrSet<BlockT *, 8> BlocksInLoop;
+
+    for (rpot_iterator I = rpot_at(Head), E = rpot_end(); I != E; ++I) {
+      BlockT *BB = *I;
+      doBlock(BB, Head, BlocksInLoop);
+
+      BlocksInLoop.insert(BB);
+    }
+
+    // Compute loop's cyclic probability using backedges probabilities.
+    for (typename GT::ChildIteratorType
+         PI = GraphTraits< Inverse<BlockT *> >::child_begin(Head),
+         PE = GraphTraits< Inverse<BlockT *> >::child_end(Head);
+         PI != PE; ++PI) {
+      BlockT *Pred = *PI;
+      assert(Pred);
+      if (isReachable(Pred) && isBackedge(Pred, Head)) {
+        BranchProbability Prob = getBackEdgeProbability(Pred, Head);
+        uint64_t N = Prob.getNumerator();
+        uint64_t D = Prob.getDenominator();
+        uint64_t Res = (N * START_FREQ) / D;
+
+        assert(Res <= UINT32_MAX);
+        CycleProb[Head] += (uint32_t) Res;
+      }
+    }
+  }
+
+  friend class BlockFrequency;
+  friend class MachineBlockFrequency;
+
+  void doFunction(FunctionT *fn, BlockProbInfoT *bpi) {
+    Fn = fn;
+    BPI = bpi;
+
+    // Clear everything.
+    RPO.clear();
+    POT.clear();
+    CycleProb.clear();
+    Freqs.clear();
+
+    BlockT *EntryBlock = fn->begin();
+
+    copy(po_begin(EntryBlock), po_end(EntryBlock), back_inserter(POT));
+
+    unsigned RPOidx = 0;
+    for (rpot_iterator I = rpot_begin(), E = rpot_end(); I != E; ++I) {
+      BlockT *BB = *I;
+      RPO[BB] = ++RPOidx;
+      DEBUG(dbgs() << "RPO[" << getBlockName(BB) << "] = " << RPO[BB] << "\n");
+    }
+
+    // Travel over all blocks in postorder.
+    for (pot_iterator I = pot_begin(), E = pot_end(); I != E; ++I) {
+      BlockT *BB = *I;
+      BlockT *LastTail = 0;
+      DEBUG(dbgs() << "POT: " << getBlockName(BB) << "\n");
+
+      for (typename GT::ChildIteratorType
+           PI = GraphTraits< Inverse<BlockT *> >::child_begin(BB),
+           PE = GraphTraits< Inverse<BlockT *> >::child_end(BB);
+           PI != PE; ++PI) {
+
+        BlockT *Pred = *PI;
+        if (isReachable(Pred) && isBackedge(Pred, BB)
+            && (!LastTail || RPO[Pred] > RPO[LastTail]))
+          LastTail = Pred;
+      }
+
+      if (LastTail)
+        doLoop(BB, LastTail);
+    }
+
+    // At the end assume the whole function as a loop, and travel over it once
+    // again.
+    doLoop(*(rpot_begin()), *(pot_begin()));
+  }
+
+public:
+  /// getBlockFreq - Return block frequency. Never return 0, value must be
+  /// positive.
+  uint32_t getBlockFreq(BlockT *BB) const {
+    typename DenseMap<BlockT *, uint32_t>::const_iterator I = Freqs.find(BB);
+    if (I != Freqs.end())
+      return I->second ? I->second : 1;
+    return 1;
+  }
+
+  void print(raw_ostream &OS) const {
+    OS << "\n\n---- Block Freqs ----\n";
+    for (typename FunctionT::iterator I = Fn->begin(), E = Fn->end(); I != E;) {
+      BlockT *BB = I++;
+      OS << " " << getBlockName(BB) << " = " << getBlockFreq(BB) << "\n";
+
+      for (typename GraphTraits<BlockT *>::ChildIteratorType
+           SI = GraphTraits<BlockT *>::child_begin(BB),
+           SE = GraphTraits<BlockT *>::child_end(BB); SI != SE; ++SI) {
+        BlockT *Succ = *SI;
+        OS << "  " << getBlockName(BB) << " -> " << getBlockName(Succ)
+           << " = " << getEdgeFreq(BB, Succ) << "\n";
+      }
+    }
+  }
+
+  void dump() const {
+    print(dbgs());
+  }
+};
+
+}
+
+#endif
diff --git a/include/llvm/Analysis/BranchProbabilityInfo.h b/include/llvm/Analysis/BranchProbabilityInfo.h
index 91f289d..02ead98 100644
--- a/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -15,8 +15,9 @@
 #define LLVM_ANALYSIS_BRANCHPROBABILITYINFO_H
 
 #include "llvm/InitializePasses.h"
+#include "llvm/Pass.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/BranchProbability.h"
-#include "llvm/Analysis/LoopInfo.h"
 
 namespace llvm {
 
@@ -25,6 +26,11 @@ class raw_ostream;
 class BranchProbabilityInfo : public FunctionPass {
 
   // Default weight value. Used when we don't have information about the edge.
+  // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of
+  // the successors have a weight yet. But it doesn't make sense when providing
+  // weight to an edge that may have siblings with non-zero weights. This can
+  // be handled various ways, but it's probably fine for an edge with unknown
+  // weight to just "inherit" the non-zero weight of an adjacent successor.
   static const uint32_t DEFAULT_WEIGHT = 16;
 
   typedef std::pair<BasicBlock *, BasicBlock *> Edge;
@@ -41,10 +47,7 @@ public:
     initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
   }
 
-  void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.addRequired<LoopInfo>();
-    AU.setPreservesAll();
-  }
+  void getAnalysisUsage(AnalysisUsage &AU) const;
 
   bool runOnFunction(Function &F);
 
diff --git a/include/llvm/Analysis/DIBuilder.h b/include/llvm/Analysis/DIBuilder.h
index 96c6587..a706cc8 100644
--- a/include/llvm/Analysis/DIBuilder.h
+++ b/include/llvm/Analysis/DIBuilder.h
@@ -135,6 +135,7 @@ namespace llvm {
                              unsigned Flags);
 
     /// createMemberType - Create debugging information entry for a member.
+    /// @param Scope        Member scope.
     /// @param Name         Member name.
     /// @param File         File where this member is defined.
     /// @param LineNo       Line number.
@@ -143,7 +144,7 @@ namespace llvm {
     /// @param OffsetInBits Member offset.
     /// @param Flags        Flags to encode member attribute, e.g. private
     /// @param Ty           Parent type.
-    DIType createMemberType(StringRef Name, DIFile File,
+    DIType createMemberType(DIDescriptor Scope, StringRef Name, DIFile File,
                             unsigned LineNo, uint64_t SizeInBits, 
                             uint64_t AlignInBits, uint64_t OffsetInBits, 
                             unsigned Flags, DIType Ty);
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index 1b78fe4..e56d24d 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -37,8 +37,8 @@ class TargetData;
 class IVStrideUse : public CallbackVH, public ilist_node<IVStrideUse> {
   friend class IVUsers;
 public:
-  IVStrideUse(IVUsers *P, Instruction* U, Value *O, Value *PN)
-    : CallbackVH(U), Parent(P), OperandValToReplace(O), Phi(PN) {
+  IVStrideUse(IVUsers *P, Instruction* U, Value *O)
+    : CallbackVH(U), Parent(P), OperandValToReplace(O) {
   }
 
   /// getUser - Return the user instruction for this use.
@@ -51,11 +51,6 @@ public:
     setValPtr(NewUser);
   }
 
-  /// getPhi - Return the phi node that represents this IV.
-  PHINode *getPhi() const {
-    return cast<PHINode>(Phi);
-  }
-
   /// getOperandValToReplace - Return the Value of the operand in the user
   /// instruction that this IVStrideUse is representing.
   Value *getOperandValToReplace() const {
@@ -86,9 +81,6 @@ private:
   /// that this IVStrideUse is representing.
   WeakVH OperandValToReplace;
 
-  /// Phi - The loop header phi that represents this IV.
-  WeakVH Phi;
-
   /// PostIncLoops - The set of loops for which Expr has been adjusted to
   /// use post-inc mode. This corresponds with SCEVExpander's post-inc concept.
   PostIncLoopSet PostIncLoops;
@@ -151,9 +143,9 @@ public:
   /// AddUsersIfInteresting - Inspect the specified Instruction.  If it is a
   /// reducible SCEV, recursively add its users to the IVUsesByStride set and
   /// return true.  Otherwise, return false.
-  bool AddUsersIfInteresting(Instruction *I, PHINode *Phi);
+  bool AddUsersIfInteresting(Instruction *I);
 
-  IVStrideUse &AddUser(Instruction *User, Value *Operand, PHINode *Phi);
+  IVStrideUse &AddUser(Instruction *User, Value *Operand);
 
   /// getReplacementExpr - Return a SCEV expression which computes the
   /// value of the OperandValToReplace of the given IVStrideUse.
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index b56fe08..34860e7 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -90,18 +90,27 @@ namespace llvm {
     /// get methods: These are static ctor methods for creating various
     /// MemDepResult kinds.
     static MemDepResult getDef(Instruction *Inst) {
+      assert(Inst && "Def requires inst");
       return MemDepResult(PairTy(Inst, Def));
     }
     static MemDepResult getClobber(Instruction *Inst) {
+      assert(Inst && "Clobber requires inst");
       return MemDepResult(PairTy(Inst, Clobber));
     }
     static MemDepResult getNonLocal() {
       return MemDepResult(PairTy(0, NonLocal));
     }
+    static MemDepResult getUnknown() {
+      return MemDepResult(PairTy(0, Clobber));
+    }
 
     /// isClobber - Return true if this MemDepResult represents a query that is
     /// a instruction clobber dependency.
-    bool isClobber() const { return Value.getInt() == Clobber; }
+    bool isClobber() const { return Value.getInt() == Clobber && getInst(); }
+
+    /// isUnknown - Return true if this MemDepResult represents a query which
+    /// cannot and/or will not be computed.
+    bool isUnknown() const { return Value.getInt() == Clobber && !getInst(); }
 
     /// isDef - Return true if this MemDepResult represents a query that is
     /// a instruction definition dependency.
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index 0eff75f..a22bd12 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -88,6 +88,13 @@ namespace llvm {
 
   //===--------------------------------------------------------------------===//
   //
+  // createObjCARCAliasAnalysisPass - This pass implements ObjC-ARC-based
+  // alias analysis.
+  //
+  ImmutablePass *createObjCARCAliasAnalysisPass();
+
+  //===--------------------------------------------------------------------===//
+  //
   // createProfileLoaderPass - This pass loads information from a profile dump
   // file.
   //
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 39d378e..a8c03b2 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -30,6 +30,10 @@ namespace llvm {
   /// memory.
   class SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> {
     ScalarEvolution &SE;
+
+    // New instructions receive a name to identifies them with the current pass.
+    const char* IVName;
+
     std::map<std::pair<const SCEV *, Instruction *>, AssertingVH<Value> >
       InsertedExpressions;
     std::set<AssertingVH<Value> > InsertedValues;
@@ -67,9 +71,9 @@ namespace llvm {
 
   public:
     /// SCEVExpander - Construct a SCEVExpander in "canonical" mode.
-    explicit SCEVExpander(ScalarEvolution &se)
-      : SE(se), IVIncInsertLoop(0), CanonicalMode(true),
-        Builder(se.getContext(), TargetFolder(se.TD)) {}
+    explicit SCEVExpander(ScalarEvolution &se, const char *name)
+      : SE(se), IVName(name), IVIncInsertLoop(0), IVIncInsertPos(0),
+        CanonicalMode(true), Builder(se.getContext(), TargetFolder(se.TD)) {}
 
     /// clear - Erase the contents of the InsertedExpressions map so that users
     /// trying to expand the same expression into multiple BasicBlocks or
diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h
index 6df1693..6826330 100644
--- a/include/llvm/Analysis/ValueTracking.h
+++ b/include/llvm/Analysis/ValueTracking.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_ANALYSIS_VALUETRACKING_H
 #define LLVM_ANALYSIS_VALUETRACKING_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/DataTypes.h"
 #include <string>
 
@@ -108,18 +109,9 @@ namespace llvm {
   /// If InsertBefore is not null, this function will duplicate (modified)
   /// insertvalues when a part of a nested struct is extracted.
   Value *FindInsertedValue(Value *V,
-                           const unsigned *idx_begin,
-                           const unsigned *idx_end,
+                           ArrayRef<unsigned> idx_range,
                            Instruction *InsertBefore = 0);
 
-  /// This is a convenience wrapper for finding values indexed by a single index
-  /// only.
-  inline Value *FindInsertedValue(Value *V, const unsigned Idx,
-                                  Instruction *InsertBefore = 0) {
-    const unsigned Idxs[1] = { Idx };
-    return FindInsertedValue(V, &Idxs[0], &Idxs[1], InsertBefore);
-  }
-  
   /// GetPointerBaseWithConstantOffset - Analyze the specified pointer to see if
   /// it can be expressed as a base pointer plus a constant offset.  Return the
   /// base and offset to the caller.
@@ -158,6 +150,10 @@ namespace llvm {
     return GetUnderlyingObject(const_cast<Value *>(V), TD, MaxLookup);
   }
 
+  /// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer
+  /// are lifetime markers.
+  bool onlyUsedByLifetimeMarkers(const Value *V);
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Assembly/Writer.h b/include/llvm/Assembly/Writer.h
index c5b2390..8d8befd 100644
--- a/include/llvm/Assembly/Writer.h
+++ b/include/llvm/Assembly/Writer.h
@@ -17,52 +17,12 @@
 #ifndef LLVM_ASSEMBLY_WRITER_H
 #define LLVM_ASSEMBLY_WRITER_H
 
-#include <string>
-
 namespace llvm {
 
 class Type;
 class Module;
 class Value;
 class raw_ostream;
-template <typename T> class SmallVectorImpl;
-  
-/// TypePrinting - Type printing machinery.
-class TypePrinting {
-  void *TypeNames;  // A map to remember type names.
-  TypePrinting(const TypePrinting &);   // DO NOT IMPLEMENT
-  void operator=(const TypePrinting&);  // DO NOT IMPLEMENT
-public:
-  TypePrinting();
-  ~TypePrinting();
-  
-  void clear();
-  
-  void print(const Type *Ty, raw_ostream &OS, bool IgnoreTopLevelName = false);
-  
-  void printAtLeastOneLevel(const Type *Ty, raw_ostream &OS) {
-    print(Ty, OS, true);
-  }
-  
-  /// hasTypeName - Return true if the type has a name in TypeNames, false
-  /// otherwise.
-  bool hasTypeName(const Type *Ty) const;
-  
-  /// addTypeName - Add a name for the specified type if it doesn't already have
-  /// one.  This name will be printed instead of the structural version of the
-  /// type in order to make the output more concise.
-  void addTypeName(const Type *Ty, const std::string &N);
-  
-private:
-  void CalcTypeName(const Type *Ty, SmallVectorImpl<const Type *> &TypeStack,
-                    raw_ostream &OS, bool IgnoreTopLevelName = false);
-};
-
-// WriteTypeSymbolic - This attempts to write the specified type as a symbolic
-// type, if there is an entry in the Module's symbol table for the specified
-// type or one of its component types.
-//
-void WriteTypeSymbolic(raw_ostream &, const Type *, const Module *M);
 
 // WriteAsOperand - Write the name of the specified value out to the specified
 // ostream.  This can be useful when you just want to print int %reg126, not the
diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h
index 8b69d6e..233eab8 100644
--- a/include/llvm/Attributes.h
+++ b/include/llvm/Attributes.h
@@ -69,6 +69,9 @@ const Attributes Hotpatch    = 1<<29;     ///< Function should have special
                                           ///'hotpatch' sequence in prologue
 const Attributes UWTable     = 1<<30;     ///< Function must be in a unwind
                                           ///table
+const Attributes NonLazyBind = 1U<<31;    ///< Function is called early and/or
+                                          ///  often, so lazy binding isn't
+                                          ///  worthwhile.
 
 /// Note that uwtable is about the ABI or the user mandating an entry in the
 /// unwind table. The nounwind attribute is about an exception passing by the
@@ -90,7 +93,7 @@ const Attributes ParameterOnly = ByVal | Nest | StructRet | NoCapture;
 const Attributes FunctionOnly = NoReturn | NoUnwind | ReadNone | ReadOnly |
   NoInline | AlwaysInline | OptimizeForSize | StackProtect | StackProtectReq |
   NoRedZone | NoImplicitFloat | Naked | InlineHint | StackAlignment |
-  Hotpatch | UWTable;
+  Hotpatch | UWTable | NonLazyBind;
 
 /// @brief Parameter attributes that do not apply to vararg call arguments.
 const Attributes VarArgsIncompatible = StructRet;
diff --git a/include/llvm/BasicBlock.h b/include/llvm/BasicBlock.h
index 7e7c9e7..3b953c0 100644
--- a/include/llvm/BasicBlock.h
+++ b/include/llvm/BasicBlock.h
@@ -110,7 +110,7 @@ public:
         Function *getParent()       { return Parent; }
 
   /// use_back - Specialize the methods defined in Value, as we know that an
-  /// BasicBlock can only be used by Users (specifically PHI nodes, terminators,
+  /// BasicBlock can only be used by Users (specifically terminators
   /// and BlockAddress's).
   User       *use_back()       { return cast<User>(*use_begin());}
   const User *use_back() const { return cast<User>(*use_begin());}
@@ -138,6 +138,12 @@ public:
     return const_cast<BasicBlock*>(this)->getFirstNonPHIOrDbg();
   }
 
+  // Same as above, but also skip lifetime intrinsics.
+  Instruction* getFirstNonPHIOrDbgOrLifetime();
+  const Instruction* getFirstNonPHIOrDbgOrLifetime() const {
+    return const_cast<BasicBlock*>(this)->getFirstNonPHIOrDbgOrLifetime();
+  }
+
   /// removeFromParent - This method unlinks 'this' from the containing
   /// function, but does not delete it.
   ///
@@ -248,6 +254,10 @@ public:
   /// other than direct branches, switches, etc. to it.
   bool hasAddressTaken() const { return getSubclassDataFromValue() != 0; }
 
+  /// replaceSuccessorsPhiUsesWith - Update all phi nodes in all our successors
+  /// to refer to basic block New instead of to us.
+  void replaceSuccessorsPhiUsesWith(BasicBlock *New);
+
 private:
   /// AdjustBlockAddressRefCount - BasicBlock stores the number of BlockAddress
   /// objects using it.  This is almost always 0, sometimes one, possibly but
diff --git a/include/llvm/Bitcode/BitstreamReader.h b/include/llvm/Bitcode/BitstreamReader.h
index 779ef5f..0437f53 100644
--- a/include/llvm/Bitcode/BitstreamReader.h
+++ b/include/llvm/Bitcode/BitstreamReader.h
@@ -194,6 +194,7 @@ public:
       CurAbbrevs[i]->addRef();
     
     // Copy block scope and bump ref counts.
+    BlockScope = RHS.BlockScope;
     for (unsigned S = 0, e = static_cast<unsigned>(BlockScope.size());
          S != e; ++S) {
       std::vector<BitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
@@ -375,10 +376,12 @@ public:
 
     // Check that the block wasn't partially defined, and that the offset isn't
     // bogus.
-    if (AtEndOfStream() || NextChar+NumWords*4 > BitStream->getLastChar())
+    const unsigned char *const SkipTo = NextChar + NumWords*4;
+    if (AtEndOfStream() || SkipTo > BitStream->getLastChar() ||
+                           SkipTo < BitStream->getFirstChar())
       return true;
 
-    NextChar += NumWords*4;
+    NextChar = SkipTo;
     return false;
   }
 
diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h
index 7692bd2..df68bd5 100644
--- a/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/include/llvm/Bitcode/LLVMBitCodes.h
@@ -29,13 +29,23 @@ namespace bitc {
 
     // Module sub-block id's.
     PARAMATTR_BLOCK_ID,
-    TYPE_BLOCK_ID,
+    
+    /// TYPE_BLOCK_ID_OLD - This is the type descriptor block in LLVM 2.9 and
+    /// earlier, replaced with TYPE_BLOCK_ID2.  FIXME: Remove in LLVM 3.1.
+    TYPE_BLOCK_ID_OLD,
+    
     CONSTANTS_BLOCK_ID,
     FUNCTION_BLOCK_ID,
-    TYPE_SYMTAB_BLOCK_ID,
+    
+    /// TYPE_SYMTAB_BLOCK_ID_OLD - This type descriptor is from LLVM 2.9 and
+    /// earlier bitcode files.  FIXME: Remove in LLVM 3.1
+    TYPE_SYMTAB_BLOCK_ID_OLD,
+    
     VALUE_SYMTAB_BLOCK_ID,
     METADATA_BLOCK_ID,
-    METADATA_ATTACHMENT_ID
+    METADATA_ATTACHMENT_ID,
+    
+    TYPE_BLOCK_ID_NEW
   };
 
 
@@ -72,31 +82,38 @@ namespace bitc {
 
   /// TYPE blocks have codes for each type primitive they use.
   enum TypeCodes {
-    TYPE_CODE_NUMENTRY =  1,   // NUMENTRY: [numentries]
+    TYPE_CODE_NUMENTRY =  1,    // NUMENTRY: [numentries]
 
     // Type Codes
-    TYPE_CODE_VOID     =  2,   // VOID
-    TYPE_CODE_FLOAT    =  3,   // FLOAT
-    TYPE_CODE_DOUBLE   =  4,   // DOUBLE
-    TYPE_CODE_LABEL    =  5,   // LABEL
-    TYPE_CODE_OPAQUE   =  6,   // OPAQUE
-    TYPE_CODE_INTEGER  =  7,   // INTEGER: [width]
-    TYPE_CODE_POINTER  =  8,   // POINTER: [pointee type]
-    TYPE_CODE_FUNCTION =  9,   // FUNCTION: [vararg, retty, paramty x N]
-    TYPE_CODE_STRUCT   = 10,   // STRUCT: [ispacked, eltty x N]
-    TYPE_CODE_ARRAY    = 11,   // ARRAY: [numelts, eltty]
-    TYPE_CODE_VECTOR   = 12,   // VECTOR: [numelts, eltty]
+    TYPE_CODE_VOID     =  2,    // VOID
+    TYPE_CODE_FLOAT    =  3,    // FLOAT
+    TYPE_CODE_DOUBLE   =  4,    // DOUBLE
+    TYPE_CODE_LABEL    =  5,    // LABEL
+    TYPE_CODE_OPAQUE   =  6,    // OPAQUE
+    TYPE_CODE_INTEGER  =  7,    // INTEGER: [width]
+    TYPE_CODE_POINTER  =  8,    // POINTER: [pointee type]
+    TYPE_CODE_FUNCTION =  9,    // FUNCTION: [vararg, retty, paramty x N]
+    
+    // FIXME: This is the encoding used for structs in LLVM 2.9 and earlier.
+    // REMOVE this in LLVM 3.1
+    TYPE_CODE_STRUCT_OLD = 10,  // STRUCT: [ispacked, eltty x N]
+    TYPE_CODE_ARRAY    = 11,    // ARRAY: [numelts, eltty]
+    TYPE_CODE_VECTOR   = 12,    // VECTOR: [numelts, eltty]
 
     // These are not with the other floating point types because they're
     // a late addition, and putting them in the right place breaks
     // binary compatibility.
-    TYPE_CODE_X86_FP80 = 13,   // X86 LONG DOUBLE
-    TYPE_CODE_FP128    = 14,   // LONG DOUBLE (112 bit mantissa)
-    TYPE_CODE_PPC_FP128= 15,   // PPC LONG DOUBLE (2 doubles)
+    TYPE_CODE_X86_FP80 = 13,    // X86 LONG DOUBLE
+    TYPE_CODE_FP128    = 14,    // LONG DOUBLE (112 bit mantissa)
+    TYPE_CODE_PPC_FP128= 15,    // PPC LONG DOUBLE (2 doubles)
 
-    TYPE_CODE_METADATA = 16,   // METADATA
+    TYPE_CODE_METADATA = 16,    // METADATA
 
-    TYPE_CODE_X86_MMX = 17     // X86 MMX
+    TYPE_CODE_X86_MMX = 17,     // X86 MMX
+    
+    TYPE_CODE_STRUCT_ANON = 18, // STRUCT_ANON: [ispacked, eltty x N]
+    TYPE_CODE_STRUCT_NAME = 19, // STRUCT_NAME: [strchr x N]
+    TYPE_CODE_STRUCT_NAMED = 20 // STRUCT_NAMED: [ispacked, eltty x N]
   };
 
   // The type symbol table only has one code (TST_ENTRY_CODE).
@@ -112,20 +129,16 @@ namespace bitc {
 
   enum MetadataCodes {
     METADATA_STRING        = 1,   // MDSTRING:      [values]
-    // FIXME: Remove NODE in favor of NODE2 in LLVM 3.0
-    METADATA_NODE          = 2,   // NODE with potentially invalid metadata
-    // FIXME: Remove FN_NODE in favor of FN_NODE2 in LLVM 3.0
-    METADATA_FN_NODE       = 3,   // FN_NODE with potentially invalid metadata
+    // 2 is unused.
+    // 3 is unused.
     METADATA_NAME          = 4,   // STRING:        [values]
-    // FIXME: Remove NAMED_NODE in favor of NAMED_NODE2 in LLVM 3.0
-    METADATA_NAMED_NODE    = 5,   // NAMED_NODE with potentially invalid metadata
+    // 5 is unused.
     METADATA_KIND          = 6,   // [n x [id, name]]
-    // FIXME: Remove ATTACHMENT in favor of ATTACHMENT2 in LLVM 3.0
-    METADATA_ATTACHMENT    = 7,   // ATTACHMENT with potentially invalid metadata
-    METADATA_NODE2         = 8,   // NODE2:         [n x (type num, value num)]
-    METADATA_FN_NODE2      = 9,   // FN_NODE2:      [n x (type num, value num)]
-    METADATA_NAMED_NODE2   = 10,  // NAMED_NODE2:   [n x mdnodes]
-    METADATA_ATTACHMENT2   = 11   // [m x [value, [n x [id, mdnode]]]
+    // 7 is unused.
+    METADATA_NODE          = 8,   // NODE:          [n x (type num, value num)]
+    METADATA_FN_NODE       = 9,   // FN_NODE:       [n x (type num, value num)]
+    METADATA_NAMED_NODE    = 10,  // NAMED_NODE:    [n x mdnodes]
+    METADATA_ATTACHMENT    = 11   // [m x [value, [n x [id, mdnode]]]
   };
   // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
   // constant and maintains an implicit current type value.
@@ -227,21 +240,18 @@ namespace bitc {
     FUNC_CODE_INST_UNREACHABLE = 15, // UNREACHABLE
 
     FUNC_CODE_INST_PHI         = 16, // PHI:        [ty, val0,bb0, ...]
-    FUNC_CODE_INST_MALLOC      = 17, // MALLOC:     [instty, op, align]
-    FUNC_CODE_INST_FREE        = 18, // FREE:       [opty, op]
+    // 17 is unused.
+    // 18 is unused.
     FUNC_CODE_INST_ALLOCA      = 19, // ALLOCA:     [instty, op, align]
     FUNC_CODE_INST_LOAD        = 20, // LOAD:       [opty, op, align, vol]
-    // FIXME: Remove STORE in favor of STORE2 in LLVM 3.0
-    FUNC_CODE_INST_STORE       = 21, // STORE:      [valty,val,ptr, align, vol]
-    // FIXME: Remove CALL in favor of CALL2 in LLVM 3.0
-    FUNC_CODE_INST_CALL        = 22, // CALL with potentially invalid metadata
+    // 21 is unused.
+    // 22 is unused.
     FUNC_CODE_INST_VAARG       = 23, // VAARG:      [valistty, valist, instty]
     // This store code encodes the pointer type, rather than the value type
     // this is so information only available in the pointer type (e.g. address
     // spaces) is retained.
-    FUNC_CODE_INST_STORE2      = 24, // STORE:      [ptrty,ptr,val, align, vol]
-    // FIXME: Remove GETRESULT in favor of EXTRACTVAL in LLVM 3.0
-    FUNC_CODE_INST_GETRESULT   = 25, // GETRESULT:  [ty, opval, n]
+    FUNC_CODE_INST_STORE       = 24, // STORE:      [ptrty,ptr,val, align, vol]
+    // 25 is unused.
     FUNC_CODE_INST_EXTRACTVAL  = 26, // EXTRACTVAL: [n x operands]
     FUNC_CODE_INST_INSERTVAL   = 27, // INSERTVAL:  [n x operands]
     // fcmp/icmp returning Int1TY or vector of Int1Ty. Same as CMP, exists to
@@ -251,14 +261,12 @@ namespace bitc {
     FUNC_CODE_INST_VSELECT     = 29, // VSELECT:    [ty,opval,opval,predty,pred]
     FUNC_CODE_INST_INBOUNDS_GEP= 30, // INBOUNDS_GEP: [n x operands]
     FUNC_CODE_INST_INDIRECTBR  = 31, // INDIRECTBR: [opty, op0, op1, ...]
-    
-    // FIXME: Remove DEBUG_LOC in favor of DEBUG_LOC2 in LLVM 3.0
-    FUNC_CODE_DEBUG_LOC        = 32, // DEBUG_LOC with potentially invalid metadata
+    // 32 is unused.
     FUNC_CODE_DEBUG_LOC_AGAIN  = 33, // DEBUG_LOC_AGAIN
 
-    FUNC_CODE_INST_CALL2       = 34, // CALL2:      [attr, fnty, fnid, args...]
+    FUNC_CODE_INST_CALL        = 34, // CALL:       [attr, fnty, fnid, args...]
 
-    FUNC_CODE_DEBUG_LOC2       = 35  // DEBUG_LOC2: [Line,Col,ScopeVal, IAVal]
+    FUNC_CODE_DEBUG_LOC        = 35  // DEBUG_LOC:  [Line,Col,ScopeVal, IAVal]
   };
 } // End bitc namespace
 } // End llvm namespace
diff --git a/include/llvm/CodeGen/Analysis.h b/include/llvm/CodeGen/Analysis.h
index 78bf9fc..f8a7029 100644
--- a/include/llvm/CodeGen/Analysis.h
+++ b/include/llvm/CodeGen/Analysis.h
@@ -16,6 +16,7 @@
 
 #include "llvm/Instructions.h"
 #include "llvm/InlineAsm.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
@@ -37,6 +38,12 @@ unsigned ComputeLinearIndex(const Type *Ty,
                             const unsigned *IndicesEnd,
                             unsigned CurIndex = 0);
 
+inline unsigned ComputeLinearIndex(const Type *Ty,
+                                   ArrayRef<unsigned> Indices,
+                                   unsigned CurIndex = 0) {
+  return ComputeLinearIndex(Ty, Indices.begin(), Indices.end(), CurIndex);
+}
+
 /// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
 /// EVTs that represent all the individual underlying
 /// non-aggregate types that comprise it.
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index 5eea099..06c5c83 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -465,8 +465,8 @@ namespace llvm {
     void EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
                             const MachineBasicBlock *MBB,
                             unsigned uid) const;
-    void EmitLLVMUsedList(Constant *List);
-    void EmitXXStructorList(Constant *List);
+    void EmitLLVMUsedList(const Constant *List);
+    void EmitXXStructorList(const Constant *List);
     GCMetadataPrinter *GetOrCreateGCPrinter(GCStrategy *C);
   };
 }
diff --git a/include/llvm/CodeGen/FunctionLoweringInfo.h b/include/llvm/CodeGen/FunctionLoweringInfo.h
index 4421cc0..84bbf48 100644
--- a/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -24,6 +24,7 @@
 #ifndef NDEBUG
 #include "llvm/ADT/SmallSet.h"
 #endif
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -57,7 +58,7 @@ public:
   const Function *Fn;
   MachineFunction *MF;
   MachineRegisterInfo *RegInfo;
-
+  BranchProbabilityInfo *BPI;
   /// CanLowerReturn - true iff the function's return value can be lowered to
   /// registers.
   bool CanLowerReturn;
diff --git a/include/llvm/CodeGen/ISDOpcodes.h b/include/llvm/CodeGen/ISDOpcodes.h
index e765cad..459cecd 100644
--- a/include/llvm/CodeGen/ISDOpcodes.h
+++ b/include/llvm/CodeGen/ISDOpcodes.h
@@ -232,7 +232,7 @@ namespace ISD {
     SMULO, UMULO,
 
     // Simple binary floating point operators.
-    FADD, FSUB, FMUL, FDIV, FREM,
+    FADD, FSUB, FMUL, FMA, FDIV, FREM,
 
     // FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.  NOTE: This
     // DAG node does not require that X and Y have the same type, just that they
@@ -580,7 +580,8 @@ namespace ISD {
 
     // PREFETCH - This corresponds to a prefetch intrinsic. It takes chains are
     // their first operand. The other operands are the address to prefetch,
-    // read / write specifier, and locality specifier.
+    // read / write specifier, locality specifier and instruction / data cache
+    // specifier.
     PREFETCH,
 
     // OUTCHAIN = MEMBARRIER(INCHAIN, load-load, load-store, store-load,
diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h
index c931261..098dd0b 100644
--- a/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -39,8 +39,6 @@ namespace {
       (void) llvm::createGreedyRegisterAllocator();
       (void) llvm::createDefaultPBQPRegisterAllocator();
 
-      (void) llvm::createSimpleRegisterCoalescer();
-      
       llvm::linkOcamlGC();
       llvm::linkShadowStackGC();
       
diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h
index ad12157..397e59e 100644
--- a/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/include/llvm/CodeGen/MachineBasicBlock.h
@@ -16,6 +16,7 @@
 
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/ADT/GraphTraits.h"
+#include "llvm/Support/DataTypes.h"
 #include <functional>
 
 namespace llvm {
@@ -27,6 +28,7 @@ class MCSymbol;
 class SlotIndexes;
 class StringRef;
 class raw_ostream;
+class MachineBranchProbabilityInfo;
 
 template <>
 struct ilist_traits<MachineInstr> : public ilist_default_traits<MachineInstr> {
@@ -63,12 +65,19 @@ class MachineBasicBlock : public ilist_node<MachineBasicBlock> {
   const BasicBlock *BB;
   int Number;
   MachineFunction *xParent;
-  
+
   /// Predecessors/Successors - Keep track of the predecessor / successor
   /// basicblocks.
   std::vector<MachineBasicBlock *> Predecessors;
   std::vector<MachineBasicBlock *> Successors;
 
+
+  /// Weights - Keep track of the weights to the successors. This vector
+  /// has the same order as Successors, or it is empty if we don't use it
+  /// (disable optimization).
+  std::vector<uint32_t> Weights;
+  typedef std::vector<uint32_t>::iterator weight_iterator;
+
   /// LiveIns - Keep track of the physical registers that are livein of
   /// the basicblock.
   std::vector<unsigned> LiveIns;
@@ -244,11 +253,13 @@ public:
   void updateTerminator();
 
   // Machine-CFG mutators
-  
+
   /// addSuccessor - Add succ as a successor of this MachineBasicBlock.
-  /// The Predecessors list of succ is automatically updated.
+  /// The Predecessors list of succ is automatically updated. WEIGHT
+  /// parameter is stored in Weights list and it may be used by
+  /// MachineBranchProbabilityInfo analysis to calculate branch probability.
   ///
-  void addSuccessor(MachineBasicBlock *succ);
+  void addSuccessor(MachineBasicBlock *succ, uint32_t weight = 0);
 
   /// removeSuccessor - Remove successor from the successors list of this
   /// MachineBasicBlock. The Predecessors list of succ is automatically updated.
@@ -260,7 +271,12 @@ public:
   /// updated.  Return the iterator to the element after the one removed.
   ///
   succ_iterator removeSuccessor(succ_iterator I);
-  
+
+  /// replaceSuccessor - Replace successor OLD with NEW and update weight info.
+  ///
+  void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New);
+
+
   /// transferSuccessors - Transfers all the successors from MBB to this
   /// machine basic block (i.e., copies all the successors fromMBB and
   /// remove all the successors from fromMBB).
@@ -396,8 +412,22 @@ public:
   /// getSymbol - Return the MCSymbol for this basic block.
   ///
   MCSymbol *getSymbol() const;
-  
-private:   // Methods used to maintain doubly linked list of blocks...
+
+
+private:
+  /// getWeightIterator - Return weight iterator corresponding to the I
+  /// successor iterator.
+  weight_iterator getWeightIterator(succ_iterator I);
+
+  friend class MachineBranchProbabilityInfo;
+
+  /// getSuccWeight - Return weight of the edge from this block to MBB. This
+  /// method should NOT be called directly, but by using getEdgeWeight method
+  /// from MachineBranchProbabilityInfo class.
+  uint32_t getSuccWeight(MachineBasicBlock *succ);
+
+
+  // Methods used to maintain doubly linked list of blocks...
   friend struct ilist_traits<MachineBasicBlock>;
 
   // Machine-CFG mutators
diff --git a/include/llvm/CodeGen/MachineBlockFrequency.h b/include/llvm/CodeGen/MachineBlockFrequency.h
new file mode 100644
index 0000000..25bf1f0
--- /dev/null
+++ b/include/llvm/CodeGen/MachineBlockFrequency.h
@@ -0,0 +1,53 @@
+//====----- MachineBlockFrequency.h - MachineBlock Frequency Analysis ----====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEBLOCKFREQUENCY_H
+#define LLVM_CODEGEN_MACHINEBLOCKFREQUENCY_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include <climits>
+
+namespace llvm {
+
+class MachineBranchProbabilityInfo;
+template<class BlockT, class FunctionT, class BranchProbInfoT>
+class BlockFrequencyImpl;
+
+/// MachineBlockFrequency pass uses BlockFrequencyImpl implementation to estimate
+/// machine basic block frequencies.
+class MachineBlockFrequency : public MachineFunctionPass {
+
+  BlockFrequencyImpl<MachineBasicBlock, MachineFunction, MachineBranchProbabilityInfo> *MBFI;
+
+public:
+  static char ID;
+
+  MachineBlockFrequency();
+
+  ~MachineBlockFrequency();
+
+  void getAnalysisUsage(AnalysisUsage &AU) const;
+
+  bool runOnMachineFunction(MachineFunction &F);
+
+  /// getblockFreq - Return block frequency. Never return 0, value must be
+  /// positive. Please note that initial frequency is equal to 1024. It means
+  /// that we should not rely on the value itself, but only on the comparison to
+  /// the other block frequencies. We do this to avoid using of the floating
+  /// points.
+  uint32_t getBlockFreq(MachineBasicBlock *MBB);
+};
+
+}
+
+#endif
diff --git a/include/llvm/CodeGen/MachineBranchProbabilityInfo.h b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
new file mode 100644
index 0000000..d9673e2
--- /dev/null
+++ b/include/llvm/CodeGen/MachineBranchProbabilityInfo.h
@@ -0,0 +1,78 @@
+
+//==- MachineBranchProbabilityInfo.h - Machine Branch Probability Analysis -==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is used to evaluate branch probabilties on machine basic blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEBRANCHPROBABILITYINFO_H
+#define LLVM_CODEGEN_MACHINEBRANCHPROBABILITYINFO_H
+
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
+#include <climits>
+
+namespace llvm {
+
+class raw_ostream;
+class MachineBasicBlock;
+
+class MachineBranchProbabilityInfo : public ImmutablePass {
+
+  // Default weight value. Used when we don't have information about the edge.
+  // TODO: DEFAULT_WEIGHT makes sense during static predication, when none of
+  // the successors have a weight yet. But it doesn't make sense when providing
+  // weight to an edge that may have siblings with non-zero weights. This can
+  // be handled various ways, but it's probably fine for an edge with unknown
+  // weight to just "inherit" the non-zero weight of an adjacent successor.
+  static const uint32_t DEFAULT_WEIGHT = 16;
+
+  // Get sum of the block successors' weights.
+  uint32_t getSumForBlock(MachineBasicBlock *MBB) const;
+
+public:
+  static char ID;
+
+  MachineBranchProbabilityInfo() : ImmutablePass(ID) {
+    PassRegistry &Registry = *PassRegistry::getPassRegistry();
+    initializeMachineBranchProbabilityInfoPass(Registry);
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.setPreservesAll();
+  }
+
+  // Return edge weight. If we don't have any informations about it - return
+  // DEFAULT_WEIGHT.
+  uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst) const;
+
+  // A 'Hot' edge is an edge which probability is >= 80%.
+  bool isEdgeHot(MachineBasicBlock *Src, MachineBasicBlock *Dst) const;
+
+  // Return a hot successor for the block BB or null if there isn't one.
+  MachineBasicBlock *getHotSucc(MachineBasicBlock *MBB) const;
+
+  // Return a probability as a fraction between 0 (0% probability) and
+  // 1 (100% probability), however the value is never equal to 0, and can be 1
+  // only iff SRC block has only one successor.
+  BranchProbability getEdgeProbability(MachineBasicBlock *Src,
+                                       MachineBasicBlock *Dst) const;
+
+  // Print value between 0 (0% probability) and 1 (100% probability),
+  // however the value is never equal to 0, and can be 1 only iff SRC block
+  // has only one successor.
+  raw_ostream &printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src,
+                                    MachineBasicBlock *Dst) const;
+};
+
+}
+
+
+#endif
diff --git a/include/llvm/CodeGen/MachineFunction.h b/include/llvm/CodeGen/MachineFunction.h
index f56c053..6e08f7b 100644
--- a/include/llvm/CodeGen/MachineFunction.h
+++ b/include/llvm/CodeGen/MachineFunction.h
@@ -345,7 +345,7 @@ public:
   /// CreateMachineInstr - Allocate a new MachineInstr. Use this instead
   /// of `new MachineInstr'.
   ///
-  MachineInstr *CreateMachineInstr(const TargetInstrDesc &TID,
+  MachineInstr *CreateMachineInstr(const MCInstrDesc &MCID,
                                    DebugLoc DL,
                                    bool NoImp = false);
 
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index c36dd69..5b3d3ea 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -17,11 +17,12 @@
 #define LLVM_CODEGEN_MACHINEINSTR_H
 
 #include "llvm/CodeGen/MachineOperand.h"
-#include "llvm/Target/TargetInstrDesc.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Target/TargetOpcodes.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Support/DebugLoc.h"
 #include <vector>
@@ -30,7 +31,6 @@ namespace llvm {
 
 template <typename T> class SmallVectorImpl;
 class AliasAnalysis;
-class TargetInstrDesc;
 class TargetInstrInfo;
 class TargetRegisterInfo;
 class MachineFunction;
@@ -57,7 +57,7 @@ public:
                                         // function frame setup code.
   };
 private:
-  const TargetInstrDesc *TID;           // Instruction descriptor.
+  const MCInstrDesc *MCID;              // Instruction descriptor.
   uint16_t NumImplicitOps;              // Number of implicit operands (which
                                         // are determined at construction time).
 
@@ -94,7 +94,7 @@ private:
   MachineInstr(MachineFunction &, const MachineInstr &);
 
   /// MachineInstr ctor - This constructor creates a dummy MachineInstr with
-  /// TID NULL and no operands.
+  /// MCID NULL and no operands.
   MachineInstr();
 
   // The next two constructors have DebugLoc and non-DebugLoc versions;
@@ -103,25 +103,25 @@ private:
 
   /// MachineInstr ctor - This constructor creates a MachineInstr and adds the
   /// implicit operands.  It reserves space for the number of operands specified
-  /// by the TargetInstrDesc.  The version with a DebugLoc should be preferred.
-  explicit MachineInstr(const TargetInstrDesc &TID, bool NoImp = false);
+  /// by the MCInstrDesc.  The version with a DebugLoc should be preferred.
+  explicit MachineInstr(const MCInstrDesc &MCID, bool NoImp = false);
 
   /// MachineInstr ctor - Work exactly the same as the ctor above, except that
   /// the MachineInstr is created and added to the end of the specified basic
   /// block.  The version with a DebugLoc should be preferred.
-  MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &TID);
+  MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &MCID);
 
   /// MachineInstr ctor - This constructor create a MachineInstr and add the
   /// implicit operands.  It reserves space for number of operands specified by
-  /// TargetInstrDesc.  An explicit DebugLoc is supplied.
-  explicit MachineInstr(const TargetInstrDesc &TID, const DebugLoc dl,
+  /// MCInstrDesc.  An explicit DebugLoc is supplied.
+  explicit MachineInstr(const MCInstrDesc &MCID, const DebugLoc dl,
                         bool NoImp = false);
 
   /// MachineInstr ctor - Work exactly the same as the ctor above, except that
   /// the MachineInstr is created and added to the end of the specified basic
   /// block.
   MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
-               const TargetInstrDesc &TID);
+               const MCInstrDesc &MCID);
 
   ~MachineInstr();
 
@@ -181,13 +181,22 @@ public:
   ///
   DebugLoc getDebugLoc() const { return debugLoc; }
 
+  /// emitError - Emit an error referring to the source location of this
+  /// instruction. This should only be used for inline assembly that is somehow
+  /// impossible to compile. Other errors should have been handled much
+  /// earlier.
+  ///
+  /// If this method returns, the caller should try to recover from the error.
+  ///
+  void emitError(StringRef Msg) const;
+
   /// getDesc - Returns the target instruction descriptor of this
   /// MachineInstr.
-  const TargetInstrDesc &getDesc() const { return *TID; }
+  const MCInstrDesc &getDesc() const { return *MCID; }
 
   /// getOpcode - Returns the opcode of this MachineInstr.
   ///
-  int getOpcode() const { return TID->Opcode; }
+  int getOpcode() const { return MCID->Opcode; }
 
   /// Access to explicit operands of the instruction.
   ///
@@ -279,6 +288,9 @@ public:
   bool isCopy() const {
     return getOpcode() == TargetOpcode::COPY;
   }
+  bool isFullCopy() const {
+    return isCopy() && !getOperand(0).getSubReg() && !getOperand(1).getSubReg();
+  }
 
   /// isCopyLike - Return true if the instruction behaves like a copy.
   /// This does not include native copy instructions.
@@ -464,8 +476,8 @@ public:
 
   /// hasUnmodeledSideEffects - Return true if this instruction has side
   /// effects that are not modeled by mayLoad / mayStore, etc.
-  /// For all instructions, the property is encoded in TargetInstrDesc::Flags
-  /// (see TargetInstrDesc::hasUnmodeledSideEffects(). The only exception is
+  /// For all instructions, the property is encoded in MCInstrDesc::Flags
+  /// (see MCInstrDesc::hasUnmodeledSideEffects(). The only exception is
   /// INLINEASM instruction, in which case the side effect property is encoded
   /// in one of its operands (see InlineAsm::Extra_HasSideEffect).
   ///
@@ -497,7 +509,7 @@ public:
   /// setDesc - Replace the instruction descriptor (thus opcode) of
   /// the current instruction with a new one.
   ///
-  void setDesc(const TargetInstrDesc &tid) { TID = &tid; }
+  void setDesc(const MCInstrDesc &tid) { MCID = &tid; }
 
   /// setDebugLoc - Replace current source information with new such.
   /// Avoid using this, the constructor argument is preferable.
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index c8183a3..b989027 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -22,7 +22,7 @@
 
 namespace llvm {
 
-class TargetInstrDesc;
+class MCInstrDesc;
 class MDNode;
 
 namespace RegState {
@@ -77,6 +77,11 @@ public:
     return *this;
   }
 
+  const MachineInstrBuilder &addCImm(const ConstantInt *Val) const {
+    MI->addOperand(MachineOperand::CreateCImm(Val));
+    return *this;
+  }
+
   const MachineInstrBuilder &addFPImm(const ConstantFP *Val) const {
     MI->addOperand(MachineOperand::CreateFPImm(Val));
     return *this;
@@ -175,8 +180,8 @@ public:
 ///
 inline MachineInstrBuilder BuildMI(MachineFunction &MF,
                                    DebugLoc DL,
-                                   const TargetInstrDesc &TID) {
-  return MachineInstrBuilder(MF.CreateMachineInstr(TID, DL));
+                                   const MCInstrDesc &MCID) {
+  return MachineInstrBuilder(MF.CreateMachineInstr(MCID, DL));
 }
 
 /// BuildMI - This version of the builder sets up the first operand as a
@@ -184,9 +189,9 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF,
 ///
 inline MachineInstrBuilder BuildMI(MachineFunction &MF,
                                    DebugLoc DL,
-                                   const TargetInstrDesc &TID,
+                                   const MCInstrDesc &MCID,
                                    unsigned DestReg) {
-  return MachineInstrBuilder(MF.CreateMachineInstr(TID, DL))
+  return MachineInstrBuilder(MF.CreateMachineInstr(MCID, DL))
            .addReg(DestReg, RegState::Define);
 }
 
@@ -197,9 +202,9 @@ inline MachineInstrBuilder BuildMI(MachineFunction &MF,
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    MachineBasicBlock::iterator I,
                                    DebugLoc DL,
-                                   const TargetInstrDesc &TID,
+                                   const MCInstrDesc &MCID,
                                    unsigned DestReg) {
-  MachineInstr *MI = BB.getParent()->CreateMachineInstr(TID, DL);
+  MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL);
   BB.insert(I, MI);
   return MachineInstrBuilder(MI).addReg(DestReg, RegState::Define);
 }
@@ -211,8 +216,8 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
 inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
                                    MachineBasicBlock::iterator I,
                                    DebugLoc DL,
-                                   const TargetInstrDesc &TID) {
-  MachineInstr *MI = BB.getParent()->CreateMachineInstr(TID, DL);
+                                   const MCInstrDesc &MCID) {
+  MachineInstr *MI = BB.getParent()->CreateMachineInstr(MCID, DL);
   BB.insert(I, MI);
   return MachineInstrBuilder(MI);
 }
@@ -223,8 +228,8 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock &BB,
 ///
 inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
                                    DebugLoc DL,
-                                   const TargetInstrDesc &TID) {
-  return BuildMI(*BB, BB->end(), DL, TID);
+                                   const MCInstrDesc &MCID) {
+  return BuildMI(*BB, BB->end(), DL, MCID);
 }
 
 /// BuildMI - This version of the builder inserts the newly-built
@@ -233,9 +238,9 @@ inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
 ///
 inline MachineInstrBuilder BuildMI(MachineBasicBlock *BB,
                                    DebugLoc DL,
-                                   const TargetInstrDesc &TID,
+                                   const MCInstrDesc &MCID,
                                    unsigned DestReg) {
-  return BuildMI(*BB, BB->end(), DL, TID, DestReg);
+  return BuildMI(*BB, BB->end(), DL, MCID, DestReg);
 }
 
 inline unsigned getDefRegState(bool B) {
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index 140c6e8..fdef574 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -21,6 +21,7 @@ namespace llvm {
 
 class BlockAddress;
 class ConstantFP;
+class ConstantInt;
 class GlobalValue;
 class MachineBasicBlock;
 class MachineInstr;
@@ -38,6 +39,7 @@ public:
   enum MachineOperandType {
     MO_Register,               ///< Register operand.
     MO_Immediate,              ///< Immediate operand
+    MO_CImmediate,             ///< Immediate >64bit operand
     MO_FPImmediate,            ///< Floating-point immediate operand
     MO_MachineBasicBlock,      ///< MachineBasicBlock reference
     MO_FrameIndex,             ///< Abstract Stack Frame Index
@@ -111,6 +113,7 @@ private:
   union {
     MachineBasicBlock *MBB;   // For MO_MachineBasicBlock.
     const ConstantFP *CFP;    // For MO_FPImmediate.
+    const ConstantInt *CI;    // For MO_CImmediate. Integers > 64bit.
     int64_t ImmVal;           // For MO_Immediate.
     const MDNode *MD;         // For MO_Metadata.
     MCSymbol *Sym;            // For MO_MCSymbol
@@ -173,6 +176,8 @@ public:
   bool isReg() const { return OpKind == MO_Register; }
   /// isImm - Tests if this is a MO_Immediate operand.
   bool isImm() const { return OpKind == MO_Immediate; }
+  /// isCImm - Test if t his is a MO_CImmediate operand.
+  bool isCImm() const { return OpKind == MO_CImmediate; }
   /// isFPImm - Tests if this is a MO_FPImmediate operand.
   bool isFPImm() const { return OpKind == MO_FPImmediate; }
   /// isMBB - Tests if this is a MO_MachineBasicBlock operand.
@@ -333,6 +338,11 @@ public:
     return Contents.ImmVal;
   }
 
+  const ConstantInt *getCImm() const {
+    assert(isCImm() && "Wrong MachineOperand accessor");
+    return Contents.CI;
+  }
+
   const ConstantFP *getFPImm() const {
     assert(isFPImm() && "Wrong MachineOperand accessor");
     return Contents.CFP;
@@ -440,6 +450,12 @@ public:
     return Op;
   }
 
+  static MachineOperand CreateCImm(const ConstantInt *CI) {
+    MachineOperand Op(MachineOperand::MO_CImmediate);
+    Op.Contents.CI = CI;
+    return Op;
+  }
+
   static MachineOperand CreateFPImm(const ConstantFP *CFP) {
     MachineOperand Op(MachineOperand::MO_FPImmediate);
     Op.Contents.CFP = CFP;
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 74df8da..1079726 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -32,11 +32,6 @@ class MachineRegisterInfo {
   IndexedMap<std::pair<const TargetRegisterClass*, MachineOperand*>,
              VirtReg2IndexFunctor> VRegInfo;
 
-  /// RegClassVRegMap - This vector acts as a map from TargetRegisterClass to
-  /// virtual registers. For each target register class, it keeps a list of
-  /// virtual registers belonging to the class.
-  std::vector<unsigned> *RegClass2VRegMap;
-
   /// RegAllocHints - This vector records register allocation hints for virtual
   /// registers. For each virtual register, it keeps a register and hint type
   /// pair making up the allocation hint. Hint type is target specific except
@@ -216,13 +211,6 @@ public:
   ///
   unsigned getNumVirtRegs() const { return VRegInfo.size(); }
 
-  /// getRegClassVirtRegs - Return the list of virtual registers of the given
-  /// target register class.
-  const std::vector<unsigned> &
-  getRegClassVirtRegs(const TargetRegisterClass *RC) const {
-    return RegClass2VRegMap[RC->getID()];
-  }
-
   /// setRegAllocationHint - Specify a register allocation hint for the
   /// specified virtual register.
   void setRegAllocationHint(unsigned Reg, unsigned Type, unsigned PrefReg) {
@@ -237,6 +225,14 @@ public:
     return RegAllocHints[Reg];
   }
 
+  /// getSimpleHint - Return the preferred register allocation hint, or 0 if a
+  /// standard simple hint (Type == 0) is not set.
+  unsigned getSimpleHint(unsigned Reg) const {
+    std::pair<unsigned, unsigned> Hint = getRegAllocationHint(Reg);
+    return Hint.first ? 0 : Hint.second;
+  }
+
+
   //===--------------------------------------------------------------------===//
   // Physical Register Use Info
   //===--------------------------------------------------------------------===//
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 53aee7a..e7928cb 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -73,16 +73,9 @@ namespace llvm {
   ///  This pass is still in development
   extern char &StrongPHIEliminationID;
 
-  extern char &PreAllocSplittingID;
-
   /// LiveStacks pass. An analysis keeping track of the liveness of stack slots.
   extern char &LiveStacksID;
 
-  /// SimpleRegisterCoalescing pass.  Aggressively coalesces every register
-  /// copy it can.
-  ///
-  extern char &SimpleRegisterCoalescingID;
-
   /// TwoAddressInstruction pass - This pass reduces two-address instructions to
   /// use two operands. This destroys SSA information but it is desired by
   /// register allocators.
@@ -132,10 +125,10 @@ namespace llvm {
   ///
   FunctionPass *createDefaultPBQPRegisterAllocator();
 
-  /// SimpleRegisterCoalescing Pass - Coalesce all copies possible.  Can run
+  /// RegisterCoalescer Pass - Coalesce all copies possible.  Can run
   /// independently of the register allocator.
   ///
-  RegisterCoalescer *createSimpleRegisterCoalescer();
+  RegisterCoalescer *createRegisterCoalescer();
 
   /// PrologEpilogCodeInserter Pass - This pass inserts prolog and epilog code,
   /// and eliminates abstract frame references.
diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h
index 8139c65..bce3ec7 100644
--- a/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/include/llvm/CodeGen/RegAllocPBQP.h
@@ -161,7 +161,8 @@ namespace llvm {
                             PBQP::PBQPNum benefit);
   };
 
-  FunctionPass* createPBQPRegisterAllocator(std::auto_ptr<PBQPBuilder> builder);
+  FunctionPass* createPBQPRegisterAllocator(std::auto_ptr<PBQPBuilder> builder,
+                                            char *customPassID=0);
 }
 
 #endif /* LLVM_CODEGEN_REGALLOCPBQP_H */
diff --git a/include/llvm/CodeGen/RegisterCoalescer.h b/include/llvm/CodeGen/RegisterCoalescer.h
deleted file mode 100644
index af0b394..0000000
--- a/include/llvm/CodeGen/RegisterCoalescer.h
+++ /dev/null
@@ -1,244 +0,0 @@
-//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the abstract interface for register coalescers, 
-// allowing them to interact with and query register allocators.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/IncludeFile.h"
-#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/ADT/SmallPtrSet.h"
-
-#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H
-#define LLVM_CODEGEN_REGISTER_COALESCER_H
-
-namespace llvm {
-
-  class MachineFunction;
-  class RegallocQuery;
-  class AnalysisUsage;
-  class MachineInstr;
-  class TargetRegisterInfo;
-  class TargetRegisterClass;
-  class TargetInstrInfo;
-
-  /// An abstract interface for register coalescers.  Coalescers must
-  /// implement this interface to be part of the coalescer analysis
-  /// group.
-  class RegisterCoalescer {
-  public:
-    static char ID; // Class identification, replacement for typeinfo
-    RegisterCoalescer() {}
-    virtual ~RegisterCoalescer();  // We want to be subclassed
-
-    /// Run the coalescer on this function, providing interference
-    /// data to query.  Return whether we removed any copies.
-    virtual bool coalesceFunction(MachineFunction &mf,
-                                  RegallocQuery &ifd) = 0;
-
-    /// Reset state.  Can be used to allow a coalescer run by
-    /// PassManager to be run again by the register allocator.
-    virtual void reset(MachineFunction &mf) {}
-
-    /// Register allocators must call this from their own
-    /// getAnalysisUsage to cover the case where the coalescer is not
-    /// a Pass in the proper sense and isn't managed by PassManager.
-    /// PassManager needs to know which analyses to make available and
-    /// which to invalidate when running the register allocator or any
-    /// pass that might call coalescing.  The long-term solution is to
-    /// allow hierarchies of PassManagers.
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {}
-  }; 
-
-  /// An abstract interface for register allocators to interact with
-  /// coalescers
-  ///
-  /// Example:
-  ///
-  /// This is simply an example of how to use the RegallocQuery
-  /// interface.  It is not meant to be used in production.
-  ///
-  ///   class LinearScanRegallocQuery : public RegallocQuery {
-  ///   private:
-  ///     const LiveIntervals \&li;
-  ///
-  ///   public:
-  ///     LinearScanRegallocQuery(LiveIntervals &intervals) 
-  ///         : li(intervals) {}
-  ///
-  ///     /// This is pretty slow and conservative, but since linear scan
-  ///     /// allocation doesn't pre-compute interference information it's
-  ///     /// the best we can do.  Coalescers are always free to ignore this
-  ///     /// and implement their own discovery strategy.  See
-  ///     /// SimpleRegisterCoalescing for an example.
-  ///     void getInterferences(IntervalSet &interferences,
-  ///                           const LiveInterval &a) const {
-  ///       for(LiveIntervals::const_iterator iv = li.begin(),
-  ///             ivend = li.end();
-  ///           iv != ivend;
-  ///           ++iv) {
-  ///         if (interfere(a, iv->second)) {
-  ///           interferences.insert(&iv->second);
-  ///         }
-  ///       }
-  ///     }
-  ///
-  ///     /// This is *really* slow and stupid.  See above.
-  ///     int getNumberOfInterferences(const LiveInterval &a) const {
-  ///       IntervalSet intervals;
-  ///       getInterferences(intervals, a);
-  ///       return intervals.size();
-  ///     }
-  ///   };  
-  ///
-  ///   In the allocator:
-  ///
-  ///   RegisterCoalescer &coalescer = getAnalysis<RegisterCoalescer>();
-  ///
-  ///   // We don't reset the coalescer so if it's already been run this
-  ///   // takes almost no time.
-  ///   LinearScanRegallocQuery ifd(*li_);
-  ///   coalescer.coalesceFunction(fn, ifd);
-  ///
-  class RegallocQuery {
-  public:
-    typedef SmallPtrSet<const LiveInterval *, 8> IntervalSet;
-
-    virtual ~RegallocQuery() {}
-    
-    /// Return whether two live ranges interfere.
-    virtual bool interfere(const LiveInterval &a,
-                           const LiveInterval &b) const {
-      // A naive test
-      return a.overlaps(b);
-    }
-
-    /// Return the set of intervals that interfere with this one.
-    virtual void getInterferences(IntervalSet &interferences,
-                                  const LiveInterval &a) const = 0;
-
-    /// This can often be cheaper than actually returning the
-    /// interferences.
-    virtual int getNumberOfInterferences(const LiveInterval &a) const = 0;
-
-    /// Make any data structure updates necessary to reflect
-    /// coalescing or other modifications.
-    virtual void updateDataForMerge(const LiveInterval &a,
-                                    const LiveInterval &b,
-                                    const MachineInstr &copy) {}
-
-    /// Allow the register allocator to communicate when it doesn't
-    /// want a copy coalesced.  This may be due to assumptions made by
-    /// the allocator about various invariants and so this question is
-    /// a matter of legality, not performance.  Performance decisions
-    /// about which copies to coalesce should be made by the
-    /// coalescer.
-    virtual bool isLegalToCoalesce(const MachineInstr &inst) const {
-      return true;
-    }
-  };
-
-
-  /// CoalescerPair - A helper class for register coalescers. When deciding if
-  /// two registers can be coalesced, CoalescerPair can determine if a copy
-  /// instruction would become an identity copy after coalescing.
-  class CoalescerPair {
-    const TargetInstrInfo &tii_;
-    const TargetRegisterInfo &tri_;
-
-    /// dstReg_ - The register that will be left after coalescing. It can be a
-    /// virtual or physical register.
-    unsigned dstReg_;
-
-    /// srcReg_ - the virtual register that will be coalesced into dstReg.
-    unsigned srcReg_;
-
-    /// subReg_ - The subregister index of srcReg in dstReg_. It is possible the
-    /// coalesce srcReg_ into a subreg of the larger dstReg_ when dstReg_ is a
-    /// virtual register.
-    unsigned subIdx_;
-
-    /// partial_ - True when the original copy was a partial subregister copy.
-    bool partial_;
-
-    /// crossClass_ - True when both regs are virtual, and newRC is constrained.
-    bool crossClass_;
-
-    /// flipped_ - True when DstReg and SrcReg are reversed from the oriignal copy
-    /// instruction.
-    bool flipped_;
-
-    /// newRC_ - The register class of the coalesced register, or NULL if dstReg_
-    /// is a physreg.
-    const TargetRegisterClass *newRC_;
-
-    /// compose - Compose subreg indices a and b, either may be 0.
-    unsigned compose(unsigned, unsigned) const;
-
-    /// isMoveInstr - Return true if MI is a move or subreg instruction.
-    bool isMoveInstr(const MachineInstr *MI, unsigned &Src, unsigned &Dst,
-                     unsigned &SrcSub, unsigned &DstSub) const;
-
-  public:
-    CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri)
-      : tii_(tii), tri_(tri), dstReg_(0), srcReg_(0), subIdx_(0),
-        partial_(false), crossClass_(false), flipped_(false), newRC_(0) {}
-
-    /// setRegisters - set registers to match the copy instruction MI. Return
-    /// false if MI is not a coalescable copy instruction.
-    bool setRegisters(const MachineInstr*);
-
-    /// flip - Swap srcReg_ and dstReg_. Return false if swapping is impossible
-    /// because dstReg_ is a physical register, or subIdx_ is set.
-    bool flip();
-
-    /// isCoalescable - Return true if MI is a copy instruction that will become
-    /// an identity copy after coalescing.
-    bool isCoalescable(const MachineInstr*) const;
-
-    /// isPhys - Return true if DstReg is a physical register.
-    bool isPhys() const { return !newRC_; }
-
-    /// isPartial - Return true if the original copy instruction did not copy the
-    /// full register, but was a subreg operation.
-    bool isPartial() const { return partial_; }
-
-    /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller register class than DstReg's.
-    bool isCrossClass() const { return crossClass_; }
-
-    /// isFlipped - Return true when getSrcReg is the register being defined by
-    /// the original copy instruction.
-    bool isFlipped() const { return flipped_; }
-
-    /// getDstReg - Return the register (virtual or physical) that will remain
-    /// after coalescing.
-    unsigned getDstReg() const { return dstReg_; }
-
-    /// getSrcReg - Return the virtual register that will be coalesced away.
-    unsigned getSrcReg() const { return srcReg_; }
-
-    /// getSubIdx - Return the subregister index in DstReg that SrcReg will be
-    /// coalesced into, or 0.
-    unsigned getSubIdx() const { return subIdx_; }
-
-    /// getNewRC - Return the register class of the coalesced register.
-    const TargetRegisterClass *getNewRC() const { return newRC_; }
-  };
-}
-
-// Because of the way .a files work, we must force the SimpleRC
-// implementation to be pulled in if the RegisterCoalescing header is
-// included.  Otherwise we run the risk of RegisterCoalescing being
-// used, but the default implementation not being linked into the tool
-// that uses it.
-FORCE_DEFINING_FILE_TO_BE_LINKED(RegisterCoalescer)
-FORCE_DEFINING_FILE_TO_BE_LINKED(SimpleRegisterCoalescing)
-
-#endif
diff --git a/include/llvm/CodeGen/RuntimeLibcalls.h b/include/llvm/CodeGen/RuntimeLibcalls.h
index 576be82..4bfd4ab 100644
--- a/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -22,7 +22,7 @@ namespace RTLIB {
   /// RTLIB::Libcall enum - This enum defines all of the runtime library calls
   /// the backend can emit.  The various long double types cannot be merged,
   /// because 80-bit library functions use "xf" and 128-bit use "tf".
-  /// 
+  ///
   /// When adding PPCF128 functions here, note that their names generally need
   /// to be overridden for Darwin with the xxx$LDBL128 form.  See
   /// PPCISelLowering.cpp.
@@ -46,6 +46,9 @@ namespace RTLIB {
     MUL_I32,
     MUL_I64,
     MUL_I128,
+    MULO_I32,
+    MULO_I64,
+    MULO_I128,
     SDIV_I8,
     SDIV_I16,
     SDIV_I32,
@@ -100,6 +103,10 @@ namespace RTLIB {
     REM_F64,
     REM_F80,
     REM_PPCF128,
+    FMA_F32,
+    FMA_F64,
+    FMA_F80,
+    FMA_PPCF128,
     POWI_F32,
     POWI_F64,
     POWI_F80,
diff --git a/include/llvm/CodeGen/ScheduleDAG.h b/include/llvm/CodeGen/ScheduleDAG.h
index 2f01948..1bbc6c5 100644
--- a/include/llvm/CodeGen/ScheduleDAG.h
+++ b/include/llvm/CodeGen/ScheduleDAG.h
@@ -34,7 +34,7 @@ namespace llvm {
   class ScheduleDAG;
   class SDNode;
   class TargetInstrInfo;
-  class TargetInstrDesc;
+  class MCInstrDesc;
   class TargetMachine;
   class TargetRegisterClass;
   template<class Graph> class GraphWriter;
@@ -497,13 +497,19 @@ namespace llvm {
     SUnit EntrySU;                        // Special node for the region entry.
     SUnit ExitSU;                         // Special node for the region exit.
 
+#ifdef NDEBUG
+    static const bool StressSched = false;
+#else
+    bool StressSched;
+#endif
+
     explicit ScheduleDAG(MachineFunction &mf);
 
     virtual ~ScheduleDAG();
 
-    /// getInstrDesc - Return the TargetInstrDesc of this SUnit.
+    /// getInstrDesc - Return the MCInstrDesc of this SUnit.
     /// Return NULL for SDNodes without a machine opcode.
-    const TargetInstrDesc *getInstrDesc(const SUnit *SU) const {
+    const MCInstrDesc *getInstrDesc(const SUnit *SU) const {
       if (SU->isInstr()) return &SU->getInstr()->getDesc();
       return getNodeDesc(SU->getNode());
     }
@@ -573,8 +579,8 @@ namespace llvm {
     void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
 
   private:
-    // Return the TargetInstrDesc of this SDNode or NULL.
-    const TargetInstrDesc *getNodeDesc(const SDNode *Node) const;
+    // Return the MCInstrDesc of this SDNode or NULL.
+    const MCInstrDesc *getNodeDesc(const SDNode *Node) const;
   };
 
   class SUnitIterator : public std::iterator<std::forward_iterator_tag,
diff --git a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
index 118df28..060e89a 100644
--- a/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
+++ b/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
@@ -25,7 +25,6 @@
 namespace llvm {
 
 class InstrItineraryData;
-class TargetInstrDesc;
 class ScheduleDAG;
 class SUnit;
 
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 1c42bef..3ec5ada 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -96,8 +96,12 @@ public:
     return DbgValues.empty() && ByvalParmDbgValues.empty();
   }
 
-  SmallVector<SDDbgValue*,2> &getSDDbgValues(const SDNode *Node) {
-    return DbgValMap[Node];
+  ArrayRef<SDDbgValue*> getSDDbgValues(const SDNode *Node) {
+    DenseMap<const SDNode*, SmallVector<SDDbgValue*, 2> >::iterator I =
+      DbgValMap.find(Node);
+    if (I != DbgValMap.end())
+      return I->second;
+    return ArrayRef<SDDbgValue*>();
   }
 
   typedef SmallVector<SDDbgValue*,32>::iterator DbgIterator;
@@ -898,7 +902,7 @@ public:
   void AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter);
 
   /// GetDbgValues - Get the debug values which reference the given SDNode.
-  SmallVector<SDDbgValue*,2> &GetDbgValues(const SDNode* SD) {
+  ArrayRef<SDDbgValue*> GetDbgValues(const SDNode* SD) {
     return DbgInfo->getSDDbgValues(SD);
   }
 
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 9d265f1..a5c4201 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -23,6 +23,7 @@
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
@@ -496,11 +497,29 @@ public:
   ///
   bool isOperandOf(SDNode *N) const;
 
-  /// isPredecessorOf - Return true if this node is a predecessor of N. This
-  /// node is either an operand of N or it can be reached by recursively
+  /// isPredecessorOf - Return true if this node is a predecessor of N.
+  /// NOTE: Implemented on top of hasPredecessor and every bit as
+  /// expensive. Use carefully.
+  bool isPredecessorOf(const SDNode *N) const { return N->hasPredecessor(this); }
+
+  /// hasPredecessor - Return true if N is a predecessor of this node.
+  /// N is either an operand of this node, or can be reached by recursively
+  /// traversing up the operands.
+  /// NOTE: This is an expensive method. Use it carefully.
+  bool hasPredecessor(const SDNode *N) const;
+
+  /// hasPredecesorHelper - Return true if N is a predecessor of this node.
+  /// N is either an operand of this node, or can be reached by recursively
   /// traversing up the operands.
-  /// NOTE: this is an expensive method. Use it carefully.
-  bool isPredecessorOf(SDNode *N) const;
+  /// In this helper the Visited and worklist sets are held externally to
+  /// cache predecessors over multiple invocations. If you want to test for
+  /// multiple predecessors this method is preferable to multiple calls to
+  /// hasPredecessor. Be sure to clear Visited and Worklist if the DAG
+  /// changes.
+  /// NOTE: This is still very expensive. Use carefully.
+  bool hasPredecessorHelper(const SDNode *N,
+                            SmallPtrSet<const SDNode *, 32> &Visited,
+                            SmallVector<const SDNode *, 16> &Worklist) const; 
 
   /// getNumOperands - Return the number of values used by this operation.
   ///
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index 33ce675..6eb3180 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -140,6 +140,9 @@ namespace llvm {
       return lie.getPointer();
     }
 
+    /// Return true for a valid index.
+    operator bool() const { return isValid(); }
+
     /// Print this index to the given raw_ostream.
     void print(raw_ostream &os) const;
 
diff --git a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index 54e5751..711280e 100644
--- a/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -52,7 +52,7 @@ protected:
   const MCSection *MergeableConst8Section;
   const MCSection *MergeableConst16Section;
 public:
-  TargetLoweringObjectFileELF() {}
+  TargetLoweringObjectFileELF();
   ~TargetLoweringObjectFileELF() {}
 
   virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
@@ -131,7 +131,7 @@ class TargetLoweringObjectFileMachO : public TargetLoweringObjectFile {
   const MCSection *LazySymbolPointerSection;
   const MCSection *NonLazySymbolPointerSection;
 public:
-  TargetLoweringObjectFileMachO() {}
+  TargetLoweringObjectFileMachO();
   ~TargetLoweringObjectFileMachO() {}
 
   virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
@@ -207,7 +207,7 @@ class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile {
   const MCSection *PDataSection;
   const MCSection *XDataSection;
 public:
-  TargetLoweringObjectFileCOFF() {}
+  TargetLoweringObjectFileCOFF();
   ~TargetLoweringObjectFileCOFF() {}
 
   virtual void Initialize(MCContext &Ctx, const TargetMachine &TM);
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index 22d1622..424721b 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -83,7 +83,11 @@ namespace llvm {
 
       isVoid         =  35,   // This has no value
 
-      LAST_VALUETYPE =  36,   // This always remains at the end of the list.
+      untyped        =  36,   // This value takes a register, but has
+                              // unspecified type.  The register class
+                              // will be determined by the opcode.
+
+      LAST_VALUETYPE =  37,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
       // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index a1163f7..0cfb634 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -1,10 +1,10 @@
 //===- ValueTypes.td - ValueType definitions ---------------*- tablegen -*-===//
-// 
+//
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
-// 
+//
 //===----------------------------------------------------------------------===//
 //
 // Value types - These values correspond to the register types defined in the
@@ -58,6 +58,7 @@ def v4f64  : ValueType<256, 32>;   //  4 x f64 vector value
 def x86mmx : ValueType<64 , 33>;   // X86 MMX value
 def FlagVT : ValueType<0  , 34>;   // Pre-RA sched glue
 def isVoid : ValueType<0  , 35>;   // Produces no value
+def untyped: ValueType<8  , 36>;   // Produces an untyped value
 
 def MetadataVT: ValueType<0, 250>; // Metadata
 
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index 755daa6..0b8a0ad 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -557,6 +557,9 @@
 /* LLVM name for the native TargetInfo init function, if available */
 #cmakedefine LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo
 
+/* LLVM name for the native MCAsmInfo init function, if available */
+#cmakedefine LLVM_NATIVE_MCASMINFO LLVMInitialize${LLVM_NATIVE_ARCH}MCAsmInfo
+
 /* Define if this is Unixish platform */
 #cmakedefine LLVM_ON_UNIX ${LLVM_ON_UNIX}
 
@@ -686,9 +689,6 @@
    `char[]'. */
 #undef YYTEXT_POINTER
 
-/* Define to a type to use for `mode_t' if it is not otherwise available. */
-#cmakedefine mode_t ${mode_t}
-
 /* Define to a function replacing strtoll */
 #cmakedefine strtoll ${strtoll}
 
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index 10a8935..0a716ea 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -573,6 +573,9 @@
 /* LLVM name for the native AsmPrinter init function, if available */
 #undef LLVM_NATIVE_ASMPRINTER
 
+/* LLVM name for the native MCAsmInfo init function, if available */
+#undef LLVM_NATIVE_MCASMINFO
+
 /* LLVM name for the native Target init function, if available */
 #undef LLVM_NATIVE_TARGET
 
diff --git a/include/llvm/Config/llvm-config.h.cmake b/include/llvm/Config/llvm-config.h.cmake
index ee81f7a..5f948a2 100644
--- a/include/llvm/Config/llvm-config.h.cmake
+++ b/include/llvm/Config/llvm-config.h.cmake
@@ -58,6 +58,9 @@
 /* LLVM name for the native TargetInfo init function, if available */
 #cmakedefine LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo
 
+/* LLVM name for the native MCAsmInfo init function, if available */
+#cmakedefine LLVM_NATIVE_MCASMINFO LLVMInitialize${LLVM_NATIVE_ARCH}MCAsmInfo
+
 /* LLVM name for the native AsmPrinter init function, if available */
 #cmakedefine LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter
 
diff --git a/include/llvm/Config/llvm-config.h.in b/include/llvm/Config/llvm-config.h.in
index 4766a7a..bc8ddce 100644
--- a/include/llvm/Config/llvm-config.h.in
+++ b/include/llvm/Config/llvm-config.h.in
@@ -58,6 +58,9 @@
 /* LLVM name for the native TargetInfo init function, if available */
 #undef LLVM_NATIVE_TARGETINFO
 
+/* LLVM name for the native MCAsmInfo init function, if available */
+#undef LLVM_NATIVE_MCASMINFO
+
 /* LLVM name for the native AsmPrinter init function, if available */
 #undef LLVM_NATIVE_ASMPRINTER
 
diff --git a/include/llvm/Constant.h b/include/llvm/Constant.h
index 5f32ce0..5e351c4 100644
--- a/include/llvm/Constant.h
+++ b/include/llvm/Constant.h
@@ -50,11 +50,11 @@ protected:
 public:
   /// isNullValue - Return true if this is the value that would be returned by
   /// getNullValue.
-  virtual bool isNullValue() const = 0;
+  bool isNullValue() const;
 
   /// isNegativeZeroValue - Return true if the value is what would be returned 
   /// by getZeroValueForNegation.
-  virtual bool isNegativeZeroValue() const { return isNullValue(); }
+  bool isNegativeZeroValue() const;
 
   /// canTrap - Return true if evaluation of this constant could trap.  This is
   /// true for things like constant expressions that could divide by zero.
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index eabc3a5..01fca29 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -149,13 +149,7 @@ public:
   static bool isValueValidForType(const Type *Ty, uint64_t V);
   static bool isValueValidForType(const Type *Ty, int64_t V);
 
-  /// This function will return true iff this constant represents the "null"
-  /// value that would be returned by the getNullValue method.
-  /// @returns true if this is the null integer value.
-  /// @brief Determine if the value is null.
-  virtual bool isNullValue() const { 
-    return Val == 0; 
-  }
+  bool isNegative() const { return Val.isNegative(); }
 
   /// This is just a convenience method to make client code smaller for a
   /// common code. It also correctly performs the comparison without the
@@ -263,22 +257,14 @@ public:
   
   /// isValueValidForType - return true if Ty is big enough to represent V.
   static bool isValueValidForType(const Type *Ty, const APFloat &V);
-  inline const APFloat& getValueAPF() const { return Val; }
-
-  /// isNullValue - Return true if this is the value that would be returned by
-  /// getNullValue.  For ConstantFP, this is +0.0, but not -0.0.  To handle the
-  /// two the same, use isZero().
-  virtual bool isNullValue() const;
-  
-  /// isNegativeZeroValue - Return true if the value is what would be returned 
-  /// by getZeroValueForNegation.
-  virtual bool isNegativeZeroValue() const {
-    return Val.isZero() && Val.isNegative();
-  }
+  inline const APFloat &getValueAPF() const { return Val; }
 
   /// isZero - Return true if the value is positive or negative zero.
   bool isZero() const { return Val.isZero(); }
 
+  /// isNegative - Return true if the sign bit is set.
+  bool isNegative() const { return Val.isNegative(); }
+
   /// isNaN - Return true if the value is a NaN.
   bool isNaN() const { return Val.isNaN(); }
 
@@ -324,10 +310,6 @@ protected:
 public:
   static ConstantAggregateZero* get(const Type *Ty);
   
-  /// isNullValue - Return true if this is the value that would be returned by
-  /// getNullValue.
-  virtual bool isNullValue() const { return true; }
-
   virtual void destroyConstant();
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -350,9 +332,7 @@ protected:
   ConstantArray(const ArrayType *T, const std::vector<Constant*> &Val);
 public:
   // ConstantArray accessors
-  static Constant *get(const ArrayType *T, const std::vector<Constant*> &V);
-  static Constant *get(const ArrayType *T, Constant *const *Vals, 
-                       unsigned NumVals);
+  static Constant *get(const ArrayType *T, ArrayRef<Constant*> V);
                              
   /// This method constructs a ConstantArray and initializes it with a text
   /// string. The default behavior (AddNull==true) causes a null terminator to
@@ -389,10 +369,11 @@ public:
   ///
   std::string getAsString() const;
 
-  /// isNullValue - Return true if this is the value that would be returned by
-  /// getNullValue.  This always returns false because zero arrays are always
-  /// created as ConstantAggregateZero objects.
-  virtual bool isNullValue() const { return false; }
+  /// getAsCString - If this array is isCString(), then this method converts the
+  /// array (without the trailing null byte) to an std::string and returns it.
+  /// Otherwise, it asserts out.
+  ///
+  std::string getAsCString() const;
 
   virtual void destroyConstant();
   virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
@@ -422,14 +403,29 @@ protected:
   ConstantStruct(const StructType *T, const std::vector<Constant*> &Val);
 public:
   // ConstantStruct accessors
-  static Constant *get(const StructType *T, const std::vector<Constant*> &V);
-  static Constant *get(LLVMContext &Context, 
-                       const std::vector<Constant*> &V, bool Packed);
-  static Constant *get(LLVMContext &Context,
-                       Constant *const *Vals, unsigned NumVals, bool Packed);
-  static Constant *get(LLVMContext &Context, bool Packed,
-                       Constant * Val, ...) END_WITH_NULL;
-
+  static Constant *get(const StructType *T, ArrayRef<Constant*> V);
+  static Constant *get(const StructType *T, ...) END_WITH_NULL;
+
+  /// getAnon - Return an anonymous struct that has the specified
+  /// elements.  If the struct is possibly empty, then you must specify a
+  /// context.
+  static Constant *getAnon(ArrayRef<Constant*> V, bool Packed = false) {
+    return get(getTypeForElements(V, Packed), V);
+  }
+  static Constant *getAnon(LLVMContext &Ctx, 
+                           ArrayRef<Constant*> V, bool Packed = false) {
+    return get(getTypeForElements(Ctx, V, Packed), V);
+  }
+
+  /// getTypeForElements - Return an anonymous struct type to use for a constant
+  /// with the specified set of elements.  The list must not be empty.
+  static StructType *getTypeForElements(ArrayRef<Constant*> V,
+                                        bool Packed = false);
+  /// getTypeForElements - This version of the method allows an empty list.
+  static StructType *getTypeForElements(LLVMContext &Ctx,
+                                        ArrayRef<Constant*> V,
+                                        bool Packed = false);
+  
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
 
@@ -439,13 +435,6 @@ public:
     return reinterpret_cast<const StructType*>(Value::getType());
   }
 
-  /// isNullValue - Return true if this is the value that would be returned by
-  /// getNullValue.  This always returns false because zero structs are always
-  /// created as ConstantAggregateZero objects.
-  virtual bool isNullValue() const {
-    return false;
-  }
-
   virtual void destroyConstant();
   virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
 
@@ -476,8 +465,6 @@ protected:
 public:
   // ConstantVector accessors
   static Constant *get(ArrayRef<Constant*> V);
-  // FIXME: Eliminate this constructor form.
-  static Constant *get(const VectorType *T, const std::vector<Constant*> &V);
   
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Constant);
@@ -489,11 +476,6 @@ public:
     return reinterpret_cast<const VectorType*>(Value::getType());
   }
   
-  /// isNullValue - Return true if this is the value that would be returned by
-  /// getNullValue.  This always returns false because zero vectors are always
-  /// created as ConstantAggregateZero objects.
-  virtual bool isNullValue() const { return false; }
-
   /// This function will return true iff every element in this vector constant
   /// is set to all ones.
   /// @returns true iff this constant's emements are all set to all ones.
@@ -542,10 +524,6 @@ public:
   /// get() - Static factory methods - Return objects of the specified value
   static ConstantPointerNull *get(const PointerType *T);
 
-  /// isNullValue - Return true if this is the value that would be returned by
-  /// getNullValue.
-  virtual bool isNullValue() const { return true; }
-
   virtual void destroyConstant();
 
   /// getType - Specialize the getType() method to always return an PointerType,
@@ -582,10 +560,6 @@ public:
   Function *getFunction() const { return (Function*)Op<0>().get(); }
   BasicBlock *getBasicBlock() const { return (BasicBlock*)Op<1>().get(); }
   
-  /// isNullValue - Return true if this is the value that would be returned by
-  /// getNullValue.
-  virtual bool isNullValue() const { return false; }
-  
   virtual void destroyConstant();
   virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
   
@@ -623,35 +597,6 @@ protected:
     setValueSubclassData(Opcode);
   }
 
-  // These private methods are used by the type resolution code to create
-  // ConstantExprs in intermediate forms.
-  static Constant *getTy(const Type *Ty, unsigned Opcode,
-                         Constant *C1, Constant *C2,
-                         unsigned Flags = 0);
-  static Constant *getCompareTy(unsigned short pred, Constant *C1,
-                                Constant *C2);
-  static Constant *getSelectTy(const Type *Ty,
-                               Constant *C1, Constant *C2, Constant *C3);
-  template<typename IndexTy>
-  static Constant *getGetElementPtrTy(const Type *Ty, Constant *C,
-                                      IndexTy const *Idxs, unsigned NumIdxs,
-                                      bool InBounds);
-  static Constant *getExtractElementTy(const Type *Ty, Constant *Val,
-                                       Constant *Idx);
-  static Constant *getInsertElementTy(const Type *Ty, Constant *Val,
-                                      Constant *Elt, Constant *Idx);
-  static Constant *getShuffleVectorTy(const Type *Ty, Constant *V1,
-                                      Constant *V2, Constant *Mask);
-  static Constant *getExtractValueTy(const Type *Ty, Constant *Agg,
-                                     const unsigned *Idxs, unsigned NumIdxs);
-  static Constant *getInsertValueTy(const Type *Ty, Constant *Agg,
-                                    Constant *Val,
-                                    const unsigned *Idxs, unsigned NumIdxs);
-  template<typename IndexTy>
-  static Constant *getGetElementPtrImpl(Constant *C,
-                                        IndexTy const *IdxList,
-                                        unsigned NumIdx, bool InBounds);
-
 public:
   // Static methods to construct a ConstantExpr of different kinds.  Note that
   // these methods may return a object that is not an instance of the
@@ -822,9 +767,7 @@ public:
 
   /// Select constant expr
   ///
-  static Constant *getSelect(Constant *C, Constant *V1, Constant *V2) {
-    return getSelectTy(V1->getType(), C, V1, V2);
-  }
+  static Constant *getSelect(Constant *C, Constant *V1, Constant *V2);
 
   /// get - Return a binary or shift operator constant expression,
   /// folding if possible.
@@ -846,7 +789,9 @@ public:
   ///
   static Constant *getGetElementPtr(Constant *C,
                                     Constant *const *IdxList, unsigned NumIdx,
-                                    bool InBounds = false);
+                                    bool InBounds = false) {
+    return getGetElementPtr(C, (Value**)IdxList, NumIdx, InBounds);
+  }
   static Constant *getGetElementPtr(Constant *C,
                                     Value *const *IdxList, unsigned NumIdx,
                                     bool InBounds = false);
@@ -867,14 +812,9 @@ public:
   static Constant *getExtractElement(Constant *Vec, Constant *Idx);
   static Constant *getInsertElement(Constant *Vec, Constant *Elt,Constant *Idx);
   static Constant *getShuffleVector(Constant *V1, Constant *V2, Constant *Mask);
-  static Constant *getExtractValue(Constant *Agg,
-                                   const unsigned *IdxList, unsigned NumIdx);
+  static Constant *getExtractValue(Constant *Agg, ArrayRef<unsigned> Idxs);
   static Constant *getInsertValue(Constant *Agg, Constant *Val,
-                                  const unsigned *IdxList, unsigned NumIdx);
-
-  /// isNullValue - Return true if this is the value that would be returned by
-  /// getNullValue.
-  virtual bool isNullValue() const { return false; }
+                                  ArrayRef<unsigned> Idxs);
 
   /// getOpcode - Return the opcode at the root of this constant expression
   unsigned getOpcode() const { return getSubclassDataFromValue(); }
@@ -895,10 +835,18 @@ public:
   Constant *getWithOperandReplaced(unsigned OpNo, Constant *Op) const;
   
   /// getWithOperands - This returns the current constant expression with the
-  /// operands replaced with the specified values.  The specified operands must
-  /// match count and type with the existing ones.
-  Constant *getWithOperands(ArrayRef<Constant*> Ops) const;
-  
+  /// operands replaced with the specified values.  The specified array must
+  /// have the same number of operands as our current one.
+  Constant *getWithOperands(ArrayRef<Constant*> Ops) const {
+    return getWithOperands(Ops, getType());
+  }
+
+  /// getWithOperands - This returns the current constant expression with the
+  /// operands replaced with the specified values and with the specified result
+  /// type.  The specified array must have the same number of operands as our
+  /// current one.
+  Constant *getWithOperands(ArrayRef<Constant*> Ops, const Type *Ty) const;
+
   virtual void destroyConstant();
   virtual void replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U);
 
@@ -950,10 +898,6 @@ public:
   ///
   static UndefValue *get(const Type *T);
 
-  /// isNullValue - Return true if this is the value that would be returned by
-  /// getNullValue.
-  virtual bool isNullValue() const { return false; }
-
   virtual void destroyConstant();
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
diff --git a/include/llvm/DefaultPasses.h b/include/llvm/DefaultPasses.h
index e2e58a5b..2e4145b 100644
--- a/include/llvm/DefaultPasses.h
+++ b/include/llvm/DefaultPasses.h
@@ -29,7 +29,6 @@ extern unsigned char ConstantMergeID;
 extern unsigned char CorrelatedValuePropagationID;
 extern unsigned char DeadArgEliminationID;
 extern unsigned char DeadStoreEliminationID;
-extern unsigned char DeadTypeEliminationID;
 extern unsigned char EarlyCSEID;
 extern unsigned char FunctionAttrsID;
 extern unsigned char FunctionInliningID;
diff --git a/include/llvm/DerivedTypes.h b/include/llvm/DerivedTypes.h
index f1cb330..acb28de 100644
--- a/include/llvm/DerivedTypes.h
+++ b/include/llvm/DerivedTypes.h
@@ -19,76 +19,27 @@
 #define LLVM_DERIVED_TYPES_H
 
 #include "llvm/Type.h"
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
 class Value;
-template<class ValType, class TypeClass> class TypeMap;
-class FunctionValType;
-class ArrayValType;
-class StructValType;
-class PointerValType;
-class VectorValType;
-class IntegerValType;
 class APInt;
 class LLVMContext;
-
-class DerivedType : public Type {
-  friend class Type;
-
-protected:
-  explicit DerivedType(LLVMContext &C, TypeID id) : Type(C, id) {}
-
-  /// notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type
-  /// that the current type has transitioned from being abstract to being
-  /// concrete.
-  ///
-  void notifyUsesThatTypeBecameConcrete();
-
-  /// dropAllTypeUses - When this (abstract) type is resolved to be equal to
-  /// another (more concrete) type, we must eliminate all references to other
-  /// types, to avoid some circular reference problems.
-  ///
-  void dropAllTypeUses();
-
-public:
-
-  //===--------------------------------------------------------------------===//
-  // Abstract Type handling methods - These types have special lifetimes, which
-  // are managed by (add|remove)AbstractTypeUser. See comments in
-  // AbstractTypeUser.h for more information.
-
-  /// refineAbstractTypeTo - This function is used to when it is discovered that
-  /// the 'this' abstract type is actually equivalent to the NewType specified.
-  /// This causes all users of 'this' to switch to reference the more concrete
-  /// type NewType and for 'this' to be deleted.
-  ///
-  void refineAbstractTypeTo(const Type *NewType);
-
-  void dump() const { Type::dump(); }
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const DerivedType *) { return true; }
-  static inline bool classof(const Type *T) {
-    return T->isDerivedType();
-  }
-};
+template<typename T> class ArrayRef;
+class StringRef;
 
 /// Class to represent integer types. Note that this class is also used to
 /// represent the built-in integer types: Int1Ty, Int8Ty, Int16Ty, Int32Ty and
 /// Int64Ty.
 /// @brief Integer representation type
-class IntegerType : public DerivedType {
+class IntegerType : public Type {
   friend class LLVMContextImpl;
   
 protected:
-  explicit IntegerType(LLVMContext &C, unsigned NumBits) : 
-      DerivedType(C, IntegerTyID) {
+  explicit IntegerType(LLVMContext &C, unsigned NumBits) : Type(C, IntegerTyID){
     setSubclassData(NumBits);
   }
-  friend class TypeMap<IntegerValType, IntegerType>;
 public:
   /// This enum is just used to hold constants we need for IntegerType.
   enum {
@@ -103,7 +54,7 @@ public:
   /// that instance will be returned. Otherwise a new one will be created. Only
   /// one instance with a given NumBits value is ever created.
   /// @brief Get or create an IntegerType instance.
-  static const IntegerType* get(LLVMContext &C, unsigned NumBits);
+  static IntegerType *get(LLVMContext &C, unsigned NumBits);
 
   /// @brief Get the number of bits in this IntegerType
   unsigned getBitWidth() const { return getSubclassData(); }
@@ -132,7 +83,7 @@ public:
   /// @brief Is this a power-of-2 byte-width IntegerType ?
   bool isPowerOf2ByteWidth() const;
 
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  // Methods for support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const IntegerType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == IntegerTyID;
@@ -142,34 +93,22 @@ public:
 
 /// FunctionType - Class to represent function types
 ///
-class FunctionType : public DerivedType {
-  friend class TypeMap<FunctionValType, FunctionType>;
-  bool isVarArgs;
-
+class FunctionType : public Type {
   FunctionType(const FunctionType &);                   // Do not implement
   const FunctionType &operator=(const FunctionType &);  // Do not implement
-  FunctionType(const Type *Result, ArrayRef<const Type*> Params,
-               bool IsVarArgs);
+  FunctionType(const Type *Result, ArrayRef<Type*> Params, bool IsVarArgs);
 
 public:
   /// FunctionType::get - This static method is the primary way of constructing
   /// a FunctionType.
   ///
-  static FunctionType *get(
-    const Type *Result, ///< The result type
-    ArrayRef<const Type*> Params, ///< The types of the parameters
-    bool isVarArg  ///< Whether this is a variable argument length function
-  );
+  static FunctionType *get(const Type *Result,
+                           ArrayRef<Type*> Params, bool isVarArg);
 
   /// FunctionType::get - Create a FunctionType taking no parameters.
   ///
-  static FunctionType *get(
-    const Type *Result, ///< The result type
-    bool isVarArg  ///< Whether this is a variable argument length function
-  ) {
-    return get(Result, ArrayRef<const Type *>(), isVarArg);
-  }
-
+  static FunctionType *get(const Type *Result, bool isVarArg);
+  
   /// isValidReturnType - Return true if the specified type is valid as a return
   /// type.
   static bool isValidReturnType(const Type *RetTy);
@@ -178,26 +117,22 @@ public:
   /// argument type.
   static bool isValidArgumentType(const Type *ArgTy);
 
-  inline bool isVarArg() const { return isVarArgs; }
-  inline const Type *getReturnType() const { return ContainedTys[0]; }
+  bool isVarArg() const { return getSubclassData(); }
+  Type *getReturnType() const { return ContainedTys[0]; }
 
   typedef Type::subtype_iterator param_iterator;
   param_iterator param_begin() const { return ContainedTys + 1; }
   param_iterator param_end() const { return &ContainedTys[NumContainedTys]; }
 
-  // Parameter type accessors...
-  const Type *getParamType(unsigned i) const { return ContainedTys[i+1]; }
+  // Parameter type accessors.
+  Type *getParamType(unsigned i) const { return ContainedTys[i+1]; }
 
   /// getNumParams - Return the number of fixed parameters this function type
   /// requires.  This does not consider varargs.
   ///
   unsigned getNumParams() const { return NumContainedTys - 1; }
 
-  // Implement the AbstractTypeUser interface.
-  virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
-  virtual void typeBecameConcrete(const DerivedType *AbsTy);
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  // Methods for support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const FunctionType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == FunctionTyID;
@@ -206,22 +141,21 @@ public:
 
 
 /// CompositeType - Common super class of ArrayType, StructType, PointerType
-/// and VectorType
-class CompositeType : public DerivedType {
+/// and VectorType.
+class CompositeType : public Type {
 protected:
-  inline explicit CompositeType(LLVMContext &C, TypeID id) :
-    DerivedType(C, id) { }
+  explicit CompositeType(LLVMContext &C, TypeID tid) : Type(C, tid) { }
 public:
 
   /// getTypeAtIndex - Given an index value into the type, return the type of
   /// the element.
   ///
-  virtual const Type *getTypeAtIndex(const Value *V) const = 0;
-  virtual const Type *getTypeAtIndex(unsigned Idx) const = 0;
-  virtual bool indexValid(const Value *V) const = 0;
-  virtual bool indexValid(unsigned Idx) const = 0;
+  Type *getTypeAtIndex(const Value *V) const;
+  Type *getTypeAtIndex(unsigned Idx) const;
+  bool indexValid(const Value *V) const;
+  bool indexValid(unsigned Idx) const;
 
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  // Methods for support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const CompositeType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == ArrayTyID ||
@@ -232,69 +166,114 @@ public:
 };
 
 
-/// StructType - Class to represent struct types
+/// StructType - Class to represent struct types, both normal and packed.
+/// Besides being optionally packed, structs can be either "anonymous" or may
+/// have an identity.  Anonymous structs are uniqued by structural equivalence,
+/// but types are each unique when created, and optionally have a name.
 ///
 class StructType : public CompositeType {
-  friend class TypeMap<StructValType, StructType>;
   StructType(const StructType &);                   // Do not implement
   const StructType &operator=(const StructType &);  // Do not implement
-  StructType(LLVMContext &C, ArrayRef<const Type*> Types, bool isPacked);
+  StructType(LLVMContext &C)
+    : CompositeType(C, StructTyID), SymbolTableEntry(0) {}
+  enum {
+    // This is the contents of the SubClassData field.
+    SCDB_HasBody = 1,
+    SCDB_Packed = 2,
+    SCDB_IsAnonymous = 4
+  };
+  
+  /// SymbolTableEntry - For a named struct that actually has a name, this is a
+  /// pointer to the symbol table entry (maintained by LLVMContext) for the
+  /// struct.  This is null if the type is an anonymous struct or if it is
+  /// a named type that has an empty name.
+  /// 
+  void *SymbolTableEntry;
 public:
+  ~StructType() {
+    delete [] ContainedTys; // Delete the body.
+  }
+
+  /// StructType::createNamed - This creates a named struct with no body
+  /// specified.  If the name is empty, it creates an unnamed struct, which has
+  /// a unique identity but no actual name.
+  static StructType *createNamed(LLVMContext &Context, StringRef Name);
+  
+  static StructType *createNamed(StringRef Name, ArrayRef<Type*> Elements,
+                                 bool isPacked = false);
+  static StructType *createNamed(LLVMContext &Context, StringRef Name,
+                                 ArrayRef<Type*> Elements,
+                                 bool isPacked = false);
+  static StructType *createNamed(StringRef Name, Type *elt1, ...) END_WITH_NULL;
+
   /// StructType::get - This static method is the primary way to create a
   /// StructType.
-  ///
-  static StructType *get(LLVMContext &Context, 
-                         ArrayRef<const Type*> Params,
-                         bool isPacked=false);
+  static StructType *get(LLVMContext &Context, ArrayRef<Type*> Elements,
+                         bool isPacked = false);
 
   /// StructType::get - Create an empty structure type.
   ///
-  static StructType *get(LLVMContext &Context, bool isPacked=false) {
-    return get(Context, llvm::ArrayRef<const Type*>(), isPacked);
-  }
+  static StructType *get(LLVMContext &Context, bool isPacked = false);
+  
+  /// StructType::get - This static method is a convenience method for creating
+  /// structure types by specifying the elements as arguments.  Note that this
+  /// method always returns a non-packed struct, and requires at least one
+  /// element type.
+  static StructType *get(Type *elt1, ...) END_WITH_NULL;
 
-  /// StructType::get - This static method is a convenience method for
-  /// creating structure types by specifying the elements as arguments.
-  /// Note that this method always returns a non-packed struct.  To get
-  /// an empty struct, pass NULL, NULL.
-  static StructType *get(LLVMContext &Context, 
-                         const Type *type, ...) END_WITH_NULL;
+  bool isPacked() const { return (getSubclassData() & SCDB_Packed) != 0; }
+  
+  /// isAnonymous - Return true if this type is uniqued by structural
+  /// equivalence, false if it has an identity.
+  bool isAnonymous() const {return (getSubclassData() & SCDB_IsAnonymous) != 0;}
+  
+  /// isOpaque - Return true if this is a type with an identity that has no body
+  /// specified yet.  These prints as 'opaque' in .ll files.
+  bool isOpaque() const { return (getSubclassData() & SCDB_HasBody) == 0; }
+  
+  /// hasName - Return true if this is a named struct that has a non-empty name.
+  bool hasName() const { return SymbolTableEntry != 0; }
+  
+  /// getName - Return the name for this struct type if it has an identity.
+  /// This may return an empty string for an unnamed struct type.  Do not call
+  /// this on an anonymous type.
+  StringRef getName() const;
+  
+  /// setName - Change the name of this type to the specified name, or to a name
+  /// with a suffix if there is a collision.  Do not call this on an anonymous
+  /// type.
+  void setName(StringRef Name);
 
+  /// setBody - Specify a body for an opaque type.
+  void setBody(ArrayRef<Type*> Elements, bool isPacked = false);
+  void setBody(Type *elt1, ...) END_WITH_NULL;
+  
   /// isValidElementType - Return true if the specified type is valid as a
   /// element type.
   static bool isValidElementType(const Type *ElemTy);
+  
 
-  // Iterator access to the elements
+  // Iterator access to the elements.
   typedef Type::subtype_iterator element_iterator;
   element_iterator element_begin() const { return ContainedTys; }
   element_iterator element_end() const { return &ContainedTys[NumContainedTys];}
 
+  /// isLayoutIdentical - Return true if this is layout identical to the
+  /// specified struct.
+  bool isLayoutIdentical(const StructType *Other) const;  
+  
   // Random access to the elements
   unsigned getNumElements() const { return NumContainedTys; }
-  const Type *getElementType(unsigned N) const {
+  Type *getElementType(unsigned N) const {
     assert(N < NumContainedTys && "Element number out of range!");
     return ContainedTys[N];
   }
 
-  /// getTypeAtIndex - Given an index value into the type, return the type of
-  /// the element.  For a structure type, this must be a constant value...
-  ///
-  virtual const Type *getTypeAtIndex(const Value *V) const;
-  virtual const Type *getTypeAtIndex(unsigned Idx) const;
-  virtual bool indexValid(const Value *V) const;
-  virtual bool indexValid(unsigned Idx) const;
-
-  // Implement the AbstractTypeUser interface.
-  virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
-  virtual void typeBecameConcrete(const DerivedType *AbsTy);
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  // Methods for support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const StructType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == StructTyID;
   }
-
-  bool isPacked() const { return (0 != getSubclassData()) ? true : false; }
 };
 
 /// SequentialType - This is the superclass of the array, pointer and vector
@@ -306,38 +285,21 @@ public:
 /// components out in memory identically.
 ///
 class SequentialType : public CompositeType {
-  PATypeHandle ContainedType; ///< Storage for the single contained type
+  Type *ContainedType;               ///< Storage for the single contained type.
   SequentialType(const SequentialType &);                  // Do not implement!
   const SequentialType &operator=(const SequentialType &); // Do not implement!
 
-  // avoiding warning: 'this' : used in base member initializer list
-  SequentialType* this_() { return this; }
 protected:
-  SequentialType(TypeID TID, const Type *ElType)
-    : CompositeType(ElType->getContext(), TID), ContainedType(ElType, this_()) {
+  SequentialType(TypeID TID, Type *ElType)
+    : CompositeType(ElType->getContext(), TID), ContainedType(ElType) {
     ContainedTys = &ContainedType;
     NumContainedTys = 1;
   }
 
 public:
-  inline const Type *getElementType() const { return ContainedTys[0]; }
-
-  virtual bool indexValid(const Value *V) const;
-  virtual bool indexValid(unsigned) const {
-    return true;
-  }
-
-  /// getTypeAtIndex - Given an index value into the type, return the type of
-  /// the element.  For sequential types, there is only one subtype...
-  ///
-  virtual const Type *getTypeAtIndex(const Value *) const {
-    return ContainedTys[0];
-  }
-  virtual const Type *getTypeAtIndex(unsigned) const {
-    return ContainedTys[0];
-  }
+  Type *getElementType() const { return ContainedTys[0]; }
 
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  // Methods for support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const SequentialType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == ArrayTyID ||
@@ -347,15 +309,14 @@ public:
 };
 
 
-/// ArrayType - Class to represent array types
+/// ArrayType - Class to represent array types.
 ///
 class ArrayType : public SequentialType {
-  friend class TypeMap<ArrayValType, ArrayType>;
   uint64_t NumElements;
 
   ArrayType(const ArrayType &);                   // Do not implement
   const ArrayType &operator=(const ArrayType &);  // Do not implement
-  ArrayType(const Type *ElType, uint64_t NumEl);
+  ArrayType(Type *ElType, uint64_t NumEl);
 public:
   /// ArrayType::get - This static method is the primary way to construct an
   /// ArrayType
@@ -366,31 +327,26 @@ public:
   /// element type.
   static bool isValidElementType(const Type *ElemTy);
 
-  inline uint64_t getNumElements() const { return NumElements; }
+  uint64_t getNumElements() const { return NumElements; }
 
-  // Implement the AbstractTypeUser interface.
-  virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
-  virtual void typeBecameConcrete(const DerivedType *AbsTy);
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  // Methods for support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const ArrayType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == ArrayTyID;
   }
 };
 
-/// VectorType - Class to represent vector types
+/// VectorType - Class to represent vector types.
 ///
 class VectorType : public SequentialType {
-  friend class TypeMap<VectorValType, VectorType>;
   unsigned NumElements;
 
   VectorType(const VectorType &);                   // Do not implement
   const VectorType &operator=(const VectorType &);  // Do not implement
-  VectorType(const Type *ElType, unsigned NumEl);
+  VectorType(Type *ElType, unsigned NumEl);
 public:
   /// VectorType::get - This static method is the primary way to construct an
-  /// VectorType
+  /// VectorType.
   ///
   static VectorType *get(const Type *ElementType, unsigned NumElements);
 
@@ -400,7 +356,7 @@ public:
   ///
   static VectorType *getInteger(const VectorType *VTy) {
     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
-    const Type *EltTy = IntegerType::get(VTy->getContext(), EltBits);
+    Type *EltTy = IntegerType::get(VTy->getContext(), EltBits);
     return VectorType::get(EltTy, VTy->getNumElements());
   }
 
@@ -410,7 +366,7 @@ public:
   ///
   static VectorType *getExtendedElementVectorType(const VectorType *VTy) {
     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
-    const Type *EltTy = IntegerType::get(VTy->getContext(), EltBits * 2);
+    Type *EltTy = IntegerType::get(VTy->getContext(), EltBits * 2);
     return VectorType::get(EltTy, VTy->getNumElements());
   }
 
@@ -422,7 +378,7 @@ public:
     unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
     assert((EltBits & 1) == 0 &&
            "Cannot truncate vector element with odd bit-width");
-    const Type *EltTy = IntegerType::get(VTy->getContext(), EltBits / 2);
+    Type *EltTy = IntegerType::get(VTy->getContext(), EltBits / 2);
     return VectorType::get(EltTy, VTy->getNumElements());
   }
 
@@ -431,18 +387,14 @@ public:
   static bool isValidElementType(const Type *ElemTy);
 
   /// @brief Return the number of elements in the Vector type.
-  inline unsigned getNumElements() const { return NumElements; }
+  unsigned getNumElements() const { return NumElements; }
 
   /// @brief Return the number of bits in the Vector type.
-  inline unsigned getBitWidth() const {
+  unsigned getBitWidth() const {
     return NumElements * getElementType()->getPrimitiveSizeInBits();
   }
 
-  // Implement the AbstractTypeUser interface.
-  virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
-  virtual void typeBecameConcrete(const DerivedType *AbsTy);
-
-  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  // Methods for support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const VectorType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == VectorTyID;
@@ -450,15 +402,12 @@ public:
 };
 
 
-/// PointerType - Class to represent pointers
+/// PointerType - Class to represent pointers.
 ///
 class PointerType : public SequentialType {
-  friend class TypeMap<PointerValType, PointerType>;
-  unsigned AddressSpace;
-
   PointerType(const PointerType &);                   // Do not implement
   const PointerType &operator=(const PointerType &);  // Do not implement
-  explicit PointerType(const Type *ElType, unsigned AddrSpace);
+  explicit PointerType(Type *ElType, unsigned AddrSpace);
 public:
   /// PointerType::get - This constructs a pointer to an object of the specified
   /// type in a numbered address space.
@@ -475,39 +424,15 @@ public:
   static bool isValidElementType(const Type *ElemTy);
 
   /// @brief Return the address space of the Pointer type.
-  inline unsigned getAddressSpace() const { return AddressSpace; }
-
-  // Implement the AbstractTypeUser interface.
-  virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
-  virtual void typeBecameConcrete(const DerivedType *AbsTy);
+  inline unsigned getAddressSpace() const { return getSubclassData(); }
 
-  // Implement support type inquiry through isa, cast, and dyn_cast:
+  // Implement support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const PointerType *) { return true; }
   static inline bool classof(const Type *T) {
     return T->getTypeID() == PointerTyID;
   }
 };
 
-
-/// OpaqueType - Class to represent abstract types
-///
-class OpaqueType : public DerivedType {
-  friend class LLVMContextImpl;
-  OpaqueType(const OpaqueType &);                   // DO NOT IMPLEMENT
-  const OpaqueType &operator=(const OpaqueType &);  // DO NOT IMPLEMENT
-  OpaqueType(LLVMContext &C);
-public:
-  /// OpaqueType::get - Static factory method for the OpaqueType class...
-  ///
-  static OpaqueType *get(LLVMContext &C);
-
-  // Implement support for type inquiry through isa, cast, and dyn_cast:
-  static inline bool classof(const OpaqueType *) { return true; }
-  static inline bool classof(const Type *T) {
-    return T->getTypeID() == OpaqueTyID;
-  }
-};
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index 3dc65e3..724b9f0 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -53,6 +53,7 @@ class RuntimeDyld {
   // RuntimeDyldImpl is the actual class. RuntimeDyld is just the public
   // interface.
   RuntimeDyldImpl *Dyld;
+  RTDyldMemoryManager *MM;
 public:
   RuntimeDyld(RTDyldMemoryManager*);
   ~RuntimeDyld();
diff --git a/include/llvm/Function.h b/include/llvm/Function.h
index 1edc176..0aa5b2a 100644
--- a/include/llvm/Function.h
+++ b/include/llvm/Function.h
@@ -128,8 +128,8 @@ public:
 
   ~Function();
 
-  const Type *getReturnType() const;           // Return the type of the ret val
-  const FunctionType *getFunctionType() const; // Return the FunctionType for me
+  Type *getReturnType() const;           // Return the type of the ret val
+  FunctionType *getFunctionType() const; // Return the FunctionType for me
 
   /// getContext - Return a pointer to the LLVMContext associated with this 
   /// function, or NULL if this function is not bound to a context yet.
@@ -139,12 +139,6 @@ public:
   /// arguments.
   bool isVarArg() const;
 
-  /// isDeclaration - Is the body of this function unknown? (The basic block 
-  /// list is empty if so.) This is true for function declarations, but not 
-  /// true for function definitions.
-  ///
-  virtual bool isDeclaration() const { return BasicBlocks.empty(); }
-
   /// getIntrinsicID - This method returns the ID number of the specified
   /// function, or Intrinsic::not_intrinsic if the function is not an
   /// instrinsic, or if the pointer is null.  This value is always defined to be
diff --git a/include/llvm/GlobalAlias.h b/include/llvm/GlobalAlias.h
index f4af5b1..c3d3c38 100644
--- a/include/llvm/GlobalAlias.h
+++ b/include/llvm/GlobalAlias.h
@@ -47,11 +47,6 @@ public:
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
-  /// isDeclaration - Is this global variable lacking an initializer?  If so, 
-  /// the global variable is defined in some other translation unit, and is thus
-  /// only a declaration here.
-  virtual bool isDeclaration() const;
-
   /// removeFromParent - This method unlinks 'this' from the containing module,
   /// but does not delete it.
   ///
@@ -63,23 +58,23 @@ public:
   virtual void eraseFromParent();
 
   /// set/getAliasee - These methods retrive and set alias target.
-  void setAliasee(Constant* GV);
-  const Constant* getAliasee() const {
+  void setAliasee(Constant *GV);
+  const Constant *getAliasee() const {
     return cast_or_null<Constant>(getOperand(0));
   }
-  Constant* getAliasee() {
+  Constant *getAliasee() {
     return cast_or_null<Constant>(getOperand(0));
   }
   /// getAliasedGlobal() - Aliasee can be either global or bitcast of
   /// global. This method retrives the global for both aliasee flavours.
-  const GlobalValue* getAliasedGlobal() const;
+  const GlobalValue *getAliasedGlobal() const;
 
   /// resolveAliasedGlobal() - This method tries to ultimately resolve the alias
   /// by going through the aliasing chain and trying to find the very last
   /// global. Returns NULL if a cycle was found. If stopOnWeak is false, then
   /// the whole chain aliasing chain is traversed, otherwise - only strong
   /// aliases.
-  const GlobalValue* resolveAliasedGlobal(bool stopOnWeak = true) const;
+  const GlobalValue *resolveAliasedGlobal(bool stopOnWeak = true) const;
 
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const GlobalAlias *) { return true; }
diff --git a/include/llvm/GlobalValue.h b/include/llvm/GlobalValue.h
index b184b8e..d0f0888 100644
--- a/include/llvm/GlobalValue.h
+++ b/include/llvm/GlobalValue.h
@@ -106,8 +106,8 @@ public:
   bool use_empty_except_constants();
 
   /// getType - Global values are always pointers.
-  inline const PointerType *getType() const {
-    return reinterpret_cast<const PointerType*>(User::getType());
+  inline PointerType *getType() const {
+    return reinterpret_cast<PointerType*>(User::getType());
   }
 
   static LinkageTypes getLinkOnceLinkage(bool ODR) {
@@ -258,16 +258,12 @@ public:
 
 /// @}
 
-  /// Override from Constant class. No GlobalValue's are null values so this
-  /// always returns false.
-  virtual bool isNullValue() const { return false; }
-
   /// Override from Constant class.
   virtual void destroyConstant();
 
   /// isDeclaration - Return true if the primary definition of this global 
-  /// value is outside of the current translation unit...
-  virtual bool isDeclaration() const = 0;
+  /// value is outside of the current translation unit.
+  bool isDeclaration() const;
 
   /// removeFromParent - This method unlinks 'this' from the containing module,
   /// but does not delete it.
diff --git a/include/llvm/GlobalVariable.h b/include/llvm/GlobalVariable.h
index 442e0c0..bbc09c1 100644
--- a/include/llvm/GlobalVariable.h
+++ b/include/llvm/GlobalVariable.h
@@ -68,11 +68,6 @@ public:
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
-  /// isDeclaration - Is this global variable lacking an initializer?  If so,
-  /// the global variable is defined in some other translation unit, and is thus
-  /// only a declaration here.
-  virtual bool isDeclaration() const { return getNumOperands() == 0; }
-
   /// hasInitializer - Unless a global variable isExternal(), it has an
   /// initializer.  The initializer for the global variable/constant is held by
   /// Initializer if an initializer is specified.
@@ -119,7 +114,7 @@ public:
   /// illegal to call this method if the global is external, because we cannot
   /// tell what the value is initialized to!
   ///
-  inline /*const FIXME*/ Constant *getInitializer() const {
+  inline const Constant *getInitializer() const {
     assert(hasInitializer() && "GV doesn't have initializer!");
     return static_cast<Constant*>(Op<0>().get());
   }
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 372eaba..4caf8f1 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -65,6 +65,7 @@ void initializeArgPromotionPass(PassRegistry&);
 void initializeBasicAliasAnalysisPass(PassRegistry&);
 void initializeBasicCallGraphPass(PassRegistry&);
 void initializeBlockExtractorPassPass(PassRegistry&);
+void initializeBlockFrequencyPass(PassRegistry&);
 void initializeBlockPlacementPass(PassRegistry&);
 void initializeBranchProbabilityInfoPass(PassRegistry&);
 void initializeBreakCriticalEdgesPass(PassRegistry&);
@@ -83,7 +84,6 @@ void initializeDAEPass(PassRegistry&);
 void initializeDAHPass(PassRegistry&);
 void initializeDCEPass(PassRegistry&);
 void initializeDSEPass(PassRegistry&);
-void initializeDTEPass(PassRegistry&);
 void initializeDeadInstEliminationPass(PassRegistry&);
 void initializeDeadMachineInstructionElimPass(PassRegistry&);
 void initializeDomOnlyPrinterPass(PassRegistry&);
@@ -140,10 +140,13 @@ void initializeLoopUnrollPass(PassRegistry&);
 void initializeLoopUnswitchPass(PassRegistry&);
 void initializeLoopIdiomRecognizePass(PassRegistry&);
 void initializeLowerAtomicPass(PassRegistry&);
+void initializeLowerExpectIntrinsicPass(PassRegistry&);
 void initializeLowerIntrinsicsPass(PassRegistry&);
 void initializeLowerInvokePass(PassRegistry&);
 void initializeLowerSetJmpPass(PassRegistry&);
 void initializeLowerSwitchPass(PassRegistry&);
+void initializeMachineBlockFrequencyPass(PassRegistry&);
+void initializeMachineBranchProbabilityInfoPass(PassRegistry&);
 void initializeMachineCSEPass(PassRegistry&);
 void initializeMachineDominatorTreePass(PassRegistry&);
 void initializeMachineLICMPass(PassRegistry&);
@@ -160,6 +163,10 @@ void initializeModuleDebugInfoPrinterPass(PassRegistry&);
 void initializeNoAAPass(PassRegistry&);
 void initializeNoProfileInfoPass(PassRegistry&);
 void initializeNoPathProfileInfoPass(PassRegistry&);
+void initializeObjCARCAliasAnalysisPass(PassRegistry&);
+void initializeObjCARCExpandPass(PassRegistry&);
+void initializeObjCARCContractPass(PassRegistry&);
+void initializeObjCARCOptPass(PassRegistry&);
 void initializeOptimalEdgeProfilerPass(PassRegistry&);
 void initializeOptimizePHIsPass(PassRegistry&);
 void initializePEIPass(PassRegistry&);
@@ -171,7 +178,6 @@ void initializePostDomOnlyViewerPass(PassRegistry&);
 void initializePostDomPrinterPass(PassRegistry&);
 void initializePostDomViewerPass(PassRegistry&);
 void initializePostDominatorTreePass(PassRegistry&);
-void initializePreAllocSplittingPass(PassRegistry&);
 void initializePreVerifierPass(PassRegistry&);
 void initializePrintDbgInfoPass(PassRegistry&);
 void initializePrintFunctionPassPass(PassRegistry&);
@@ -192,7 +198,6 @@ void initializeRegionOnlyPrinterPass(PassRegistry&);
 void initializeRegionOnlyViewerPass(PassRegistry&);
 void initializeRegionPrinterPass(PassRegistry&);
 void initializeRegionViewerPass(PassRegistry&);
-void initializeRegisterCoalescerAnalysisGroup(PassRegistry&);
 void initializeRenderMachineFunctionPass(PassRegistry&);
 void initializeSCCPPass(PassRegistry&);
 void initializeSROA_DTPass(PassRegistry&);
@@ -200,7 +205,7 @@ void initializeSROA_SSAUpPass(PassRegistry&);
 void initializeScalarEvolutionAliasAnalysisPass(PassRegistry&);
 void initializeScalarEvolutionPass(PassRegistry&);
 void initializeSimpleInlinerPass(PassRegistry&);
-void initializeSimpleRegisterCoalescingPass(PassRegistry&);
+void initializeRegisterCoalescerPass(PassRegistry&);
 void initializeSimplifyLibCallsPass(PassRegistry&);
 void initializeSingleLoopExtractorPass(PassRegistry&);
 void initializeSinkingPass(PassRegistry&);
diff --git a/include/llvm/InlineAsm.h b/include/llvm/InlineAsm.h
index ed8f0f7..a98aff1 100644
--- a/include/llvm/InlineAsm.h
+++ b/include/llvm/InlineAsm.h
@@ -25,15 +25,16 @@ class PointerType;
 class FunctionType;
 class Module;
 struct InlineAsmKeyType;
-template<class ValType, class TypeClass, class ConstantClass, bool HasLargeKey>
+template<class ValType, class ValRefType, class TypeClass, class ConstantClass,
+         bool HasLargeKey>
 class ConstantUniqueMap;
 template<class ConstantClass, class TypeClass, class ValType>
 struct ConstantCreator;
 
 class InlineAsm : public Value {
   friend struct ConstantCreator<InlineAsm, PointerType, InlineAsmKeyType>;
-  friend class ConstantUniqueMap<InlineAsmKeyType, PointerType, InlineAsm,
-                                 false>;
+  friend class ConstantUniqueMap<InlineAsmKeyType, const InlineAsmKeyType&,
+                                 PointerType, InlineAsm, false>;
 
   InlineAsm(const InlineAsm &);             // do not implement
   void operator=(const InlineAsm&);         // do not implement
@@ -63,13 +64,13 @@ public:
   
   /// getType - InlineAsm's are always pointers.
   ///
-  const PointerType *getType() const {
-    return reinterpret_cast<const PointerType*>(Value::getType());
+  PointerType *getType() const {
+    return reinterpret_cast<PointerType*>(Value::getType());
   }
   
   /// getFunctionType - InlineAsm's are always pointers to functions.
   ///
-  const FunctionType *getFunctionType() const;
+  FunctionType *getFunctionType() const;
   
   const std::string &getAsmString() const { return AsmString; }
   const std::string &getConstraintString() const { return Constraints; }
@@ -187,25 +188,32 @@ public:
   // in the backend.
   
   enum {
+    // Fixed operands on an INLINEASM SDNode.
     Op_InputChain = 0,
     Op_AsmString = 1,
     Op_MDNode = 2,
     Op_ExtraInfo = 3,    // HasSideEffects, IsAlignStack
     Op_FirstOperand = 4,
 
+    // Fixed operands on an INLINEASM MachineInstr.
     MIOp_AsmString = 0,
     MIOp_ExtraInfo = 1,    // HasSideEffects, IsAlignStack
     MIOp_FirstOperand = 2,
 
+    // Interpretation of the MIOp_ExtraInfo bit field.
     Extra_HasSideEffects = 1,
     Extra_IsAlignStack = 2,
-    
-    Kind_RegUse = 1,
-    Kind_RegDef = 2,
-    Kind_Imm = 3,
-    Kind_Mem = 4,
-    Kind_RegDefEarlyClobber = 6,
-    
+
+    // Inline asm operands map to multiple SDNode / MachineInstr operands.
+    // The first operand is an immediate describing the asm operand, the low
+    // bits is the kind:
+    Kind_RegUse = 1,             // Input register, "r".
+    Kind_RegDef = 2,             // Output register, "=r".
+    Kind_RegDefEarlyClobber = 3, // Early-clobber output register, "=&r".
+    Kind_Clobber = 4,            // Clobbered register, "~r".
+    Kind_Imm = 5,                // Immediate.
+    Kind_Mem = 6,                // Memory operand, "m".
+
     Flag_MatchingOperand = 0x80000000
   };
   
@@ -232,7 +240,10 @@ public:
   static bool isRegDefEarlyClobberKind(unsigned Flag) {
     return getKind(Flag) == Kind_RegDefEarlyClobber;
   }
-  
+  static bool isClobberKind(unsigned Flag) {
+    return getKind(Flag) == Kind_Clobber;
+  }
+
   /// getNumOperandRegisters - Extract the number of registers field from the
   /// inline asm operand flag.
   static unsigned getNumOperandRegisters(unsigned Flag) {
diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h
index 54dfe39..0bc9a3b 100644
--- a/include/llvm/Instructions.h
+++ b/include/llvm/Instructions.h
@@ -20,6 +20,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/Attributes.h"
 #include "llvm/CallingConv.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include <iterator>
 
@@ -76,7 +77,7 @@ public:
   /// getAllocatedType - Return the type that is being allocated by the
   /// instruction.
   ///
-  const Type *getAllocatedType() const;
+  Type *getAllocatedType() const;
 
   /// getAlignment - Return the alignment of the memory that is being allocated
   /// by the instruction.
@@ -271,10 +272,10 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value)
 //                             GetElementPtrInst Class
 //===----------------------------------------------------------------------===//
 
-// checkType - Simple wrapper function to give a better assertion failure
+// checkGEPType - Simple wrapper function to give a better assertion failure
 // message on bad indexes for a gep instruction.
 //
-static inline const Type *checkType(const Type *Ty) {
+static inline const Type *checkGEPType(const Type *Ty) {
   assert(Ty && "Invalid GetElementPtrInst indices for type!");
   return Ty;
 }
@@ -315,13 +316,13 @@ class GetElementPtrInst : public Instruction {
   /// pointer type.
   ///
   template<typename RandomAccessIterator>
-  static const Type *getIndexedType(const Type *Ptr,
-                                    RandomAccessIterator IdxBegin,
-                                    RandomAccessIterator IdxEnd,
-                                    // This argument ensures that we
-                                    // have an iterator we can do
-                                    // arithmetic on in constant time
-                                    std::random_access_iterator_tag) {
+  static Type *getIndexedType(const Type *Ptr,
+                              RandomAccessIterator IdxBegin,
+                              RandomAccessIterator IdxEnd,
+                              // This argument ensures that we
+                              // have an iterator we can do
+                              // arithmetic on in constant time
+                              std::random_access_iterator_tag) {
     unsigned NumIdx = static_cast<unsigned>(std::distance(IdxBegin, IdxEnd));
 
     if (NumIdx > 0)
@@ -446,24 +447,22 @@ public:
   /// pointer type.
   ///
   template<typename RandomAccessIterator>
-  static const Type *getIndexedType(const Type *Ptr,
-                                    RandomAccessIterator IdxBegin,
-                                    RandomAccessIterator IdxEnd) {
+  static Type *getIndexedType(const Type *Ptr, RandomAccessIterator IdxBegin,
+                              RandomAccessIterator IdxEnd) {
     return getIndexedType(Ptr, IdxBegin, IdxEnd,
                           typename std::iterator_traits<RandomAccessIterator>::
                           iterator_category());
   }
 
-  static const Type *getIndexedType(const Type *Ptr,
-                                    Value* const *Idx, unsigned NumIdx);
+  // FIXME: Use ArrayRef
+  static Type *getIndexedType(const Type *Ptr,
+                              Value* const *Idx, unsigned NumIdx);
+  static Type *getIndexedType(const Type *Ptr,
+                              Constant* const *Idx, unsigned NumIdx);
 
-  static const Type *getIndexedType(const Type *Ptr,
-                                    Constant* const *Idx, unsigned NumIdx);
-
-  static const Type *getIndexedType(const Type *Ptr,
-                                    uint64_t const *Idx, unsigned NumIdx);
-
-  static const Type *getIndexedType(const Type *Ptr, Value *Idx);
+  static Type *getIndexedType(const Type *Ptr,
+                              uint64_t const *Idx, unsigned NumIdx);
+  static Type *getIndexedType(const Type *Ptr, Value *Idx);
 
   inline op_iterator       idx_begin()       { return op_begin()+1; }
   inline const_op_iterator idx_begin() const { return op_begin()+1; }
@@ -538,7 +537,7 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr,
                                      unsigned Values,
                                      const Twine &NameStr,
                                      Instruction *InsertBefore)
-  : Instruction(PointerType::get(checkType(
+  : Instruction(PointerType::get(checkGEPType(
                                    getIndexedType(Ptr->getType(),
                                                   IdxBegin, IdxEnd)),
                                  cast<PointerType>(Ptr->getType())
@@ -557,7 +556,7 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr,
                                      unsigned Values,
                                      const Twine &NameStr,
                                      BasicBlock *InsertAtEnd)
-  : Instruction(PointerType::get(checkType(
+  : Instruction(PointerType::get(checkGEPType(
                                    getIndexedType(Ptr->getType(),
                                                   IdxBegin, IdxEnd)),
                                  cast<PointerType>(Ptr->getType())
@@ -843,46 +842,17 @@ public:
 class CallInst : public Instruction {
   AttrListPtr AttributeList; ///< parameter attributes for call
   CallInst(const CallInst &CI);
-  void init(Value *Func, Value* const *Params, unsigned NumParams);
-  void init(Value *Func, Value *Actual1, Value *Actual2);
-  void init(Value *Func, Value *Actual);
-  void init(Value *Func);
-
-  template<typename RandomAccessIterator>
-  void init(Value *Func,
-            RandomAccessIterator ArgBegin,
-            RandomAccessIterator ArgEnd,
-            const Twine &NameStr,
-            // This argument ensures that we have an iterator we can
-            // do arithmetic on in constant time
-            std::random_access_iterator_tag) {
-    unsigned NumArgs = (unsigned)std::distance(ArgBegin, ArgEnd);
-
-    // This requires that the iterator points to contiguous memory.
-    init(Func, NumArgs ? &*ArgBegin : 0, NumArgs);
-    setName(NameStr);
-  }
+  void init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr);
+  void init(Value *Func, const Twine &NameStr);
 
-  /// Construct a CallInst given a range of arguments. RandomAccessIterator
-  /// must be a random-access iterator pointing to contiguous storage
-  /// (e.g. a std::vector<>::iterator). Checks are made for
-  /// random-accessness but not for contiguous storage as that would
-  /// incur runtime overhead.
+  /// Construct a CallInst given a range of arguments.
   /// @brief Construct a CallInst from a range of arguments
-  template<typename RandomAccessIterator>
-  CallInst(Value *Func,
-           RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
-           const Twine &NameStr, Instruction *InsertBefore);
-
-  /// Construct a CallInst given a range of arguments.  RandomAccessIterator
-  /// must be a random-access iterator pointing to contiguous storage
-  /// (e.g. a std::vector<>::iterator).  Checks are made for
-  /// random-accessness but not for contiguous storage as that would
-  /// incur runtime overhead.
+  inline CallInst(Value *Func, ArrayRef<Value *> Args,
+                  const Twine &NameStr, Instruction *InsertBefore);
+
+  /// Construct a CallInst given a range of arguments.
   /// @brief Construct a CallInst from a range of arguments
-  template<typename RandomAccessIterator>
-  inline CallInst(Value *Func,
-                  RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
+  inline CallInst(Value *Func, ArrayRef<Value *> Args,
                   const Twine &NameStr, BasicBlock *InsertAtEnd);
 
   CallInst(Value *F, Value *Actual, const Twine &NameStr,
@@ -895,31 +865,18 @@ class CallInst : public Instruction {
 protected:
   virtual CallInst *clone_impl() const;
 public:
-  template<typename RandomAccessIterator>
   static CallInst *Create(Value *Func,
-                          RandomAccessIterator ArgBegin,
-                          RandomAccessIterator ArgEnd,
+                          ArrayRef<Value *> Args,
                           const Twine &NameStr = "",
                           Instruction *InsertBefore = 0) {
-    return new(unsigned(ArgEnd - ArgBegin + 1))
-      CallInst(Func, ArgBegin, ArgEnd, NameStr, InsertBefore);
+    return new(unsigned(Args.size() + 1))
+      CallInst(Func, Args, NameStr, InsertBefore);
   }
-  template<typename RandomAccessIterator>
   static CallInst *Create(Value *Func,
-                          RandomAccessIterator ArgBegin,
-                          RandomAccessIterator ArgEnd,
+                          ArrayRef<Value *> Args,
                           const Twine &NameStr, BasicBlock *InsertAtEnd) {
-    return new(unsigned(ArgEnd - ArgBegin + 1))
-      CallInst(Func, ArgBegin, ArgEnd, NameStr, InsertAtEnd);
-  }
-  static CallInst *Create(Value *F, Value *Actual,
-                          const Twine &NameStr = "",
-                          Instruction *InsertBefore = 0) {
-    return new(2) CallInst(F, Actual, NameStr, InsertBefore);
-  }
-  static CallInst *Create(Value *F, Value *Actual, const Twine &NameStr,
-                          BasicBlock *InsertAtEnd) {
-    return new(2) CallInst(F, Actual, NameStr, InsertAtEnd);
+    return new(unsigned(Args.size() + 1))
+      CallInst(Func, Args, NameStr, InsertAtEnd);
   }
   static CallInst *Create(Value *F, const Twine &NameStr = "",
                           Instruction *InsertBefore = 0) {
@@ -1094,32 +1051,24 @@ template <>
 struct OperandTraits<CallInst> : public VariadicOperandTraits<CallInst, 1> {
 };
 
-template<typename RandomAccessIterator>
-CallInst::CallInst(Value *Func,
-                   RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
+CallInst::CallInst(Value *Func, ArrayRef<Value *> Args,
                    const Twine &NameStr, BasicBlock *InsertAtEnd)
   : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
                                    ->getElementType())->getReturnType(),
                 Instruction::Call,
-                OperandTraits<CallInst>::op_end(this) - (ArgEnd - ArgBegin + 1),
-                unsigned(ArgEnd - ArgBegin + 1), InsertAtEnd) {
-  init(Func, ArgBegin, ArgEnd, NameStr,
-       typename std::iterator_traits<RandomAccessIterator>
-       ::iterator_category());
+                OperandTraits<CallInst>::op_end(this) - (Args.size() + 1),
+                unsigned(Args.size() + 1), InsertAtEnd) {
+  init(Func, Args, NameStr);
 }
 
-template<typename RandomAccessIterator>
-CallInst::CallInst(Value *Func,
-                   RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
+CallInst::CallInst(Value *Func, ArrayRef<Value *> Args,
                    const Twine &NameStr, Instruction *InsertBefore)
   : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
                                    ->getElementType())->getReturnType(),
                 Instruction::Call,
-                OperandTraits<CallInst>::op_end(this) - (ArgEnd - ArgBegin + 1),
-                unsigned(ArgEnd - ArgBegin + 1), InsertBefore) {
-  init(Func, ArgBegin, ArgEnd, NameStr,
-       typename std::iterator_traits<RandomAccessIterator>
-       ::iterator_category());
+                OperandTraits<CallInst>::op_end(this) - (Args.size() + 1),
+                unsigned(Args.size() + 1), InsertBefore) {
+  init(Func, Args, NameStr);
 }
 
 
@@ -1430,69 +1379,18 @@ class ExtractValueInst : public UnaryInstruction {
   SmallVector<unsigned, 4> Indices;
 
   ExtractValueInst(const ExtractValueInst &EVI);
-  void init(const unsigned *Idx, unsigned NumIdx,
-            const Twine &NameStr);
-  void init(unsigned Idx, const Twine &NameStr);
-
-  template<typename RandomAccessIterator>
-  void init(RandomAccessIterator IdxBegin,
-            RandomAccessIterator IdxEnd,
-            const Twine &NameStr,
-            // This argument ensures that we have an iterator we can
-            // do arithmetic on in constant time
-            std::random_access_iterator_tag) {
-    unsigned NumIdx = static_cast<unsigned>(std::distance(IdxBegin, IdxEnd));
-
-    // There's no fundamental reason why we require at least one index
-    // (other than weirdness with &*IdxBegin being invalid; see
-    // getelementptr's init routine for example). But there's no
-    // present need to support it.
-    assert(NumIdx > 0 && "ExtractValueInst must have at least one index");
-
-    // This requires that the iterator points to contiguous memory.
-    init(&*IdxBegin, NumIdx, NameStr); // FIXME: for the general case
-                                         // we have to build an array here
-  }
-
-  /// getIndexedType - Returns the type of the element that would be extracted
-  /// with an extractvalue instruction with the specified parameters.
-  ///
-  /// Null is returned if the indices are invalid for the specified type.
-  ///
-  static const Type *getIndexedType(const Type *Agg,
-                                    const unsigned *Idx, unsigned NumIdx);
-
-  template<typename RandomAccessIterator>
-  static const Type *getIndexedType(const Type *Ptr,
-                                    RandomAccessIterator IdxBegin,
-                                    RandomAccessIterator IdxEnd,
-                                    // This argument ensures that we
-                                    // have an iterator we can do
-                                    // arithmetic on in constant time
-                                    std::random_access_iterator_tag) {
-    unsigned NumIdx = static_cast<unsigned>(std::distance(IdxBegin, IdxEnd));
-
-    if (NumIdx > 0)
-      // This requires that the iterator points to contiguous memory.
-      return getIndexedType(Ptr, &*IdxBegin, NumIdx);
-    else
-      return getIndexedType(Ptr, (const unsigned *)0, NumIdx);
-  }
+  void init(ArrayRef<unsigned> Idxs, const Twine &NameStr);
 
   /// Constructors - Create a extractvalue instruction with a base aggregate
   /// value and a list of indices.  The first ctor can optionally insert before
   /// an existing instruction, the second appends the new instruction to the
   /// specified BasicBlock.
-  template<typename RandomAccessIterator>
   inline ExtractValueInst(Value *Agg,
-                          RandomAccessIterator IdxBegin,
-                          RandomAccessIterator IdxEnd,
+                          ArrayRef<unsigned> Idxs,
                           const Twine &NameStr,
                           Instruction *InsertBefore);
-  template<typename RandomAccessIterator>
   inline ExtractValueInst(Value *Agg,
-                          RandomAccessIterator IdxBegin,
-                          RandomAccessIterator IdxEnd,
+                          ArrayRef<unsigned> Idxs,
                           const Twine &NameStr, BasicBlock *InsertAtEnd);
 
   // allocate space for exactly one operand
@@ -1503,54 +1401,25 @@ protected:
   virtual ExtractValueInst *clone_impl() const;
 
 public:
-  template<typename RandomAccessIterator>
   static ExtractValueInst *Create(Value *Agg,
-                                  RandomAccessIterator IdxBegin,
-                                  RandomAccessIterator IdxEnd,
+                                  ArrayRef<unsigned> Idxs,
                                   const Twine &NameStr = "",
                                   Instruction *InsertBefore = 0) {
     return new
-      ExtractValueInst(Agg, IdxBegin, IdxEnd, NameStr, InsertBefore);
+      ExtractValueInst(Agg, Idxs, NameStr, InsertBefore);
   }
-  template<typename RandomAccessIterator>
   static ExtractValueInst *Create(Value *Agg,
-                                  RandomAccessIterator IdxBegin,
-                                  RandomAccessIterator IdxEnd,
+                                  ArrayRef<unsigned> Idxs,
                                   const Twine &NameStr,
                                   BasicBlock *InsertAtEnd) {
-    return new ExtractValueInst(Agg, IdxBegin, IdxEnd, NameStr, InsertAtEnd);
-  }
-
-  /// Constructors - These two creators are convenience methods because one
-  /// index extractvalue instructions are much more common than those with
-  /// more than one.
-  static ExtractValueInst *Create(Value *Agg, unsigned Idx,
-                                  const Twine &NameStr = "",
-                                  Instruction *InsertBefore = 0) {
-    unsigned Idxs[1] = { Idx };
-    return new ExtractValueInst(Agg, Idxs, Idxs + 1, NameStr, InsertBefore);
-  }
-  static ExtractValueInst *Create(Value *Agg, unsigned Idx,
-                                  const Twine &NameStr,
-                                  BasicBlock *InsertAtEnd) {
-    unsigned Idxs[1] = { Idx };
-    return new ExtractValueInst(Agg, Idxs, Idxs + 1, NameStr, InsertAtEnd);
+    return new ExtractValueInst(Agg, Idxs, NameStr, InsertAtEnd);
   }
 
   /// getIndexedType - Returns the type of the element that would be extracted
   /// with an extractvalue instruction with the specified parameters.
   ///
   /// Null is returned if the indices are invalid for the specified type.
-  ///
-  template<typename RandomAccessIterator>
-  static const Type *getIndexedType(const Type *Ptr,
-                                    RandomAccessIterator IdxBegin,
-                                    RandomAccessIterator IdxEnd) {
-    return getIndexedType(Ptr, IdxBegin, IdxEnd,
-                          typename std::iterator_traits<RandomAccessIterator>::
-                          iterator_category());
-  }
-  static const Type *getIndexedType(const Type *Ptr, unsigned Idx);
+  static Type *getIndexedType(const Type *Agg, ArrayRef<unsigned> Idxs);
 
   typedef const unsigned* idx_iterator;
   inline idx_iterator idx_begin() const { return Indices.begin(); }
@@ -1566,7 +1435,11 @@ public:
     return 0U;                      // get index for modifying correct operand
   }
 
-  unsigned getNumIndices() const {  // Note: always non-negative
+  ArrayRef<unsigned> getIndices() const {
+    return Indices;
+  }
+
+  unsigned getNumIndices() const {
     return (unsigned)Indices.size();
   }
 
@@ -1584,31 +1457,21 @@ public:
   }
 };
 
-template<typename RandomAccessIterator>
 ExtractValueInst::ExtractValueInst(Value *Agg,
-                                   RandomAccessIterator IdxBegin,
-                                   RandomAccessIterator IdxEnd,
+                                   ArrayRef<unsigned> Idxs,
                                    const Twine &NameStr,
                                    Instruction *InsertBefore)
-  : UnaryInstruction(checkType(getIndexedType(Agg->getType(),
-                                              IdxBegin, IdxEnd)),
+  : UnaryInstruction(checkGEPType(getIndexedType(Agg->getType(), Idxs)),
                      ExtractValue, Agg, InsertBefore) {
-  init(IdxBegin, IdxEnd, NameStr,
-       typename std::iterator_traits<RandomAccessIterator>
-       ::iterator_category());
+  init(Idxs, NameStr);
 }
-template<typename RandomAccessIterator>
 ExtractValueInst::ExtractValueInst(Value *Agg,
-                                   RandomAccessIterator IdxBegin,
-                                   RandomAccessIterator IdxEnd,
+                                   ArrayRef<unsigned> Idxs,
                                    const Twine &NameStr,
                                    BasicBlock *InsertAtEnd)
-  : UnaryInstruction(checkType(getIndexedType(Agg->getType(),
-                                              IdxBegin, IdxEnd)),
+  : UnaryInstruction(checkGEPType(getIndexedType(Agg->getType(), Idxs)),
                      ExtractValue, Agg, InsertAtEnd) {
-  init(IdxBegin, IdxEnd, NameStr,
-       typename std::iterator_traits<RandomAccessIterator>
-       ::iterator_category());
+  init(Idxs, NameStr);
 }
 
 
@@ -1624,44 +1487,19 @@ class InsertValueInst : public Instruction {
 
   void *operator new(size_t, unsigned); // Do not implement
   InsertValueInst(const InsertValueInst &IVI);
-  void init(Value *Agg, Value *Val, const unsigned *Idx, unsigned NumIdx,
+  void init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs,
             const Twine &NameStr);
-  void init(Value *Agg, Value *Val, unsigned Idx, const Twine &NameStr);
-
-  template<typename RandomAccessIterator>
-  void init(Value *Agg, Value *Val,
-            RandomAccessIterator IdxBegin, RandomAccessIterator IdxEnd,
-            const Twine &NameStr,
-            // This argument ensures that we have an iterator we can
-            // do arithmetic on in constant time
-            std::random_access_iterator_tag) {
-    unsigned NumIdx = static_cast<unsigned>(std::distance(IdxBegin, IdxEnd));
-
-    // There's no fundamental reason why we require at least one index
-    // (other than weirdness with &*IdxBegin being invalid; see
-    // getelementptr's init routine for example). But there's no
-    // present need to support it.
-    assert(NumIdx > 0 && "InsertValueInst must have at least one index");
-
-    // This requires that the iterator points to contiguous memory.
-    init(Agg, Val, &*IdxBegin, NumIdx, NameStr); // FIXME: for the general case
-                                              // we have to build an array here
-  }
 
   /// Constructors - Create a insertvalue instruction with a base aggregate
   /// value, a value to insert, and a list of indices.  The first ctor can
   /// optionally insert before an existing instruction, the second appends
   /// the new instruction to the specified BasicBlock.
-  template<typename RandomAccessIterator>
   inline InsertValueInst(Value *Agg, Value *Val,
-                         RandomAccessIterator IdxBegin,
-                         RandomAccessIterator IdxEnd,
+                         ArrayRef<unsigned> Idxs,
                          const Twine &NameStr,
                          Instruction *InsertBefore);
-  template<typename RandomAccessIterator>
   inline InsertValueInst(Value *Agg, Value *Val,
-                         RandomAccessIterator IdxBegin,
-                         RandomAccessIterator IdxEnd,
+                         ArrayRef<unsigned> Idxs,
                          const Twine &NameStr, BasicBlock *InsertAtEnd);
 
   /// Constructors - These two constructors are convenience methods because one
@@ -1679,37 +1517,17 @@ public:
     return User::operator new(s, 2);
   }
 
-  template<typename RandomAccessIterator>
   static InsertValueInst *Create(Value *Agg, Value *Val,
-                                 RandomAccessIterator IdxBegin,
-                                 RandomAccessIterator IdxEnd,
+                                 ArrayRef<unsigned> Idxs,
                                  const Twine &NameStr = "",
                                  Instruction *InsertBefore = 0) {
-    return new InsertValueInst(Agg, Val, IdxBegin, IdxEnd,
-                               NameStr, InsertBefore);
+    return new InsertValueInst(Agg, Val, Idxs, NameStr, InsertBefore);
   }
-  template<typename RandomAccessIterator>
   static InsertValueInst *Create(Value *Agg, Value *Val,
-                                 RandomAccessIterator IdxBegin,
-                                 RandomAccessIterator IdxEnd,
+                                 ArrayRef<unsigned> Idxs,
                                  const Twine &NameStr,
                                  BasicBlock *InsertAtEnd) {
-    return new InsertValueInst(Agg, Val, IdxBegin, IdxEnd,
-                               NameStr, InsertAtEnd);
-  }
-
-  /// Constructors - These two creators are convenience methods because one
-  /// index insertvalue instructions are much more common than those with
-  /// more than one.
-  static InsertValueInst *Create(Value *Agg, Value *Val, unsigned Idx,
-                                 const Twine &NameStr = "",
-                                 Instruction *InsertBefore = 0) {
-    return new InsertValueInst(Agg, Val, Idx, NameStr, InsertBefore);
-  }
-  static InsertValueInst *Create(Value *Agg, Value *Val, unsigned Idx,
-                                 const Twine &NameStr,
-                                 BasicBlock *InsertAtEnd) {
-    return new InsertValueInst(Agg, Val, Idx, NameStr, InsertAtEnd);
+    return new InsertValueInst(Agg, Val, Idxs, NameStr, InsertAtEnd);
   }
 
   /// Transparently provide more efficient getOperand methods.
@@ -1739,7 +1557,11 @@ public:
     return 1U;                      // get index for modifying correct operand
   }
 
-  unsigned getNumIndices() const {  // Note: always non-negative
+  ArrayRef<unsigned> getIndices() const {
+    return Indices;
+  }
+
+  unsigned getNumIndices() const {
     return (unsigned)Indices.size();
   }
 
@@ -1762,33 +1584,25 @@ struct OperandTraits<InsertValueInst> :
   public FixedNumOperandTraits<InsertValueInst, 2> {
 };
 
-template<typename RandomAccessIterator>
 InsertValueInst::InsertValueInst(Value *Agg,
                                  Value *Val,
-                                 RandomAccessIterator IdxBegin,
-                                 RandomAccessIterator IdxEnd,
+                                 ArrayRef<unsigned> Idxs,
                                  const Twine &NameStr,
                                  Instruction *InsertBefore)
   : Instruction(Agg->getType(), InsertValue,
                 OperandTraits<InsertValueInst>::op_begin(this),
                 2, InsertBefore) {
-  init(Agg, Val, IdxBegin, IdxEnd, NameStr,
-       typename std::iterator_traits<RandomAccessIterator>
-       ::iterator_category());
+  init(Agg, Val, Idxs, NameStr);
 }
-template<typename RandomAccessIterator>
 InsertValueInst::InsertValueInst(Value *Agg,
                                  Value *Val,
-                                 RandomAccessIterator IdxBegin,
-                                 RandomAccessIterator IdxEnd,
+                                 ArrayRef<unsigned> Idxs,
                                  const Twine &NameStr,
                                  BasicBlock *InsertAtEnd)
   : Instruction(Agg->getType(), InsertValue,
                 OperandTraits<InsertValueInst>::op_begin(this),
                 2, InsertAtEnd) {
-  init(Agg, Val, IdxBegin, IdxEnd, NameStr,
-       typename std::iterator_traits<RandomAccessIterator>
-       ::iterator_category());
+  init(Agg, Val, Idxs, NameStr);
 }
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueInst, Value)
@@ -1814,7 +1628,7 @@ class PHINode : public Instruction {
   explicit PHINode(const Type *Ty, unsigned NumReservedValues,
                    const Twine &NameStr = "", Instruction *InsertBefore = 0)
     : Instruction(Ty, Instruction::PHI, 0, 0, InsertBefore),
-      ReservedSpace(NumReservedValues * 2) {
+      ReservedSpace(NumReservedValues) {
     setName(NameStr);
     OperandList = allocHungoffUses(ReservedSpace);
   }
@@ -1822,11 +1636,16 @@ class PHINode : public Instruction {
   PHINode(const Type *Ty, unsigned NumReservedValues, const Twine &NameStr,
           BasicBlock *InsertAtEnd)
     : Instruction(Ty, Instruction::PHI, 0, 0, InsertAtEnd),
-      ReservedSpace(NumReservedValues * 2) {
+      ReservedSpace(NumReservedValues) {
     setName(NameStr);
     OperandList = allocHungoffUses(ReservedSpace);
   }
 protected:
+  // allocHungoffUses - this is more complicated than the generic
+  // User::allocHungoffUses, because we have to allocate Uses for the incoming
+  // values and pointers to the incoming blocks, all in one allocation.
+  Use *allocHungoffUses(unsigned) const;
+
   virtual PHINode *clone_impl() const;
 public:
   /// Constructors - NumReservedValues is a hint for the number of incoming
@@ -1845,32 +1664,55 @@ public:
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
+  // Block iterator interface. This provides access to the list of incoming
+  // basic blocks, which parallels the list of incoming values.
+
+  typedef BasicBlock **block_iterator;
+  typedef BasicBlock * const *const_block_iterator;
+
+  block_iterator block_begin() {
+    Use::UserRef *ref =
+      reinterpret_cast<Use::UserRef*>(op_begin() + ReservedSpace);
+    return reinterpret_cast<block_iterator>(ref + 1);
+  }
+
+  const_block_iterator block_begin() const {
+    const Use::UserRef *ref =
+      reinterpret_cast<const Use::UserRef*>(op_begin() + ReservedSpace);
+    return reinterpret_cast<const_block_iterator>(ref + 1);
+  }
+
+  block_iterator block_end() {
+    return block_begin() + getNumOperands();
+  }
+
+  const_block_iterator block_end() const {
+    return block_begin() + getNumOperands();
+  }
+
   /// getNumIncomingValues - Return the number of incoming edges
   ///
-  unsigned getNumIncomingValues() const { return getNumOperands()/2; }
+  unsigned getNumIncomingValues() const { return getNumOperands(); }
 
   /// getIncomingValue - Return incoming value number x
   ///
   Value *getIncomingValue(unsigned i) const {
-    assert(i*2 < getNumOperands() && "Invalid value number!");
-    return getOperand(i*2);
+    return getOperand(i);
   }
   void setIncomingValue(unsigned i, Value *V) {
-    assert(i*2 < getNumOperands() && "Invalid value number!");
-    setOperand(i*2, V);
+    setOperand(i, V);
   }
   static unsigned getOperandNumForIncomingValue(unsigned i) {
-    return i*2;
+    return i;
   }
   static unsigned getIncomingValueNumForOperand(unsigned i) {
-    assert(i % 2 == 0 && "Invalid incoming-value operand index!");
-    return i/2;
+    return i;
   }
 
   /// getIncomingBlock - Return incoming basic block number @p i.
   ///
   BasicBlock *getIncomingBlock(unsigned i) const {
-    return cast<BasicBlock>(getOperand(i*2+1));
+    return block_begin()[i];
   }
 
   /// getIncomingBlock - Return incoming basic block corresponding
@@ -1878,7 +1720,7 @@ public:
   ///
   BasicBlock *getIncomingBlock(const Use &U) const {
     assert(this == U.getUser() && "Iterator doesn't point to PHI's Uses?");
-    return cast<BasicBlock>((&U + 1)->get());
+    return getIncomingBlock(unsigned(&U - op_begin()));
   }
 
   /// getIncomingBlock - Return incoming basic block corresponding
@@ -1889,16 +1731,8 @@ public:
     return getIncomingBlock(I.getUse());
   }
 
-
   void setIncomingBlock(unsigned i, BasicBlock *BB) {
-    setOperand(i*2+1, (Value*)BB);
-  }
-  static unsigned getOperandNumForIncomingBlock(unsigned i) {
-    return i*2+1;
-  }
-  static unsigned getIncomingBlockNumForOperand(unsigned i) {
-    assert(i % 2 == 1 && "Invalid incoming-block operand index!");
-    return i/2;
+    block_begin()[i] = BB;
   }
 
   /// addIncoming - Add an incoming value to the end of the PHI list
@@ -1908,13 +1742,12 @@ public:
     assert(BB && "PHI node got a null basic block!");
     assert(getType() == V->getType() &&
            "All operands to PHI node must be the same type as the PHI node!");
-    unsigned OpNo = NumOperands;
-    if (OpNo+2 > ReservedSpace)
+    if (NumOperands == ReservedSpace)
       growOperands();  // Get more space!
     // Initialize some new operands.
-    NumOperands = OpNo+2;
-    OperandList[OpNo] = V;
-    OperandList[OpNo+1] = (Value*)BB;
+    ++NumOperands;
+    setIncomingValue(NumOperands - 1, V);
+    setIncomingBlock(NumOperands - 1, BB);
   }
 
   /// removeIncomingValue - Remove an incoming value.  This is useful if a
@@ -1937,14 +1770,16 @@ public:
   /// block in the value list for this PHI.  Returns -1 if no instance.
   ///
   int getBasicBlockIndex(const BasicBlock *BB) const {
-    Use *OL = OperandList;
-    for (unsigned i = 0, e = getNumOperands(); i != e; i += 2)
-      if (OL[i+1].get() == (const Value*)BB) return i/2;
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+      if (block_begin()[i] == BB)
+        return i;
     return -1;
   }
 
   Value *getIncomingValueForBlock(const BasicBlock *BB) const {
-    return getIncomingValue(getBasicBlockIndex(BB));
+    int Idx = getBasicBlockIndex(BB);
+    assert(Idx >= 0 && "Invalid basic block argument!");
+    return getIncomingValue(Idx);
   }
 
   /// hasConstantValue - If the specified PHI node always merges together the
@@ -2397,71 +2232,39 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)
 class InvokeInst : public TerminatorInst {
   AttrListPtr AttributeList;
   InvokeInst(const InvokeInst &BI);
-  void init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
-            Value* const *Args, unsigned NumArgs);
-
-  template<typename RandomAccessIterator>
   void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-            RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
-            const Twine &NameStr,
-            // This argument ensures that we have an iterator we can
-            // do arithmetic on in constant time
-            std::random_access_iterator_tag) {
-    unsigned NumArgs = (unsigned)std::distance(ArgBegin, ArgEnd);
-
-    // This requires that the iterator points to contiguous memory.
-    init(Func, IfNormal, IfException, NumArgs ? &*ArgBegin : 0, NumArgs);
-    setName(NameStr);
-  }
+            ArrayRef<Value *> Args, const Twine &NameStr);
 
   /// Construct an InvokeInst given a range of arguments.
-  /// RandomAccessIterator must be a random-access iterator pointing to
-  /// contiguous storage (e.g. a std::vector<>::iterator).  Checks are
-  /// made for random-accessness but not for contiguous storage as
-  /// that would incur runtime overhead.
   ///
   /// @brief Construct an InvokeInst from a range of arguments
-  template<typename RandomAccessIterator>
   inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-                    RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
-                    unsigned Values,
+                    ArrayRef<Value *> Args, unsigned Values,
                     const Twine &NameStr, Instruction *InsertBefore);
 
   /// Construct an InvokeInst given a range of arguments.
-  /// RandomAccessIterator must be a random-access iterator pointing to
-  /// contiguous storage (e.g. a std::vector<>::iterator).  Checks are
-  /// made for random-accessness but not for contiguous storage as
-  /// that would incur runtime overhead.
   ///
   /// @brief Construct an InvokeInst from a range of arguments
-  template<typename RandomAccessIterator>
   inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-                    RandomAccessIterator ArgBegin, RandomAccessIterator ArgEnd,
-                    unsigned Values,
+                    ArrayRef<Value *> Args, unsigned Values,
                     const Twine &NameStr, BasicBlock *InsertAtEnd);
 protected:
   virtual InvokeInst *clone_impl() const;
 public:
-  template<typename RandomAccessIterator>
   static InvokeInst *Create(Value *Func,
                             BasicBlock *IfNormal, BasicBlock *IfException,
-                            RandomAccessIterator ArgBegin,
-                            RandomAccessIterator ArgEnd,
-                            const Twine &NameStr = "",
+                            ArrayRef<Value *> Args, const Twine &NameStr = "",
                             Instruction *InsertBefore = 0) {
-    unsigned Values(ArgEnd - ArgBegin + 3);
-    return new(Values) InvokeInst(Func, IfNormal, IfException, ArgBegin, ArgEnd,
+    unsigned Values = unsigned(Args.size()) + 3;
+    return new(Values) InvokeInst(Func, IfNormal, IfException, Args,
                                   Values, NameStr, InsertBefore);
   }
-  template<typename RandomAccessIterator>
   static InvokeInst *Create(Value *Func,
                             BasicBlock *IfNormal, BasicBlock *IfException,
-                            RandomAccessIterator ArgBegin,
-                            RandomAccessIterator ArgEnd,
-                            const Twine &NameStr,
+                            ArrayRef<Value *> Args, const Twine &NameStr,
                             BasicBlock *InsertAtEnd) {
-    unsigned Values(ArgEnd - ArgBegin + 3);
-    return new(Values) InvokeInst(Func, IfNormal, IfException, ArgBegin, ArgEnd,
+    unsigned Values = unsigned(Args.size()) + 3;
+    return new(Values) InvokeInst(Func, IfNormal, IfException, Args,
                                   Values, NameStr, InsertAtEnd);
   }
 
@@ -2627,37 +2430,27 @@ template <>
 struct OperandTraits<InvokeInst> : public VariadicOperandTraits<InvokeInst, 3> {
 };
 
-template<typename RandomAccessIterator>
 InvokeInst::InvokeInst(Value *Func,
                        BasicBlock *IfNormal, BasicBlock *IfException,
-                       RandomAccessIterator ArgBegin,
-                       RandomAccessIterator ArgEnd,
-                       unsigned Values,
+                       ArrayRef<Value *> Args, unsigned Values,
                        const Twine &NameStr, Instruction *InsertBefore)
   : TerminatorInst(cast<FunctionType>(cast<PointerType>(Func->getType())
                                       ->getElementType())->getReturnType(),
                    Instruction::Invoke,
                    OperandTraits<InvokeInst>::op_end(this) - Values,
                    Values, InsertBefore) {
-  init(Func, IfNormal, IfException, ArgBegin, ArgEnd, NameStr,
-       typename std::iterator_traits<RandomAccessIterator>
-       ::iterator_category());
+  init(Func, IfNormal, IfException, Args, NameStr);
 }
-template<typename RandomAccessIterator>
 InvokeInst::InvokeInst(Value *Func,
                        BasicBlock *IfNormal, BasicBlock *IfException,
-                       RandomAccessIterator ArgBegin,
-                       RandomAccessIterator ArgEnd,
-                       unsigned Values,
+                       ArrayRef<Value *> Args, unsigned Values,
                        const Twine &NameStr, BasicBlock *InsertAtEnd)
   : TerminatorInst(cast<FunctionType>(cast<PointerType>(Func->getType())
                                       ->getElementType())->getReturnType(),
                    Instruction::Invoke,
                    OperandTraits<InvokeInst>::op_end(this) - Values,
                    Values, InsertAtEnd) {
-  init(Func, IfNormal, IfException, ArgBegin, ArgEnd, NameStr,
-       typename std::iterator_traits<RandomAccessIterator>
-       ::iterator_category());
+  init(Func, IfNormal, IfException, Args, NameStr);
 }
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InvokeInst, Value)
diff --git a/include/llvm/Intrinsics.h b/include/llvm/Intrinsics.h
index 5cfe551..46361ca 100644
--- a/include/llvm/Intrinsics.h
+++ b/include/llvm/Intrinsics.h
@@ -16,6 +16,7 @@
 #ifndef LLVM_INTRINSICS_H
 #define LLVM_INTRINSICS_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include <string>
 
 namespace llvm {
@@ -44,12 +45,12 @@ namespace Intrinsic {
   
   /// Intrinsic::getName(ID) - Return the LLVM name for an intrinsic, such as
   /// "llvm.ppc.altivec.lvx".
-  std::string getName(ID id, const Type **Tys = 0, unsigned numTys = 0);
+  std::string getName(ID id, ArrayRef<Type*> Tys = ArrayRef<Type*>());
   
   /// Intrinsic::getType(ID) - Return the function type for an intrinsic.
   ///
   const FunctionType *getType(LLVMContext &Context, ID id,
-                              const Type **Tys = 0, unsigned numTys = 0);
+                              ArrayRef<Type*> Tys = ArrayRef<Type*>());
 
   /// Intrinsic::isOverloaded(ID) - Returns true if the intrinsic can be
   /// overloaded.
@@ -67,8 +68,8 @@ namespace Intrinsic {
   /// overloaded intrinsic, Tys should point to an array of numTys pointers to
   /// Type, and must provide exactly one type for each overloaded type in the
   /// intrinsic.
-  Function *getDeclaration(Module *M, ID id, const Type **Tys = 0, 
-                           unsigned numTys = 0);
+  Function *getDeclaration(Module *M, ID id,
+                           ArrayRef<Type*> Tys = ArrayRef<Type*>());
                            
   /// Map a GCC builtin name to an intrinsic ID.
   ID getIntrinsicForGCCBuiltin(const char *Prefix, const char *BuiltinName);
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index d8f249a..947cf1b 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -211,7 +211,8 @@ def int_stackrestore  : Intrinsic<[], [llvm_ptr_ty]>,
 // however it does conveniently prevent the prefetch from being reordered
 // with respect to nearby accesses to the same memory.
 def int_prefetch      : Intrinsic<[],
-                                  [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty],
+                                  [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty,
+                                   llvm_i32_ty],
                                   [IntrReadWriteArgMem, NoCapture<0>]>;
 def int_pcmarker      : Intrinsic<[], [llvm_i32_ty]>;
 
@@ -254,6 +255,12 @@ let Properties = [IntrReadMem] in {
   def int_exp2 : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>]>;
 }
 
+let Properties = [IntrNoMem] in {
+  def int_fma  : Intrinsic<[llvm_anyfloat_ty],
+                         [LLVMMatchType<0>, LLVMMatchType<0>,
+                          LLVMMatchType<0>]>;
+}
+
 // NOTE: these are internal interfaces.
 def int_setjmp     : Intrinsic<[llvm_i32_ty],  [llvm_ptr_ty]>;
 def int_longjmp    : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty]>;
@@ -265,6 +272,11 @@ def int_objectsize : Intrinsic<[llvm_anyint_ty], [llvm_ptr_ty, llvm_i1_ty],
                                [IntrNoMem]>,
                                GCCBuiltin<"__builtin_object_size">;
 
+//===------------------------- Expect Intrinsics --------------------------===//
+//
+def int_expect : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>,
+                                              LLVMMatchType<0>], [IntrNoMem]>;
+
 //===-------------------- Bit Manipulation Intrinsics ---------------------===//
 //
 
@@ -311,7 +323,7 @@ let Properties = [IntrNoMem] in {
   def int_eh_sjlj_lsda    : Intrinsic<[llvm_ptr_ty]>;
   def int_eh_sjlj_callsite: Intrinsic<[], [llvm_i32_ty]>;
 }
-def int_eh_sjlj_dispatch_setup : Intrinsic<[], [llvm_i32_ty], [IntrReadMem]>;
+def int_eh_sjlj_dispatch_setup : Intrinsic<[], [llvm_i32_ty]>;
 def int_eh_sjlj_setjmp  : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty]>;
 def int_eh_sjlj_longjmp : Intrinsic<[], [llvm_ptr_ty]>;
 
diff --git a/include/llvm/LLVMContext.h b/include/llvm/LLVMContext.h
index 3502ff7..65146c3 100644
--- a/include/llvm/LLVMContext.h
+++ b/include/llvm/LLVMContext.h
@@ -39,7 +39,8 @@ public:
   // compile-time performance optimization, not a correctness optimization.
   enum {
     MD_dbg = 0,  // "dbg"
-    MD_tbaa = 1  // "tbaa"
+    MD_tbaa = 1, // "tbaa"
+    MD_prof = 2  // "prof"
   };
   
   /// getMDKindID - Return a unique non-zero ID for the specified metadata kind.
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index c02e161..8467d11 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -62,7 +62,6 @@ namespace {
       (void) llvm::createDeadCodeEliminationPass();
       (void) llvm::createDeadInstEliminationPass();
       (void) llvm::createDeadStoreEliminationPass();
-      (void) llvm::createDeadTypeEliminationPass();
       (void) llvm::createDomOnlyPrinterPass();
       (void) llvm::createDomPrinterPass();
       (void) llvm::createDomOnlyViewerPass();
@@ -92,11 +91,16 @@ namespace {
       (void) llvm::createLoopUnswitchPass();
       (void) llvm::createLoopIdiomPass();
       (void) llvm::createLoopRotatePass();
+      (void) llvm::createLowerExpectIntrinsicPass();
       (void) llvm::createLowerInvokePass();
       (void) llvm::createLowerSetJmpPass();
       (void) llvm::createLowerSwitchPass();
       (void) llvm::createNoAAPass();
       (void) llvm::createNoProfileInfoPass();
+      (void) llvm::createObjCARCAliasAnalysisPass();
+      (void) llvm::createObjCARCExpandPass();
+      (void) llvm::createObjCARCContractPass();
+      (void) llvm::createObjCARCOptPass();
       (void) llvm::createProfileEstimatorPass();
       (void) llvm::createProfileVerifierPass();
       (void) llvm::createPathProfileVerifierPass();
diff --git a/include/llvm/MC/MCAsmInfo.h b/include/llvm/MC/MCAsmInfo.h
index 775d22b..41c1717 100644
--- a/include/llvm/MC/MCAsmInfo.h
+++ b/include/llvm/MC/MCAsmInfo.h
@@ -37,6 +37,18 @@ namespace llvm {
     //===------------------------------------------------------------------===//
     // Properties to be set by the target writer, used to configure asm printer.
     //
+    
+    /// PointerSize - Pointer size in bytes.
+    ///               Default is 4.
+    unsigned PointerSize;
+
+    /// IsLittleEndian - True if target is little endian.
+    ///                  Default is true.
+    bool IsLittleEndian;
+
+    /// StackGrowsUp - True if target stack grow up.
+    ///                Default is false.
+    bool StackGrowsUp;
 
     /// HasSubsectionsViaSymbols - True if this target has the MachO
     /// .subsections_via_symbols directive.
@@ -284,6 +296,10 @@ namespace llvm {
     // use EmitLabelOffsetDifference.
     bool DwarfUsesLabelOffsetForRanges;
 
+    /// DwarfRegNumForCFI - True if dwarf register numbers are printed
+    /// instead of symbolic register names in .cfi_* directives.
+    bool DwarfRegNumForCFI;  // Defaults to false;
+
     //===--- CBE Asm Translation Table -----------------------------------===//
 
     const char *const *AsmTransCBE;          // Defaults to empty
@@ -296,6 +312,21 @@ namespace llvm {
     static unsigned getSLEB128Size(int Value);
     static unsigned getULEB128Size(unsigned Value);
 
+    /// getPointerSize - Get the pointer size in bytes.
+    unsigned getPointerSize() const {
+      return PointerSize;
+    }
+
+    /// islittleendian - True if the target is little endian.
+    bool isLittleEndian() const {
+      return IsLittleEndian;
+    }
+
+    /// isStackGrowthDirectionUp - True if target stack grow up.
+    bool isStackGrowthDirectionUp() const {
+      return StackGrowsUp;
+    }
+
     bool hasSubsectionsViaSymbols() const { return HasSubsectionsViaSymbols; }
 
     // Data directive accessors.
@@ -475,6 +506,9 @@ namespace llvm {
     bool doesDwarfUsesLabelOffsetForRanges() const {
       return DwarfUsesLabelOffsetForRanges;
     }
+    bool useDwarfRegNumForCFI() const {
+      return DwarfRegNumForCFI;
+    }
     const char *const *getAsmCBE() const {
       return AsmTransCBE;
     }
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
index 070089e..43a9ce6 100644
--- a/include/llvm/MC/MCContext.h
+++ b/include/llvm/MC/MCContext.h
@@ -39,6 +39,9 @@ namespace llvm {
   class MCContext {
     MCContext(const MCContext&); // DO NOT IMPLEMENT
     MCContext &operator=(const MCContext&); // DO NOT IMPLEMENT
+  public:
+    typedef StringMap<MCSymbol*, BumpPtrAllocator&> SymbolTable;
+  private:
 
     /// The MCAsmInfo for this target.
     const MCAsmInfo &MAI;
@@ -52,7 +55,7 @@ namespace llvm {
     BumpPtrAllocator Allocator;
 
     /// Symbols - Bindings of names to symbols.
-    StringMap<MCSymbol*, BumpPtrAllocator&> Symbols;
+    SymbolTable Symbols;
 
     /// UsedNames - Keeps tracks of names that were used both for used declared
     /// and artificial symbols.
@@ -142,6 +145,14 @@ namespace llvm {
     /// LookupSymbol - Get the symbol for \p Name, or null.
     MCSymbol *LookupSymbol(StringRef Name) const;
 
+    /// getSymbols - Get a reference for the symbol table for clients that
+    /// want to, for example, iterate over all symbols. 'const' because we
+    /// still want any modifications to the table itself to use the MCContext
+    /// APIs.
+    const SymbolTable &getSymbols() const {
+      return Symbols;
+    }
+
     /// @}
 
     /// @name Section Management
diff --git a/include/llvm/Target/TargetInstrDesc.h b/include/llvm/MC/MCInstrDesc.h
index 6e20e8a..4996914 100644
--- a/include/llvm/Target/TargetInstrDesc.h
+++ b/include/llvm/MC/MCInstrDesc.h
@@ -1,4 +1,4 @@
-//===-- llvm/Target/TargetInstrDesc.h - Instruction Descriptors -*- C++ -*-===//
+//===-- llvm/Mc/McInstrDesc.h - Instruction Descriptors -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,26 +7,23 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the TargetOperandInfo and TargetInstrDesc classes, which
+// This file defines the MCOperandInfo and MCInstrDesc classes, which
 // are used to describe target instructions and their operands. 
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_TARGETINSTRDESC_H
-#define LLVM_TARGET_TARGETINSTRDESC_H
+#ifndef LLVM_MC_MCINSTRDESC_H
+#define LLVM_MC_MCINSTRDESC_H
 
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 
-class TargetRegisterClass;
-class TargetRegisterInfo;
-  
 //===----------------------------------------------------------------------===//
 // Machine Operand Flags and Description
 //===----------------------------------------------------------------------===//
   
-namespace TOI {
+namespace MCOI {
   // Operand constraints
   enum OperandConstraint {
     TIED_TO = 0,    // Must be allocated the same register as.
@@ -34,54 +31,57 @@ namespace TOI {
   };
   
   /// OperandFlags - These are flags set on operands, but should be considered
-  /// private, all access should go through the TargetOperandInfo accessors.
+  /// private, all access should go through the MCOperandInfo accessors.
   /// See the accessors for a description of what these are.
   enum OperandFlags {
     LookupPtrRegClass = 0,
     Predicate,
     OptionalDef
   };
+
+  /// Operand Type - Operands are tagged with one of the values of this enum.
+  enum OperandType {
+    OPERAND_UNKNOWN,
+    OPERAND_IMMEDIATE,
+    OPERAND_REGISTER,
+    OPERAND_MEMORY,
+    OPERAND_PCREL
+  };
 }
 
-/// TargetOperandInfo - This holds information about one operand of a machine
+/// MCOperandInfo - This holds information about one operand of a machine
 /// instruction, indicating the register class for register operands, etc.
 ///
-class TargetOperandInfo {
+class MCOperandInfo {
 public:
   /// RegClass - This specifies the register class enumeration of the operand 
   /// if the operand is a register.  If isLookupPtrRegClass is set, then this is
   /// an index that is passed to TargetRegisterInfo::getPointerRegClass(x) to
   /// get a dynamic register class.
-  ///
-  /// NOTE: This member should be considered to be private, all access should go
-  /// through "getRegClass(TRI)" below.
   short RegClass;
   
-  /// Flags - These are flags from the TOI::OperandFlags enum.
+  /// Flags - These are flags from the MCOI::OperandFlags enum.
   unsigned short Flags;
   
   /// Lower 16 bits are used to specify which constraints are set. The higher 16
   /// bits are used to specify the value of constraints (4 bits each).
   unsigned Constraints;
+
+  /// OperandType - Information about the type of the operand.
+  MCOI::OperandType OperandType;
   /// Currently no other information.
   
-  /// getRegClass - Get the register class for the operand, handling resolution
-  /// of "symbolic" pointer register classes etc.  If this is not a register
-  /// operand, this returns null.
-  const TargetRegisterClass *getRegClass(const TargetRegisterInfo *TRI) const;
-  
-  
   /// isLookupPtrRegClass - Set if this operand is a pointer value and it
   /// requires a callback to look up its register class.
-  bool isLookupPtrRegClass() const { return Flags&(1 <<TOI::LookupPtrRegClass);}
+  bool isLookupPtrRegClass() const { return Flags&(1 <<MCOI::LookupPtrRegClass);}
   
   /// isPredicate - Set if this is one of the operands that made up of
   /// the predicate operand that controls an isPredicable() instruction.
-  bool isPredicate() const { return Flags & (1 << TOI::Predicate); }
+  bool isPredicate() const { return Flags & (1 << MCOI::Predicate); }
   
   /// isOptionalDef - Set if this operand is a optional def.
   ///
-  bool isOptionalDef() const { return Flags & (1 << TOI::OptionalDef); }
+  bool isOptionalDef() const { return Flags & (1 << MCOI::OptionalDef); }
 };
 
   
@@ -89,11 +89,11 @@ public:
 // Machine Instruction Flags and Description
 //===----------------------------------------------------------------------===//
 
-/// TargetInstrDesc flags - These should be considered private to the
-/// implementation of the TargetInstrDesc class.  Clients should use the
-/// predicate methods on TargetInstrDesc, not use these directly.  These
-/// all correspond to bitfields in the TargetInstrDesc::Flags field.
-namespace TID {
+/// MCInstrDesc flags - These should be considered private to the
+/// implementation of the MCInstrDesc class.  Clients should use the predicate
+/// methods on MCInstrDesc, not use these directly.  These all correspond to
+/// bitfields in the MCInstrDesc::Flags field.
+namespace MCID {
   enum {
     Variadic = 0,
     HasOptionalDef,
@@ -123,29 +123,29 @@ namespace TID {
   };
 }
 
-/// TargetInstrDesc - Describe properties that are true of each
-/// instruction in the target description file.  This captures information about
-/// side effects, register use and many other things.  There is one instance of
-/// this struct for each target instruction class, and the MachineInstr class
-/// points to this struct directly to describe itself.
-class TargetInstrDesc {
+/// MCInstrDesc - Describe properties that are true of each instruction in the
+/// target description file.  This captures information about side effects,
+/// register use and many other things.  There is one instance of this struct
+/// for each target instruction class, and the MachineInstr class points to
+/// this struct directly to describe itself.
+class MCInstrDesc {
 public:
   unsigned short  Opcode;        // The opcode number
   unsigned short  NumOperands;   // Num of args (may be more if variable_ops)
   unsigned short  NumDefs;       // Num of args that are definitions
   unsigned short  SchedClass;    // enum identifying instr sched class
+  unsigned short  Size;          // Number of bytes in encoding.
   const char *    Name;          // Name of the instruction record in td file
   unsigned        Flags;         // Flags identifying machine instr class
   uint64_t        TSFlags;       // Target Specific Flag values
   const unsigned *ImplicitUses;  // Registers implicitly read by this instr
   const unsigned *ImplicitDefs;  // Registers implicitly defined by this instr
-  const TargetRegisterClass **RCBarriers; // Reg classes completely "clobbered"
-  const TargetOperandInfo *OpInfo; // 'NumOperands' entries about operands
+  const MCOperandInfo *OpInfo;   // 'NumOperands' entries about operands
 
   /// getOperandConstraint - Returns the value of the specific constraint if
   /// it is set. Returns -1 if it is not set.
   int getOperandConstraint(unsigned OpNum,
-                           TOI::OperandConstraint Constraint) const {
+                           MCOI::OperandConstraint Constraint) const {
     if (OpNum < NumOperands &&
         (OpInfo[OpNum].Constraints & (1 << Constraint))) {
       unsigned Pos = 16 + Constraint * 4;
@@ -154,12 +154,6 @@ public:
     return -1;
   }
 
-  /// getRegClass - Returns the register class constraint for OpNum, or NULL.
-  const TargetRegisterClass *getRegClass(unsigned OpNum,
-                                         const TargetRegisterInfo *TRI) const {
-    return OpNum < NumOperands ? OpInfo[OpNum].getRegClass(TRI) : 0;
-  }
-
   /// getOpcode - Return the opcode number for this descriptor.
   unsigned getOpcode() const {
     return Opcode;
@@ -193,13 +187,13 @@ public:
   /// operands but before the implicit definitions and uses (if any are
   /// present).
   bool isVariadic() const {
-    return Flags & (1 << TID::Variadic);
+    return Flags & (1 << MCID::Variadic);
   }
   
   /// hasOptionalDef - Set if this instruction has an optional definition, e.g.
   /// ARM instructions which can set condition code if 's' bit is set.
   bool hasOptionalDef() const {
-    return Flags & (1 << TID::HasOptionalDef);
+    return Flags & (1 << MCID::HasOptionalDef);
   }
   
   /// getImplicitUses - Return a list of registers that are potentially
@@ -214,7 +208,7 @@ public:
   const unsigned *getImplicitUses() const {
     return ImplicitUses;
   }
-  
+      
   /// getNumImplicitUses - Return the number of implicit uses this instruction
   /// has.
   unsigned getNumImplicitUses() const {
@@ -223,8 +217,7 @@ public:
     for (; ImplicitUses[i]; ++i) /*empty*/;
     return i;
   }
-  
-  
+
   /// getImplicitDefs - Return a list of registers that are potentially
   /// written by any instance of this machine instruction.  For example, on X86,
   /// many instructions implicitly set the flags register.  In this case, they
@@ -266,17 +259,6 @@ public:
     return false;
   }
 
-  /// getRegClassBarriers - Return a list of register classes that are
-  /// completely clobbered by this machine instruction. For example, on X86
-  /// the call instructions will completely clobber all the registers in the
-  /// fp stack and XMM classes.
-  ///
-  /// This method returns null if the instruction doesn't completely clobber
-  /// any register class.
-  const TargetRegisterClass **getRegClassBarriers() const {
-    return RCBarriers;
-  }
-
   /// getSchedClass - Return the scheduling class for this instruction.  The
   /// scheduling class is an index into the InstrItineraryData table.  This
   /// returns zero if there is no known scheduling information for the
@@ -286,19 +268,25 @@ public:
     return SchedClass;
   }
   
+  /// getSize - Return the number of bytes in the encoding of this instruction,
+  /// or zero if the encoding size cannot be known from the opcode.
+  unsigned getSize() const {
+    return Size;
+  }
+
   bool isReturn() const {
-    return Flags & (1 << TID::Return);
+    return Flags & (1 << MCID::Return);
   }
   
   bool isCall() const {
-    return Flags & (1 << TID::Call);
+    return Flags & (1 << MCID::Call);
   }
   
   /// isBarrier - Returns true if the specified instruction stops control flow
   /// from executing the instruction immediately following it.  Examples include
   /// unconditional branches and return instructions.
   bool isBarrier() const {
-    return Flags & (1 << TID::Barrier);
+    return Flags & (1 << MCID::Barrier);
   }
   
   /// isTerminator - Returns true if this instruction part of the terminator for
@@ -308,7 +296,7 @@ public:
   /// Various passes use this to insert code into the bottom of a basic block,
   /// but before control flow occurs.
   bool isTerminator() const {
-    return Flags & (1 << TID::Terminator);
+    return Flags & (1 << MCID::Terminator);
   }
   
   /// isBranch - Returns true if this is a conditional, unconditional, or
@@ -316,13 +304,13 @@ public:
   /// these cases, and the TargetInstrInfo::AnalyzeBranch method can be used to
   /// get more information.
   bool isBranch() const {
-    return Flags & (1 << TID::Branch);
+    return Flags & (1 << MCID::Branch);
   }
 
   /// isIndirectBranch - Return true if this is an indirect branch, such as a
   /// branch through a register.
   bool isIndirectBranch() const {
-    return Flags & (1 << TID::IndirectBranch);
+    return Flags & (1 << MCID::IndirectBranch);
   }
 
   /// isConditionalBranch - Return true if this is a branch which may fall
@@ -346,37 +334,37 @@ public:
   /// values.   There are various methods in TargetInstrInfo that can be used to
   /// control and modify the predicate in this instruction.
   bool isPredicable() const {
-    return Flags & (1 << TID::Predicable);
+    return Flags & (1 << MCID::Predicable);
   }
   
   /// isCompare - Return true if this instruction is a comparison.
   bool isCompare() const {
-    return Flags & (1 << TID::Compare);
+    return Flags & (1 << MCID::Compare);
   }
   
   /// isMoveImmediate - Return true if this instruction is a move immediate
   /// (including conditional moves) instruction. 
   bool isMoveImmediate() const {
-    return Flags & (1 << TID::MoveImm);
+    return Flags & (1 << MCID::MoveImm);
   }
 
   /// isBitcast - Return true if this instruction is a bitcast instruction.
   ///
   bool isBitcast() const {
-    return Flags & (1 << TID::Bitcast);
+    return Flags & (1 << MCID::Bitcast);
   }
   
   /// isNotDuplicable - Return true if this instruction cannot be safely
   /// duplicated.  For example, if the instruction has a unique labels attached
   /// to it, duplicating it would cause multiple definition errors.
   bool isNotDuplicable() const {
-    return Flags & (1 << TID::NotDuplicable);
+    return Flags & (1 << MCID::NotDuplicable);
   }
   
   /// hasDelaySlot - Returns true if the specified instruction has a delay slot
   /// which must be filled by the code generator.
   bool hasDelaySlot() const {
-    return Flags & (1 << TID::DelaySlot);
+    return Flags & (1 << MCID::DelaySlot);
   }
   
   /// canFoldAsLoad - Return true for instructions that can be folded as
@@ -388,7 +376,7 @@ public:
   /// This should only be set on instructions that return a value in their
   /// only virtual register definition.
   bool canFoldAsLoad() const {
-    return Flags & (1 << TID::FoldableAsLoad);
+    return Flags & (1 << MCID::FoldableAsLoad);
   }
   
   //===--------------------------------------------------------------------===//
@@ -399,7 +387,7 @@ public:
   /// Instructions with this flag set are not necessarily simple load
   /// instructions, they may load a value and modify it, for example.
   bool mayLoad() const {
-    return Flags & (1 << TID::MayLoad);
+    return Flags & (1 << MCID::MayLoad);
   }
   
   
@@ -408,7 +396,7 @@ public:
   /// instructions, they may store a modified value based on their operands, or
   /// may not actually modify anything, for example.
   bool mayStore() const {
-    return Flags & (1 << TID::MayStore);
+    return Flags & (1 << MCID::MayStore);
   }
   
   /// hasUnmodeledSideEffects - Return true if this instruction has side
@@ -425,7 +413,7 @@ public:
   /// LLVM, etc.
   ///
   bool hasUnmodeledSideEffects() const {
-    return Flags & (1 << TID::UnmodeledSideEffects);
+    return Flags & (1 << MCID::UnmodeledSideEffects);
   }
   
   //===--------------------------------------------------------------------===//
@@ -443,7 +431,7 @@ public:
   /// Also note that some instructions require non-trivial modification to
   /// commute them.
   bool isCommutable() const {
-    return Flags & (1 << TID::Commutable);
+    return Flags & (1 << MCID::Commutable);
   }
   
   /// isConvertibleTo3Addr - Return true if this is a 2-address instruction
@@ -461,7 +449,7 @@ public:
   /// instruction (e.g. shl reg, 4 on x86).
   ///
   bool isConvertibleTo3Addr() const {
-    return Flags & (1 << TID::ConvertibleTo3Addr);
+    return Flags & (1 << MCID::ConvertibleTo3Addr);
   }
   
   /// usesCustomInsertionHook - Return true if this instruction requires
@@ -473,7 +461,7 @@ public:
   /// If this is true, the TargetLoweringInfo::InsertAtEndOfBasicBlock method
   /// is used to insert this into the MachineBasicBlock.
   bool usesCustomInsertionHook() const {
-    return Flags & (1 << TID::UsesCustomInserter);
+    return Flags & (1 << MCID::UsesCustomInserter);
   }
   
   /// isRematerializable - Returns true if this instruction is a candidate for
@@ -481,7 +469,7 @@ public:
   /// flag is set, the isReallyTriviallyReMaterializable() method is called to
   /// verify the instruction is really rematable.
   bool isRematerializable() const {
-    return Flags & (1 << TID::Rematerializable);
+    return Flags & (1 << MCID::Rematerializable);
   }
 
   /// isAsCheapAsAMove - Returns true if this instruction has the same cost (or
@@ -491,7 +479,7 @@ public:
   /// more than moving the instruction into the appropriate register. Note, we
   /// are not marking copies from and to the same register class with this flag.
   bool isAsCheapAsAMove() const {
-    return Flags & (1 << TID::CheapAsAMove);
+    return Flags & (1 << MCID::CheapAsAMove);
   }
 
   /// hasExtraSrcRegAllocReq - Returns true if this instruction source operands
@@ -501,7 +489,7 @@ public:
   /// Post-register allocation passes should not attempt to change allocations
   /// for sources of instructions with this flag.
   bool hasExtraSrcRegAllocReq() const {
-    return Flags & (1 << TID::ExtraSrcRegAllocReq);
+    return Flags & (1 << MCID::ExtraSrcRegAllocReq);
   }
 
   /// hasExtraDefRegAllocReq - Returns true if this instruction def operands
@@ -511,7 +499,7 @@ public:
   /// Post-register allocation passes should not attempt to change allocations
   /// for definitions of instructions with this flag.
   bool hasExtraDefRegAllocReq() const {
-    return Flags & (1 << TID::ExtraDefRegAllocReq);
+    return Flags & (1 << MCID::ExtraDefRegAllocReq);
   }
 };
 
diff --git a/include/llvm/MC/MCInstrInfo.h b/include/llvm/MC/MCInstrInfo.h
new file mode 100644
index 0000000..a63e5fa
--- /dev/null
+++ b/include/llvm/MC/MCInstrInfo.h
@@ -0,0 +1,51 @@
+//===-- llvm/MC/MCInstrInfo.h - Target Instruction Info ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the target machine instruction set.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCINSTRINFO_H
+#define LLVM_MC_MCINSTRINFO_H
+
+#include "llvm/MC/MCInstrDesc.h"
+#include <cassert>
+
+namespace llvm {
+
+//---------------------------------------------------------------------------
+///
+/// MCInstrInfo - Interface to description of machine instruction set
+///
+class MCInstrInfo {
+  const MCInstrDesc *Desc;  // Raw array to allow static init'n
+  unsigned NumOpcodes;      // Number of entries in the desc array
+
+public:
+  /// InitMCInstrInfo - Initialize MCInstrInfo, called by TableGen
+  /// auto-generated routines. *DO NOT USE*.
+  void InitMCInstrInfo(const MCInstrDesc *D, unsigned NO) {
+    Desc = D;
+    NumOpcodes = NO;
+  }
+
+  unsigned getNumOpcodes() const { return NumOpcodes; }
+
+  /// get - Return the machine instruction descriptor that corresponds to the
+  /// specified instruction opcode.
+  ///
+  const MCInstrDesc &get(unsigned Opcode) const {
+    assert(Opcode < NumOpcodes && "Invalid opcode!");
+    return Desc[Opcode];
+  }
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Target/TargetInstrItineraries.h b/include/llvm/MC/MCInstrItineraries.h
index 6011402..e942892 100644
--- a/include/llvm/Target/TargetInstrItineraries.h
+++ b/include/llvm/MC/MCInstrItineraries.h
@@ -1,4 +1,4 @@
-//===-- llvm/Target/TargetInstrItineraries.h - Scheduling -------*- C++ -*-===//
+//===-- llvm/MC/MCInstrItineraries.h - Scheduling ---------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,8 +13,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_TARGETINSTRITINERARIES_H
-#define LLVM_TARGET_TARGETINSTRITINERARIES_H
+#ifndef LLVM_MC_MCINSTRITINERARIES_H
+#define LLVM_MC_MCINSTRITINERARIES_H
 
 #include <algorithm>
 
diff --git a/include/llvm/MC/MCMachObjectWriter.h b/include/llvm/MC/MCMachObjectWriter.h
index ec51031..9bb598f 100644
--- a/include/llvm/MC/MCMachObjectWriter.h
+++ b/include/llvm/MC/MCMachObjectWriter.h
@@ -10,11 +10,20 @@
 #ifndef LLVM_MC_MCMACHOBJECTWRITER_H
 #define LLVM_MC_MCMACHOBJECTWRITER_H
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/Object/MachOFormat.h"
 #include "llvm/Support/DataTypes.h"
+#include <vector>
 
 namespace llvm {
 
+class MCSectionData;
+class MachObjectWriter;
+
 class MCMachObjectTargetWriter {
   const unsigned Is64Bit : 1;
   const uint32_t CPUType;
@@ -48,8 +57,191 @@ public:
   }
 
   /// @}
+
+  /// @name API
+  /// @{
+
+  virtual void RecordRelocation(MachObjectWriter *Writer,
+                                const MCAssembler &Asm,
+                                const MCAsmLayout &Layout,
+                                const MCFragment *Fragment,
+                                const MCFixup &Fixup,
+                                MCValue Target,
+                                uint64_t &FixedValue) = 0;
+
+  /// @}
 };
 
+class MachObjectWriter : public MCObjectWriter {
+  /// MachSymbolData - Helper struct for containing some precomputed information
+  /// on symbols.
+  struct MachSymbolData {
+    MCSymbolData *SymbolData;
+    uint64_t StringIndex;
+    uint8_t SectionIndex;
+
+    // Support lexicographic sorting.
+    bool operator<(const MachSymbolData &RHS) const;
+  };
+
+  /// The target specific Mach-O writer instance.
+  llvm::OwningPtr<MCMachObjectTargetWriter> TargetObjectWriter;
+
+  /// @name Relocation Data
+  /// @{
+
+  llvm::DenseMap<const MCSectionData*,
+                 std::vector<object::macho::RelocationEntry> > Relocations;
+  llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase;
+
+  /// @}
+  /// @name Symbol Table Data
+  /// @{
+
+  SmallString<256> StringTable;
+  std::vector<MachSymbolData> LocalSymbolData;
+  std::vector<MachSymbolData> ExternalSymbolData;
+  std::vector<MachSymbolData> UndefinedSymbolData;
+
+  /// @}
+
+public:
+  MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_ostream &_OS,
+                   bool _IsLittleEndian)
+    : MCObjectWriter(_OS, _IsLittleEndian), TargetObjectWriter(MOTW) {
+  }
+
+  /// @name Utility Methods
+  /// @{
+
+  bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
+
+  SectionAddrMap SectionAddress;
+
+  SectionAddrMap &getSectionAddressMap() { return SectionAddress; }
+
+  uint64_t getSectionAddress(const MCSectionData* SD) const {
+    return SectionAddress.lookup(SD);
+  }
+  uint64_t getSymbolAddress(const MCSymbolData* SD,
+                            const MCAsmLayout &Layout) const;
+
+  uint64_t getFragmentAddress(const MCFragment *Fragment,
+                              const MCAsmLayout &Layout) const;
+
+  uint64_t getPaddingSize(const MCSectionData *SD,
+                          const MCAsmLayout &Layout) const;
+
+  bool doesSymbolRequireExternRelocation(const MCSymbolData *SD);
+
+  /// @}
+
+  /// @name Target Writer Proxy Accessors
+  /// @{
+
+  bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
+  bool isARM() const {
+    uint32_t CPUType = TargetObjectWriter->getCPUType() &
+      ~object::mach::CTFM_ArchMask;
+    return CPUType == object::mach::CTM_ARM;
+  }
+
+  /// @}
+
+  void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
+                   bool SubsectionsViaSymbols);
+
+  /// WriteSegmentLoadCommand - Write a segment load command.
+  ///
+  /// \arg NumSections - The number of sections in this segment.
+  /// \arg SectionDataSize - The total size of the sections.
+  void WriteSegmentLoadCommand(unsigned NumSections,
+                               uint64_t VMSize,
+                               uint64_t SectionDataStartOffset,
+                               uint64_t SectionDataSize);
+
+  void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                    const MCSectionData &SD, uint64_t FileOffset,
+                    uint64_t RelocationsStart, unsigned NumRelocations);
+
+  void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
+                              uint32_t StringTableOffset,
+                              uint32_t StringTableSize);
+
+  void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
+                                uint32_t NumLocalSymbols,
+                                uint32_t FirstExternalSymbol,
+                                uint32_t NumExternalSymbols,
+                                uint32_t FirstUndefinedSymbol,
+                                uint32_t NumUndefinedSymbols,
+                                uint32_t IndirectSymbolOffset,
+                                uint32_t NumIndirectSymbols);
+
+  void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout);
+
+  // FIXME: We really need to improve the relocation validation. Basically, we
+  // want to implement a separate computation which evaluates the relocation
+  // entry as the linker would, and verifies that the resultant fixup value is
+  // exactly what the encoder wanted. This will catch several classes of
+  // problems:
+  //
+  //  - Relocation entry bugs, the two algorithms are unlikely to have the same
+  //    exact bug.
+  //
+  //  - Relaxation issues, where we forget to relax something.
+  //
+  //  - Input errors, where something cannot be correctly encoded. 'as' allows
+  //    these through in many cases.
+
+  void addRelocation(const MCSectionData *SD,
+                     object::macho::RelocationEntry &MRE) {
+    Relocations[SD].push_back(MRE);
+  }
+
+  void RecordScatteredRelocation(const MCAssembler &Asm,
+                                 const MCAsmLayout &Layout,
+                                 const MCFragment *Fragment,
+                                 const MCFixup &Fixup, MCValue Target,
+                                 unsigned Log2Size,
+                                 uint64_t &FixedValue);
+
+  void RecordTLVPRelocation(const MCAssembler &Asm,
+                            const MCAsmLayout &Layout,
+                            const MCFragment *Fragment,
+                            const MCFixup &Fixup, MCValue Target,
+                            uint64_t &FixedValue);
+
+  void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
+                        MCValue Target, uint64_t &FixedValue);
+
+  void BindIndirectSymbols(MCAssembler &Asm);
+
+  /// ComputeSymbolTable - Compute the symbol table data
+  ///
+  /// \param StringTable [out] - The string table data.
+  /// \param StringIndexMap [out] - Map from symbol names to offsets in the
+  /// string table.
+  void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
+                          std::vector<MachSymbolData> &LocalSymbolData,
+                          std::vector<MachSymbolData> &ExternalSymbolData,
+                          std::vector<MachSymbolData> &UndefinedSymbolData);
+
+  void computeSectionAddresses(const MCAssembler &Asm,
+                               const MCAsmLayout &Layout);
+
+  void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout);
+
+  virtual bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                                      const MCSymbolData &DataA,
+                                                      const MCFragment &FB,
+                                                      bool InSet,
+                                                      bool IsPCRel) const;
+
+  void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout);
+};
+
+
 /// \brief Construct a new Mach-O writer instance.
 ///
 /// This routine takes ownership of the target writer subclass.
diff --git a/include/llvm/MC/MCObjectStreamer.h b/include/llvm/MC/MCObjectStreamer.h
index 8b0d87a..a89933b 100644
--- a/include/llvm/MC/MCObjectStreamer.h
+++ b/include/llvm/MC/MCObjectStreamer.h
@@ -73,7 +73,8 @@ public:
   virtual void EmitValueToOffset(const MCExpr *Offset, unsigned char Value);
   virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                         const MCSymbol *LastLabel,
-                                        const MCSymbol *Label);
+                                        const MCSymbol *Label,
+                                        unsigned PointerSize);
   virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
                                          const MCSymbol *Label);
   virtual void Finish();
diff --git a/include/llvm/MC/MCParser/MCParsedAsmOperand.h b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
index 91f5773..2556e5f 100644
--- a/include/llvm/MC/MCParser/MCParsedAsmOperand.h
+++ b/include/llvm/MC/MCParser/MCParsedAsmOperand.h
@@ -28,10 +28,20 @@ public:
   /// getEndLoc - Get the location of the last token of this operand.
   virtual SMLoc getEndLoc() const = 0;
 
-  /// dump - Print a debug representation of the operand to the given stream.
-  virtual void dump(raw_ostream &OS) const = 0;
+  /// print - Print a debug representation of the operand to the given stream.
+  virtual void print(raw_ostream &OS) const = 0;
+  /// dump - Print to the debug stream.
+  virtual void dump() const;
 };
 
+//===----------------------------------------------------------------------===//
+// Debugging Support
+
+inline raw_ostream& operator<<(raw_ostream &OS, const MCParsedAsmOperand &MO) {
+  MO.print(OS);
+  return OS;
+}
+
 } // end namespace llvm.
 
 #endif
diff --git a/include/llvm/MC/MCRegisterInfo.h b/include/llvm/MC/MCRegisterInfo.h
new file mode 100644
index 0000000..caf98bb
--- /dev/null
+++ b/include/llvm/MC/MCRegisterInfo.h
@@ -0,0 +1,129 @@
+//=== MC/MCRegisterInfo.h - Target Register Description ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes an abstract interface used to get information about a
+// target machines register file.  This information is used for a variety of
+// purposed, especially register allocation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCREGISTERINFO_H
+#define LLVM_MC_MCREGISTERINFO_H
+
+#include <cassert>
+
+namespace llvm {
+
+/// MCRegisterDesc - This record contains all of the information known about
+/// a particular register.  The Overlaps field contains a pointer to a zero
+/// terminated array of registers that this register aliases, starting with
+/// itself. This is needed for architectures like X86 which have AL alias AX
+/// alias EAX. The SubRegs field is a zero terminated array of registers that
+/// are sub-registers of the specific register, e.g. AL, AH are sub-registers of
+/// AX. The SuperRegs field is a zero terminated array of registers that are
+/// super-registers of the specific register, e.g. RAX, EAX, are super-registers
+/// of AX.
+///
+struct MCRegisterDesc {
+  const char     *Name;         // Printable name for the reg (for debugging)
+  const unsigned *Overlaps;     // Overlapping registers, described above
+  const unsigned *SubRegs;      // Sub-register set, described above
+  const unsigned *SuperRegs;    // Super-register set, described above
+};
+
+/// MCRegisterInfo base class - We assume that the target defines a static
+/// array of MCRegisterDesc objects that represent all of the machine
+/// registers that the target has.  As such, we simply have to track a pointer
+/// to this array so that we can turn register number into a register
+/// descriptor.
+///
+/// Note this class is designed to be a base class of TargetRegisterInfo, which
+/// is the interface used by codegen. However, specific targets *should never*
+/// specialize this class. MCRegisterInfo should only contain getters to access
+/// TableGen generated physical register data. It must not be extended with
+/// virtual methods.
+///
+class MCRegisterInfo {
+private:
+  const MCRegisterDesc *Desc;             // Pointer to the descriptor array
+  unsigned NumRegs;                       // Number of entries in the array
+
+public:
+  /// InitMCRegisterInfo - Initialize MCRegisterInfo, called by TableGen
+  /// auto-generated routines. *DO NOT USE*.
+  void InitMCRegisterInfo(const MCRegisterDesc *D, unsigned NR) {
+    Desc = D;
+    NumRegs = NR;
+  }
+    
+  const MCRegisterDesc &operator[](unsigned RegNo) const {
+    assert(RegNo < NumRegs &&
+           "Attempting to access record for invalid register number!");
+    return Desc[RegNo];
+  }
+
+  /// Provide a get method, equivalent to [], but more useful if we have a
+  /// pointer to this object.
+  ///
+  const MCRegisterDesc &get(unsigned RegNo) const {
+    return operator[](RegNo);
+  }
+
+  /// getAliasSet - Return the set of registers aliased by the specified
+  /// register, or a null list of there are none.  The list returned is zero
+  /// terminated.
+  ///
+  const unsigned *getAliasSet(unsigned RegNo) const {
+    // The Overlaps set always begins with Reg itself.
+    return get(RegNo).Overlaps + 1;
+  }
+
+  /// getOverlaps - Return a list of registers that overlap Reg, including
+  /// itself. This is the same as the alias set except Reg is included in the
+  /// list.
+  /// These are exactly the registers in { x | regsOverlap(x, Reg) }.
+  ///
+  const unsigned *getOverlaps(unsigned RegNo) const {
+    return get(RegNo).Overlaps;
+  }
+
+  /// getSubRegisters - Return the list of registers that are sub-registers of
+  /// the specified register, or a null list of there are none. The list
+  /// returned is zero terminated and sorted according to super-sub register
+  /// relations. e.g. X86::RAX's sub-register list is EAX, AX, AL, AH.
+  ///
+  const unsigned *getSubRegisters(unsigned RegNo) const {
+    return get(RegNo).SubRegs;
+  }
+
+  /// getSuperRegisters - Return the list of registers that are super-registers
+  /// of the specified register, or a null list of there are none. The list
+  /// returned is zero terminated and sorted according to super-sub register
+  /// relations. e.g. X86::AL's super-register list is AX, EAX, RAX.
+  ///
+  const unsigned *getSuperRegisters(unsigned RegNo) const {
+    return get(RegNo).SuperRegs;
+  }
+
+  /// getName - Return the human-readable symbolic target-specific name for the
+  /// specified physical register.
+  const char *getName(unsigned RegNo) const {
+    return get(RegNo).Name;
+  }
+
+  /// getNumRegs - Return the number of registers this target has (useful for
+  /// sizing arrays holding per register information)
+  unsigned getNumRegs() const {
+    return NumRegs;
+  }
+};
+ 
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index c05a925..7bdba5f 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -460,7 +460,8 @@ namespace llvm {
 
     virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                           const MCSymbol *LastLabel,
-                                          const MCSymbol *Label) = 0;
+                                          const MCSymbol *Label,
+                                          unsigned PointerSize) = 0;
 
     virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
                                            const MCSymbol *Label) {
@@ -547,6 +548,9 @@ namespace llvm {
   ///
   /// \param ShowInst - Whether to show the MCInst representation inline with
   /// the assembly.
+  ///
+  /// \param DecodeLSDA - If true, emit comments that translates the LSDA into a
+  /// human readable format. Only usable with CFI.
   MCStreamer *createAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
                                 bool isVerboseAsm,
                                 bool useLoc,
diff --git a/include/llvm/MC/MCSubtargetInfo.h b/include/llvm/MC/MCSubtargetInfo.h
new file mode 100644
index 0000000..3b53f20
--- /dev/null
+++ b/include/llvm/MC/MCSubtargetInfo.h
@@ -0,0 +1,79 @@
+//==-- llvm/MC/MCSubtargetInfo.h - Subtarget Information ---------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the subtarget options of a Target machine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSUBTARGET_H
+#define LLVM_MC_MCSUBTARGET_H
+
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include <string>
+
+namespace llvm {
+
+class StringRef;
+
+//===----------------------------------------------------------------------===//
+///
+/// MCSubtargetInfo - Generic base class for all target subtargets.
+///
+class MCSubtargetInfo {
+  std::string TargetTriple;            // Target triple
+  const SubtargetFeatureKV *ProcFeatures;  // Processor feature list
+  const SubtargetFeatureKV *ProcDesc;  // Processor descriptions
+  const SubtargetInfoKV *ProcItins;    // Scheduling itineraries
+  const InstrStage *Stages;            // Instruction stages
+  const unsigned *OperandCycles;       // Operand cycles
+  const unsigned *ForwardingPathes;    // Forwarding pathes
+  unsigned NumFeatures;                // Number of processor features
+  unsigned NumProcs;                   // Number of processors
+  uint64_t FeatureBits;                // Feature bits for current CPU + FS
+
+public:
+  void InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS,
+                           const SubtargetFeatureKV *PF,
+                           const SubtargetFeatureKV *PD,
+                           const SubtargetInfoKV *PI, const InstrStage *IS,
+                           const unsigned *OC, const unsigned *FP,
+                           unsigned NF, unsigned NP);
+
+  /// getTargetTriple - Return the target triple string.
+  StringRef getTargetTriple() const {
+    return TargetTriple;
+  }
+
+  /// getFeatureBits - Return the feature bits.
+  ///
+  uint64_t getFeatureBits() const {
+    return FeatureBits;
+  }
+
+  /// ReInitMCSubtargetInfo - Change CPU (and optionally supplemented with
+  /// feature string), recompute and return feature bits.
+  uint64_t ReInitMCSubtargetInfo(StringRef CPU, StringRef FS);
+
+  /// ToggleFeature - Toggle a feature and returns the re-computed feature
+  /// bits. This version does not change the implied bits.
+  uint64_t ToggleFeature(uint64_t FB);
+
+  /// ToggleFeature - Toggle a feature and returns the re-computed feature
+  /// bits. This version will also change all implied bits.
+  uint64_t ToggleFeature(StringRef FS);
+
+  /// getInstrItineraryForCPU - Get scheduling itinerary of a CPU.
+  ///
+  InstrItineraryData getInstrItineraryForCPU(StringRef CPU) const;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Target/SubtargetFeature.h b/include/llvm/MC/SubtargetFeature.h
index 4213d9b..1a7dc92 100644
--- a/include/llvm/Target/SubtargetFeature.h
+++ b/include/llvm/MC/SubtargetFeature.h
@@ -1,4 +1,4 @@
-//===-- llvm/Target/SubtargetFeature.h - CPU characteristics ----*- C++ -*-===//
+//===-- llvm/MC/SubtargetFeature.h - CPU characteristics --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,17 +15,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_SUBTARGETFEATURE_H
-#define LLVM_TARGET_SUBTARGETFEATURE_H
+#ifndef LLVM_MC_SUBTARGETFEATURE_H
+#define LLVM_MC_SUBTARGETFEATURE_H
 
-#include <string>
 #include <vector>
-#include <cstring>
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
   class raw_ostream;
+  class StringRef;
   
 //===----------------------------------------------------------------------===//
 ///
@@ -75,32 +74,30 @@ struct SubtargetInfoKV {
 class SubtargetFeatures {
   std::vector<std::string> Features;    // Subtarget features as a vector
 public:
-  explicit SubtargetFeatures(const std::string &Initial = std::string());
+  explicit SubtargetFeatures(const StringRef Initial = "");
 
   /// Features string accessors.
   std::string getString() const;
-  void setString(const std::string &Initial);
-
-  /// Set the CPU string.  Replaces previous setting.  Setting to "" clears CPU.
-  void setCPU(const std::string &String);
-
-  /// Setting CPU string only if no string is set.
-  void setCPUIfNone(const std::string &String);
-
-  /// Returns current CPU string.
-  const std::string & getCPU() const;
 
   /// Adding Features.
-  void AddFeature(const std::string &String, bool IsEnabled = true);
+  void AddFeature(const StringRef String, bool IsEnabled = true);
            
-  /// Get feature bits.
-  uint64_t getBits(const SubtargetFeatureKV *CPUTable,
-                         size_t CPUTableSize,
-                   const SubtargetFeatureKV *FeatureTable,
+  /// ToggleFeature - Toggle a feature and returns the newly updated feature
+  /// bits.
+  uint64_t ToggleFeature(uint64_t Bits, const StringRef String,
+                         const SubtargetFeatureKV *FeatureTable,
                          size_t FeatureTableSize);
+           
+  /// Get feature bits of a CPU.
+  uint64_t getFeatureBits(const StringRef CPU,
+                          const SubtargetFeatureKV *CPUTable,
+                          size_t CPUTableSize,
+                          const SubtargetFeatureKV *FeatureTable,
+                          size_t FeatureTableSize);
                          
-  /// Get info pointer
-  void *getInfo(const SubtargetInfoKV *Table, size_t TableSize);
+  /// Get scheduling itinerary of a CPU.
+  void *getItinerary(const StringRef CPU,
+                     const SubtargetInfoKV *Table, size_t TableSize);
   
   /// Print feature string.
   void print(raw_ostream &OS) const;
@@ -110,8 +107,7 @@ public:
 
   /// Retrieve a formatted string of the default features for the specified
   /// target triple.
-  void getDefaultSubtargetFeatures(const std::string &CPU,
-                                   const Triple& Triple);
+  void getDefaultSubtargetFeatures(const Triple& Triple);
 };
 
 } // End namespace llvm
diff --git a/include/llvm/Module.h b/include/llvm/Module.h
index aef8eb8..47d23f3 100644
--- a/include/llvm/Module.h
+++ b/include/llvm/Module.h
@@ -28,6 +28,10 @@ namespace llvm {
 class FunctionType;
 class GVMaterializer;
 class LLVMContext;
+class StructType;
+template<typename T> struct DenseMapInfo;
+template<typename KeyT, typename ValueT, 
+         typename KeyInfoT, typename ValueInfoT> class DenseMap;
 
 template<> struct ilist_traits<Function>
   : public SymbolTableListTraits<Function, Module> {
@@ -145,7 +149,6 @@ private:
   NamedMDListType NamedMDList;    ///< The named metadata in the module
   std::string GlobalScopeAsm;     ///< Inline Asm at global scope.
   ValueSymbolTable *ValSymTab;    ///< Symbol table for values
-  TypeSymbolTable *TypeSymTab;    ///< Symbol table for types
   OwningPtr<GVMaterializer> Materializer;  ///< Used to materialize GlobalValues
   std::string ModuleID;           ///< Human readable identifier for the module
   std::string TargetTriple;       ///< Platform target triple Module compiled on
@@ -231,7 +234,7 @@ public:
 /// @name Generic Value Accessors
 /// @{
 
-  /// getNamedValue - Return the first global value in the module with
+  /// getNamedValue - Return the global value in the module with
   /// the specified name, of arbitrary type.  This method returns null
   /// if a global with the specified name is not found.
   GlobalValue *getNamedValue(StringRef Name) const;
@@ -244,6 +247,18 @@ public:
   /// custom metadata IDs registered in this LLVMContext.
   void getMDKindNames(SmallVectorImpl<StringRef> &Result) const;
 
+  
+  typedef DenseMap<StructType*, unsigned, DenseMapInfo<StructType*>,
+                   DenseMapInfo<unsigned> > NumeredTypesMapTy;
+
+  /// findUsedStructTypes - Walk the entire module and find all of the
+  /// struct types that are in use, returning them in a vector.
+  void findUsedStructTypes(std::vector<StructType*> &StructTypes) const;
+  
+  /// getTypeByName - Return the type with the specified name, or null if there
+  /// is none by that name.
+  StructType *getTypeByName(StringRef Name) const;
+
 /// @}
 /// @name Function Accessors
 /// @{
@@ -296,7 +311,7 @@ public:
   GlobalVariable *getGlobalVariable(StringRef Name,
                                     bool AllowInternal = false) const;
 
-  /// getNamedGlobal - Return the first global variable in the module with the
+  /// getNamedGlobal - Return the global variable in the module with the
   /// specified name, of arbitrary type.  This method returns null if a global
   /// with the specified name is not found.
   GlobalVariable *getNamedGlobal(StringRef Name) const {
@@ -316,7 +331,7 @@ public:
 /// @name Global Alias Accessors
 /// @{
 
-  /// getNamedAlias - Return the first global alias in the module with the
+  /// getNamedAlias - Return the global alias in the module with the
   /// specified name, of arbitrary type.  This method returns null if a global
   /// with the specified name is not found.
   GlobalAlias *getNamedAlias(StringRef Name) const;
@@ -325,12 +340,12 @@ public:
 /// @name Named Metadata Accessors
 /// @{
 
-  /// getNamedMetadata - Return the first NamedMDNode in the module with the
+  /// getNamedMetadata - Return the NamedMDNode in the module with the
   /// specified name. This method returns null if a NamedMDNode with the
   /// specified name is not found.
   NamedMDNode *getNamedMetadata(const Twine &Name) const;
 
-  /// getOrInsertNamedMetadata - Return the first named MDNode in the module
+  /// getOrInsertNamedMetadata - Return the named MDNode in the module
   /// with the specified name. This method returns a new NamedMDNode if a
   /// NamedMDNode with the specified name is not found.
   NamedMDNode *getOrInsertNamedMetadata(StringRef Name);
@@ -340,23 +355,6 @@ public:
   void eraseNamedMetadata(NamedMDNode *NMD);
 
 /// @}
-/// @name Type Accessors
-/// @{
-
-  /// addTypeName - Insert an entry in the symbol table mapping Str to Type.  If
-  /// there is already an entry for this name, true is returned and the symbol
-  /// table is not modified.
-  bool addTypeName(StringRef Name, const Type *Ty);
-
-  /// getTypeName - If there is at least one entry in the symbol table for the
-  /// specified type, return it.
-  std::string getTypeName(const Type *Ty) const;
-
-  /// getTypeByName - Return the type with the specified name in this module, or
-  /// null if there is none by that name.
-  const Type *getTypeByName(StringRef Name) const;
-
-/// @}
 /// @name Materialization
 /// @{
 
@@ -429,41 +427,26 @@ public:
   const ValueSymbolTable &getValueSymbolTable() const { return *ValSymTab; }
   /// Get the Module's symbol table of global variable and function identifiers.
   ValueSymbolTable       &getValueSymbolTable()       { return *ValSymTab; }
-  /// Get the symbol table of types
-  const TypeSymbolTable  &getTypeSymbolTable() const  { return *TypeSymTab; }
-  /// Get the Module's symbol table of types
-  TypeSymbolTable        &getTypeSymbolTable()        { return *TypeSymTab; }
 
 /// @}
 /// @name Global Variable Iteration
 /// @{
 
-  /// Get an iterator to the first global variable
   global_iterator       global_begin()       { return GlobalList.begin(); }
-  /// Get a constant iterator to the first global variable
   const_global_iterator global_begin() const { return GlobalList.begin(); }
-  /// Get an iterator to the last global variable
   global_iterator       global_end  ()       { return GlobalList.end(); }
-  /// Get a constant iterator to the last global variable
   const_global_iterator global_end  () const { return GlobalList.end(); }
-  /// Determine if the list of globals is empty.
   bool                  global_empty() const { return GlobalList.empty(); }
 
 /// @}
 /// @name Function Iteration
 /// @{
 
-  /// Get an iterator to the first function.
   iterator                begin()       { return FunctionList.begin(); }
-  /// Get a constant iterator to the first function.
   const_iterator          begin() const { return FunctionList.begin(); }
-  /// Get an iterator to the last function.
   iterator                end  ()       { return FunctionList.end();   }
-  /// Get a constant iterator to the last function.
   const_iterator          end  () const { return FunctionList.end();   }
-  /// Determine how many functions are in the Module's list of functions.
   size_t                  size() const  { return FunctionList.size(); }
-  /// Determine if the list of functions is empty.
   bool                    empty() const { return FunctionList.empty(); }
 
 /// @}
@@ -487,17 +470,11 @@ public:
 /// @name Alias Iteration
 /// @{
 
-  /// Get an iterator to the first alias.
   alias_iterator       alias_begin()            { return AliasList.begin(); }
-  /// Get a constant iterator to the first alias.
   const_alias_iterator alias_begin() const      { return AliasList.begin(); }
-  /// Get an iterator to the last alias.
   alias_iterator       alias_end  ()            { return AliasList.end();   }
-  /// Get a constant iterator to the last alias.
   const_alias_iterator alias_end  () const      { return AliasList.end();   }
-  /// Determine how many aliases are in the Module's list of aliases.
   size_t               alias_size () const      { return AliasList.size();  }
-  /// Determine if the list of aliases is empty.
   bool                 alias_empty() const      { return AliasList.empty(); }
 
 
@@ -505,24 +482,17 @@ public:
 /// @name Named Metadata Iteration
 /// @{
 
-  /// Get an iterator to the first named metadata.
   named_metadata_iterator named_metadata_begin() { return NamedMDList.begin(); }
-  /// Get a constant iterator to the first named metadata.
   const_named_metadata_iterator named_metadata_begin() const {
     return NamedMDList.begin();
   }
 
-  /// Get an iterator to the last named metadata.
   named_metadata_iterator named_metadata_end() { return NamedMDList.end(); }
-  /// Get a constant iterator to the last named metadata.
   const_named_metadata_iterator named_metadata_end() const {
     return NamedMDList.end();
   }
 
-  /// Determine how many NamedMDNodes are in the Module's list of named
-  /// metadata.
   size_t named_metadata_size() const { return NamedMDList.size();  }
-  /// Determine if the list of named metadata is empty.
   bool named_metadata_empty() const { return NamedMDList.empty(); }
 
 
@@ -530,11 +500,13 @@ public:
 /// @name Utility functions for printing and dumping Module objects
 /// @{
 
-  /// Print the module to an output stream with AssemblyAnnotationWriter.
+  /// Print the module to an output stream with an optional
+  /// AssemblyAnnotationWriter.
   void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW) const;
 
   /// Dump the module to stderr (for debugging).
   void dump() const;
+  
   /// This function causes all the subinstructions to "let go" of all references
   /// that they are maintaining.  This allows one to 'delete' a whole class at
   /// a time, even though there may be circular references... first all
diff --git a/include/llvm/Object/Binary.h b/include/llvm/Object/Binary.h
new file mode 100644
index 0000000..cd092fd
--- /dev/null
+++ b/include/llvm/Object/Binary.h
@@ -0,0 +1,67 @@
+//===- Binary.h - A generic binary file -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the Binary class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_BINARY_H
+#define LLVM_OBJECT_BINARY_H
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Object/Error.h"
+
+namespace llvm {
+
+class MemoryBuffer;
+class StringRef;
+
+namespace object {
+
+class Binary {
+private:
+  Binary(); // = delete
+  Binary(const Binary &other); // = delete
+
+  unsigned int TypeID;
+
+protected:
+  MemoryBuffer *Data;
+
+  Binary(unsigned int Type, MemoryBuffer *Source);
+
+  enum {
+    isArchive,
+
+    // Object and children.
+    isObject,
+    isCOFF,
+    isELF,
+    isMachO,
+    lastObject
+  };
+
+public:
+  virtual ~Binary();
+
+  StringRef getData() const;
+  StringRef getFileName() const;
+
+  // Cast methods.
+  unsigned int getType() const { return TypeID; }
+  static inline bool classof(const Binary *v) { return true; }
+};
+
+error_code createBinary(MemoryBuffer *Source, OwningPtr<Binary> &Result);
+error_code createBinary(StringRef Path, OwningPtr<Binary> &Result);
+
+}
+}
+
+#endif
diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h
new file mode 100644
index 0000000..121f9e8
--- /dev/null
+++ b/include/llvm/Object/COFF.h
@@ -0,0 +1,117 @@
+//===- COFF.h - COFF object file implementation -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the COFFObjectFile class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_COFF_H
+#define LLVM_OBJECT_COFF_H
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/COFF.h"
+#include "llvm/Support/Endian.h"
+
+namespace llvm {
+namespace object {
+
+struct coff_file_header {
+  support::ulittle16_t Machine;
+  support::ulittle16_t NumberOfSections;
+  support::ulittle32_t TimeDateStamp;
+  support::ulittle32_t PointerToSymbolTable;
+  support::ulittle32_t NumberOfSymbols;
+  support::ulittle16_t SizeOfOptionalHeader;
+  support::ulittle16_t Characteristics;
+};
+
+struct coff_symbol {
+  struct StringTableOffset {
+    support::ulittle32_t Zeroes;
+    support::ulittle32_t Offset;
+  };
+
+  union {
+    char ShortName[8];
+    StringTableOffset Offset;
+  } Name;
+
+  support::ulittle32_t Value;
+  support::little16_t SectionNumber;
+
+  struct {
+    support::ulittle8_t BaseType;
+    support::ulittle8_t ComplexType;
+  } Type;
+
+  support::ulittle8_t  StorageClass;
+  support::ulittle8_t  NumberOfAuxSymbols;
+};
+
+struct coff_section {
+  char Name[8];
+  support::ulittle32_t VirtualSize;
+  support::ulittle32_t VirtualAddress;
+  support::ulittle32_t SizeOfRawData;
+  support::ulittle32_t PointerToRawData;
+  support::ulittle32_t PointerToRelocations;
+  support::ulittle32_t PointerToLinenumbers;
+  support::ulittle16_t NumberOfRelocations;
+  support::ulittle16_t NumberOfLinenumbers;
+  support::ulittle32_t Characteristics;
+};
+
+class COFFObjectFile : public ObjectFile {
+private:
+  const coff_file_header *Header;
+  const coff_section     *SectionTable;
+  const coff_symbol      *SymbolTable;
+  const char             *StringTable;
+        uint32_t          StringTableSize;
+
+        error_code        getSection(int32_t index,
+                                     const coff_section *&Res) const;
+        error_code        getString(uint32_t offset, StringRef &Res) const;
+
+  const coff_symbol      *toSymb(DataRefImpl Symb) const;
+  const coff_section     *toSec(DataRefImpl Sec) const;
+
+protected:
+  virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
+  virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
+  virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
+  virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const;
+
+  virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
+  virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
+  virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
+                                           bool &Result) const;
+
+public:
+  COFFObjectFile(MemoryBuffer *Object, error_code &ec);
+  virtual symbol_iterator begin_symbols() const;
+  virtual symbol_iterator end_symbols() const;
+  virtual section_iterator begin_sections() const;
+  virtual section_iterator end_sections() const;
+
+  virtual uint8_t getBytesInAddress() const;
+  virtual StringRef getFileFormatName() const;
+  virtual unsigned getArch() const;
+};
+
+}
+}
+
+#endif
diff --git a/include/llvm/Object/Error.h b/include/llvm/Object/Error.h
new file mode 100644
index 0000000..fbaf71c
--- /dev/null
+++ b/include/llvm/Object/Error.h
@@ -0,0 +1,50 @@
+//===- Error.h - system_error extensions for Object -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This declares a new error_category for the Object library.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_ERROR_H
+#define LLVM_OBJECT_ERROR_H
+
+#include "llvm/Support/system_error.h"
+
+namespace llvm {
+namespace object {
+
+const error_category &object_category();
+
+struct object_error {
+enum _ {
+  success = 0,
+  invalid_file_type,
+  parse_failed,
+  unexpected_eof
+};
+  _ v_;
+
+  object_error(_ v) : v_(v) {}
+  explicit object_error(int v) : v_(_(v)) {}
+  operator int() const {return v_;}
+};
+
+inline error_code make_error_code(object_error e) {
+  return error_code(static_cast<int>(e), object_category());
+}
+
+} // end namespace object.
+
+template <> struct is_error_code_enum<object::object_error> : true_type { };
+
+template <> struct is_error_code_enum<object::object_error::_> : true_type { };
+
+} // end namespace llvm.
+
+#endif
diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h
index eee9d44..98ac067 100644
--- a/include/llvm/Object/ObjectFile.h
+++ b/include/llvm/Object/ObjectFile.h
@@ -14,15 +14,14 @@
 #ifndef LLVM_OBJECT_OBJECT_FILE_H
 #define LLVM_OBJECT_OBJECT_FILE_H
 
+#include "llvm/Object/Binary.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include <cstring>
 
 namespace llvm {
-
-class MemoryBuffer;
-class StringRef;
-
 namespace object {
 
 class ObjectFile;
@@ -31,7 +30,7 @@ union DataRefImpl {
   struct {
     uint32_t a, b;
   } d;
-  intptr_t p;
+  uintptr_t p;
 };
 
 static bool operator ==(const DataRefImpl &a, const DataRefImpl &b) {
@@ -40,52 +39,80 @@ static bool operator ==(const DataRefImpl &a, const DataRefImpl &b) {
   return std::memcmp(&a, &b, sizeof(DataRefImpl)) == 0;
 }
 
+class RelocationRef {
+  DataRefImpl RelocationPimpl;
+  const ObjectFile *OwningObject;
+
+public:
+  RelocationRef() : OwningObject(NULL) {
+    std::memset(&RelocationPimpl, 0, sizeof(RelocationPimpl));
+  }
+
+  RelocationRef(DataRefImpl RelocationP, const ObjectFile *Owner);
+
+  bool operator==(const RelocationRef &Other) const;
+
+  error_code getNext(RelocationRef &Result);
+};
+
 /// SymbolRef - This is a value type class that represents a single symbol in
 /// the list of symbols in the object file.
 class SymbolRef {
+  friend class SectionRef;
   DataRefImpl SymbolPimpl;
   const ObjectFile *OwningObject;
 
 public:
+  SymbolRef() : OwningObject(NULL) {
+    std::memset(&SymbolPimpl, 0, sizeof(SymbolPimpl));
+  }
+
   SymbolRef(DataRefImpl SymbolP, const ObjectFile *Owner);
 
   bool operator==(const SymbolRef &Other) const;
 
-  SymbolRef getNext() const;
+  error_code getNext(SymbolRef &Result) const;
 
-  StringRef getName() const;
-  uint64_t  getAddress() const;
-  uint64_t  getSize() const;
+  error_code getName(StringRef &Result) const;
+  error_code getAddress(uint64_t &Result) const;
+  error_code getSize(uint64_t &Result) const;
 
   /// Returns the ascii char that should be displayed in a symbol table dump via
   /// nm for this symbol.
-  char      getNMTypeChar() const;
+  error_code getNMTypeChar(char &Result) const;
 
   /// Returns true for symbols that are internal to the object file format such
   /// as section symbols.
-  bool      isInternal() const;
+  error_code isInternal(bool &Result) const;
 };
 
 /// SectionRef - This is a value type class that represents a single section in
 /// the list of sections in the object file.
 class SectionRef {
+  friend class SymbolRef;
   DataRefImpl SectionPimpl;
   const ObjectFile *OwningObject;
 
 public:
+  SectionRef() : OwningObject(NULL) {
+    std::memset(&SectionPimpl, 0, sizeof(SectionPimpl));
+  }
+
   SectionRef(DataRefImpl SectionP, const ObjectFile *Owner);
 
   bool operator==(const SectionRef &Other) const;
 
-  SectionRef getNext() const;
+  error_code getNext(SectionRef &Result) const;
 
-  StringRef getName() const;
-  uint64_t  getAddress() const;
-  uint64_t  getSize() const;
-  StringRef getContents() const;
+  error_code getName(StringRef &Result) const;
+  error_code getAddress(uint64_t &Result) const;
+  error_code getSize(uint64_t &Result) const;
+  error_code getContents(StringRef &Result) const;
 
   // FIXME: Move to the normalization layer when it's created.
-  bool      isText() const;
+  error_code isText(bool &Result) const;
+
+  error_code containsSymbol(SymbolRef S, bool &Result) const;
 };
 
 const uint64_t UnknownAddressOrSize = ~0ULL;
@@ -93,38 +120,44 @@ const uint64_t UnknownAddressOrSize = ~0ULL;
 /// ObjectFile - This class is the base class for all object file types.
 /// Concrete instances of this object are created by createObjectFile, which
 /// figure out which type to create.
-class ObjectFile {
+class ObjectFile : public Binary {
 private:
   ObjectFile(); // = delete
   ObjectFile(const ObjectFile &other); // = delete
 
 protected:
-  MemoryBuffer *MapFile;
-  const uint8_t *base;
+  ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec);
 
-  ObjectFile(MemoryBuffer *Object);
+  const uint8_t *base() const {
+    return reinterpret_cast<const uint8_t *>(Data->getBufferStart());
+  }
 
   // These functions are for SymbolRef to call internally. The main goal of
   // this is to allow SymbolRef::SymbolPimpl to point directly to the symbol
   // entry in the memory mapped object file. SymbolPimpl cannot contain any
   // virtual functions because then it could not point into the memory mapped
   // file.
+  //
+  // Implementations assume that the DataRefImpl is valid and has not been
+  // modified externally. It's UB otherwise.
   friend class SymbolRef;
-  virtual SymbolRef getSymbolNext(DataRefImpl Symb) const = 0;
-  virtual StringRef getSymbolName(DataRefImpl Symb) const = 0;
-  virtual uint64_t  getSymbolAddress(DataRefImpl Symb) const = 0;
-  virtual uint64_t  getSymbolSize(DataRefImpl Symb) const = 0;
-  virtual char      getSymbolNMTypeChar(DataRefImpl Symb) const = 0;
-  virtual bool      isSymbolInternal(DataRefImpl Symb) const = 0;
+  virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const = 0;
+  virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const = 0;
+  virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const =0;
+  virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const = 0;
+  virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const = 0;
+  virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const = 0;
 
   // Same as above for SectionRef.
   friend class SectionRef;
-  virtual SectionRef getSectionNext(DataRefImpl Sec) const = 0;
-  virtual StringRef  getSectionName(DataRefImpl Sec) const = 0;
-  virtual uint64_t   getSectionAddress(DataRefImpl Sec) const = 0;
-  virtual uint64_t   getSectionSize(DataRefImpl Sec) const = 0;
-  virtual StringRef  getSectionContents(DataRefImpl Sec) const = 0;
-  virtual bool       isSectionText(DataRefImpl Sec) const = 0;
+  virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const = 0;
+  virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const = 0;
+  virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const =0;
+  virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const = 0;
+  virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res)const=0;
+  virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const = 0;
+  virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
+                                           bool &Result) const = 0;
 
 
 public:
@@ -139,6 +172,10 @@ public:
       return &Current;
     }
 
+    const content_type &operator*() const {
+      return Current;
+    }
+
     bool operator==(const content_iterator &other) const {
       return Current == other.Current;
     }
@@ -147,8 +184,12 @@ public:
       return !(*this == other);
     }
 
-    content_iterator& operator++() {  // Preincrement
-      Current = Current.getNext();
+    content_iterator& increment(error_code &err) {
+      content_type next;
+      if (error_code ec = Current.getNext(next))
+        err = ec;
+      else
+        Current = next;
       return *this;
     }
   };
@@ -156,8 +197,6 @@ public:
   typedef content_iterator<SymbolRef> symbol_iterator;
   typedef content_iterator<SectionRef> section_iterator;
 
-  virtual ~ObjectFile();
-
   virtual symbol_iterator begin_symbols() const = 0;
   virtual symbol_iterator end_symbols() const = 0;
 
@@ -171,8 +210,6 @@ public:
   virtual StringRef getFileFormatName() const = 0;
   virtual /* Triple::ArchType */ unsigned getArch() const = 0;
 
-  StringRef getFilename() const;
-
   /// @returns Pointer to ObjectFile subclass to handle this type of object.
   /// @param ObjectPath The path to the object file. ObjectPath.isObject must
   ///        return true.
@@ -180,12 +217,16 @@ public:
   static ObjectFile *createObjectFile(StringRef ObjectPath);
   static ObjectFile *createObjectFile(MemoryBuffer *Object);
 
-private:
+  static inline bool classof(const Binary *v) {
+    return v->getType() >= isObject &&
+           v->getType() < lastObject;
+  }
+  static inline bool classof(const ObjectFile *v) { return true; }
+
+public:
   static ObjectFile *createCOFFObjectFile(MemoryBuffer *Object);
   static ObjectFile *createELFObjectFile(MemoryBuffer *Object);
   static ObjectFile *createMachOObjectFile(MemoryBuffer *Object);
-  static ObjectFile *createArchiveObjectFile(MemoryBuffer *Object);
-  static ObjectFile *createLibObjectFile(MemoryBuffer *Object);
 };
 
 // Inline function definitions.
@@ -197,28 +238,28 @@ inline bool SymbolRef::operator==(const SymbolRef &Other) const {
   return SymbolPimpl == Other.SymbolPimpl;
 }
 
-inline SymbolRef SymbolRef::getNext() const {
-  return OwningObject->getSymbolNext(SymbolPimpl);
+inline error_code SymbolRef::getNext(SymbolRef &Result) const {
+  return OwningObject->getSymbolNext(SymbolPimpl, Result);
 }
 
-inline StringRef SymbolRef::getName() const {
-  return OwningObject->getSymbolName(SymbolPimpl);
+inline error_code SymbolRef::getName(StringRef &Result) const {
+  return OwningObject->getSymbolName(SymbolPimpl, Result);
 }
 
-inline uint64_t SymbolRef::getAddress() const {
-  return OwningObject->getSymbolAddress(SymbolPimpl);
+inline error_code SymbolRef::getAddress(uint64_t &Result) const {
+  return OwningObject->getSymbolAddress(SymbolPimpl, Result);
 }
 
-inline uint64_t SymbolRef::getSize() const {
-  return OwningObject->getSymbolSize(SymbolPimpl);
+inline error_code SymbolRef::getSize(uint64_t &Result) const {
+  return OwningObject->getSymbolSize(SymbolPimpl, Result);
 }
 
-inline char SymbolRef::getNMTypeChar() const {
-  return OwningObject->getSymbolNMTypeChar(SymbolPimpl);
+inline error_code SymbolRef::getNMTypeChar(char &Result) const {
+  return OwningObject->getSymbolNMTypeChar(SymbolPimpl, Result);
 }
 
-inline bool SymbolRef::isInternal() const {
-  return OwningObject->isSymbolInternal(SymbolPimpl);
+inline error_code SymbolRef::isInternal(bool &Result) const {
+  return OwningObject->isSymbolInternal(SymbolPimpl, Result);
 }
 
 
@@ -232,28 +273,33 @@ inline bool SectionRef::operator==(const SectionRef &Other) const {
   return SectionPimpl == Other.SectionPimpl;
 }
 
-inline SectionRef SectionRef::getNext() const {
-  return OwningObject->getSectionNext(SectionPimpl);
+inline error_code SectionRef::getNext(SectionRef &Result) const {
+  return OwningObject->getSectionNext(SectionPimpl, Result);
+}
+
+inline error_code SectionRef::getName(StringRef &Result) const {
+  return OwningObject->getSectionName(SectionPimpl, Result);
 }
 
-inline StringRef SectionRef::getName() const {
-  return OwningObject->getSectionName(SectionPimpl);
+inline error_code SectionRef::getAddress(uint64_t &Result) const {
+  return OwningObject->getSectionAddress(SectionPimpl, Result);
 }
 
-inline uint64_t SectionRef::getAddress() const {
-  return OwningObject->getSectionAddress(SectionPimpl);
+inline error_code SectionRef::getSize(uint64_t &Result) const {
+  return OwningObject->getSectionSize(SectionPimpl, Result);
 }
 
-inline uint64_t SectionRef::getSize() const {
-  return OwningObject->getSectionSize(SectionPimpl);
+inline error_code SectionRef::getContents(StringRef &Result) const {
+  return OwningObject->getSectionContents(SectionPimpl, Result);
 }
 
-inline StringRef SectionRef::getContents() const {
-  return OwningObject->getSectionContents(SectionPimpl);
+inline error_code SectionRef::isText(bool &Result) const {
+  return OwningObject->isSectionText(SectionPimpl, Result);
 }
 
-inline bool SectionRef::isText() const {
-  return OwningObject->isSectionText(SectionPimpl);
+inline error_code SectionRef::containsSymbol(SymbolRef S, bool &Result) const {
+  return OwningObject->sectionContainsSymbol(SectionPimpl, S.SymbolPimpl,
+                                             Result);
 }
 
 } // end namespace object
diff --git a/include/llvm/Support/BranchProbability.h b/include/llvm/Support/BranchProbability.h
index 7ba6491..2e81490 100644
--- a/include/llvm/Support/BranchProbability.h
+++ b/include/llvm/Support/BranchProbability.h
@@ -19,15 +19,9 @@
 namespace llvm {
 
 class raw_ostream;
-class BranchProbabilityInfo;
-class MachineBranchProbabilityInfo;
-class MachineBasicBlock;
 
 // This class represents Branch Probability as a non-negative fraction.
 class BranchProbability {
-  friend class BranchProbabilityInfo;
-  friend class MachineBranchProbabilityInfo;
-  friend class MachineBasicBlock;
 
   // Numerator
   uint32_t N;
@@ -35,9 +29,17 @@ class BranchProbability {
   // Denominator
   uint32_t D;
 
+public:
   BranchProbability(uint32_t n, uint32_t d);
 
-public:
+  uint32_t getNumerator() const { return N; }
+  uint32_t getDenominator() const { return D; }
+  
+  // Return (1 - Probability).
+  BranchProbability getCompl() {
+    return BranchProbability(D - N, D);
+  }
+
   raw_ostream &print(raw_ostream &OS) const;
 
   void dump() const;
diff --git a/include/llvm/Support/CFG.h b/include/llvm/Support/CFG.h
index d2ea123..29313ef 100644
--- a/include/llvm/Support/CFG.h
+++ b/include/llvm/Support/CFG.h
@@ -33,7 +33,7 @@ class PredIterator : public std::iterator<std::forward_iterator_tag,
   USE_iterator It;
 
   inline void advancePastNonTerminators() {
-    // Loop to ignore non terminator uses (for example PHI nodes).
+    // Loop to ignore non terminator uses (for example BlockAddresses).
     while (!It.atEnd() && !isa<TerminatorInst>(*It))
       ++It;
   }
@@ -109,11 +109,18 @@ public:
   // TODO: This can be random access iterator, only operator[] missing.
 
   explicit inline SuccIterator(Term_ T) : Term(T), idx(0) {// begin iterator
-    assert(T && "getTerminator returned null!");
   }
   inline SuccIterator(Term_ T, bool)                       // end iterator
-    : Term(T), idx(Term->getNumSuccessors()) {
-    assert(T && "getTerminator returned null!");
+    : Term(T) {
+    if (Term)
+      idx = Term->getNumSuccessors();
+    else
+      // Term == NULL happens, if a basic block is not fully constructed and
+      // consequently getTerminator() returns NULL. In this case we construct a
+      // SuccIterator which describes a basic block that has zero successors.
+      // Defining SuccIterator for incomplete and malformed CFGs is especially
+      // useful for debugging.
+      idx = 0;
   }
 
   inline const Self &operator=(const Self &I) {
@@ -201,6 +208,7 @@ public:
 
   /// Get the source BB of this iterator.
   inline BB_ *getSource() {
+    assert(Term && "Source not available, if basic block was malformed");
     return Term->getParent();
   }
 };
diff --git a/include/llvm/Support/ConstantFolder.h b/include/llvm/Support/ConstantFolder.h
index d0eaa3e..7330235 100644
--- a/include/llvm/Support/ConstantFolder.h
+++ b/include/llvm/Support/ConstantFolder.h
@@ -210,14 +210,14 @@ public:
     return ConstantExpr::getShuffleVector(V1, V2, Mask);
   }
 
-  Constant *CreateExtractValue(Constant *Agg, const unsigned *IdxList,
-                               unsigned NumIdx) const {
-    return ConstantExpr::getExtractValue(Agg, IdxList, NumIdx);
+  Constant *CreateExtractValue(Constant *Agg,
+                               ArrayRef<unsigned> IdxList) const {
+    return ConstantExpr::getExtractValue(Agg, IdxList);
   }
 
   Constant *CreateInsertValue(Constant *Agg, Constant *Val,
-                              const unsigned *IdxList, unsigned NumIdx) const {
-    return ConstantExpr::getInsertValue(Agg, Val, IdxList, NumIdx);
+                              ArrayRef<unsigned> IdxList) const {
+    return ConstantExpr::getInsertValue(Agg, Val, IdxList);
   }
 };
 
diff --git a/include/llvm/Support/DebugLoc.h b/include/llvm/Support/DebugLoc.h
index 98a05a4..2ee9f87 100644
--- a/include/llvm/Support/DebugLoc.h
+++ b/include/llvm/Support/DebugLoc.h
@@ -61,7 +61,10 @@ namespace llvm {
     
     /// getFromDILocation - Translate the DILocation quad into a DebugLoc.
     static DebugLoc getFromDILocation(MDNode *N);
-    
+
+    /// getFromDILexicalBlock - Translate the DILexicalBlock into a DebugLoc.
+    static DebugLoc getFromDILexicalBlock(MDNode *N);
+
     /// isUnknown - Return true if this is an unknown location.
     bool isUnknown() const { return ScopeIdx == 0; }
     
@@ -94,6 +97,8 @@ namespace llvm {
       return LineCol == DL.LineCol && ScopeIdx == DL.ScopeIdx;
     }
     bool operator!=(const DebugLoc &DL) const { return !(*this == DL); }
+
+    void dump(const LLVMContext &Ctx) const;
   };
 
   template <>
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index cc72bd5..be48112 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -487,11 +487,11 @@ enum {
   SHT_REL           = 9,  // Relocation entries; no explicit addends.
   SHT_SHLIB         = 10, // Reserved.
   SHT_DYNSYM        = 11, // Symbol table.
-  SHT_INIT_ARRAY    = 14, // Pointers to initialisation functions.
+  SHT_INIT_ARRAY    = 14, // Pointers to initialization functions.
   SHT_FINI_ARRAY    = 15, // Pointers to termination functions.
   SHT_PREINIT_ARRAY = 16, // Pointers to pre-init functions.
   SHT_GROUP         = 17, // Section group.
-  SHT_SYMTAB_SHNDX  = 18, // Indicies for SHN_XINDEX entries.
+  SHT_SYMTAB_SHNDX  = 18, // Indices for SHN_XINDEX entries.
   SHT_LOOS          = 0x60000000, // Lowest operating system-specific type.
   SHT_HIOS          = 0x6fffffff, // Highest operating system-specific type.
   SHT_LOPROC        = 0x70000000, // Lowest processor architecture-specific type.
@@ -630,7 +630,7 @@ enum {
   STT_FUNC    = 2,   // Symbol is executable code (function, etc.)
   STT_SECTION = 3,   // Symbol refers to a section
   STT_FILE    = 4,   // Local, absolute symbol that refers to a file
-  STT_COMMON  = 5,   // An uninitialised common block
+  STT_COMMON  = 5,   // An uninitialized common block
   STT_TLS     = 6,   // Thread local data object
   STT_LOPROC  = 13,  // Lowest processor-specific symbol type
   STT_HIPROC  = 15   // Highest processor-specific symbol type
@@ -804,7 +804,7 @@ enum {
   DT_RELENT       = 19,       // Size of a Rel relocation entry.
   DT_PLTREL       = 20,       // Type of relocation entry used for linking.
   DT_DEBUG        = 21,       // Reserved for debugger.
-  DT_TEXTREL      = 22,       // Relocations exist for non-writable segements.
+  DT_TEXTREL      = 22,       // Relocations exist for non-writable segments.
   DT_JMPREL       = 23,       // Address of relocations associated with PLT.
   DT_BIND_NOW     = 24,       // Process all relocations before execution.
   DT_INIT_ARRAY   = 25,       // Pointer to array of initialization functions.
diff --git a/include/llvm/Support/Endian.h b/include/llvm/Support/Endian.h
index f62eab0..af1b506 100644
--- a/include/llvm/Support/Endian.h
+++ b/include/llvm/Support/Endian.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_SUPPORT_ENDIAN_H
 #define LLVM_SUPPORT_ENDIAN_H
 
-#include "llvm/Config/config.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/SwapByteOrder.h"
 #include "llvm/Support/type_traits.h"
diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h
index 6a7c277..91cd78e 100644
--- a/include/llvm/Support/IRBuilder.h
+++ b/include/llvm/Support/IRBuilder.h
@@ -82,7 +82,7 @@ public:
     InsertPt = I;
     SetCurrentDebugLocation(I->getDebugLoc());
   }
-  
+
   /// SetInsertPoint - This specifies that created instructions should be
   /// inserted at the specified point.
   void SetInsertPoint(BasicBlock *TheBB, BasicBlock::iterator IP) {
@@ -90,6 +90,19 @@ public:
     InsertPt = IP;
   }
 
+  /// SetInsertPoint(Use) - Find the nearest point that dominates this use, and
+  /// specify that created instructions should be inserted at this point.
+  void SetInsertPoint(Use &U) {
+    Instruction *UseInst = cast<Instruction>(U.getUser());
+    if (PHINode *Phi = dyn_cast<PHINode>(UseInst)) {
+      BasicBlock *PredBB = Phi->getIncomingBlock(U);
+      assert(U != PredBB->getTerminator() && "critical edge not split");
+      SetInsertPoint(PredBB, PredBB->getTerminator());
+      return;
+    }
+    SetInsertPoint(UseInst);
+  }
+
   /// SetCurrentDebugLocation - Set location information used by debugging
   /// information.
   void SetCurrentDebugLocation(const DebugLoc &L) {
@@ -98,7 +111,7 @@ public:
 
   /// getCurrentDebugLocation - Get location information used by debugging
   /// information.
-  const DebugLoc &getCurrentDebugLocation() const { return CurDbgLocation; }
+  DebugLoc getCurrentDebugLocation() const { return CurDbgLocation; }
 
   /// SetInstDebugLocation - If this builder has a current debug location, set
   /// it on the specified instruction.
@@ -109,8 +122,8 @@ public:
 
   /// getCurrentFunctionReturnType - Get the return type of the current function
   /// that we're emitting into.
-  const Type *getCurrentFunctionReturnType() const;
-  
+  Type *getCurrentFunctionReturnType() const;
+
   /// InsertPoint - A saved insertion point.
   class InsertPoint {
     BasicBlock *Block;
@@ -198,7 +211,7 @@ public:
   ConstantInt *getInt64(uint64_t C) {
     return ConstantInt::get(getInt64Ty(), C);
   }
-  
+
   /// getInt - Get a constant integer value.
   ConstantInt *getInt(const APInt &AI) {
     return ConstantInt::get(Context, AI);
@@ -209,46 +222,46 @@ public:
   //===--------------------------------------------------------------------===//
 
   /// getInt1Ty - Fetch the type representing a single bit
-  const IntegerType *getInt1Ty() {
+  IntegerType *getInt1Ty() {
     return Type::getInt1Ty(Context);
   }
 
   /// getInt8Ty - Fetch the type representing an 8-bit integer.
-  const IntegerType *getInt8Ty() {
+  IntegerType *getInt8Ty() {
     return Type::getInt8Ty(Context);
   }
 
   /// getInt16Ty - Fetch the type representing a 16-bit integer.
-  const IntegerType *getInt16Ty() {
+  IntegerType *getInt16Ty() {
     return Type::getInt16Ty(Context);
   }
 
   /// getInt32Ty - Fetch the type resepresenting a 32-bit integer.
-  const IntegerType *getInt32Ty() {
+  IntegerType *getInt32Ty() {
     return Type::getInt32Ty(Context);
   }
 
   /// getInt64Ty - Fetch the type representing a 64-bit integer.
-  const IntegerType *getInt64Ty() {
+  IntegerType *getInt64Ty() {
     return Type::getInt64Ty(Context);
   }
 
   /// getFloatTy - Fetch the type representing a 32-bit floating point value.
-  const Type *getFloatTy() {
+  Type *getFloatTy() {
     return Type::getFloatTy(Context);
   }
 
   /// getDoubleTy - Fetch the type representing a 64-bit floating point value.
-  const Type *getDoubleTy() {
+  Type *getDoubleTy() {
     return Type::getDoubleTy(Context);
   }
 
   /// getVoidTy - Fetch the type representing void.
-  const Type *getVoidTy() {
+  Type *getVoidTy() {
     return Type::getVoidTy(Context);
   }
 
-  const PointerType *getInt8PtrTy(unsigned AddrSpace = 0) {
+  PointerType *getInt8PtrTy(unsigned AddrSpace = 0) {
     return Type::getInt8PtrTy(Context, AddrSpace);
   }
 
@@ -263,7 +276,7 @@ public:
                          bool isVolatile = false, MDNode *TBAATag = 0) {
     return CreateMemSet(Ptr, Val, getInt64(Size), Align, isVolatile, TBAATag);
   }
-  
+
   CallInst *CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
                          bool isVolatile = false, MDNode *TBAATag = 0);
 
@@ -274,7 +287,7 @@ public:
                          bool isVolatile = false, MDNode *TBAATag = 0) {
     return CreateMemCpy(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag);
   }
-  
+
   CallInst *CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
                          bool isVolatile = false, MDNode *TBAATag = 0);
 
@@ -285,9 +298,9 @@ public:
                           bool isVolatile = false, MDNode *TBAATag = 0) {
     return CreateMemMove(Dst, Src, getInt64(Size), Align, isVolatile, TBAATag);
   }
-  
+
   CallInst *CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
-                          bool isVolatile = false, MDNode *TBAATag = 0);  
+                          bool isVolatile = false, MDNode *TBAATag = 0);
 
   /// CreateLifetimeStart - Create a lifetime.start intrinsic.  If the pointer
   /// isn't i8* it will be converted.
@@ -341,7 +354,13 @@ public:
     SetInsertPoint(IP);
     SetCurrentDebugLocation(IP->getDebugLoc());
   }
-  
+
+  explicit IRBuilder(Use &U)
+    : IRBuilderBase(U->getContext()), Folder() {
+    SetInsertPoint(U);
+    SetCurrentDebugLocation(cast<Instruction>(U.getUser())->getDebugLoc());
+  }
+
   IRBuilder(BasicBlock *TheBB, BasicBlock::iterator IP, const T& F)
     : IRBuilderBase(TheBB->getContext()), Folder(F) {
     SetInsertPoint(TheBB, IP);
@@ -430,34 +449,30 @@ public:
 
   InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
                            BasicBlock *UnwindDest, const Twine &Name = "") {
-    Value *Args[] = { 0 };
-    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args,
-                                     Args), Name);
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest,
+                                     ArrayRef<Value *>()),
+                  Name);
   }
   InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
                            BasicBlock *UnwindDest, Value *Arg1,
                            const Twine &Name = "") {
-    Value *Args[] = { Arg1 };
-    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args,
-                                     Args+1), Name);
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Arg1),
+                  Name);
   }
   InvokeInst *CreateInvoke3(Value *Callee, BasicBlock *NormalDest,
                             BasicBlock *UnwindDest, Value *Arg1,
                             Value *Arg2, Value *Arg3,
                             const Twine &Name = "") {
     Value *Args[] = { Arg1, Arg2, Arg3 };
-    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args,
-                                     Args+3), Name);
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args),
+                  Name);
   }
   /// CreateInvoke - Create an invoke instruction.
-  template<typename RandomAccessIterator>
   InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
-                           BasicBlock *UnwindDest,
-                           RandomAccessIterator ArgBegin,
-                           RandomAccessIterator ArgEnd,
+                           BasicBlock *UnwindDest, ArrayRef<Value *> Args,
                            const Twine &Name = "") {
-    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest,
-                                     ArgBegin, ArgEnd), Name);
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args),
+                  Name);
   }
 
   UnwindInst *CreateUnwind() {
@@ -1107,33 +1122,27 @@ public:
   CallInst *CreateCall2(Value *Callee, Value *Arg1, Value *Arg2,
                         const Twine &Name = "") {
     Value *Args[] = { Arg1, Arg2 };
-    return Insert(CallInst::Create(Callee, Args, Args+2), Name);
+    return Insert(CallInst::Create(Callee, Args), Name);
   }
   CallInst *CreateCall3(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3,
                         const Twine &Name = "") {
     Value *Args[] = { Arg1, Arg2, Arg3 };
-    return Insert(CallInst::Create(Callee, Args, Args+3), Name);
+    return Insert(CallInst::Create(Callee, Args), Name);
   }
   CallInst *CreateCall4(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3,
                         Value *Arg4, const Twine &Name = "") {
     Value *Args[] = { Arg1, Arg2, Arg3, Arg4 };
-    return Insert(CallInst::Create(Callee, Args, Args+4), Name);
+    return Insert(CallInst::Create(Callee, Args), Name);
   }
   CallInst *CreateCall5(Value *Callee, Value *Arg1, Value *Arg2, Value *Arg3,
                         Value *Arg4, Value *Arg5, const Twine &Name = "") {
     Value *Args[] = { Arg1, Arg2, Arg3, Arg4, Arg5 };
-    return Insert(CallInst::Create(Callee, Args, Args+5), Name);
+    return Insert(CallInst::Create(Callee, Args), Name);
   }
 
-  CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Arg,
+  CallInst *CreateCall(Value *Callee, ArrayRef<Value *> Args,
                        const Twine &Name = "") {
-    return Insert(CallInst::Create(Callee, Arg.begin(), Arg.end(), Name));
-  }
-
-  template<typename RandomAccessIterator>
-  CallInst *CreateCall(Value *Callee, RandomAccessIterator ArgBegin,
-                       RandomAccessIterator ArgEnd, const Twine &Name = "") {
-    return Insert(CallInst::Create(Callee, ArgBegin, ArgEnd), Name);
+    return Insert(CallInst::Create(Callee, Args, Name));
   }
 
   Value *CreateSelect(Value *C, Value *True, Value *False,
@@ -1175,43 +1184,21 @@ public:
     return Insert(new ShuffleVectorInst(V1, V2, Mask), Name);
   }
 
-  Value *CreateExtractValue(Value *Agg, unsigned Idx,
-                            const Twine &Name = "") {
-    if (Constant *AggC = dyn_cast<Constant>(Agg))
-      return Insert(Folder.CreateExtractValue(AggC, &Idx, 1), Name);
-    return Insert(ExtractValueInst::Create(Agg, Idx), Name);
-  }
-
-  template<typename RandomAccessIterator>
   Value *CreateExtractValue(Value *Agg,
-                            RandomAccessIterator IdxBegin,
-                            RandomAccessIterator IdxEnd,
+                            ArrayRef<unsigned> Idxs,
                             const Twine &Name = "") {
     if (Constant *AggC = dyn_cast<Constant>(Agg))
-      return Insert(Folder.CreateExtractValue(AggC, IdxBegin, IdxEnd-IdxBegin),
-                    Name);
-    return Insert(ExtractValueInst::Create(Agg, IdxBegin, IdxEnd), Name);
+      return Insert(Folder.CreateExtractValue(AggC, Idxs), Name);
+    return Insert(ExtractValueInst::Create(Agg, Idxs), Name);
   }
 
-  Value *CreateInsertValue(Value *Agg, Value *Val, unsigned Idx,
-                           const Twine &Name = "") {
-    if (Constant *AggC = dyn_cast<Constant>(Agg))
-      if (Constant *ValC = dyn_cast<Constant>(Val))
-        return Insert(Folder.CreateInsertValue(AggC, ValC, &Idx, 1), Name);
-    return Insert(InsertValueInst::Create(Agg, Val, Idx), Name);
-  }
-
-  template<typename RandomAccessIterator>
   Value *CreateInsertValue(Value *Agg, Value *Val,
-                           RandomAccessIterator IdxBegin,
-                           RandomAccessIterator IdxEnd,
+                           ArrayRef<unsigned> Idxs,
                            const Twine &Name = "") {
     if (Constant *AggC = dyn_cast<Constant>(Agg))
       if (Constant *ValC = dyn_cast<Constant>(Val))
-        return Insert(Folder.CreateInsertValue(AggC, ValC, IdxBegin,
-                                               IdxEnd - IdxBegin),
-                      Name);
-    return Insert(InsertValueInst::Create(Agg, Val, IdxBegin, IdxEnd), Name);
+        return Insert(Folder.CreateInsertValue(AggC, ValC, Idxs), Name);
+    return Insert(InsertValueInst::Create(Agg, Val, Idxs), Name);
   }
 
   //===--------------------------------------------------------------------===//
@@ -1238,7 +1225,7 @@ public:
   Value *CreatePtrDiff(Value *LHS, Value *RHS, const Twine &Name = "") {
     assert(LHS->getType() == RHS->getType() &&
            "Pointer subtraction operand types must match!");
-    const PointerType *ArgType = cast<PointerType>(LHS->getType());
+    PointerType *ArgType = cast<PointerType>(LHS->getType());
     Value *LHS_int = CreatePtrToInt(LHS, Type::getInt64Ty(Context));
     Value *RHS_int = CreatePtrToInt(RHS, Type::getInt64Ty(Context));
     Value *Difference = CreateSub(LHS_int, RHS_int);
diff --git a/include/llvm/Support/NoFolder.h b/include/llvm/Support/NoFolder.h
index 5ead26e..94359a5 100644
--- a/include/llvm/Support/NoFolder.h
+++ b/include/llvm/Support/NoFolder.h
@@ -22,6 +22,7 @@
 #ifndef LLVM_SUPPORT_NOFOLDER_H
 #define LLVM_SUPPORT_NOFOLDER_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 
@@ -269,15 +270,14 @@ public:
     return new ShuffleVectorInst(V1, V2, Mask);
   }
 
-  Instruction *CreateExtractValue(Constant *Agg, const unsigned *IdxList,
-                                  unsigned NumIdx) const {
-    return ExtractValueInst::Create(Agg, IdxList, IdxList+NumIdx);
+  Instruction *CreateExtractValue(Constant *Agg,
+                                  ArrayRef<unsigned> IdxList) const {
+    return ExtractValueInst::Create(Agg, IdxList);
   }
 
   Instruction *CreateInsertValue(Constant *Agg, Constant *Val,
-                                 const unsigned *IdxList,
-                                 unsigned NumIdx) const {
-    return InsertValueInst::Create(Agg, Val, IdxList, IdxList+NumIdx);
+                                 ArrayRef<unsigned> IdxList) const {
+    return InsertValueInst::Create(Agg, Val, IdxList);
   }
 };
 
diff --git a/include/llvm/Support/PassManagerBuilder.h b/include/llvm/Support/PassManagerBuilder.h
index 513bb88..b0cec6e 100644
--- a/include/llvm/Support/PassManagerBuilder.h
+++ b/include/llvm/Support/PassManagerBuilder.h
@@ -67,7 +67,12 @@ public:
     
     /// EP_LoopOptimizerEnd - This extension point allows adding loop passes to
     /// the end of the loop optimizer.
-    EP_LoopOptimizerEnd
+    EP_LoopOptimizerEnd,
+
+    /// EP_ScalarOptimizerLate - This extension point allows adding optimization
+    /// passes after most of the main optimizations, but before the last
+    /// cleanup-ish optimizations.
+    EP_ScalarOptimizerLate
   };
   
   /// The Optimization Level - Specify the basic optimization level.
@@ -147,6 +152,7 @@ public:
     FPM.add(createCFGSimplificationPass());
     FPM.add(createScalarReplAggregatesPass());
     FPM.add(createEarlyCSEPass());
+    FPM.add(createLowerExpectIntrinsicPass());
   }
   
   /// populateModulePassManager - This sets up the primary pass manager.
@@ -223,13 +229,16 @@ public:
     MPM.add(createJumpThreadingPass());         // Thread jumps
     MPM.add(createCorrelatedValuePropagationPass());
     MPM.add(createDeadStoreEliminationPass());  // Delete dead stores
+
+    addExtensionsToPM(EP_ScalarOptimizerLate, MPM);
+
     MPM.add(createAggressiveDCEPass());         // Delete dead instructions
     MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
     MPM.add(createInstructionCombiningPass());  // Clean up after everything.
     
     if (!DisableUnitAtATime) {
+      // FIXME: We shouldn't bother with this anymore.
       MPM.add(createStripDeadPrototypesPass()); // Get rid of dead prototypes
-      MPM.add(createDeadTypeEliminationPass()); // Eliminate dead types
       
       // GlobalOpt already deletes dead functions and globals, at -O3 try a
       // late pass of GlobalDCE.  It is capable of deleting dead cycles.
diff --git a/include/llvm/Support/TargetFolder.h b/include/llvm/Support/TargetFolder.h
index 20ca557..3233a98 100644
--- a/include/llvm/Support/TargetFolder.h
+++ b/include/llvm/Support/TargetFolder.h
@@ -21,6 +21,7 @@
 
 #include "llvm/Constants.h"
 #include "llvm/InstrTypes.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Analysis/ConstantFolding.h"
 
 namespace llvm {
@@ -226,14 +227,14 @@ public:
     return Fold(ConstantExpr::getShuffleVector(V1, V2, Mask));
   }
 
-  Constant *CreateExtractValue(Constant *Agg, const unsigned *IdxList,
-                               unsigned NumIdx) const {
-    return Fold(ConstantExpr::getExtractValue(Agg, IdxList, NumIdx));
+  Constant *CreateExtractValue(Constant *Agg,
+                               ArrayRef<unsigned> IdxList) const {
+    return Fold(ConstantExpr::getExtractValue(Agg, IdxList));
   }
 
   Constant *CreateInsertValue(Constant *Agg, Constant *Val,
-                              const unsigned *IdxList, unsigned NumIdx) const {
-    return Fold(ConstantExpr::getInsertValue(Agg, Val, IdxList, NumIdx));
+                              ArrayRef<unsigned> IdxList) const {
+    return Fold(ConstantExpr::getInsertValue(Agg, Val, IdxList));
   }
 };
 
diff --git a/include/llvm/Support/TypeBuilder.h b/include/llvm/Support/TypeBuilder.h
index ea63da0..1800778 100644
--- a/include/llvm/Support/TypeBuilder.h
+++ b/include/llvm/Support/TypeBuilder.h
@@ -18,6 +18,7 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/LLVMContext.h"
 #include <limits.h>
+#include <vector>
 
 namespace llvm {
 
@@ -50,7 +51,7 @@ namespace llvm {
 ///   namespace llvm {
 ///   template<bool xcompile> class TypeBuilder<MyType, xcompile> {
 ///   public:
-///     static const StructType *get(LLVMContext &Context) {
+///     static StructType *get(LLVMContext &Context) {
 ///       // If you cache this result, be sure to cache it separately
 ///       // for each LLVMContext.
 ///       return StructType::get(
@@ -103,7 +104,7 @@ template<typename T, bool cross> class TypeBuilder<const volatile T, cross>
 // Pointers
 template<typename T, bool cross> class TypeBuilder<T*, cross> {
 public:
-  static const PointerType *get(LLVMContext &Context) {
+  static PointerType *get(LLVMContext &Context) {
     return PointerType::getUnqual(TypeBuilder<T,cross>::get(Context));
   }
 };
@@ -114,14 +115,14 @@ template<typename T, bool cross> class TypeBuilder<T&, cross> {};
 // Arrays
 template<typename T, size_t N, bool cross> class TypeBuilder<T[N], cross> {
 public:
-  static const ArrayType *get(LLVMContext &Context) {
+  static ArrayType *get(LLVMContext &Context) {
     return ArrayType::get(TypeBuilder<T, cross>::get(Context), N);
   }
 };
 /// LLVM uses an array of length 0 to represent an unknown-length array.
 template<typename T, bool cross> class TypeBuilder<T[], cross> {
 public:
-  static const ArrayType *get(LLVMContext &Context) {
+  static ArrayType *get(LLVMContext &Context) {
     return ArrayType::get(TypeBuilder<T, cross>::get(Context), 0);
   }
 };
@@ -151,7 +152,7 @@ public:
 #define DEFINE_INTEGRAL_TYPEBUILDER(T) \
 template<> class TypeBuilder<T, false> { \
 public: \
-  static const IntegerType *get(LLVMContext &Context) { \
+  static IntegerType *get(LLVMContext &Context) { \
     return IntegerType::get(Context, sizeof(T) * CHAR_BIT); \
   } \
 }; \
@@ -180,14 +181,14 @@ DEFINE_INTEGRAL_TYPEBUILDER(unsigned long long);
 template<uint32_t num_bits, bool cross>
 class TypeBuilder<types::i<num_bits>, cross> {
 public:
-  static const IntegerType *get(LLVMContext &C) {
+  static IntegerType *get(LLVMContext &C) {
     return IntegerType::get(C, num_bits);
   }
 };
 
 template<> class TypeBuilder<float, false> {
 public:
-  static const Type *get(LLVMContext& C) {
+  static Type *get(LLVMContext& C) {
     return Type::getFloatTy(C);
   }
 };
@@ -195,7 +196,7 @@ template<> class TypeBuilder<float, true> {};
 
 template<> class TypeBuilder<double, false> {
 public:
-  static const Type *get(LLVMContext& C) {
+  static Type *get(LLVMContext& C) {
     return Type::getDoubleTy(C);
   }
 };
@@ -203,32 +204,32 @@ template<> class TypeBuilder<double, true> {};
 
 template<bool cross> class TypeBuilder<types::ieee_float, cross> {
 public:
-  static const Type *get(LLVMContext& C) { return Type::getFloatTy(C); }
+  static Type *get(LLVMContext& C) { return Type::getFloatTy(C); }
 };
 template<bool cross> class TypeBuilder<types::ieee_double, cross> {
 public:
-  static const Type *get(LLVMContext& C) { return Type::getDoubleTy(C); }
+  static Type *get(LLVMContext& C) { return Type::getDoubleTy(C); }
 };
 template<bool cross> class TypeBuilder<types::x86_fp80, cross> {
 public:
-  static const Type *get(LLVMContext& C) { return Type::getX86_FP80Ty(C); }
+  static Type *get(LLVMContext& C) { return Type::getX86_FP80Ty(C); }
 };
 template<bool cross> class TypeBuilder<types::fp128, cross> {
 public:
-  static const Type *get(LLVMContext& C) { return Type::getFP128Ty(C); }
+  static Type *get(LLVMContext& C) { return Type::getFP128Ty(C); }
 };
 template<bool cross> class TypeBuilder<types::ppc_fp128, cross> {
 public:
-  static const Type *get(LLVMContext& C) { return Type::getPPC_FP128Ty(C); }
+  static Type *get(LLVMContext& C) { return Type::getPPC_FP128Ty(C); }
 };
 template<bool cross> class TypeBuilder<types::x86_mmx, cross> {
 public:
-  static const Type *get(LLVMContext& C) { return Type::getX86_MMXTy(C); }
+  static Type *get(LLVMContext& C) { return Type::getX86_MMXTy(C); }
 };
 
 template<bool cross> class TypeBuilder<void, cross> {
 public:
-  static const Type *get(LLVMContext &C) {
+  static Type *get(LLVMContext &C) {
     return Type::getVoidTy(C);
   }
 };
@@ -246,14 +247,14 @@ template<> class TypeBuilder<const volatile void*, false>
 
 template<typename R, bool cross> class TypeBuilder<R(), cross> {
 public:
-  static const FunctionType *get(LLVMContext &Context) {
+  static FunctionType *get(LLVMContext &Context) {
     return FunctionType::get(TypeBuilder<R, cross>::get(Context), false);
   }
 };
 template<typename R, typename A1, bool cross> class TypeBuilder<R(A1), cross> {
 public:
-  static const FunctionType *get(LLVMContext &Context) {
-    std::vector<const Type*> params;
+  static FunctionType *get(LLVMContext &Context) {
+    std::vector<Type*> params;
     params.reserve(1);
     params.push_back(TypeBuilder<A1, cross>::get(Context));
     return FunctionType::get(TypeBuilder<R, cross>::get(Context),
@@ -263,8 +264,8 @@ public:
 template<typename R, typename A1, typename A2, bool cross>
 class TypeBuilder<R(A1, A2), cross> {
 public:
-  static const FunctionType *get(LLVMContext &Context) {
-    std::vector<const Type*> params;
+  static FunctionType *get(LLVMContext &Context) {
+    std::vector<Type*> params;
     params.reserve(2);
     params.push_back(TypeBuilder<A1, cross>::get(Context));
     params.push_back(TypeBuilder<A2, cross>::get(Context));
@@ -275,8 +276,8 @@ public:
 template<typename R, typename A1, typename A2, typename A3, bool cross>
 class TypeBuilder<R(A1, A2, A3), cross> {
 public:
-  static const FunctionType *get(LLVMContext &Context) {
-    std::vector<const Type*> params;
+  static FunctionType *get(LLVMContext &Context) {
+    std::vector<Type*> params;
     params.reserve(3);
     params.push_back(TypeBuilder<A1, cross>::get(Context));
     params.push_back(TypeBuilder<A2, cross>::get(Context));
@@ -290,8 +291,8 @@ template<typename R, typename A1, typename A2, typename A3, typename A4,
          bool cross>
 class TypeBuilder<R(A1, A2, A3, A4), cross> {
 public:
-  static const FunctionType *get(LLVMContext &Context) {
-    std::vector<const Type*> params;
+  static FunctionType *get(LLVMContext &Context) {
+    std::vector<Type*> params;
     params.reserve(4);
     params.push_back(TypeBuilder<A1, cross>::get(Context));
     params.push_back(TypeBuilder<A2, cross>::get(Context));
@@ -306,8 +307,8 @@ template<typename R, typename A1, typename A2, typename A3, typename A4,
          typename A5, bool cross>
 class TypeBuilder<R(A1, A2, A3, A4, A5), cross> {
 public:
-  static const FunctionType *get(LLVMContext &Context) {
-    std::vector<const Type*> params;
+  static FunctionType *get(LLVMContext &Context) {
+    std::vector<Type*> params;
     params.reserve(5);
     params.push_back(TypeBuilder<A1, cross>::get(Context));
     params.push_back(TypeBuilder<A2, cross>::get(Context));
@@ -321,15 +322,15 @@ public:
 
 template<typename R, bool cross> class TypeBuilder<R(...), cross> {
 public:
-  static const FunctionType *get(LLVMContext &Context) {
+  static FunctionType *get(LLVMContext &Context) {
     return FunctionType::get(TypeBuilder<R, cross>::get(Context), true);
   }
 };
 template<typename R, typename A1, bool cross>
 class TypeBuilder<R(A1, ...), cross> {
 public:
-  static const FunctionType *get(LLVMContext &Context) {
-    std::vector<const Type*> params;
+  static FunctionType *get(LLVMContext &Context) {
+    std::vector<Type*> params;
     params.reserve(1);
     params.push_back(TypeBuilder<A1, cross>::get(Context));
     return FunctionType::get(TypeBuilder<R, cross>::get(Context), params, true);
@@ -338,8 +339,8 @@ public:
 template<typename R, typename A1, typename A2, bool cross>
 class TypeBuilder<R(A1, A2, ...), cross> {
 public:
-  static const FunctionType *get(LLVMContext &Context) {
-    std::vector<const Type*> params;
+  static FunctionType *get(LLVMContext &Context) {
+    std::vector<Type*> params;
     params.reserve(2);
     params.push_back(TypeBuilder<A1, cross>::get(Context));
     params.push_back(TypeBuilder<A2, cross>::get(Context));
@@ -350,8 +351,8 @@ public:
 template<typename R, typename A1, typename A2, typename A3, bool cross>
 class TypeBuilder<R(A1, A2, A3, ...), cross> {
 public:
-  static const FunctionType *get(LLVMContext &Context) {
-    std::vector<const Type*> params;
+  static FunctionType *get(LLVMContext &Context) {
+    std::vector<Type*> params;
     params.reserve(3);
     params.push_back(TypeBuilder<A1, cross>::get(Context));
     params.push_back(TypeBuilder<A2, cross>::get(Context));
@@ -365,8 +366,8 @@ template<typename R, typename A1, typename A2, typename A3, typename A4,
          bool cross>
 class TypeBuilder<R(A1, A2, A3, A4, ...), cross> {
 public:
-  static const FunctionType *get(LLVMContext &Context) {
-    std::vector<const Type*> params;
+  static FunctionType *get(LLVMContext &Context) {
+    std::vector<Type*> params;
     params.reserve(4);
     params.push_back(TypeBuilder<A1, cross>::get(Context));
     params.push_back(TypeBuilder<A2, cross>::get(Context));
@@ -381,8 +382,8 @@ template<typename R, typename A1, typename A2, typename A3, typename A4,
          typename A5, bool cross>
 class TypeBuilder<R(A1, A2, A3, A4, A5, ...), cross> {
 public:
-  static const FunctionType *get(LLVMContext &Context) {
-    std::vector<const Type*> params;
+  static FunctionType *get(LLVMContext &Context) {
+    std::vector<Type*> params;
     params.reserve(5);
     params.push_back(TypeBuilder<A1, cross>::get(Context));
     params.push_back(TypeBuilder<A2, cross>::get(Context));
diff --git a/include/llvm/Support/system_error.h b/include/llvm/Support/system_error.h
index 47759b9..2c15b69 100644
--- a/include/llvm/Support/system_error.h
+++ b/include/llvm/Support/system_error.h
@@ -222,7 +222,7 @@ template <> struct hash<std::error_code>;
 
 */
 
-#include "llvm/Config/config.h"
+#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/type_traits.h"
 #include <cerrno>
 #include <string>
@@ -669,7 +669,7 @@ const error_category& generic_category();
 const error_category& system_category();
 
 /// Get the error_category used for errno values from POSIX functions. This is
-/// the same as the system_category on POISIX systems, but is the same as the
+/// the same as the system_category on POSIX systems, but is the same as the
 /// generic_category on Windows.
 const error_category& posix_category();
 
diff --git a/include/llvm/Target/Target.td b/include/llvm/Target/Target.td
index ab6a4e2..018ccbd 100644
--- a/include/llvm/Target/Target.td
+++ b/include/llvm/Target/Target.td
@@ -26,11 +26,19 @@ class SubRegIndex {
   string Namespace = "";
 }
 
+// RegAltNameIndex - The alternate name set to use for register operands of
+// this register class when printing.
+class RegAltNameIndex {
+  string Namespace = "";
+}
+def NoRegAltName : RegAltNameIndex;
+
 // Register - You should define one instance of this class for each register
 // in the target machine.  String n will become the "name" of the register.
-class Register<string n> {
+class Register<string n, list<string> altNames = []> {
   string Namespace = "";
   string AsmName = n;
+  list<string> AltNames = altNames;
 
   // Aliases - A list of registers that this register overlaps with.  A read or
   // modification of this register can potentially read or modify the aliased
@@ -48,6 +56,10 @@ class Register<string n> {
   // SubRegs.
   list<SubRegIndex> SubRegIndices = [];
 
+  // RegAltNameIndices - The alternate name indices which are valid for this
+  // register.
+  list<RegAltNameIndex> RegAltNameIndices = [];
+
   // CompositeIndices - Specify subreg indices that don't correspond directly to
   // a register in SubRegs and are not inherited. The following formats are
   // supported:
@@ -92,7 +104,7 @@ class RegisterWithSubRegs<string n, list<Register> subregs> : Register<n> {
 // registers by register allocators.
 //
 class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
-                    list<Register> regList> {
+                    dag regList, RegAltNameIndex idx = NoRegAltName> {
   string Namespace = namespace;
 
   // RegType - Specify the list ValueType of the registers in this register
@@ -122,7 +134,12 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
   // allocation_order_* method are not specified, this also defines the order of
   // allocation used by the register allocator.
   //
-  list<Register> MemberList = regList;
+  dag MemberList = regList;
+
+  // AltNameIndex - The alternate register name to use when printing operands
+  // of this register class. Every register in the register class must have
+  // a valid alternate name for the given index.
+  RegAltNameIndex altNameIndex = idx;
 
   // SubRegClasses - Specify the register class of subregisters as a list of
   // dags: (RegClass SubRegIndex, SubRegindex, ...)
@@ -133,11 +150,91 @@ class RegisterClass<string namespace, list<ValueType> regTypes, int alignment,
   // model instruction operand constraints, and should have isAllocatable = 0.
   bit isAllocatable = 1;
 
-  // MethodProtos/MethodBodies - These members can be used to insert arbitrary
-  // code into a generated register class.   The normal usage of this is to
-  // overload virtual methods.
-  code MethodProtos = [{}];
-  code MethodBodies = [{}];
+  // AltOrders - List of alternative allocation orders. The default order is
+  // MemberList itself, and that is good enough for most targets since the
+  // register allocators automatically remove reserved registers and move
+  // callee-saved registers to the end.
+  list<dag> AltOrders = [];
+
+  // AltOrderSelect - The body of a function that selects the allocation order
+  // to use in a given machine function. The code will be inserted in a
+  // function like this:
+  //
+  //   static inline unsigned f(const MachineFunction &MF) { ... }
+  //
+  // The function should return 0 to select the default order defined by
+  // MemberList, 1 to select the first AltOrders entry and so on.
+  code AltOrderSelect = [{}];
+}
+
+// The memberList in a RegisterClass is a dag of set operations. TableGen
+// evaluates these set operations and expand them into register lists. These
+// are the most common operation, see test/TableGen/SetTheory.td for more
+// examples of what is possible:
+//
+// (add R0, R1, R2) - Set Union. Each argument can be an individual register, a
+// register class, or a sub-expression. This is also the way to simply list
+// registers.
+//
+// (sub GPR, SP) - Set difference. Subtract the last arguments from the first.
+//
+// (and GPR, CSR) - Set intersection. All registers from the first set that are
+// also in the second set.
+//
+// (sequence "R%u", 0, 15) -> [R0, R1, ..., R15]. Generate a sequence of
+// numbered registers.
+//
+// (shl GPR, 4) - Remove the first N elements.
+//
+// (trunc GPR, 4) - Truncate after the first N elements.
+//
+// (rotl GPR, 1) - Rotate N places to the left.
+//
+// (rotr GPR, 1) - Rotate N places to the right.
+//
+// (decimate GPR, 2) - Pick every N'th element, starting with the first.
+//
+// All of these operators work on ordered sets, not lists. That means
+// duplicates are removed from sub-expressions.
+
+// Set operators. The rest is defined in TargetSelectionDAG.td.
+def sequence;
+def decimate;
+
+// RegisterTuples - Automatically generate super-registers by forming tuples of
+// sub-registers. This is useful for modeling register sequence constraints
+// with pseudo-registers that are larger than the architectural registers.
+//
+// The sub-register lists are zipped together:
+//
+//   def EvenOdd : RegisterTuples<[sube, subo], [(add R0, R2), (add R1, R3)]>;
+//
+// Generates the same registers as:
+//
+//   let SubRegIndices = [sube, subo] in {
+//     def R0_R1 : RegisterWithSubRegs<"", [R0, R1]>;
+//     def R2_R3 : RegisterWithSubRegs<"", [R2, R3]>;
+//   }
+//
+// The generated pseudo-registers inherit super-classes and fields from their
+// first sub-register. Most fields from the Register class are inferred, and
+// the AsmName and Dwarf numbers are cleared.
+//
+// RegisterTuples instances can be used in other set operations to form
+// register classes and so on. This is the only way of using the generated
+// registers.
+class RegisterTuples<list<SubRegIndex> Indices, list<dag> Regs> {
+  // SubRegs - N lists of registers to be zipped up. Super-registers are
+  // synthesized from the first element of each SubRegs list, the second
+  // element and so on.
+  list<dag> SubRegs = Regs;
+
+  // SubRegIndices - N SubRegIndex instances. This provides the names of the
+  // sub-registers in the synthesized super-registers.
+  list<SubRegIndex> SubRegIndices = Indices;
+
+  // Compose sub-register indices like in a normal Register.
+  list<dag> CompositeIndices = [];
 }
 
 
@@ -196,7 +293,12 @@ class Instruction {
   // code.
   list<Predicate> Predicates = [];
 
-  // Code size.
+  // Size - Size of encoded instruction, or zero if the size cannot be determined
+  // from the opcode.
+  int Size = 0;
+
+  // Code size, for instruction selection.
+  // FIXME: What does this actually mean?
   int CodeSize = 0;
 
   // Added complexity passed onto matching pattern.
@@ -227,6 +329,9 @@ class Instruction {
   bit isAsCheapAsAMove = 0; // As cheap (or cheaper) than a move instruction.
   bit hasExtraSrcRegAllocReq = 0; // Sources have special regalloc requirement?
   bit hasExtraDefRegAllocReq = 0; // Defs have special regalloc requirement?
+  bit isPseudo     = 0;     // Is this instruction a pseudo-instruction?
+                            // If so, won't have encoding information for
+                            // the [MC]CodeEmitter stuff.
 
   // Side effect flags - When set, the flags have these meanings:
   //
@@ -241,6 +346,11 @@ class Instruction {
   // Is this instruction a "real" instruction (with a distinct machine
   // encoding), or is it a pseudo instruction used for codegen modeling
   // purposes.
+  // FIXME: For now this is distinct from isPseudo, above, as code-gen-only
+  // instructions can (and often do) still have encoding information
+  // associated with them. Once we've migrated all of them over to true
+  // pseudo-instructions that are lowered to real instructions prior to
+  // the printer/emitter, we can remove this attribute and just use isPseudo.
   bit isCodeGenOnly = 0;
 
   // Is this instruction a pseudo instruction for use by the assembler parser.
@@ -268,6 +378,14 @@ class Instruction {
   ///@}
 }
 
+/// PseudoInstExpansion - Expansion information for a pseudo-instruction.
+/// Which instruction it expands to and how the operands map from the
+/// pseudo.
+class PseudoInstExpansion<dag Result> {
+  dag ResultInst = Result;     // The instruction to generate.
+  bit isPseudo = 1;
+}
+
 /// Predicates - These are extra conditionals which are turned into instruction
 /// selector matching code. Currently each predicate is just a string.
 class Predicate<string cond> {
@@ -277,6 +395,15 @@ class Predicate<string cond> {
   /// matcher, this is true.  Targets should set this by inheriting their
   /// feature from the AssemblerPredicate class in addition to Predicate.
   bit AssemblerMatcherPredicate = 0;
+
+  /// AssemblerCondString - Name of the subtarget feature being tested used
+  /// as alternative condition string used for assembler matcher.
+  /// e.g. "ModeThumb" is translated to "(Bits & ModeThumb) != 0".
+  ///      "!ModeThumb" is translated to "(Bits & ModeThumb) == 0".
+  /// It can also list multiple features separated by ",".
+  /// e.g. "ModeThumb,FeatureThumb2" is translated to
+  ///      "(Bits & ModeThumb) != 0 && (Bits & FeatureThumb2) != 0".
+  string AssemblerCondString = "";
 }
 
 /// NoHonorSignDependentRounding - This predicate is true if support for
@@ -373,6 +500,7 @@ class Operand<ValueType ty> {
   string EncoderMethod = "";
   string DecoderMethod = "";
   string AsmOperandLowerMethod = ?;
+  string OperandType = "OPERAND_UNKNOWN";
   dag MIOperandInfo = (ops);
 
   // ParserMatchClass - The "match class" that operands of this type fit
@@ -386,6 +514,25 @@ class Operand<ValueType ty> {
   AsmOperandClass ParserMatchClass = ImmAsmOperand;
 }
 
+class RegisterOperand<RegisterClass regclass, string pm = "printOperand"> {
+  // RegClass - The register class of the operand.
+  RegisterClass RegClass = regclass;
+  // PrintMethod - The target method to call to print register operands of
+  // this type. The method normally will just use an alt-name index to look
+  // up the name to print. Default to the generic printOperand().
+  string PrintMethod = pm;
+  // ParserMatchClass - The "match class" that operands of this type fit
+  // in. Match classes are used to define the order in which instructions are
+  // match, to ensure that which instructions gets matched is deterministic.
+  //
+  // The target specific parser must be able to classify an parsed operand into
+  // a unique class, which does not partially overlap with any other classes. It
+  // can match a subset of some other class, in which case the AsmOperandClass
+  // should declare the other operand as one of its super classes.
+  AsmOperandClass ParserMatchClass;
+}
+
+let OperandType = "OPERAND_IMMEDIATE" in {
 def i1imm  : Operand<i1>;
 def i8imm  : Operand<i8>;
 def i16imm : Operand<i16>;
@@ -394,6 +541,7 @@ def i64imm : Operand<i64>;
 
 def f32imm : Operand<f32>;
 def f64imm : Operand<f64>;
+}
 
 /// zero_reg definition - Special node to stand for the zero register.
 ///
@@ -566,8 +714,9 @@ def DefaultAsmParser : AsmParser;
 
 /// AssemblerPredicate - This is a Predicate that can be used when the assembler
 /// matches instructions and aliases.
-class AssemblerPredicate {
+class AssemblerPredicate<string cond> {
   bit AssemblerMatcherPredicate = 1;
+  string AssemblerCondString = cond;
 }
 
 
diff --git a/include/llvm/Target/TargetAsmInfo.h b/include/llvm/Target/TargetAsmInfo.h
index 743a2d4..5a526dc 100644
--- a/include/llvm/Target/TargetAsmInfo.h
+++ b/include/llvm/Target/TargetAsmInfo.h
@@ -20,6 +20,7 @@
 #include "llvm/Target/TargetRegisterInfo.h"
 
 namespace llvm {
+  template <typename T> class ArrayRef;
   class MCSection;
   class MCContext;
   class MachineFunction;
@@ -27,30 +28,14 @@ namespace llvm {
   class TargetLoweringObjectFile;
 
 class TargetAsmInfo {
-  unsigned PointerSize;
-  bool IsLittleEndian;
-  TargetFrameLowering::StackDirection StackDir;
-  const TargetRegisterInfo *TRI;
   std::vector<MachineMove> InitialFrameState;
+  const TargetRegisterInfo *TRI;
+  const TargetFrameLowering *TFI;
   const TargetLoweringObjectFile *TLOF;
 
 public:
   explicit TargetAsmInfo(const TargetMachine &TM);
 
-  /// getPointerSize - Get the pointer size in bytes.
-  unsigned getPointerSize() const {
-    return PointerSize;
-  }
-
-  /// islittleendian - True if the target is little endian.
-  bool isLittleEndian() const {
-    return IsLittleEndian;
-  }
-
-  TargetFrameLowering::StackDirection getStackGrowthDirection() const {
-    return StackDir;
-  }
-
   const MCSection *getDwarfLineSection() const {
     return TLOF->getDwarfLineSection();
   }
@@ -59,6 +44,10 @@ public:
     return TLOF->getEHFrameSection();
   }
 
+  const MCSection *getCompactUnwindSection() const {
+    return TLOF->getCompactUnwindSection();
+  }
+
   const MCSection *getDwarfFrameSection() const {
     return TLOF->getDwarfFrameSection();
   }
@@ -79,6 +68,12 @@ public:
     return TLOF->isFunctionEHFrameSymbolPrivate();
   }
 
+  int getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
+                               int DataAlignmentFactor,
+                               bool IsEH) const {
+    return TFI->getCompactUnwindEncoding(Instrs, DataAlignmentFactor, IsEH);
+  }
+
   const unsigned *getCalleeSavedRegs(MachineFunction *MF = 0) const {
     return TRI->getCalleeSavedRegs(MF);
   }
diff --git a/include/llvm/Target/TargetAsmParser.h b/include/llvm/Target/TargetAsmParser.h
index 9ff50cb..df84231 100644
--- a/include/llvm/Target/TargetAsmParser.h
+++ b/include/llvm/Target/TargetAsmParser.h
@@ -15,7 +15,6 @@
 namespace llvm {
 class MCStreamer;
 class StringRef;
-class Target;
 class SMLoc;
 class AsmToken;
 class MCParsedAsmOperand;
@@ -26,23 +25,19 @@ class TargetAsmParser : public MCAsmParserExtension {
   TargetAsmParser(const TargetAsmParser &);   // DO NOT IMPLEMENT
   void operator=(const TargetAsmParser &);  // DO NOT IMPLEMENT
 protected: // Can only create subclasses.
-  TargetAsmParser(const Target &);
+  TargetAsmParser();
  
-  /// The Target that this machine was created for.
-  const Target &TheTarget;
-
-  /// The current set of available features.
+  /// AvailableFeatures - The current set of available features.
   unsigned AvailableFeatures;
 
 public:
   virtual ~TargetAsmParser();
 
-  const Target &getTarget() const { return TheTarget; }
-
   unsigned getAvailableFeatures() const { return AvailableFeatures; }
   void setAvailableFeatures(unsigned Value) { AvailableFeatures = Value; }
 
-  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) = 0;
+  virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+                             SMLoc &EndLoc) = 0;
 
   /// ParseInstruction - Parse one assembly instruction.
   ///
diff --git a/include/llvm/Target/TargetData.h b/include/llvm/Target/TargetData.h
index 32e3e2b..c280810 100644
--- a/include/llvm/Target/TargetData.h
+++ b/include/llvm/Target/TargetData.h
@@ -259,7 +259,7 @@ public:
   /// getIntPtrType - Return an unsigned integer type that is the same size or
   /// greater to the host pointer size.
   ///
-  const IntegerType *getIntPtrType(LLVMContext &C) const;
+  IntegerType *getIntPtrType(LLVMContext &C) const;
 
   /// getIndexedOffset - return the offset from the beginning of the type for
   /// the specified indices.  This is used to implement getelementptr.
@@ -272,12 +272,6 @@ public:
   /// information is lazily cached.
   const StructLayout *getStructLayout(const StructType *Ty) const;
 
-  /// InvalidateStructLayoutInfo - TargetData speculatively caches StructLayout
-  /// objects.  If a TargetData object is alive when types are being refined and
-  /// removed, this method must be called whenever a StructType is removed to
-  /// avoid a dangling pointer in this cache.
-  void InvalidateStructLayoutInfo(const StructType *Ty) const;
-
   /// getPreferredAlignment - Return the preferred alignment of the specified
   /// global.  This includes an explicitly requested alignment (if the global
   /// has one).
diff --git a/include/llvm/Target/TargetFrameLowering.h b/include/llvm/Target/TargetFrameLowering.h
index e104b16..e3d77cf 100644
--- a/include/llvm/Target/TargetFrameLowering.h
+++ b/include/llvm/Target/TargetFrameLowering.h
@@ -15,6 +15,8 @@
 #define LLVM_TARGET_TARGETFRAMELOWERING_H
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/ADT/ArrayRef.h"
 
 #include <utility>
 #include <vector>
@@ -189,6 +191,14 @@ public:
   ///
   virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
   }
+
+  /// getCompactUnwindEncoding - Get the compact unwind encoding for the
+  /// function. Return 0 if the compact unwind isn't available.
+  virtual uint32_t getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
+                                            int DataAlignmentFactor,
+                                            bool IsEH) const {
+    return 0;
+  }
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 418f3fe..f663566 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_TARGET_TARGETINSTRINFO_H
 #define LLVM_TARGET_TARGETINSTRINFO_H
 
-#include "llvm/Target/TargetInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 
 namespace llvm {
@@ -32,6 +32,7 @@ class SelectionDAG;
 class ScheduleDAG;
 class TargetRegisterClass;
 class TargetRegisterInfo;
+class BranchProbability;
 
 template<class T> class SmallVectorImpl;
 
@@ -40,25 +41,22 @@ template<class T> class SmallVectorImpl;
 ///
 /// TargetInstrInfo - Interface to description of machine instruction set
 ///
-class TargetInstrInfo {
-  const TargetInstrDesc *Descriptors; // Raw array to allow static init'n
-  unsigned NumOpcodes;                // Number of entries in the desc array
-
+class TargetInstrInfo : public MCInstrInfo {
   TargetInstrInfo(const TargetInstrInfo &);  // DO NOT IMPLEMENT
   void operator=(const TargetInstrInfo &);   // DO NOT IMPLEMENT
 public:
-  TargetInstrInfo(const TargetInstrDesc *desc, unsigned NumOpcodes);
+  TargetInstrInfo(int CFSetupOpcode = -1, int CFDestroyOpcode = -1)
+    : CallFrameSetupOpcode(CFSetupOpcode),
+      CallFrameDestroyOpcode(CFDestroyOpcode) {
+  }
+    
   virtual ~TargetInstrInfo();
 
-  unsigned getNumOpcodes() const { return NumOpcodes; }
-
-  /// get - Return the machine instruction descriptor that corresponds to the
-  /// specified instruction opcode.
-  ///
-  const TargetInstrDesc &get(unsigned Opcode) const {
-    assert(Opcode < NumOpcodes && "Invalid opcode!");
-    return Descriptors[Opcode];
-  }
+  /// getRegClass - Givem a machine instruction descriptor, returns the register
+  /// class constraint for OpNum, or NULL.
+  const TargetRegisterClass *getRegClass(const MCInstrDesc &TID,
+                                         unsigned OpNum,
+                                         const TargetRegisterInfo *TRI) const;
 
   /// isTriviallyReMaterializable - Return true if the instruction is trivially
   /// rematerializable, meaning it has no side effects and requires no operands
@@ -93,6 +91,15 @@ private:
                                                 AliasAnalysis *AA) const;
 
 public:
+  /// getCallFrameSetup/DestroyOpcode - These methods return the opcode of the
+  /// frame setup/destroy instructions if they exist (-1 otherwise).  Some
+  /// targets use pseudo instructions in order to abstract away the difference
+  /// between operating with a frame pointer and operating without, through the
+  /// use of these two instructions.
+  ///
+  int getCallFrameSetupOpcode() const { return CallFrameSetupOpcode; }
+  int getCallFrameDestroyOpcode() const { return CallFrameDestroyOpcode; }
+
   /// isCoalescableExtInstr - Return true if the instruction is a "coalescable"
   /// extension instruction. That is, it's like a copy where it's legal for the
   /// source to overlap the destination. e.g. X86::MOVSX64rr32. If this returns
@@ -315,7 +322,7 @@ public:
   virtual
   bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
                            unsigned ExtraPredCycles,
-                           float Probability, float Confidence) const {
+                           const BranchProbability &Probability) const {
     return false;
   }
 
@@ -330,7 +337,7 @@ public:
                       unsigned NumTCycles, unsigned ExtraTCycles,
                       MachineBasicBlock &FMBB,
                       unsigned NumFCycles, unsigned ExtraFCycles,
-                      float Probability, float Confidence) const {
+                      const BranchProbability &Probability) const {
     return false;
   }
 
@@ -342,7 +349,7 @@ public:
   /// will be properly predicted.
   virtual bool
   isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
-                            float Probability, float Confidence) const {
+                            const BranchProbability &Probability) const {
     return false;
   }
 
@@ -663,6 +670,9 @@ public:
   virtual
   bool hasLowDefLatency(const InstrItineraryData *ItinData,
                         const MachineInstr *DefMI, unsigned DefIdx) const;
+
+private:
+  int CallFrameSetupOpcode, CallFrameDestroyOpcode;
 };
 
 /// TargetInstrInfoImpl - This is the default implementation of
@@ -671,8 +681,9 @@ public:
 /// libcodegen, not in libtarget.
 class TargetInstrInfoImpl : public TargetInstrInfo {
 protected:
-  TargetInstrInfoImpl(const TargetInstrDesc *desc, unsigned NumOpcodes)
-  : TargetInstrInfo(desc, NumOpcodes) {}
+  TargetInstrInfoImpl(int CallFrameSetupOpcode = -1,
+                      int CallFrameDestroyOpcode = -1)
+    : TargetInstrInfo(CallFrameSetupOpcode, CallFrameDestroyOpcode) {}
 public:
   virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
                                        MachineBasicBlock *NewDest) const;
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 3e36fb7..533c3ac 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -1421,13 +1421,6 @@ public:
   /// is for this target.
   virtual ConstraintType getConstraintType(const std::string &Constraint) const;
 
-  /// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
-  /// return a list of registers that can be used to satisfy the constraint.
-  /// This should only be used for C_RegisterClass constraints.
-  virtual std::vector<unsigned>
-  getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                    EVT VT) const;
-
   /// getRegForInlineAsmConstraint - Given a physical register constraint (e.g.
   /// {edx}), return the register number and the register class for the
   /// register.
@@ -1547,6 +1540,8 @@ public:
   //===--------------------------------------------------------------------===//
   // Div utility functions
   //
+  SDValue BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
+                         SelectionDAG &DAG) const;
   SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG,
                       std::vector<SDNode*>* Created) const;
   SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG,
@@ -1836,9 +1831,8 @@ private:
 
         // Build a new vector type and check if it is legal.
         MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
-
         // Found a legal promoted vector type.
-        if (ValueTypeActions.getTypeAction(NVT) == TypeLegal)
+        if (NVT != MVT() && ValueTypeActions.getTypeAction(NVT) == TypeLegal)
           return LegalizeKind(TypePromoteInteger,
                               EVT::getVectorVT(Context, EltVT, NumElts));
       }
@@ -1853,6 +1847,7 @@ private:
       // If there is no simple vector type with this many elements then there
       // cannot be a larger legal vector type.  Note that this assumes that
       // there are no skipped intermediate vector types in the simple types.
+      if (!EltVT.isSimple()) break;
       MVT LargerVector = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts);
       if (LargerVector == MVT()) break;
 
diff --git a/include/llvm/Target/TargetLoweringObjectFile.h b/include/llvm/Target/TargetLoweringObjectFile.h
index 3991035..2e1d6b9 100644
--- a/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/include/llvm/Target/TargetLoweringObjectFile.h
@@ -68,6 +68,11 @@ protected:
   /// LSDASection - If exception handling is supported by the target, this is
   /// the section the Language Specific Data Area information is emitted to.
   const MCSection *LSDASection;
+
+  /// CompactUnwindSection - If exception handling is supported by the target
+  /// and the target can support a compact representation of the CIE and FDE,
+  /// this is the section to emit them into.
+  const MCSection *CompactUnwindSection;
   
   // Dwarf sections for debug info.  If a target supports debug info, these must
   // be set.
@@ -102,8 +107,8 @@ protected:
   /// private linkage, aka an L or .L label) or false if it should be a normal
   /// non-.globl label.  This defaults to true.
   bool IsFunctionEHFrameSymbolPrivate;
+
 public:
-  
   MCContext &getContext() const { return *Ctx; }
   
   virtual ~TargetLoweringObjectFile();
@@ -121,7 +126,6 @@ public:
   bool getSupportsWeakOmittedEHFrame() const {
     return SupportsWeakOmittedEHFrame;
   }
-  
   bool getCommDirectiveSupportsAlignment() const {
     return CommDirectiveSupportsAlignment;
   }
@@ -132,6 +136,7 @@ public:
   const MCSection *getStaticCtorSection() const { return StaticCtorSection; }
   const MCSection *getStaticDtorSection() const { return StaticDtorSection; }
   const MCSection *getLSDASection() const { return LSDASection; }
+  const MCSection *getCompactUnwindSection() const{return CompactUnwindSection;}
   virtual const MCSection *getEHFrameSection() const = 0;
   virtual void emitPersonalityValue(MCStreamer &Streamer,
                                     const TargetMachine &TM,
diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h
index 78f770c..ac41a58 100644
--- a/include/llvm/Target/TargetMachine.h
+++ b/include/llvm/Target/TargetMachine.h
@@ -14,29 +14,30 @@
 #ifndef LLVM_TARGET_TARGETMACHINE_H
 #define LLVM_TARGET_TARGETMACHINE_H
 
-#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/ADT/StringRef.h"
 #include <cassert>
 #include <string>
 
 namespace llvm {
 
-class Target;
+class InstrItineraryData;
+class JITCodeEmitter;
 class MCAsmInfo;
+class MCContext;
+class Pass;
+class PassManager;
+class PassManagerBase;
+class Target;
 class TargetData;
-class TargetSubtarget;
+class TargetELFWriterInfo;
+class TargetFrameLowering;
 class TargetInstrInfo;
 class TargetIntrinsicInfo;
 class TargetJITInfo;
 class TargetLowering;
-class TargetSelectionDAGInfo;
-class TargetFrameLowering;
-class JITCodeEmitter;
-class MCContext;
 class TargetRegisterInfo;
-class PassManagerBase;
-class PassManager;
-class Pass;
-class TargetELFWriterInfo;
+class TargetSelectionDAGInfo;
+class TargetSubtargetInfo;
 class formatted_raw_ostream;
 class raw_ostream;
 
@@ -91,15 +92,22 @@ class TargetMachine {
   TargetMachine(const TargetMachine &);   // DO NOT IMPLEMENT
   void operator=(const TargetMachine &);  // DO NOT IMPLEMENT
 protected: // Can only create subclasses.
-  TargetMachine(const Target &);
+  TargetMachine(const Target &T, StringRef TargetTriple,
+                StringRef CPU, StringRef FS);
 
   /// getSubtargetImpl - virtual method implemented by subclasses that returns
-  /// a reference to that target's TargetSubtarget-derived member variable.
-  virtual const TargetSubtarget *getSubtargetImpl() const { return 0; }
+  /// a reference to that target's TargetSubtargetInfo-derived member variable.
+  virtual const TargetSubtargetInfo *getSubtargetImpl() const { return 0; }
 
   /// TheTarget - The Target that this machine was created for.
   const Target &TheTarget;
 
+  /// TargetTriple, TargetCPU, TargetFS - Triple string, CPU name, and target
+  /// feature strings the TargetMachine instance is created with.
+  std::string TargetTriple;
+  std::string TargetCPU;
+  std::string TargetFS;
+
   /// AsmInfo - Contains target specific asm information.
   ///
   const MCAsmInfo *AsmInfo;
@@ -115,6 +123,10 @@ public:
 
   const Target &getTarget() const { return TheTarget; }
 
+  const StringRef getTargetTriple() const { return TargetTriple; }
+  const StringRef getTargetCPU() const { return TargetCPU; }
+  const StringRef getTargetFeatureString() const { return TargetFS; }
+
   // Interfaces to the major aspects of target machine information:
   // -- Instruction opcode and operand information
   // -- Pipelines and scheduling information
@@ -132,7 +144,7 @@ public:
   const MCAsmInfo *getMCAsmInfo() const { return AsmInfo; }
 
   /// getSubtarget - This method returns a pointer to the specified type of
-  /// TargetSubtarget.  In debug builds, it verifies that the object being
+  /// TargetSubtargetInfo.  In debug builds, it verifies that the object being
   /// returned is of the correct type.
   template<typename STC> const STC &getSubtarget() const {
     return *static_cast<const STC*>(getSubtargetImpl());
@@ -295,10 +307,9 @@ public:
 /// implemented with the LLVM target-independent code generator.
 ///
 class LLVMTargetMachine : public TargetMachine {
-  std::string TargetTriple;
-
 protected: // Can only create subclasses.
-  LLVMTargetMachine(const Target &T, const std::string &TargetTriple);
+  LLVMTargetMachine(const Target &T, StringRef TargetTriple,
+                    StringRef CPU, StringRef FS);
 
 private:
   /// addCommonCodeGenPasses - Add standard LLVM codegen passes used for
@@ -311,9 +322,6 @@ private:
   virtual void setCodeModelForStatic();
 
 public:
-
-  const std::string &getTargetTriple() const { return TargetTriple; }
-
   /// addPassesToEmitFile - Add passes to the specified pass manager to get the
   /// specified file emitted.  Typically this will involve several steps of code
   /// generation.  If OptLevel is None, the code generator should emit code as
diff --git a/include/llvm/Target/TargetOpcodes.h b/include/llvm/Target/TargetOpcodes.h
index 01fba66..37f7b2f 100644
--- a/include/llvm/Target/TargetOpcodes.h
+++ b/include/llvm/Target/TargetOpcodes.h
@@ -71,6 +71,10 @@ namespace TargetOpcode {
     /// REG_SEQUENCE - This variadic instruction is used to form a register that
     /// represent a consecutive sequence of sub-registers. It's used as register
     /// coalescing / allocation aid and must be eliminated before code emission.
+    // In SDNode form, the first operand encodes the register class created by
+    // the REG_SEQUENCE, while each subsequent pair names a vreg + subreg index
+    // pair.  Once it has been lowered to a MachineInstr, the regclass operand
+    // is no longer present.
     /// e.g. v1027 = REG_SEQUENCE v1024, 3, v1025, 4, v1026, 5
     /// After register coalescing references of v1024 should be replace with
     /// v1027:3, v1025 with v1027:4, etc.
diff --git a/include/llvm/Target/TargetOptions.h b/include/llvm/Target/TargetOptions.h
index beed039..55d50d9 100644
--- a/include/llvm/Target/TargetOptions.h
+++ b/include/llvm/Target/TargetOptions.h
@@ -133,8 +133,8 @@ namespace llvm {
   /// as their parent function, etc.), using an alternate ABI if necessary.
   extern bool GuaranteedTailCallOpt;
 
-  /// StackAlignment - Override default stack alignment for target.
-  extern unsigned StackAlignment;
+  /// StackAlignmentOverride - Override default stack alignment for target.
+  extern unsigned StackAlignmentOverride;
 
   /// RealignStack - This flag indicates whether the stack should be
   /// automatically realigned, if needed.
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index f6a8414..8d827f1 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -16,8 +16,10 @@
 #ifndef LLVM_TARGET_TARGETREGISTERINFO_H
 #define LLVM_TARGET_TARGETREGISTERINFO_H
 
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
 #include <cassert>
 #include <functional>
@@ -26,30 +28,10 @@ namespace llvm {
 
 class BitVector;
 class MachineFunction;
-class MachineMove;
 class RegScavenger;
 template<class T> class SmallVectorImpl;
 class raw_ostream;
 
-/// TargetRegisterDesc - This record contains all of the information known about
-/// a particular register.  The Overlaps field contains a pointer to a zero
-/// terminated array of registers that this register aliases, starting with
-/// itself. This is needed for architectures like X86 which have AL alias AX
-/// alias EAX. The SubRegs field is a zero terminated array of registers that
-/// are sub-registers of the specific register, e.g. AL, AH are sub-registers of
-/// AX. The SuperRegs field is a zero terminated array of registers that are
-/// super-registers of the specific register, e.g. RAX, EAX, are super-registers
-/// of AX.
-///
-struct TargetRegisterDesc {
-  const char     *Name;         // Printable name for the reg (for debugging)
-  const unsigned *Overlaps;     // Overlapping registers, described above
-  const unsigned *SubRegs;      // Sub-register set, described above
-  const unsigned *SuperRegs;    // Super-register set, described above
-  unsigned CostPerUse;          // Extra cost of instructions using register.
-  bool inAllocatableClass;      // Register belongs to an allocatable regclass.
-};
-
 class TargetRegisterClass {
 public:
   typedef const unsigned* iterator;
@@ -236,27 +218,23 @@ public:
     return SuperClasses[0] != 0;
   }
 
-  /// allocation_order_begin/end - These methods define a range of registers
-  /// which specify the registers in this class that are valid to register
-  /// allocate, and the preferred order to allocate them in.  For example,
-  /// callee saved registers should be at the end of the list, because it is
-  /// cheaper to allocate caller saved registers.
-  ///
-  /// These methods take a MachineFunction argument, which can be used to tune
-  /// the allocatable registers based on the characteristics of the function,
-  /// subtarget, or other criteria.
+  /// getRawAllocationOrder - Returns the preferred order for allocating
+  /// registers from this register class in MF. The raw order comes directly
+  /// from the .td file and may include reserved registers that are not
+  /// allocatable. Register allocators should also make sure to allocate
+  /// callee-saved registers only after all the volatiles are used. The
+  /// RegisterClassInfo class provides filtered allocation orders with
+  /// callee-saved registers moved to the end.
   ///
-  /// Register allocators should account for the fact that an allocation
-  /// order iterator may return a reserved register and always check
-  /// if the register is allocatable (getAllocatableSet()) before using it.
+  /// The MachineFunction argument can be used to tune the allocatable
+  /// registers based on the characteristics of the function, subtarget, or
+  /// other criteria.
   ///
-  /// By default, these methods return all registers in the class.
+  /// By default, this method returns all registers in the class.
   ///
-  virtual iterator allocation_order_begin(const MachineFunction &MF) const {
-    return begin();
-  }
-  virtual iterator allocation_order_end(const MachineFunction &MF)   const {
-    return end();
+  virtual
+  ArrayRef<unsigned> getRawAllocationOrder(const MachineFunction &MF) const {
+    return ArrayRef<unsigned>(begin(), getNumRegs());
   }
 
   /// getSize - Return the size of the register in bytes, which is also the size
@@ -277,6 +255,12 @@ public:
   bool isAllocatable() const { return Allocatable; }
 };
 
+/// TargetRegisterInfoDesc - Extra information, not in MCRegisterDesc, about
+/// registers. These are used by codegen, not by MC.
+struct TargetRegisterInfoDesc {
+  unsigned CostPerUse;          // Extra cost of instructions using register.
+  bool inAllocatableClass;      // Register belongs to an allocatable regclass.
+};
 
 /// TargetRegisterInfo base class - We assume that the target defines a static
 /// array of TargetRegisterDesc objects that represent all of the machine
@@ -284,34 +268,19 @@ public:
 /// to this array so that we can turn register number into a register
 /// descriptor.
 ///
-class TargetRegisterInfo {
-protected:
-  const unsigned* SubregHash;
-  const unsigned SubregHashSize;
-  const unsigned* AliasesHash;
-  const unsigned AliasesHashSize;
+class TargetRegisterInfo : public MCRegisterInfo {
 public:
   typedef const TargetRegisterClass * const * regclass_iterator;
 private:
-  const TargetRegisterDesc *Desc;             // Pointer to the descriptor array
+  const TargetRegisterInfoDesc *InfoDesc;     // Extra desc array for codegen
   const char *const *SubRegIndexNames;        // Names of subreg indexes.
-  unsigned NumRegs;                           // Number of entries in the array
-
   regclass_iterator RegClassBegin, RegClassEnd;   // List of regclasses
 
-  int CallFrameSetupOpcode, CallFrameDestroyOpcode;
-
 protected:
-  TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
+  TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
                      regclass_iterator RegClassBegin,
                      regclass_iterator RegClassEnd,
-                     const char *const *subregindexnames,
-                     int CallFrameSetupOpcode = -1,
-                     int CallFrameDestroyOpcode = -1,
-                     const unsigned* subregs = 0,
-                     const unsigned subregsize = 0,
-                     const unsigned* aliases = 0,
-                     const unsigned aliasessize = 0);
+                     const char *const *subregindexnames);
   virtual ~TargetRegisterInfo();
 public:
 
@@ -391,71 +360,16 @@ public:
   BitVector getAllocatableSet(const MachineFunction &MF,
                               const TargetRegisterClass *RC = NULL) const;
 
-  const TargetRegisterDesc &operator[](unsigned RegNo) const {
-    assert(RegNo < NumRegs &&
-           "Attempting to access record for invalid register number!");
-    return Desc[RegNo];
-  }
-
-  /// Provide a get method, equivalent to [], but more useful if we have a
-  /// pointer to this object.
-  ///
-  const TargetRegisterDesc &get(unsigned RegNo) const {
-    return operator[](RegNo);
-  }
-
-  /// getAliasSet - Return the set of registers aliased by the specified
-  /// register, or a null list of there are none.  The list returned is zero
-  /// terminated.
-  ///
-  const unsigned *getAliasSet(unsigned RegNo) const {
-    // The Overlaps set always begins with Reg itself.
-    return get(RegNo).Overlaps + 1;
-  }
-
-  /// getOverlaps - Return a list of registers that overlap Reg, including
-  /// itself. This is the same as the alias set except Reg is included in the
-  /// list.
-  /// These are exactly the registers in { x | regsOverlap(x, Reg) }.
-  ///
-  const unsigned *getOverlaps(unsigned RegNo) const {
-    return get(RegNo).Overlaps;
-  }
-
-  /// getSubRegisters - Return the list of registers that are sub-registers of
-  /// the specified register, or a null list of there are none. The list
-  /// returned is zero terminated and sorted according to super-sub register
-  /// relations. e.g. X86::RAX's sub-register list is EAX, AX, AL, AH.
-  ///
-  const unsigned *getSubRegisters(unsigned RegNo) const {
-    return get(RegNo).SubRegs;
-  }
-
-  /// getSuperRegisters - Return the list of registers that are super-registers
-  /// of the specified register, or a null list of there are none. The list
-  /// returned is zero terminated and sorted according to super-sub register
-  /// relations. e.g. X86::AL's super-register list is RAX, EAX, AX.
-  ///
-  const unsigned *getSuperRegisters(unsigned RegNo) const {
-    return get(RegNo).SuperRegs;
-  }
-
-  /// getName - Return the human-readable symbolic target-specific name for the
-  /// specified physical register.
-  const char *getName(unsigned RegNo) const {
-    return get(RegNo).Name;
-  }
-
   /// getCostPerUse - Return the additional cost of using this register instead
   /// of other registers in its class.
   unsigned getCostPerUse(unsigned RegNo) const {
-    return get(RegNo).CostPerUse;
+    return InfoDesc[RegNo].CostPerUse;
   }
 
-  /// getNumRegs - Return the number of registers this target has (useful for
-  /// sizing arrays holding per register information)
-  unsigned getNumRegs() const {
-    return NumRegs;
+  /// isInAllocatableClass - Return true if the register is in the allocation
+  /// of any register class.
+  bool isInAllocatableClass(unsigned RegNo) const {
+    return InfoDesc[RegNo].inAllocatableClass;
   }
 
   /// getSubRegIndexName - Return the human-readable symbolic target-specific
@@ -468,49 +382,28 @@ public:
   /// regsOverlap - Returns true if the two registers are equal or alias each
   /// other. The registers may be virtual register.
   bool regsOverlap(unsigned regA, unsigned regB) const {
-    if (regA == regB)
-      return true;
-
+    if (regA == regB) return true;
     if (isVirtualRegister(regA) || isVirtualRegister(regB))
       return false;
-
-    // regA and regB are distinct physical registers. Do they alias?
-    size_t index = (regA + regB * 37) & (AliasesHashSize-1);
-    unsigned ProbeAmt = 0;
-    while (AliasesHash[index*2] != 0 &&
-           AliasesHash[index*2+1] != 0) {
-      if (AliasesHash[index*2] == regA && AliasesHash[index*2+1] == regB)
-        return true;
-
-      index = (index + ProbeAmt) & (AliasesHashSize-1);
-      ProbeAmt += 2;
+    for (const unsigned *regList = getOverlaps(regA)+1; *regList; ++regList) {
+      if (*regList == regB) return true;
     }
-
     return false;
   }
 
   /// isSubRegister - Returns true if regB is a sub-register of regA.
   ///
   bool isSubRegister(unsigned regA, unsigned regB) const {
-    // SubregHash is a simple quadratically probed hash table.
-    size_t index = (regA + regB * 37) & (SubregHashSize-1);
-    unsigned ProbeAmt = 2;
-    while (SubregHash[index*2] != 0 &&
-           SubregHash[index*2+1] != 0) {
-      if (SubregHash[index*2] == regA && SubregHash[index*2+1] == regB)
-        return true;
-
-      index = (index + ProbeAmt) & (SubregHashSize-1);
-      ProbeAmt += 2;
-    }
-
-    return false;
+    return isSuperRegister(regB, regA);
   }
 
   /// isSuperRegister - Returns true if regB is a super-register of regA.
   ///
   bool isSuperRegister(unsigned regA, unsigned regB) const {
-    return isSubRegister(regB, regA);
+    for (const unsigned *regList = getSuperRegisters(regA); *regList;++regList){
+      if (*regList == regB) return true;
+    }
+    return false;
   }
 
   /// getCalleeSavedRegs - Return a null-terminated list of all of the
@@ -599,7 +492,7 @@ public:
   }
 
   /// getRegClass - Returns the register class associated with the enumeration
-  /// value.  See class TargetOperandInfo.
+  /// value.  See class MCOperandInfo.
   const TargetRegisterClass *getRegClass(unsigned i) const {
     assert(i < getNumRegClasses() && "Register Class ID out of range");
     return RegClassBegin[i];
@@ -642,14 +535,17 @@ public:
     return 0;
   }
 
-  /// getAllocationOrder - Returns the register allocation order for a specified
-  /// register class in the form of a pair of TargetRegisterClass iterators.
-  virtual std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
-  getAllocationOrder(const TargetRegisterClass *RC,
-                     unsigned HintType, unsigned HintReg,
-                     const MachineFunction &MF) const {
-    return std::make_pair(RC->allocation_order_begin(MF),
-                          RC->allocation_order_end(MF));
+  /// getRawAllocationOrder - Returns the register allocation order for a
+  /// specified register class with a target-dependent hint. The returned list
+  /// may contain reserved registers that cannot be allocated.
+  ///
+  /// Register allocators need only call this function to resolve
+  /// target-dependent hints, but it should work without hinting as well.
+  virtual ArrayRef<unsigned>
+  getRawAllocationOrder(const TargetRegisterClass *RC,
+                        unsigned HintType, unsigned HintReg,
+                        const MachineFunction &MF) const {
+    return RC->getRawAllocationOrder(MF);
   }
 
   /// ResolveRegAllocHint - Resolves the specified register allocation hint
@@ -762,15 +658,6 @@ public:
     return false; // Must return a value in order to compile with VS 2005
   }
 
-  /// getCallFrameSetup/DestroyOpcode - These methods return the opcode of the
-  /// frame setup/destroy instructions if they exist (-1 otherwise).  Some
-  /// targets use pseudo instructions in order to abstract away the difference
-  /// between operating with a frame pointer and operating without, through the
-  /// use of these two instructions.
-  ///
-  int getCallFrameSetupOpcode() const { return CallFrameSetupOpcode; }
-  int getCallFrameDestroyOpcode() const { return CallFrameDestroyOpcode; }
-
   /// eliminateCallFramePseudoInstr - This method is called during prolog/epilog
   /// code insertion to eliminate call frame setup and destroy pseudo
   /// instructions (but only if the Target is using them).  It is responsible
@@ -782,9 +669,6 @@ public:
   eliminateCallFramePseudoInstr(MachineFunction &MF,
                                 MachineBasicBlock &MBB,
                                 MachineBasicBlock::iterator MI) const {
-    assert(getCallFrameSetupOpcode()== -1 && getCallFrameDestroyOpcode()== -1 &&
-           "eliminateCallFramePseudoInstr must be implemented if using"
-           " call frame setup/destroy pseudo instructions!");
     assert(0 && "Call Frame Pseudo Instructions do not exist on this target!");
   }
 
@@ -836,6 +720,12 @@ public:
   virtual int getSEHRegNum(unsigned i) const {
     return i;
   }
+
+  /// getCompactUnwindRegNum - This function maps the register to the number for
+  /// compact unwind encoding. Return -1 if the register isn't valid.
+  virtual int getCompactUnwindRegNum(unsigned, bool) const {
+    return -1;
+  }
 };
 
 
diff --git a/include/llvm/Target/TargetRegistry.h b/include/llvm/Target/TargetRegistry.h
index a464822..7e0ce19 100644
--- a/include/llvm/Target/TargetRegistry.h
+++ b/include/llvm/Target/TargetRegistry.h
@@ -33,7 +33,10 @@ namespace llvm {
   class MCContext;
   class MCDisassembler;
   class MCInstPrinter;
+  class MCInstrInfo;
+  class MCRegisterInfo;
   class MCStreamer;
+  class MCSubtargetInfo;
   class TargetAsmBackend;
   class TargetAsmLexer;
   class TargetAsmParser;
@@ -63,10 +66,16 @@ namespace llvm {
 
     typedef unsigned (*TripleMatchQualityFnTy)(const std::string &TT);
 
-    typedef MCAsmInfo *(*AsmInfoCtorFnTy)(const Target &T,
-                                                StringRef TT);
+    typedef MCAsmInfo *(*MCAsmInfoCtorFnTy)(const Target &T,
+                                            StringRef TT);
+    typedef MCInstrInfo *(*MCInstrInfoCtorFnTy)(void);
+    typedef MCRegisterInfo *(*MCRegInfoCtorFnTy)(void);
+    typedef MCSubtargetInfo *(*MCSubtargetInfoCtorFnTy)(StringRef TT,
+                                                        StringRef CPU,
+                                                        StringRef Features);
     typedef TargetMachine *(*TargetMachineCtorTy)(const Target &T,
                                                   const std::string &TT,
+                                                  const std::string &CPU,
                                                   const std::string &Features);
     typedef AsmPrinter *(*AsmPrinterCtorTy)(TargetMachine &TM,
                                             MCStreamer &Streamer);
@@ -74,15 +83,14 @@ namespace llvm {
                                                   const std::string &TT);
     typedef TargetAsmLexer *(*AsmLexerCtorTy)(const Target &T,
                                               const MCAsmInfo &MAI);
-    typedef TargetAsmParser *(*AsmParserCtorTy)(const Target &T,MCAsmParser &P,
-                                                TargetMachine &TM);
+    typedef TargetAsmParser *(*AsmParserCtorTy)(MCSubtargetInfo &STI,
+                                                MCAsmParser &P);
     typedef MCDisassembler *(*MCDisassemblerCtorTy)(const Target &T);
     typedef MCInstPrinter *(*MCInstPrinterCtorTy)(const Target &T,
-                                                  TargetMachine &TM,
                                                   unsigned SyntaxVariant,
                                                   const MCAsmInfo &MAI);
-    typedef MCCodeEmitter *(*CodeEmitterCtorTy)(const Target &T,
-                                                TargetMachine &TM,
+    typedef MCCodeEmitter *(*CodeEmitterCtorTy)(const MCInstrInfo &II,
+                                                const MCSubtargetInfo &STI,
                                                 MCContext &Ctx);
     typedef MCStreamer *(*ObjectStreamerCtorTy)(const Target &T,
                                                 const std::string &TT,
@@ -120,7 +128,21 @@ namespace llvm {
     /// HasJIT - Whether this target supports the JIT.
     bool HasJIT;
 
-    AsmInfoCtorFnTy AsmInfoCtorFn;
+    /// MCAsmInfoCtorFn - Constructor function for this target's MCAsmInfo, if
+    /// registered.
+    MCAsmInfoCtorFnTy MCAsmInfoCtorFn;
+
+    /// MCInstrInfoCtorFn - Constructor function for this target's MCInstrInfo,
+    /// if registered.
+    MCInstrInfoCtorFnTy MCInstrInfoCtorFn;
+
+    /// MCRegInfoCtorFn - Constructor function for this target's MCRegisterInfo,
+    /// if registered.
+    MCRegInfoCtorFnTy MCRegInfoCtorFn;
+
+    /// MCSubtargetInfoCtorFn - Constructor function for this target's
+    /// MCSubtargetInfo, if registered.
+    MCSubtargetInfoCtorFnTy MCSubtargetInfoCtorFn;
 
     /// TargetMachineCtorFn - Construction function for this target's
     /// TargetMachine, if registered.
@@ -218,17 +240,49 @@ namespace llvm {
     /// @name Feature Constructors
     /// @{
 
-    /// createAsmInfo - Create a MCAsmInfo implementation for the specified
+    /// createMCAsmInfo - Create a MCAsmInfo implementation for the specified
     /// target triple.
     ///
     /// \arg Triple - This argument is used to determine the target machine
     /// feature set; it should always be provided. Generally this should be
     /// either the target triple from the module, or the target triple of the
     /// host if that does not exist.
-    MCAsmInfo *createAsmInfo(StringRef Triple) const {
-      if (!AsmInfoCtorFn)
+    MCAsmInfo *createMCAsmInfo(StringRef Triple) const {
+      if (!MCAsmInfoCtorFn)
+        return 0;
+      return MCAsmInfoCtorFn(*this, Triple);
+    }
+
+    /// createMCInstrInfo - Create a MCInstrInfo implementation.
+    ///
+    MCInstrInfo *createMCInstrInfo() const {
+      if (!MCInstrInfoCtorFn)
+        return 0;
+      return MCInstrInfoCtorFn();
+    }
+
+    /// createMCRegInfo - Create a MCRegisterInfo implementation.
+    ///
+    MCRegisterInfo *createMCRegInfo() const {
+      if (!MCRegInfoCtorFn)
+        return 0;
+      return MCRegInfoCtorFn();
+    }
+
+    /// createMCSubtargetInfo - Create a MCSubtargetInfo implementation.
+    ///
+    /// \arg Triple - This argument is used to determine the target machine
+    /// feature set; it should always be provided. Generally this should be
+    /// either the target triple from the module, or the target triple of the
+    /// host if that does not exist.
+    /// \arg CPU - This specifies the name of the target CPU.
+    /// \arg Features - This specifies the string representation of the
+    /// additional target features.
+    MCSubtargetInfo *createMCSubtargetInfo(StringRef Triple, StringRef CPU,
+                                           StringRef Features) const {
+      if (!MCSubtargetInfoCtorFn)
         return 0;
-      return AsmInfoCtorFn(*this, Triple);
+      return MCSubtargetInfoCtorFn(Triple, CPU, Features);
     }
 
     /// createTargetMachine - Create a target specific machine implementation
@@ -239,10 +293,11 @@ namespace llvm {
     /// either the target triple from the module, or the target triple of the
     /// host if that does not exist.
     TargetMachine *createTargetMachine(const std::string &Triple,
+                                       const std::string &CPU,
                                        const std::string &Features) const {
       if (!TargetMachineCtorFn)
         return 0;
-      return TargetMachineCtorFn(*this, Triple, Features);
+      return TargetMachineCtorFn(*this, Triple, CPU, Features);
     }
 
     /// createAsmBackend - Create a target specific assembly parser.
@@ -267,11 +322,11 @@ namespace llvm {
     ///
     /// \arg Parser - The target independent parser implementation to use for
     /// parsing and lexing.
-    TargetAsmParser *createAsmParser(MCAsmParser &Parser,
-                                     TargetMachine &TM) const {
+    TargetAsmParser *createAsmParser(MCSubtargetInfo &STI,
+                                     MCAsmParser &Parser) const {
       if (!AsmParserCtorFn)
         return 0;
-      return AsmParserCtorFn(*this, Parser, TM);
+      return AsmParserCtorFn(STI, Parser);
     }
 
     /// createAsmPrinter - Create a target specific assembly printer pass.  This
@@ -288,20 +343,21 @@ namespace llvm {
       return MCDisassemblerCtorFn(*this);
     }
 
-    MCInstPrinter *createMCInstPrinter(TargetMachine &TM,
-                                       unsigned SyntaxVariant,
+    MCInstPrinter *createMCInstPrinter(unsigned SyntaxVariant,
                                        const MCAsmInfo &MAI) const {
       if (!MCInstPrinterCtorFn)
         return 0;
-      return MCInstPrinterCtorFn(*this, TM, SyntaxVariant, MAI);
+      return MCInstPrinterCtorFn(*this, SyntaxVariant, MAI);
     }
 
 
     /// createCodeEmitter - Create a target specific code emitter.
-    MCCodeEmitter *createCodeEmitter(TargetMachine &TM, MCContext &Ctx) const {
+    MCCodeEmitter *createCodeEmitter(const MCInstrInfo &II,
+                                     const MCSubtargetInfo &STI,
+                                     MCContext &Ctx) const {
       if (!CodeEmitterCtorFn)
         return 0;
-      return CodeEmitterCtorFn(*this, TM, Ctx);
+      return CodeEmitterCtorFn(II, STI, Ctx);
     }
 
     /// createObjectStreamer - Create a target specific MCStreamer.
@@ -429,7 +485,7 @@ namespace llvm {
                                Target::TripleMatchQualityFnTy TQualityFn,
                                bool HasJIT = false);
 
-    /// RegisterAsmInfo - Register a MCAsmInfo implementation for the
+    /// RegisterMCAsmInfo - Register a MCAsmInfo implementation for the
     /// given target.
     ///
     /// Clients are responsible for ensuring that registration doesn't occur
@@ -438,10 +494,56 @@ namespace llvm {
     ///
     /// @param T - The target being registered.
     /// @param Fn - A function to construct a MCAsmInfo for the target.
-    static void RegisterAsmInfo(Target &T, Target::AsmInfoCtorFnTy Fn) {
+    static void RegisterMCAsmInfo(Target &T, Target::MCAsmInfoCtorFnTy Fn) {
+      // Ignore duplicate registration.
+      if (!T.MCAsmInfoCtorFn)
+        T.MCAsmInfoCtorFn = Fn;
+    }
+
+    /// RegisterMCInstrInfo - Register a MCInstrInfo implementation for the
+    /// given target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct a MCInstrInfo for the target.
+    static void RegisterMCInstrInfo(Target &T, Target::MCInstrInfoCtorFnTy Fn) {
+      // Ignore duplicate registration.
+      if (!T.MCInstrInfoCtorFn)
+        T.MCInstrInfoCtorFn = Fn;
+    }
+
+    /// RegisterMCRegInfo - Register a MCRegisterInfo implementation for the
+    /// given target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct a MCRegisterInfo for the target.
+    static void RegisterMCRegInfo(Target &T, Target::MCRegInfoCtorFnTy Fn) {
+      // Ignore duplicate registration.
+      if (!T.MCRegInfoCtorFn)
+        T.MCRegInfoCtorFn = Fn;
+    }
+
+    /// RegisterMCSubtargetInfo - Register a MCSubtargetInfo implementation for
+    /// the given target.
+    ///
+    /// Clients are responsible for ensuring that registration doesn't occur
+    /// while another thread is attempting to access the registry. Typically
+    /// this is done by initializing all targets at program startup.
+    ///
+    /// @param T - The target being registered.
+    /// @param Fn - A function to construct a MCSubtargetInfo for the target.
+    static void RegisterMCSubtargetInfo(Target &T,
+                                        Target::MCSubtargetInfoCtorFnTy Fn) {
       // Ignore duplicate registration.
-      if (!T.AsmInfoCtorFn)
-        T.AsmInfoCtorFn = Fn;
+      if (!T.MCSubtargetInfoCtorFn)
+        T.MCSubtargetInfoCtorFn = Fn;
     }
 
     /// RegisterTargetMachine - Register a TargetMachine implementation for the
@@ -620,18 +722,18 @@ namespace llvm {
     }
   };
 
-  /// RegisterAsmInfo - Helper template for registering a target assembly info
+  /// RegisterMCAsmInfo - Helper template for registering a target assembly info
   /// implementation.  This invokes the static "Create" method on the class to
   /// actually do the construction.  Usage:
   ///
   /// extern "C" void LLVMInitializeFooTarget() {
   ///   extern Target TheFooTarget;
-  ///   RegisterAsmInfo<FooMCAsmInfo> X(TheFooTarget);
+  ///   RegisterMCAsmInfo<FooMCAsmInfo> X(TheFooTarget);
   /// }
   template<class MCAsmInfoImpl>
-  struct RegisterAsmInfo {
-    RegisterAsmInfo(Target &T) {
-      TargetRegistry::RegisterAsmInfo(T, &Allocator);
+  struct RegisterMCAsmInfo {
+    RegisterMCAsmInfo(Target &T) {
+      TargetRegistry::RegisterMCAsmInfo(T, &Allocator);
     }
   private:
     static MCAsmInfo *Allocator(const Target &T, StringRef TT) {
@@ -640,20 +742,119 @@ namespace llvm {
 
   };
 
-  /// RegisterAsmInfoFn - Helper template for registering a target assembly info
+  /// RegisterMCAsmInfoFn - Helper template for registering a target assembly info
   /// implementation.  This invokes the specified function to do the
   /// construction.  Usage:
   ///
   /// extern "C" void LLVMInitializeFooTarget() {
   ///   extern Target TheFooTarget;
-  ///   RegisterAsmInfoFn X(TheFooTarget, TheFunction);
+  ///   RegisterMCAsmInfoFn X(TheFooTarget, TheFunction);
   /// }
-  struct RegisterAsmInfoFn {
-    RegisterAsmInfoFn(Target &T, Target::AsmInfoCtorFnTy Fn) {
-      TargetRegistry::RegisterAsmInfo(T, Fn);
+  struct RegisterMCAsmInfoFn {
+    RegisterMCAsmInfoFn(Target &T, Target::MCAsmInfoCtorFnTy Fn) {
+      TargetRegistry::RegisterMCAsmInfo(T, Fn);
     }
   };
 
+  /// RegisterMCInstrInfo - Helper template for registering a target instruction
+  /// info implementation.  This invokes the static "Create" method on the class
+  /// to actually do the construction.  Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooTarget() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterMCInstrInfo<FooMCInstrInfo> X(TheFooTarget);
+  /// }
+  template<class MCInstrInfoImpl>
+  struct RegisterMCInstrInfo {
+    RegisterMCInstrInfo(Target &T) {
+      TargetRegistry::RegisterMCInstrInfo(T, &Allocator);
+    }
+  private:
+    static MCInstrInfo *Allocator() {
+      return new MCInstrInfoImpl();
+    }
+  };
+
+  /// RegisterMCInstrInfoFn - Helper template for registering a target
+  /// instruction info implementation.  This invokes the specified function to
+  /// do the construction.  Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooTarget() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterMCInstrInfoFn X(TheFooTarget, TheFunction);
+  /// }
+  struct RegisterMCInstrInfoFn {
+    RegisterMCInstrInfoFn(Target &T, Target::MCInstrInfoCtorFnTy Fn) {
+      TargetRegistry::RegisterMCInstrInfo(T, Fn);
+    }
+  };
+
+  /// RegisterMCRegInfo - Helper template for registering a target register info
+  /// implementation.  This invokes the static "Create" method on the class to
+  /// actually do the construction.  Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooTarget() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterMCRegInfo<FooMCRegInfo> X(TheFooTarget);
+  /// }
+  template<class MCRegisterInfoImpl>
+  struct RegisterMCRegInfo {
+    RegisterMCRegInfo(Target &T) {
+      TargetRegistry::RegisterMCRegInfo(T, &Allocator);
+    }
+  private:
+    static MCRegisterInfo *Allocator() {
+      return new MCRegisterInfoImpl();
+    }
+  };
+
+  /// RegisterMCRegInfoFn - Helper template for registering a target register
+  /// info implementation.  This invokes the specified function to do the
+  /// construction.  Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooTarget() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterMCRegInfoFn X(TheFooTarget, TheFunction);
+  /// }
+  struct RegisterMCRegInfoFn {
+    RegisterMCRegInfoFn(Target &T, Target::MCRegInfoCtorFnTy Fn) {
+      TargetRegistry::RegisterMCRegInfo(T, Fn);
+    }
+  };
+
+  /// RegisterMCSubtargetInfo - Helper template for registering a target
+  /// subtarget info implementation.  This invokes the static "Create" method
+  /// on the class to actually do the construction.  Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooTarget() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterMCSubtargetInfo<FooMCSubtargetInfo> X(TheFooTarget);
+  /// }
+  template<class MCSubtargetInfoImpl>
+  struct RegisterMCSubtargetInfo {
+    RegisterMCSubtargetInfo(Target &T) {
+      TargetRegistry::RegisterMCSubtargetInfo(T, &Allocator);
+    }
+  private:
+    static MCSubtargetInfo *Allocator(StringRef TT, StringRef CPU,
+                                      StringRef FS) {
+      return new MCSubtargetInfoImpl();
+    }
+  };
+
+  /// RegisterMCSubtargetInfoFn - Helper template for registering a target
+  /// subtarget info implementation.  This invokes the specified function to
+  /// do the construction.  Usage:
+  ///
+  /// extern "C" void LLVMInitializeFooTarget() {
+  ///   extern Target TheFooTarget;
+  ///   RegisterMCSubtargetInfoFn X(TheFooTarget, TheFunction);
+  /// }
+  struct RegisterMCSubtargetInfoFn {
+    RegisterMCSubtargetInfoFn(Target &T, Target::MCSubtargetInfoCtorFnTy Fn) {
+      TargetRegistry::RegisterMCSubtargetInfo(T, Fn);
+    }
+  };
 
   /// RegisterTargetMachine - Helper template for registering a target machine
   /// implementation, for use in the target machine initialization
@@ -671,8 +872,9 @@ namespace llvm {
 
   private:
     static TargetMachine *Allocator(const Target &T, const std::string &TT,
+                                    const std::string &CPU,
                                     const std::string &FS) {
-      return new TargetMachineImpl(T, TT, FS);
+      return new TargetMachineImpl(T, TT, CPU, FS);
     }
   };
 
@@ -731,9 +933,8 @@ namespace llvm {
     }
 
   private:
-    static TargetAsmParser *Allocator(const Target &T, MCAsmParser &P,
-                                      TargetMachine &TM) {
-      return new AsmParserImpl(T, P, TM);
+    static TargetAsmParser *Allocator(MCSubtargetInfo &STI, MCAsmParser &P) {
+      return new AsmParserImpl(STI, P);
     }
   };
 
@@ -772,9 +973,10 @@ namespace llvm {
     }
 
   private:
-    static MCCodeEmitter *Allocator(const Target &T, TargetMachine &TM,
+    static MCCodeEmitter *Allocator(const MCInstrInfo &II,
+                                    const MCSubtargetInfo &STI,
                                     MCContext &Ctx) {
-      return new CodeEmitterImpl(T, TM, Ctx);
+      return new CodeEmitterImpl();
     }
   };
 
diff --git a/include/llvm/Target/TargetSelect.h b/include/llvm/Target/TargetSelect.h
index c5ab90b..272ee09 100644
--- a/include/llvm/Target/TargetSelect.h
+++ b/include/llvm/Target/TargetSelect.h
@@ -26,6 +26,18 @@ extern "C" {
 #define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##Target();
 #include "llvm/Config/Targets.def"
   
+#define LLVM_TARGET(TargetName) \
+  void LLVMInitialize##TargetName##MCAsmInfo();
+#include "llvm/Config/Targets.def"
+
+#define LLVM_TARGET(TargetName) \
+  void LLVMInitialize##TargetName##MCInstrInfo();
+#include "llvm/Config/Targets.def"
+
+#define LLVM_TARGET(TargetName) \
+  void LLVMInitialize##TargetName##MCSubtargetInfo();
+#include "llvm/Config/Targets.def"
+
   // Declare all of the available assembly printer initialization functions.
 #define LLVM_ASM_PRINTER(TargetName) void LLVMInitialize##TargetName##AsmPrinter();
 #include "llvm/Config/AsmPrinters.def"
@@ -35,7 +47,8 @@ extern "C" {
 #include "llvm/Config/AsmParsers.def"
 
   // Declare all of the available disassembler initialization functions.
-#define LLVM_DISASSEMBLER(TargetName) void LLVMInitialize##TargetName##Disassembler();
+#define LLVM_DISASSEMBLER(TargetName) \
+  void LLVMInitialize##TargetName##Disassembler();
 #include "llvm/Config/Disassemblers.def"
 }
 
@@ -63,6 +76,38 @@ namespace llvm {
 #include "llvm/Config/Targets.def"
   }
   
+  /// InitializeAllMCAsmInfos - The main program should call this function
+  /// if it wants access to all available assembly infos for targets that
+  /// LLVM is configured to support, to make them available via the
+  /// TargetRegistry.
+  ///
+  /// It is legal for a client to make multiple calls to this function.
+  inline void InitializeAllMCAsmInfos() {
+#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##MCAsmInfo();
+#include "llvm/Config/Targets.def"
+  }
+  
+  /// InitializeAllMCInstrInfos - The main program should call this function
+  /// if it wants access to all available instruction infos for targets that
+  /// LLVM is configured to support, to make them available via the
+  /// TargetRegistry.
+  ///
+  /// It is legal for a client to make multiple calls to this function.
+  inline void InitializeAllMCInstrInfos() {
+#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##MCInstrInfo();
+#include "llvm/Config/Targets.def"
+  }
+  
+  /// InitializeAllMCSubtargetInfos - The main program should call this function
+  /// if it wants access to all available subtarget infos for targets that LLVM
+  /// is configured to support, to make them available via the TargetRegistry.
+  ///
+  /// It is legal for a client to make multiple calls to this function.
+  inline void InitializeAllMCSubtargetInfos() {
+#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##MCSubtargetInfo();
+#include "llvm/Config/Targets.def"
+  }
+  
   /// InitializeAllAsmPrinters - The main program should call this function if
   /// it wants all asm printers that LLVM is configured to support, to make them
   /// available via the TargetRegistry.
@@ -103,6 +148,7 @@ namespace llvm {
 #ifdef LLVM_NATIVE_TARGET
     LLVM_NATIVE_TARGETINFO();
     LLVM_NATIVE_TARGET();
+    LLVM_NATIVE_MCASMINFO();
     return false;
 #else
     return true;
diff --git a/include/llvm/Target/TargetSelectionDAG.td b/include/llvm/Target/TargetSelectionDAG.td
index 672117f..9d1ef2c 100644
--- a/include/llvm/Target/TargetSelectionDAG.td
+++ b/include/llvm/Target/TargetSelectionDAG.td
@@ -197,8 +197,8 @@ def SDTSubVecInsert : SDTypeProfile<1, 3, [ // subvector insert
   SDTCisSubVecOfVec<2, 1>, SDTCisSameAs<0,1>, SDTCisInt<3>
 ]>;
 
-def SDTPrefetch : SDTypeProfile<0, 3, [     // prefetch
-  SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisInt<1>
+def SDTPrefetch : SDTypeProfile<0, 4, [     // prefetch
+  SDTCisPtrTy<0>, SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>, SDTCisInt<1>
 ]>;
 
 def SDTMemBarrier : SDTypeProfile<0, 5, [   // memory barier
@@ -353,6 +353,7 @@ def fsub       : SDNode<"ISD::FSUB"       , SDTFPBinOp>;
 def fmul       : SDNode<"ISD::FMUL"       , SDTFPBinOp, [SDNPCommutative]>;
 def fdiv       : SDNode<"ISD::FDIV"       , SDTFPBinOp>;
 def frem       : SDNode<"ISD::FREM"       , SDTFPBinOp>;
+def fma        : SDNode<"ISD::FMA"        , SDTFPTernaryOp>;
 def fabs       : SDNode<"ISD::FABS"       , SDTFPUnaryOp>;
 def fgetsign   : SDNode<"ISD::FGETSIGN"   , SDTFPToIntOp>;
 def fneg       : SDNode<"ISD::FNEG"       , SDTFPUnaryOp>;
diff --git a/include/llvm/Target/TargetSubtarget.h b/include/llvm/Target/TargetSubtargetInfo.h
index 22b09ba..9556c7a 100644
--- a/include/llvm/Target/TargetSubtarget.h
+++ b/include/llvm/Target/TargetSubtargetInfo.h
@@ -1,4 +1,4 @@
-//==-- llvm/Target/TargetSubtarget.h - Target Information --------*- C++ -*-==//
+//==-- llvm/Target/TargetSubtargetInfo.h - Target Information ----*- C++ -*-==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,9 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TARGET_TARGETSUBTARGET_H
-#define LLVM_TARGET_TARGETSUBTARGET_H
+#ifndef LLVM_TARGET_TARGETSUBTARGETINFO_H
+#define LLVM_TARGET_TARGETSUBTARGETINFO_H
 
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -25,22 +26,22 @@ template <typename T> class SmallVectorImpl;
 
 //===----------------------------------------------------------------------===//
 ///
-/// TargetSubtarget - Generic base class for all target subtargets.  All
+/// TargetSubtargetInfo - Generic base class for all target subtargets.  All
 /// Target-specific options that control code generation and printing should
-/// be exposed through a TargetSubtarget-derived class.
+/// be exposed through a TargetSubtargetInfo-derived class.
 ///
-class TargetSubtarget {
-  TargetSubtarget(const TargetSubtarget&);   // DO NOT IMPLEMENT
-  void operator=(const TargetSubtarget&);  // DO NOT IMPLEMENT
+class TargetSubtargetInfo : public MCSubtargetInfo {
+  TargetSubtargetInfo(const TargetSubtargetInfo&);   // DO NOT IMPLEMENT
+  void operator=(const TargetSubtargetInfo&);  // DO NOT IMPLEMENT
 protected: // Can only create subclasses...
-  TargetSubtarget();
+  TargetSubtargetInfo();
 public:
   // AntiDepBreakMode - Type of anti-dependence breaking that should
   // be performed before post-RA scheduling.
   typedef enum { ANTIDEP_NONE, ANTIDEP_CRITICAL, ANTIDEP_ALL } AntiDepBreakMode;
   typedef SmallVectorImpl<TargetRegisterClass*> RegClassVector;
 
-  virtual ~TargetSubtarget();
+  virtual ~TargetSubtargetInfo();
 
   /// getSpecialAddressLatency - For targets where it is beneficial to
   /// backschedule instructions that compute addresses, return a value
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index d12fd1d..f025e180 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -74,13 +74,6 @@ ModulePass *createGlobalOptimizerPass();
 
 
 //===----------------------------------------------------------------------===//
-/// createDeadTypeEliminationPass - Return a new pass that eliminates symbol
-/// table entries for types that are never used.
-///
-ModulePass *createDeadTypeEliminationPass();
-
-
-//===----------------------------------------------------------------------===//
 /// createGlobalDCEPass - This transform is designed to eliminate unreachable
 /// internal globals (functions or global variables)
 ///
diff --git a/include/llvm/Transforms/Scalar.h b/include/llvm/Transforms/Scalar.h
index de46a8d..2187d4e 100644
--- a/include/llvm/Transforms/Scalar.h
+++ b/include/llvm/Transforms/Scalar.h
@@ -338,11 +338,37 @@ Pass *createCorrelatedValuePropagationPass();
 
 //===----------------------------------------------------------------------===//
 //
+// ObjCARCExpand - ObjC ARC preliminary simplifications.
+//
+Pass *createObjCARCExpandPass();
+
+//===----------------------------------------------------------------------===//
+//
+// ObjCARCContract - Late ObjC ARC cleanups.
+//
+Pass *createObjCARCContractPass();
+
+//===----------------------------------------------------------------------===//
+//
+// ObjCARCOpt - ObjC ARC optimization.
+//
+Pass *createObjCARCOptPass();
+
+//===----------------------------------------------------------------------===//
+//
 // InstructionSimplifier - Remove redundant instructions.
 //
 FunctionPass *createInstructionSimplifierPass();
 extern char &InstructionSimplifierID;
 
+
+//===----------------------------------------------------------------------===//
+//
+// LowerExpectIntriniscs - Removes llvm.expect intrinsics and creates
+// "block_weights" metadata.
+FunctionPass *createLowerExpectIntrinsicPass();
+
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 853de2d..674c2d0 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -107,12 +107,6 @@ BasicBlock *CloneBasicBlock(const BasicBlock *BB,
                             const Twine &NameSuffix = "", Function *F = 0,
                             ClonedCodeInfo *CodeInfo = 0);
 
-
-/// CloneLoop - Clone Loop. Clone dominator info for loop insiders. Populate
-/// VMap using old blocks to new blocks mapping.
-Loop *CloneLoop(Loop *L, LPPassManager *LPM, LoopInfo *LI, 
-                ValueToValueMapTy &VMap, Pass *P);
-
 /// CloneFunction - Return a copy of the specified function, but without
 /// embedding the function into another module.  Also, any references specified
 /// in the VMap are changed to refer to their mapped value instead of the
diff --git a/include/llvm/Transforms/Utils/SSAUpdater.h b/include/llvm/Transforms/Utils/SSAUpdater.h
index 51c8467..063d413 100644
--- a/include/llvm/Transforms/Utils/SSAUpdater.h
+++ b/include/llvm/Transforms/Utils/SSAUpdater.h
@@ -122,12 +122,9 @@ private:
 class LoadAndStorePromoter {
 protected:
   SSAUpdater &SSA;
-  DbgDeclareInst *DDI;
-  DIBuilder *DIB;
 public:
   LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
-                       SSAUpdater &S, DbgDeclareInst *DDI, DIBuilder *DIB,
-                       StringRef Name = StringRef());
+                       SSAUpdater &S, StringRef Name = StringRef());
   virtual ~LoadAndStorePromoter() {}
   
   /// run - This does the promotion.  Insts is a list of loads and stores to
@@ -161,6 +158,10 @@ public:
   virtual void instructionDeleted(Instruction *I) const {
   }
 
+  /// updateDebugInfo - This is called to update debug info associated with the
+  /// instruction.
+  virtual void updateDebugInfo(Instruction *I) const {
+  }
 };
 
 } // End llvm namespace
diff --git a/include/llvm/Transforms/Utils/ValueMapper.h b/include/llvm/Transforms/Utils/ValueMapper.h
index d612213..2194373 100644
--- a/include/llvm/Transforms/Utils/ValueMapper.h
+++ b/include/llvm/Transforms/Utils/ValueMapper.h
@@ -22,6 +22,18 @@ namespace llvm {
   class Instruction;
   typedef ValueMap<const Value *, TrackingVH<Value> > ValueToValueMapTy;
 
+  /// ValueMapTypeRemapper - This is a class that can be implemented by clients
+  /// to remap types when cloning constants and instructions.
+  class ValueMapTypeRemapper {
+    virtual void Anchor();  // Out of line method.
+  public:
+    virtual ~ValueMapTypeRemapper() {}
+    
+    /// remapType - The client should implement this method if they want to
+    /// remap types while mapping values.
+    virtual Type *remapType(Type *SrcTy) = 0;
+  };
+  
   /// RemapFlags - These are flags that the value mapping APIs allow.
   enum RemapFlags {
     RF_None = 0,
@@ -42,9 +54,27 @@ namespace llvm {
   }
   
   Value *MapValue(const Value *V, ValueToValueMapTy &VM,
-                  RemapFlags Flags = RF_None);
+                  RemapFlags Flags = RF_None,
+                  ValueMapTypeRemapper *TypeMapper = 0);
+
   void RemapInstruction(Instruction *I, ValueToValueMapTy &VM,
-                        RemapFlags Flags = RF_None);
+                        RemapFlags Flags = RF_None,
+                        ValueMapTypeRemapper *TypeMapper = 0);
+  
+  /// MapValue - provide versions that preserve type safety for MDNode and
+  /// Constants.
+  inline MDNode *MapValue(const MDNode *V, ValueToValueMapTy &VM,
+                          RemapFlags Flags = RF_None,
+                          ValueMapTypeRemapper *TypeMapper = 0) {
+    return (MDNode*)MapValue((const Value*)V, VM, Flags, TypeMapper);
+  }
+  inline Constant *MapValue(const Constant *V, ValueToValueMapTy &VM,
+                            RemapFlags Flags = RF_None,
+                            ValueMapTypeRemapper *TypeMapper = 0) {
+    return (Constant*)MapValue((const Value*)V, VM, Flags, TypeMapper);
+  }
+  
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Type.h b/include/llvm/Type.h
index 3bda91d..e4ff3e1 100644
--- a/include/llvm/Type.h
+++ b/include/llvm/Type.h
@@ -6,86 +6,64 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the Type class.  For more "Type"
+// stuff, look in DerivedTypes.h.
+//
+//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_TYPE_H
 #define LLVM_TYPE_H
 
-#include "llvm/AbstractTypeUser.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/ADT/GraphTraits.h"
-#include <string>
-#include <vector>
 
 namespace llvm {
 
-class DerivedType;
 class PointerType;
 class IntegerType;
-class TypeMapBase;
 class raw_ostream;
 class Module;
 class LLVMContext;
+class LLVMContextImpl;
+template<class GraphType> struct GraphTraits;
 
-/// This file contains the declaration of the Type class.  For more "Type" type
-/// stuff, look in DerivedTypes.h.
-///
 /// The instances of the Type class are immutable: once they are created,
 /// they are never changed.  Also note that only one instance of a particular
 /// type is ever created.  Thus seeing if two types are equal is a matter of
 /// doing a trivial pointer comparison. To enforce that no two equal instances
 /// are created, Type instances can only be created via static factory methods 
-/// in class Type and in derived classes.
+/// in class Type and in derived classes.  Once allocated, Types are never
+/// free'd.
 /// 
-/// Once allocated, Types are never free'd, unless they are an abstract type
-/// that is resolved to a more concrete type.
-/// 
-/// Types themself don't have a name, and can be named either by:
-/// - using SymbolTable instance, typically from some Module,
-/// - using convenience methods in the Module class (which uses module's 
-///    SymbolTable too).
-///
-/// Opaque types are simple derived types with no state.  There may be many
-/// different Opaque type objects floating around, but two are only considered
-/// identical if they are pointer equals of each other.  This allows us to have
-/// two opaque types that end up resolving to different concrete types later.
-///
-/// Opaque types are also kinda weird and scary and different because they have
-/// to keep a list of uses of the type.  When, through linking, parsing, or
-/// bitcode reading, they become resolved, they need to find and update all
-/// users of the unknown type, causing them to reference a new, more concrete
-/// type.  Opaque types are deleted when their use list dwindles to zero users.
-///
-/// @brief Root of type hierarchy
-class Type : public AbstractTypeUser {
+class Type {
 public:
-  //===-------------------------------------------------------------------===//
+  //===--------------------------------------------------------------------===//
   /// Definitions of all of the base types for the Type system.  Based on this
-  /// value, you can cast to a "DerivedType" subclass (see DerivedTypes.h)
+  /// value, you can cast to a class defined in DerivedTypes.h.
   /// Note: If you add an element to this, you need to add an element to the
   /// Type::getPrimitiveType function, or else things will break!
   /// Also update LLVMTypeKind and LLVMGetTypeKind () in the C binding.
   ///
   enum TypeID {
-    // PrimitiveTypes .. make sure LastPrimitiveTyID stays up to date
+    // PrimitiveTypes - make sure LastPrimitiveTyID stays up to date.
     VoidTyID = 0,    ///<  0: type with no size
-    FloatTyID,       ///<  1: 32 bit floating point type
-    DoubleTyID,      ///<  2: 64 bit floating point type
-    X86_FP80TyID,    ///<  3: 80 bit floating point type (X87)
-    FP128TyID,       ///<  4: 128 bit floating point type (112-bit mantissa)
-    PPC_FP128TyID,   ///<  5: 128 bit floating point type (two 64-bits)
+    FloatTyID,       ///<  1: 32-bit floating point type
+    DoubleTyID,      ///<  2: 64-bit floating point type
+    X86_FP80TyID,    ///<  3: 80-bit floating point type (X87)
+    FP128TyID,       ///<  4: 128-bit floating point type (112-bit mantissa)
+    PPC_FP128TyID,   ///<  5: 128-bit floating point type (two 64-bits, PowerPC)
     LabelTyID,       ///<  6: Labels
     MetadataTyID,    ///<  7: Metadata
-    X86_MMXTyID,     ///<  8: MMX vectors (64 bits)
+    X86_MMXTyID,     ///<  8: MMX vectors (64 bits, X86 specific)
 
-    // Derived types... see DerivedTypes.h file...
-    // Make sure FirstDerivedTyID stays up to date!!!
+    // Derived types... see DerivedTypes.h file.
+    // Make sure FirstDerivedTyID stays up to date!
     IntegerTyID,     ///<  9: Arbitrary bit width integers
     FunctionTyID,    ///< 10: Functions
     StructTyID,      ///< 11: Structures
     ArrayTyID,       ///< 12: Arrays
     PointerTyID,     ///< 13: Pointers
-    OpaqueTyID,      ///< 14: Opaque: type with unknown structure
-    VectorTyID,      ///< 15: SIMD 'packed' format, or other vector type
+    VectorTyID,      ///< 14: SIMD 'packed' format, or other vector type
 
     NumTypeIDs,                         // Must remain as last defined ID
     LastPrimitiveTyID = X86_MMXTyID,
@@ -93,101 +71,52 @@ public:
   };
 
 private:
-  TypeID   ID : 8;    // The current base type of this type.
-  bool     Abstract : 1;  // True if type contains an OpaqueType
-  unsigned SubclassData : 23; //Space for subclasses to store data
-
-  /// RefCount - This counts the number of PATypeHolders that are pointing to
-  /// this type.  When this number falls to zero, if the type is abstract and
-  /// has no AbstractTypeUsers, the type is deleted.  This is only sensical for
-  /// derived types.
-  ///
-  mutable unsigned RefCount;
-
   /// Context - This refers to the LLVMContext in which this type was uniqued.
   LLVMContext &Context;
-  friend class LLVMContextImpl;
-
-  const Type *getForwardedTypeInternal() const;
 
-  // Some Type instances are allocated as arrays, some aren't. So we provide
-  // this method to get the right kind of destruction for the type of Type.
-  void destroy() const; // const is a lie, this does "delete this"!
+  TypeID   ID : 8;            // The current base type of this type.
+  unsigned SubclassData : 24; // Space for subclasses to store data
 
 protected:
-  explicit Type(LLVMContext &C, TypeID id) :
-                             ID(id), Abstract(false), SubclassData(0),
-                             RefCount(0), Context(C),
-                             ForwardType(0), NumContainedTys(0),
-                             ContainedTys(0) {}
-  virtual ~Type() {
-    assert(AbstractTypeUsers.empty() && "Abstract types remain");
-  }
-
-  /// Types can become nonabstract later, if they are refined.
-  ///
-  inline void setAbstract(bool Val) { Abstract = Val; }
-
-  unsigned getRefCount() const { return RefCount; }
+  friend class LLVMContextImpl;
+  explicit Type(LLVMContext &C, TypeID tid)
+    : Context(C), ID(tid), SubclassData(0),
+      NumContainedTys(0), ContainedTys(0) {}
+  ~Type() {}
 
   unsigned getSubclassData() const { return SubclassData; }
-  void setSubclassData(unsigned val) { SubclassData = val; }
-
-  /// ForwardType - This field is used to implement the union find scheme for
-  /// abstract types.  When types are refined to other types, this field is set
-  /// to the more refined type.  Only abstract types can be forwarded.
-  mutable const Type *ForwardType;
-
-
-  /// AbstractTypeUsers - Implement a list of the users that need to be notified
-  /// if I am a type, and I get resolved into a more concrete type.
-  ///
-  mutable std::vector<AbstractTypeUser *> AbstractTypeUsers;
+  void setSubclassData(unsigned val) {
+    SubclassData = val;
+    // Ensure we don't have any accidental truncation.
+    assert(SubclassData == val && "Subclass data too large for field");
+  }
 
-  /// NumContainedTys - Keeps track of how many PATypeHandle instances there
-  /// are at the end of this type instance for the list of contained types. It
-  /// is the subclasses responsibility to set this up. Set to 0 if there are no
-  /// contained types in this type.
+  /// NumContainedTys - Keeps track of how many Type*'s there are in the
+  /// ContainedTys list.
   unsigned NumContainedTys;
 
-  /// ContainedTys - A pointer to the array of Types (PATypeHandle) contained 
-  /// by this Type.  For example, this includes the arguments of a function 
-  /// type, the elements of a structure, the pointee of a pointer, the element
-  /// type of an array, etc.  This pointer may be 0 for types that don't 
-  /// contain other types (Integer, Double, Float).  In general, the subclass 
-  /// should arrange for space for the PATypeHandles to be included in the 
-  /// allocation of the type object and set this pointer to the address of the 
-  /// first element. This allows the Type class to manipulate the ContainedTys 
-  /// without understanding the subclass's placement for this array.  keeping 
-  /// it here also allows the subtype_* members to be implemented MUCH more 
-  /// efficiently, and dynamically very few types do not contain any elements.
-  PATypeHandle *ContainedTys;
+  /// ContainedTys - A pointer to the array of Types contained by this Type.
+  /// For example, this includes the arguments of a function type, the elements
+  /// of a structure, the pointee of a pointer, the element type of an array,
+  /// etc.  This pointer may be 0 for types that don't contain other types
+  /// (Integer, Double, Float).
+  Type * const *ContainedTys;
 
 public:
   void print(raw_ostream &O) const;
-
-  /// @brief Debugging support: print to stderr
   void dump() const;
 
-  /// @brief Debugging support: print to stderr (use type names from context
-  /// module).
-  void dump(const Module *Context) const;
-
-  /// getContext - Fetch the LLVMContext in which this type was uniqued.
+  /// getContext - Return the LLVMContext in which this type was uniqued.
   LLVMContext &getContext() const { return Context; }
 
   //===--------------------------------------------------------------------===//
-  // Property accessors for dealing with types... Some of these virtual methods
-  // are defined in private classes defined in Type.cpp for primitive types.
+  // Accessors for working with types.
   //
 
-  /// getDescription - Return the string representation of the type.
-  std::string getDescription() const;
-
   /// getTypeID - Return the type id for the type.  This will return one
   /// of the TypeID enum elements defined above.
   ///
-  inline TypeID getTypeID() const { return ID; }
+  TypeID getTypeID() const { return ID; }
 
   /// isVoidTy - Return true if this is 'void'.
   bool isVoidTy() const { return ID == VoidTyID; }
@@ -209,8 +138,10 @@ public:
 
   /// isFloatingPointTy - Return true if this is one of the five floating point
   /// types
-  bool isFloatingPointTy() const { return ID == FloatTyID || ID == DoubleTyID ||
-      ID == X86_FP80TyID || ID == FP128TyID || ID == PPC_FP128TyID; }
+  bool isFloatingPointTy() const {
+    return ID == FloatTyID || ID == DoubleTyID ||
+      ID == X86_FP80TyID || ID == FP128TyID || ID == PPC_FP128TyID;
+  }
 
   /// isX86_MMXTy - Return true if this is X86 MMX.
   bool isX86_MMXTy() const { return ID == X86_MMXTyID; }
@@ -253,19 +184,10 @@ public:
   ///
   bool isPointerTy() const { return ID == PointerTyID; }
 
-  /// isOpaqueTy - True if this is an instance of OpaqueType.
-  ///
-  bool isOpaqueTy() const { return ID == OpaqueTyID; }
-
   /// isVectorTy - True if this is an instance of VectorType.
   ///
   bool isVectorTy() const { return ID == VectorTyID; }
 
-  /// isAbstract - True if the type is either an Opaque type, or is a derived
-  /// type that includes an opaque type somewhere in it.
-  ///
-  inline bool isAbstract() const { return Abstract; }
-
   /// canLosslesslyBitCastTo - Return true if this type could be converted 
   /// with a lossless BitCast to type 'Ty'. For example, i8* to i32*. BitCasts 
   /// are valid for types of the same size only where no re-interpretation of 
@@ -280,24 +202,22 @@ public:
   /// Here are some useful little methods to query what type derived types are
   /// Note that all other types can just compare to see if this == Type::xxxTy;
   ///
-  inline bool isPrimitiveType() const { return ID <= LastPrimitiveTyID; }
-  inline bool isDerivedType()   const { return ID >= FirstDerivedTyID; }
+  bool isPrimitiveType() const { return ID <= LastPrimitiveTyID; }
+  bool isDerivedType()   const { return ID >= FirstDerivedTyID; }
 
   /// isFirstClassType - Return true if the type is "first class", meaning it
   /// is a valid type for a Value.
   ///
-  inline bool isFirstClassType() const {
-    // There are more first-class kinds than non-first-class kinds, so a
-    // negative test is simpler than a positive one.
-    return ID != FunctionTyID && ID != VoidTyID && ID != OpaqueTyID;
+  bool isFirstClassType() const {
+    return ID != FunctionTyID && ID != VoidTyID;
   }
 
   /// isSingleValueType - Return true if the type is a valid type for a
-  /// virtual register in codegen.  This includes all first-class types
-  /// except struct and array types.
+  /// register in codegen.  This includes all first-class types except struct
+  /// and array types.
   ///
-  inline bool isSingleValueType() const {
-    return (ID != VoidTyID && ID <= LastPrimitiveTyID) ||
+  bool isSingleValueType() const {
+    return (ID != VoidTyID && isPrimitiveType()) ||
             ID == IntegerTyID || ID == PointerTyID || ID == VectorTyID;
   }
 
@@ -306,7 +226,7 @@ public:
   /// extractvalue instruction. This includes struct and array types, but
   /// does not include vector types.
   ///
-  inline bool isAggregateType() const {
+  bool isAggregateType() const {
     return ID == StructTyID || ID == ArrayTyID;
   }
 
@@ -323,9 +243,8 @@ public:
     // it doesn't have a size.
     if (ID != StructTyID && ID != ArrayTyID && ID != VectorTyID)
       return false;
-    // If it is something that can have a size and it's concrete, it definitely
-    // has a size, otherwise we have to try harder to decide.
-    return !isAbstract() || isSizedDerivedType();
+    // Otherwise we have to try harder to decide.
+    return isSizedDerivedType();
   }
 
   /// getPrimitiveSizeInBits - Return the basic size of this type if it is a
@@ -350,28 +269,14 @@ public:
   /// have a stable mantissa (e.g. ppc long double), this method returns -1.
   int getFPMantissaWidth() const;
 
-  /// getForwardedType - Return the type that this type has been resolved to if
-  /// it has been resolved to anything.  This is used to implement the
-  /// union-find algorithm for type resolution, and shouldn't be used by general
-  /// purpose clients.
-  const Type *getForwardedType() const {
-    if (!ForwardType) return 0;
-    return getForwardedTypeInternal();
-  }
-
-  /// getVAArgsPromotedType - Return the type an argument of this type
-  /// will be promoted to if passed through a variable argument
-  /// function.
-  const Type *getVAArgsPromotedType(LLVMContext &C) const; 
-
   /// getScalarType - If this is a vector type, return the element type,
-  /// otherwise return this.
+  /// otherwise return 'this'.
   const Type *getScalarType() const;
 
   //===--------------------------------------------------------------------===//
-  // Type Iteration support
+  // Type Iteration support.
   //
-  typedef PATypeHandle *subtype_iterator;
+  typedef Type * const *subtype_iterator;
   subtype_iterator subtype_begin() const { return ContainedTys; }
   subtype_iterator subtype_end() const { return &ContainedTys[NumContainedTys];}
 
@@ -379,9 +284,9 @@ public:
   /// (defined a the end of the file).  For derived types, this returns the
   /// types 'contained' in the derived type.
   ///
-  const Type *getContainedType(unsigned i) const {
+  Type *getContainedType(unsigned i) const {
     assert(i < NumContainedTys && "Index out of range!");
-    return ContainedTys[i].get();
+    return ContainedTys[i];
   }
 
   /// getNumContainedTypes - Return the number of types in the derived type.
@@ -394,139 +299,76 @@ public:
   //
 
   /// getPrimitiveType - Return a type based on an identifier.
-  static const Type *getPrimitiveType(LLVMContext &C, TypeID IDNumber);
+  static Type *getPrimitiveType(LLVMContext &C, TypeID IDNumber);
 
   //===--------------------------------------------------------------------===//
-  // These are the builtin types that are always available...
+  // These are the builtin types that are always available.
   //
-  static const Type *getVoidTy(LLVMContext &C);
-  static const Type *getLabelTy(LLVMContext &C);
-  static const Type *getFloatTy(LLVMContext &C);
-  static const Type *getDoubleTy(LLVMContext &C);
-  static const Type *getMetadataTy(LLVMContext &C);
-  static const Type *getX86_FP80Ty(LLVMContext &C);
-  static const Type *getFP128Ty(LLVMContext &C);
-  static const Type *getPPC_FP128Ty(LLVMContext &C);
-  static const Type *getX86_MMXTy(LLVMContext &C);
-  static const IntegerType *getIntNTy(LLVMContext &C, unsigned N);
-  static const IntegerType *getInt1Ty(LLVMContext &C);
-  static const IntegerType *getInt8Ty(LLVMContext &C);
-  static const IntegerType *getInt16Ty(LLVMContext &C);
-  static const IntegerType *getInt32Ty(LLVMContext &C);
-  static const IntegerType *getInt64Ty(LLVMContext &C);
+  static Type *getVoidTy(LLVMContext &C);
+  static Type *getLabelTy(LLVMContext &C);
+  static Type *getFloatTy(LLVMContext &C);
+  static Type *getDoubleTy(LLVMContext &C);
+  static Type *getMetadataTy(LLVMContext &C);
+  static Type *getX86_FP80Ty(LLVMContext &C);
+  static Type *getFP128Ty(LLVMContext &C);
+  static Type *getPPC_FP128Ty(LLVMContext &C);
+  static Type *getX86_MMXTy(LLVMContext &C);
+  static IntegerType *getIntNTy(LLVMContext &C, unsigned N);
+  static IntegerType *getInt1Ty(LLVMContext &C);
+  static IntegerType *getInt8Ty(LLVMContext &C);
+  static IntegerType *getInt16Ty(LLVMContext &C);
+  static IntegerType *getInt32Ty(LLVMContext &C);
+  static IntegerType *getInt64Ty(LLVMContext &C);
 
   //===--------------------------------------------------------------------===//
   // Convenience methods for getting pointer types with one of the above builtin
   // types as pointee.
   //
-  static const PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0);
-  static const PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0);
-  static const PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0);
-  static const PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0);
-  static const PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0);
-  static const PointerType *getX86_MMXPtrTy(LLVMContext &C, unsigned AS = 0);
-  static const PointerType *getIntNPtrTy(LLVMContext &C, unsigned N,
-                                         unsigned AS = 0);
-  static const PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0);
-  static const PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0);
-  static const PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0);
-  static const PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0);
-  static const PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getX86_MMXPtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS = 0);
+  static PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0);
+  static PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0);
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Type *) { return true; }
 
-  void addRef() const {
-    assert(isAbstract() && "Cannot add a reference to a non-abstract type!");
-    ++RefCount;
-  }
-
-  void dropRef() const {
-    assert(isAbstract() && "Cannot drop a reference to a non-abstract type!");
-    assert(RefCount && "No objects are currently referencing this object!");
-
-    // If this is the last PATypeHolder using this object, and there are no
-    // PATypeHandles using it, the type is dead, delete it now.
-    if (--RefCount == 0 && AbstractTypeUsers.empty())
-      this->destroy();
-  }
-  
-  /// addAbstractTypeUser - Notify an abstract type that there is a new user of
-  /// it.  This function is called primarily by the PATypeHandle class.
-  ///
-  void addAbstractTypeUser(AbstractTypeUser *U) const;
-  
-  /// removeAbstractTypeUser - Notify an abstract type that a user of the class
-  /// no longer has a handle to the type.  This function is called primarily by
-  /// the PATypeHandle class.  When there are no users of the abstract type, it
-  /// is annihilated, because there is no way to get a reference to it ever
-  /// again.
-  ///
-  void removeAbstractTypeUser(AbstractTypeUser *U) const;
-
   /// getPointerTo - Return a pointer to the current type.  This is equivalent
   /// to PointerType::get(Foo, AddrSpace).
-  const PointerType *getPointerTo(unsigned AddrSpace = 0) const;
+  PointerType *getPointerTo(unsigned AddrSpace = 0) const;
 
 private:
   /// isSizedDerivedType - Derived types like structures and arrays are sized
   /// iff all of the members of the type are sized as well.  Since asking for
   /// their size is relatively uncommon, move this operation out of line.
   bool isSizedDerivedType() const;
-
-  virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
-  virtual void typeBecameConcrete(const DerivedType *AbsTy);
-
-protected:
-  // PromoteAbstractToConcrete - This is an internal method used to calculate
-  // change "Abstract" from true to false when types are refined.
-  void PromoteAbstractToConcrete();
-  friend class TypeMapBase;
 };
 
-//===----------------------------------------------------------------------===//
-// Define some inline methods for the AbstractTypeUser.h:PATypeHandle class.
-// These are defined here because they MUST be inlined, yet are dependent on
-// the definition of the Type class.
-//
-inline void PATypeHandle::addUser() {
-  assert(Ty && "Type Handle has a null type!");
-  if (Ty->isAbstract())
-    Ty->addAbstractTypeUser(User);
-}
-inline void PATypeHandle::removeUser() {
-  if (Ty->isAbstract())
-    Ty->removeAbstractTypeUser(User);
-}
-
-// Define inline methods for PATypeHolder.
-
-/// get - This implements the forwarding part of the union-find algorithm for
-/// abstract types.  Before every access to the Type*, we check to see if the
-/// type we are pointing to is forwarding to a new type.  If so, we drop our
-/// reference to the type.
-///
-inline Type* PATypeHolder::get() const {
-  if (Ty == 0) return 0;
-  const Type *NewTy = Ty->getForwardedType();
-  if (!NewTy) return const_cast<Type*>(Ty);
-  return *const_cast<PATypeHolder*>(this) = NewTy;
-}
-
-inline void PATypeHolder::addRef() {
-  if (Ty && Ty->isAbstract())
-    Ty->addRef();
-}
-
-inline void PATypeHolder::dropRef() {
-  if (Ty && Ty->isAbstract())
-    Ty->dropRef();
+// Printing of types.
+static inline raw_ostream &operator<<(raw_ostream &OS, const Type &T) {
+  T.print(OS);
+  return OS;
 }
 
+// allow isa<PointerType>(x) to work without DerivedTypes.h included.
+template <> struct isa_impl<PointerType, Type> {
+  static inline bool doit(const Type &Ty) {
+    return Ty.getTypeID() == Type::PointerTyID;
+  }
+};
 
+  
 //===----------------------------------------------------------------------===//
 // Provide specializations of GraphTraits to be able to treat a type as a
-// graph of sub types...
+// graph of sub types.
+
 
 template <> struct GraphTraits<Type*> {
   typedef Type NodeType;
@@ -554,14 +396,6 @@ template <> struct GraphTraits<const Type*> {
   }
 };
 
-template <> struct isa_impl<PointerType, Type> {
-  static inline bool doit(const Type &Ty) {
-    return Ty.getTypeID() == Type::PointerTyID;
-  }
-};
-
-raw_ostream &operator<<(raw_ostream &OS, const Type &T);
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/TypeSymbolTable.h b/include/llvm/TypeSymbolTable.h
deleted file mode 100644
index 89ad534..0000000
--- a/include/llvm/TypeSymbolTable.h
+++ /dev/null
@@ -1,152 +0,0 @@
-//===-- llvm/TypeSymbolTable.h - Implement a Type Symtab --------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the name/type symbol table for LLVM.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TYPE_SYMBOL_TABLE_H
-#define LLVM_TYPE_SYMBOL_TABLE_H
-
-#include "llvm/Type.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
-#include <map>
-
-namespace llvm {
-
-/// This class provides a symbol table of name/type pairs with operations to
-/// support constructing, searching and iterating over the symbol table. The
-/// class derives from AbstractTypeUser so that the contents of the symbol
-/// table can be updated when abstract types become concrete.
-class TypeSymbolTable : public AbstractTypeUser {
-
-/// @name Types
-/// @{
-public:
-
-  /// @brief A mapping of names to types.
-  typedef std::map<const std::string, const Type*> TypeMap;
-
-  /// @brief An iterator over the TypeMap.
-  typedef TypeMap::iterator iterator;
-
-  /// @brief A const_iterator over the TypeMap.
-  typedef TypeMap::const_iterator const_iterator;
-
-/// @}
-/// @name Constructors
-/// @{
-public:
-
-  TypeSymbolTable():LastUnique(0) {}
-  ~TypeSymbolTable();
-
-/// @}
-/// @name Accessors
-/// @{
-public:
-
-  /// Generates a unique name for a type based on the \p BaseName by
-  /// incrementing an integer and appending it to the name, if necessary
-  /// @returns the unique name
-  /// @brief Get a unique name for a type
-  std::string getUniqueName(StringRef BaseName) const;
-
-  /// This method finds the type with the given \p name in the type map
-  /// and returns it.
-  /// @returns null if the name is not found, otherwise the Type
-  /// associated with the \p name.
-  /// @brief Lookup a type by name.
-  Type *lookup(StringRef name) const;
-
-  /// Lookup the type associated with name.
-  /// @returns end() if the name is not found, or an iterator at the entry for
-  /// Type.
-  iterator find(StringRef Name) {
-    return tmap.find(Name);
-  }
-
-  /// Lookup the type associated with name.
-  /// @returns end() if the name is not found, or an iterator at the entry for
-  /// Type.
-  const_iterator find(StringRef Name) const {
-    return tmap.find(Name);
-  }
-
-  /// @returns true iff the symbol table is empty.
-  /// @brief Determine if the symbol table is empty
-  inline bool empty() const { return tmap.empty(); }
-
-  /// @returns the size of the symbol table
-  /// @brief The number of name/type pairs is returned.
-  inline unsigned size() const { return unsigned(tmap.size()); }
-
-  /// This function can be used from the debugger to display the
-  /// content of the symbol table while debugging.
-  /// @brief Print out symbol table on stderr
-  void dump() const;
-
-/// @}
-/// @name Iteration
-/// @{
-public:
-  /// Get an iterator to the start of the symbol table
-  inline iterator begin() { return tmap.begin(); }
-
-  /// @brief Get a const_iterator to the start of the symbol table
-  inline const_iterator begin() const { return tmap.begin(); }
-
-  /// Get an iterator to the end of the symbol table.
-  inline iterator end() { return tmap.end(); }
-
-  /// Get a const_iterator to the end of the symbol table.
-  inline const_iterator end() const { return tmap.end(); }
-
-/// @}
-/// @name Mutators
-/// @{
-public:
-
-  /// Inserts a type into the symbol table with the specified name. There can be
-  /// a many-to-one mapping between names and types. This method allows a type
-  /// with an existing entry in the symbol table to get a new name.
-  /// @brief Insert a type under a new name.
-  void insert(StringRef Name, const Type *Typ);
-
-  /// Remove a type at the specified position in the symbol table.
-  /// @returns the removed Type.
-  /// @returns the Type that was erased from the symbol table.
-  Type* remove(iterator TI);
-
-/// @}
-/// @name AbstractTypeUser Methods
-/// @{
-private:
-  /// This function is called when one of the types in the type plane
-  /// is refined.
-  virtual void refineAbstractType(const DerivedType *OldTy, const Type *NewTy);
-
-  /// This function marks a type as being concrete (defined).
-  virtual void typeBecameConcrete(const DerivedType *AbsTy);
-
-/// @}
-/// @name Internal Data
-/// @{
-private:
-  TypeMap tmap; ///< This is the mapping of names to types.
-  mutable uint32_t LastUnique; ///< Counter for tracking unique names
-
-/// @}
-
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/include/llvm/Use.h b/include/llvm/Use.h
index ccbdd7f..a496325 100644
--- a/include/llvm/Use.h
+++ b/include/llvm/Use.h
@@ -60,6 +60,10 @@ public:
   /// that also works with less standard-compliant compilers
   void swap(Use &RHS);
 
+  // A type for the word following an array of hung-off Uses in memory, which is
+  // a pointer back to their User with the bottom bit set.
+  typedef PointerIntPair<User*, 1, unsigned> UserRef;
+
 private:
   /// Copy ctor - do not implement
   Use(const Use &U);
@@ -108,13 +112,16 @@ public:
   Use *getNext() const { return Next; }
 
   
+  /// initTags - initialize the waymarking tags on an array of Uses, so that
+  /// getUser() can find the User from any of those Uses.
+  static Use *initTags(Use *Start, Use *Stop);
+
   /// zap - This is used to destroy Use operands when the number of operands of
   /// a User changes.
   static void zap(Use *Start, const Use *Stop, bool del = false);
 
 private:
   const Use* getImpliedUser() const;
-  static Use *initTags(Use *Start, Use *Stop);
   
   Value *Val;
   Use *Next;
@@ -136,7 +143,6 @@ private:
   }
 
   friend class Value;
-  friend class User;
 };
 
 // simplify_type - Allow clients to treat uses just like values when using
@@ -208,15 +214,6 @@ public:
   unsigned getOperandNo() const;
 };
 
-//===----------------------------------------------------------------------===//
-//                         AugmentedUse layout struct
-//===----------------------------------------------------------------------===//
-
-struct AugmentedUse : public Use {
-  PointerIntPair<User*, 1, unsigned> ref;
-  AugmentedUse(); // not implemented
-};
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Value.h b/include/llvm/Value.h
index 3a1c3ca..08fa1c9 100644
--- a/include/llvm/Value.h
+++ b/include/llvm/Value.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_VALUE_H
 #define LLVM_VALUE_H
 
-#include "llvm/AbstractTypeUser.h"
 #include "llvm/Use.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
@@ -32,7 +31,6 @@ class GlobalVariable;
 class GlobalAlias;
 class InlineAsm;
 class ValueSymbolTable;
-class TypeSymbolTable;
 template<typename ValueTy> class StringMapEntry;
 template <typename ValueTy = Value>
 class AssertingVH;
@@ -43,6 +41,7 @@ class ValueHandleBase;
 class LLVMContext;
 class Twine;
 class MDNode;
+class Type;
 
 //===----------------------------------------------------------------------===//
 //                                 Value Class
@@ -77,12 +76,11 @@ private:
   /// This field is initialized to zero by the ctor.
   unsigned short SubclassData;
 
-  PATypeHolder VTy;
+  Type *VTy;
   Use *UseList;
 
   friend class ValueSymbolTable; // Allow ValueSymbolTable to directly mod Name.
   friend class ValueHandleBase;
-  friend class AbstractTypeUser;
   ValueName *Name;
 
   void operator=(const Value &);     // Do not implement
@@ -107,13 +105,13 @@ public:
 
   /// All values are typed, get the type of this value.
   ///
-  inline const Type *getType() const { return VTy; }
+  Type *getType() const { return VTy; }
 
   /// All values hold a context through their type.
   LLVMContext &getContext() const;
 
   // All values can potentially be named...
-  inline bool hasName() const { return Name != 0; }
+  bool hasName() const { return Name != 0; }
   ValueName *getValueName() const { return Name; }
   
   /// getName() - Return a constant reference to the value's name. This is cheap
@@ -149,10 +147,6 @@ public:
   ///
   void replaceAllUsesWith(Value *V);
 
-  // uncheckedReplaceAllUsesWith - Just like replaceAllUsesWith but dangerous.
-  // Only use when in type resolution situations!
-  void uncheckedReplaceAllUsesWith(Value *V);
-
   //----------------------------------------------------------------------
   // Methods for handling the chain of uses of this Value.
   //
@@ -279,10 +273,6 @@ public:
     return true; // Values are always values.
   }
 
-  /// getRawType - This should only be used to implement the vmcore library.
-  ///
-  const Type *getRawType() const { return VTy.getRawType(); }
-
   /// stripPointerCasts - This method strips off any unneeded pointer
   /// casts from the specified value, returning the original uncasted value.
   /// Note that the returned value has pointer type if the specified value does.
@@ -310,6 +300,15 @@ public:
   /// load, store, and alloca instructions, and global values.
   static const unsigned MaximumAlignment = 1u << 29;
   
+  /// mutateType - Mutate the type of this Value to be of the specified type.
+  /// Note that this is an extremely dangerous operation which can create
+  /// completely invalid IR very easily.  It is strongly recommended that you
+  /// recreate IR objects with the right types instead of mutating them in
+  /// place.
+  void mutateType(Type *Ty) {
+    VTy = Ty;
+  }
+  
 protected:
   unsigned short getSubclassDataFromValue() const { return SubclassData; }
   void setValueSubclassData(unsigned short D) { SubclassData = D; }
diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp
index e57ba78..71e0a83 100644
--- a/lib/Analysis/Analysis.cpp
+++ b/lib/Analysis/Analysis.cpp
@@ -23,6 +23,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeAliasSetPrinterPass(Registry);
   initializeNoAAPass(Registry);
   initializeBasicAliasAnalysisPass(Registry);
+  initializeBlockFrequencyPass(Registry);
   initializeBranchProbabilityInfoPass(Registry);
   initializeCFGViewerPass(Registry);
   initializeCFGPrinterPass(Registry);
diff --git a/lib/Analysis/BlockFrequency.cpp b/lib/Analysis/BlockFrequency.cpp
new file mode 100644
index 0000000..4b86d1d
--- /dev/null
+++ b/lib/Analysis/BlockFrequency.cpp
@@ -0,0 +1,59 @@
+//=======-------- BlockFrequency.cpp - Block Frequency Analysis -------=======//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/Analysis/BlockFrequency.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/Passes.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(BlockFrequency, "block-freq", "Block Frequency Analysis",
+                      true, true)
+INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo)
+INITIALIZE_PASS_END(BlockFrequency, "block-freq", "Block Frequency Analysis",
+                    true, true)
+
+char BlockFrequency::ID = 0;
+
+
+BlockFrequency::BlockFrequency() : FunctionPass(ID) {
+  initializeBlockFrequencyPass(*PassRegistry::getPassRegistry());
+  BFI = new BlockFrequencyImpl<BasicBlock, Function, BranchProbabilityInfo>();
+}
+
+BlockFrequency::~BlockFrequency() {
+  delete BFI;
+}
+
+void BlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<BranchProbabilityInfo>();
+  AU.setPreservesAll();
+}
+
+bool BlockFrequency::runOnFunction(Function &F) {
+  BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>();
+  BFI->doFunction(&F, &BPI);
+  return false;
+}
+
+/// getblockFreq - Return block frequency. Never return 0, value must be
+/// positive. Please note that initial frequency is equal to 1024. It means that
+/// we should not rely on the value itself, but only on the comparison to the
+/// other block frequencies. We do this to avoid using of floating points.
+///
+uint32_t BlockFrequency::getBlockFreq(BasicBlock *BB) {
+  return BFI->getBlockFreq(BB);
+}
diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp
index 812fac0..e39cd22 100644
--- a/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/lib/Analysis/BranchProbabilityInfo.cpp
@@ -13,6 +13,7 @@
 
 #include "llvm/Instructions.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Support/Debug.h"
 
 using namespace llvm;
@@ -25,7 +26,7 @@ INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob",
 
 char BranchProbabilityInfo::ID = 0;
 
-
+namespace {
 // Please note that BranchProbabilityAnalysis is not a FunctionPass.
 // It is created by BranchProbabilityInfo (which is a FunctionPass), which
 // provides a clear interface. Thanks to that, all heuristics and other
@@ -143,6 +144,7 @@ public:
 
   bool runOnFunction(Function &F);
 };
+} // end anonymous namespace
 
 // Calculate Edge Weights using "Return Heuristics". Predict a successor which
 // leads directly to Return Instruction will not be taken.
@@ -167,7 +169,7 @@ void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
 
   Value *Cond = BI->getCondition();
   ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
-  if (!CI)
+  if (!CI || !CI->isEquality())
     return;
 
   Value *LHS = CI->getOperand(0);
@@ -184,7 +186,7 @@ void BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) {
   // p == 0   ->   isProb = false
   // p != q   ->   isProb = true
   // p == q   ->   isProb = false;
-  bool isProb = !CI->isEquality();
+  bool isProb = CI->getPredicate() == ICmpInst::ICMP_NE;
   if (!isProb)
     std::swap(Taken, NonTaken);
 
@@ -256,6 +258,10 @@ bool BranchProbabilityAnalysis::runOnFunction(Function &F) {
   return false;
 }
 
+void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const {
+    AU.addRequired<LoopInfo>();
+    AU.setPreservesAll();
+}
 
 bool BranchProbabilityInfo::runOnFunction(Function &F) {
   LoopInfo &LI = getAnalysis<LoopInfo>();
@@ -347,8 +353,8 @@ getEdgeProbability(BasicBlock *Src, BasicBlock *Dst) const {
 raw_ostream &
 BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, BasicBlock *Src,
                                             BasicBlock *Dst) const {
-  BranchProbability Prob = getEdgeProbability(Src, Dst);
 
+  const BranchProbability Prob = getEdgeProbability(Src, Dst);
   OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr()
      << " probability is " << Prob
      << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 1a975bf..ab846a2 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMAnalysis
   AliasSetTracker.cpp
   Analysis.cpp
   BasicAliasAnalysis.cpp
+  BlockFrequency.cpp
   BranchProbabilityInfo.cpp
   CFGPrinter.cpp
   CaptureTracking.cpp
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 08a6065..7fca17e 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -771,12 +771,12 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
     return ConstantExpr::getInsertValue(
                                 cast<Constant>(IVI->getAggregateOperand()),
                                 cast<Constant>(IVI->getInsertedValueOperand()),
-                                IVI->idx_begin(), IVI->getNumIndices());
+                                IVI->getIndices());
 
   if (ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(I))
     return ConstantExpr::getExtractValue(
                                     cast<Constant>(EVI->getAggregateOperand()),
-                                    EVI->idx_begin(), EVI->getNumIndices());
+                                    EVI->getIndices());
 
   return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
                                   Ops.data(), Ops.size(), TD);
@@ -1399,7 +1399,7 @@ llvm::ConstantFoldCall(Function *F,
             ConstantInt::get(F->getContext(), Res),
             ConstantInt::get(Type::getInt1Ty(F->getContext()), Overflow)
           };
-          return ConstantStruct::get(F->getContext(), Ops, 2, false);
+          return ConstantStruct::get(cast<StructType>(F->getReturnType()), Ops);
         }
         }
       }
diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp
index ef5d03a..ac5eeeb 100644
--- a/lib/Analysis/DIBuilder.cpp
+++ b/lib/Analysis/DIBuilder.cpp
@@ -219,7 +219,7 @@ DIType DIBuilder::createInheritance(DIType Ty, DIType BaseTy,
 }
 
 /// createMemberType - Create debugging information entry for a member.
-DIType DIBuilder::createMemberType(StringRef Name, 
+DIType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, 
                                    DIFile File, unsigned LineNumber, 
                                    uint64_t SizeInBits, uint64_t AlignInBits,
                                    uint64_t OffsetInBits, unsigned Flags, 
@@ -227,7 +227,7 @@ DIType DIBuilder::createMemberType(StringRef Name,
   // TAG_member is encoded in DIDerivedType format.
   Value *Elts[] = {
     GetTagConstant(VMContext, dwarf::DW_TAG_member),
-    File, // Or TheCU ? Ty ?
+    Scope,
     MDString::get(VMContext, Name),
     File,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber),
@@ -786,7 +786,7 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
     DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
 
   Value *Args[] = { MDNode::get(Storage->getContext(), Storage), VarInfo };
-  return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
+  return CallInst::Create(DeclareFn, Args, "", InsertBefore);
 }
 
 /// insertDeclare - Insert a new llvm.dbg.declare intrinsic call.
@@ -802,9 +802,9 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo,
   // If this block already has a terminator then insert this intrinsic
   // before the terminator.
   if (TerminatorInst *T = InsertAtEnd->getTerminator())
-    return CallInst::Create(DeclareFn, Args, Args+2, "", T);
+    return CallInst::Create(DeclareFn, Args, "", T);
   else
-    return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
+    return CallInst::Create(DeclareFn, Args, "", InsertAtEnd);
 }
 
 /// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
@@ -819,7 +819,7 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
   Value *Args[] = { MDNode::get(V->getContext(), V),
                     ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
                     VarInfo };
-  return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore);
+  return CallInst::Create(ValueFn, Args, "", InsertBefore);
 }
 
 /// insertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
@@ -834,6 +834,6 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset,
   Value *Args[] = { MDNode::get(V->getContext(), V),
                     ConstantInt::get(Type::getInt64Ty(V->getContext()), Offset),
                     VarInfo };
-  return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
+  return CallInst::Create(ValueFn, Args, "", InsertAtEnd);
 }
 
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 67f8147..b42e946 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -727,37 +727,37 @@ void DIVariable::dump() const {
 
 /// fixupObjcLikeName - Replace contains special characters used
 /// in a typical Objective-C names with '.' in a given string.
-static void fixupObjcLikeName(std::string &Str) {
+static void fixupObjcLikeName(StringRef Str, SmallVectorImpl<char> &Out) {
+  bool isObjCLike = false;
   for (size_t i = 0, e = Str.size(); i < e; ++i) {
     char C = Str[i];
-    if (C == '[' || C == ']' || C == ' ' || C == ':' || C == '+' ||
-        C == '(' || C == ')')
-      Str[i] = '.';
+    if (C == '[')
+      isObjCLike = true;
+
+    if (isObjCLike && (C == '[' || C == ']' || C == ' ' || C == ':' ||
+                       C == '+' || C == '(' || C == ')'))
+      Out.push_back('.');
+    else
+      Out.push_back(C);
   }
 }
 
 /// getFnSpecificMDNode - Return a NameMDNode, if available, that is 
 /// suitable to hold function specific information.
 NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, StringRef FuncName) {
-  if (FuncName.find('[') == StringRef::npos)
-    return M.getNamedMetadata(Twine("llvm.dbg.lv.", FuncName));
-  std::string Name = FuncName;
-  fixupObjcLikeName(Name);
-  return M.getNamedMetadata(Twine("llvm.dbg.lv.", Name));
+  SmallString<32> Name = StringRef("llvm.dbg.lv.");
+  fixupObjcLikeName(FuncName, Name);
+
+  return M.getNamedMetadata(Name.str());
 }
 
 /// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable
 /// to hold function specific information.
 NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, StringRef FuncName) {
-  SmallString<32> Out;
-  if (FuncName.find('[') == StringRef::npos)
-    return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", FuncName)
-                                      .toStringRef(Out));
-  
-  std::string Name = FuncName;
-  fixupObjcLikeName(Name);
-  return M.getOrInsertNamedMetadata(Twine("llvm.dbg.lv.", Name)
-                                    .toStringRef(Out));
+  SmallString<32> Name = StringRef("llvm.dbg.lv.");
+  fixupObjcLikeName(FuncName, Name);
+
+  return M.getOrInsertNamedMetadata(Name.str());
 }
 
 
diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp
index dde2556..6535786 100644
--- a/lib/Analysis/IPA/FindUsedTypes.cpp
+++ b/lib/Analysis/IPA/FindUsedTypes.cpp
@@ -96,8 +96,6 @@ void FindUsedTypes::print(raw_ostream &OS, const Module *M) const {
   OS << "Types in use by this module:\n";
   for (SetVector<const Type *>::const_iterator I = UsedTypes.begin(),
        E = UsedTypes.end(); I != E; ++I) {
-    OS << "   ";
-    WriteTypeSymbolic(OS, *I, M);
-    OS << '\n';
+    OS << "   " << **I << '\n';
   }
 }
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index a0c42f0..e5f0a77 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -21,7 +21,6 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/ADT/STLExtras.h"
@@ -39,15 +38,6 @@ INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
 INITIALIZE_PASS_END(IVUsers, "iv-users",
                       "Induction Variable Users", false, true)
 
-// IVUsers behavior currently depends on this temporary indvars mode. The
-// option must be defined upstream from its uses.
-namespace llvm {
-  bool DisableIVRewrite = false;
-}
-cl::opt<bool, true> DisableIVRewriteOpt(
-  "disable-iv-rewrite", cl::Hidden, cl::location(llvm::DisableIVRewrite),
-  cl::desc("Disable canonical induction variable rewriting"));
-
 Pass *llvm::createIVUsersPass() {
   return new IVUsers();
 }
@@ -56,17 +46,20 @@ Pass *llvm::createIVUsersPass() {
 /// used by the given expression, within the context of analyzing the
 /// given loop.
 static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
-                          ScalarEvolution *SE) {
+                          ScalarEvolution *SE, LoopInfo *LI) {
   // An addrec is interesting if it's affine or if it has an interesting start.
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
-    // Keep things simple. Don't touch loop-variant strides.
+    // Keep things simple. Don't touch loop-variant strides unless they're
+    // only used outside the loop and we can simplify them.
     if (AR->getLoop() == L)
-      return AR->isAffine() || !L->contains(I);
+      return AR->isAffine() ||
+             (!L->contains(I) &&
+              SE->getSCEVAtScope(AR, LI->getLoopFor(I->getParent())) != AR);
     // Otherwise recurse to see if the start value is interesting, and that
     // the step value is not interesting, since we don't yet know how to
     // do effective SCEV expansions for addrecs with interesting steps.
-    return isInteresting(AR->getStart(), I, L, SE) &&
-          !isInteresting(AR->getStepRecurrence(*SE), I, L, SE);
+    return isInteresting(AR->getStart(), I, L, SE, LI) &&
+          !isInteresting(AR->getStepRecurrence(*SE), I, L, SE, LI);
   }
 
   // An add is interesting if exactly one of its operands is interesting.
@@ -74,7 +67,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
     bool AnyInterestingYet = false;
     for (SCEVAddExpr::op_iterator OI = Add->op_begin(), OE = Add->op_end();
          OI != OE; ++OI)
-      if (isInteresting(*OI, I, L, SE)) {
+      if (isInteresting(*OI, I, L, SE, LI)) {
         if (AnyInterestingYet)
           return false;
         AnyInterestingYet = true;
@@ -89,7 +82,7 @@ static bool isInteresting(const SCEV *S, const Instruction *I, const Loop *L,
 /// AddUsersIfInteresting - Inspect the specified instruction.  If it is a
 /// reducible SCEV, recursively add its users to the IVUsesByStride set and
 /// return true.  Otherwise, return false.
-bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
+bool IVUsers::AddUsersIfInteresting(Instruction *I) {
   if (!SE->isSCEVable(I->getType()))
     return false;   // Void and FP expressions cannot be reduced.
 
@@ -100,11 +93,6 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
   if (Width > 64 || (TD && !TD->isLegalInteger(Width)))
     return false;
 
-  // We expect Sign/Zero extension to be eliminated from the IR before analyzing
-  // any downstream uses.
-  if (DisableIVRewrite && (isa<SExtInst>(I) || isa<ZExtInst>(I)))
-    return false;
-
   if (!Processed.insert(I))
     return true;    // Instruction already handled.
 
@@ -113,7 +101,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
 
   // If we've come to an uninteresting expression, stop the traversal and
   // call this a user.
-  if (!isInteresting(ISE, I, L, SE))
+  if (!isInteresting(ISE, I, L, SE, LI))
     return false;
 
   SmallPtrSet<Instruction *, 4> UniqueUsers;
@@ -136,13 +124,12 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
     bool AddUserToIVUsers = false;
     if (LI->getLoopFor(User->getParent()) != L) {
       if (isa<PHINode>(User) || Processed.count(User) ||
-          !AddUsersIfInteresting(User, Phi)) {
+          !AddUsersIfInteresting(User)) {
         DEBUG(dbgs() << "FOUND USER in other loop: " << *User << '\n'
                      << "   OF SCEV: " << *ISE << '\n');
         AddUserToIVUsers = true;
       }
-    } else if (Processed.count(User) ||
-               !AddUsersIfInteresting(User, Phi)) {
+    } else if (Processed.count(User) || !AddUsersIfInteresting(User)) {
       DEBUG(dbgs() << "FOUND USER: " << *User << '\n'
                    << "   OF SCEV: " << *ISE << '\n');
       AddUserToIVUsers = true;
@@ -150,7 +137,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
 
     if (AddUserToIVUsers) {
       // Okay, we found a user that we cannot reduce.
-      IVUses.push_back(new IVStrideUse(this, User, I, Phi));
+      IVUses.push_back(new IVStrideUse(this, User, I));
       IVStrideUse &NewUse = IVUses.back();
       // Autodetect the post-inc loop set, populating NewUse.PostIncLoops.
       // The regular return value here is discarded; instead of recording
@@ -165,8 +152,8 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I, PHINode *Phi) {
   return true;
 }
 
-IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand, PHINode *Phi) {
-  IVUses.push_back(new IVStrideUse(this, User, Operand, Phi));
+IVStrideUse &IVUsers::AddUser(Instruction *User, Value *Operand) {
+  IVUses.push_back(new IVStrideUse(this, User, Operand));
   return IVUses.back();
 }
 
@@ -194,7 +181,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
   // them by stride.  Start by finding all of the PHI nodes in the header for
   // this loop.  If they are induction variables, inspect their uses.
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
-    (void)AddUsersIfInteresting(I, cast<PHINode>(I));
+    (void)AddUsersIfInteresting(I);
 
   return false;
 }
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index 9d78f8b..8709f6b 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -2204,15 +2204,15 @@ Value *llvm::SimplifySelectInst(Value *CondVal, Value *TrueVal, Value *FalseVal,
   if (TrueVal == FalseVal)
     return TrueVal;
 
-  if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
-    return FalseVal;
-  if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
-    return TrueVal;
   if (isa<UndefValue>(CondVal)) {  // select undef, X, Y -> X or Y
     if (isa<Constant>(TrueVal))
       return TrueVal;
     return FalseVal;
   }
+  if (isa<UndefValue>(TrueVal))   // select C, undef, X -> X
+    return FalseVal;
+  if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
+    return TrueVal;
 
   return 0;
 }
diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp
index f130f30..89755da 100644
--- a/lib/Analysis/Lint.cpp
+++ b/lib/Analysis/Lint.cpp
@@ -592,8 +592,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
       return findValueImpl(CI->getOperand(0), OffsetOk, Visited);
   } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {
     if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),
-                                     Ex->idx_begin(),
-                                     Ex->idx_end()))
+                                     Ex->getIndices()))
       if (W != V)
         return findValueImpl(W, OffsetOk, Visited);
   } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
@@ -607,9 +606,7 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
         return findValueImpl(CE->getOperand(0), OffsetOk, Visited);
     } else if (CE->getOpcode() == Instruction::ExtractValue) {
       ArrayRef<unsigned> Indices = CE->getIndices();
-      if (Value *W = FindInsertedValue(CE->getOperand(0),
-                                       Indices.begin(),
-                                       Indices.end()))
+      if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))
         if (W != V)
           return findValueImpl(W, OffsetOk, Visited);
     }
diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp
index 64d215c..2283db0 100644
--- a/lib/Analysis/MemDepPrinter.cpp
+++ b/lib/Analysis/MemDepPrinter.cpp
@@ -79,8 +79,8 @@ bool MemDepPrinter::runOnFunction(Function &F) {
 
     MemDepResult Res = MDA.getDependency(Inst);
     if (!Res.isNonLocal()) {
-      assert(Res.isClobber() != Res.isDef() &&
-             "Local dep should be def or clobber!");
+      assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
+              "Local dep should be unknown, def or clobber!");
       Deps[Inst].insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
                                                           Res.isClobber()),
                                        static_cast<BasicBlock *>(0)));
@@ -92,8 +92,9 @@ bool MemDepPrinter::runOnFunction(Function &F) {
       for (MemoryDependenceAnalysis::NonLocalDepInfo::const_iterator
            I = NLDI.begin(), E = NLDI.end(); I != E; ++I) {
         const MemDepResult &Res = I->getResult();
-        assert(Res.isClobber() != Res.isDef() &&
-               "Resolved non-local call dep should be def or clobber!");
+        assert((Res.isUnknown() || Res.isClobber() || Res.isDef()) &&
+                "Resolved non-local call dep should be unknown, def or "
+                "clobber!");
         InstDeps.insert(std::make_pair(InstAndClobberFlag(Res.getInst(),
                                                           Res.isClobber()),
                                        I->getBB()));
@@ -148,16 +149,24 @@ void MemDepPrinter::print(raw_ostream &OS, const Module *M) const {
       bool isClobber = I->first.getInt();
       const BasicBlock *DepBB = I->second;
 
-      OS << "    " << (isClobber ? "Clobber" : "    Def");
+      OS << "    ";
+      if (!DepInst)
+        OS << "Unknown";
+      else if (isClobber)
+        OS << "Clobber";
+      else
+        OS << "    Def";
       if (DepBB) {
         OS << " in block ";
         WriteAsOperand(OS, DepBB, /*PrintType=*/false, M);
       }
-      OS << " from: ";
-      if (DepInst == Inst)
-        OS << "<unspecified>";
-      else
-        DepInst->print(OS);
+      if (DepInst) {
+        OS << " from: ";
+        if (DepInst == Inst)
+          OS << "<unspecified>";
+        else
+          DepInst->print(OS);
+      }
       OS << "\n";
     }
 
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index 769c68c..53d4304 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -50,13 +50,8 @@ static bool isMallocCall(const CallInst *CI) {
   const FunctionType *FTy = Callee->getFunctionType();
   if (FTy->getNumParams() != 1)
     return false;
-  if (IntegerType *ITy = dyn_cast<IntegerType>(FTy->param_begin()->get())) {
-    if (ITy->getBitWidth() != 32 && ITy->getBitWidth() != 64)
-      return false;
-    return true;
-  }
-
-  return false;
+  return FTy->getParamType(0)->isIntegerTy(32) ||
+         FTy->getParamType(0)->isIntegerTy(64);
 }
 
 /// extractMallocCall - Returns the corresponding CallInst if the instruction
@@ -211,7 +206,7 @@ const CallInst *llvm::isFreeCall(const Value *I) {
     return 0;
   if (FTy->getNumParams() != 1)
     return 0;
-  if (FTy->param_begin()->get() != Type::getInt8PtrTy(Callee->getContext()))
+  if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext()))
     return 0;
 
   return CI;
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 5f640c0..bba4482 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -47,6 +47,11 @@ STATISTIC(NumUncacheNonLocalPtr,
 STATISTIC(NumCacheCompleteNonLocalPtr,
           "Number of block queries that were completely cached");
 
+// Limit for the number of instructions to scan in a block.
+// FIXME: Figure out what a sane value is for this.
+//        (500 is relatively insane.)
+static const int BlockScanLimit = 500;
+
 char MemoryDependenceAnalysis::ID = 0;
   
 // Register this pass...
@@ -180,8 +185,16 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst,
 MemDepResult MemoryDependenceAnalysis::
 getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
                           BasicBlock::iterator ScanIt, BasicBlock *BB) {
+  unsigned Limit = BlockScanLimit;
+
   // Walk backwards through the block, looking for dependencies
   while (ScanIt != BB->begin()) {
+    // Limit the amount of scanning we do so we don't end up with quadratic
+    // running time on extreme testcases. 
+    --Limit;
+    if (!Limit)
+      return MemDepResult::getUnknown();
+
     Instruction *Inst = --ScanIt;
     
     // If this inst is a memory op, get the pointer it accessed
@@ -215,11 +228,11 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,
     }
   }
   
-  // No dependence found.  If this is the entry block of the function, it is a
-  // clobber, otherwise it is non-local.
+  // No dependence found.  If this is the entry block of the function, it is
+  // unknown, otherwise it is non-local.
   if (BB != &BB->getParent()->getEntryBlock())
     return MemDepResult::getNonLocal();
-  return MemDepResult::getClobber(ScanIt);
+  return MemDepResult::getUnknown();
 }
 
 /// isLoadLoadClobberIfExtendedToFullWidth - Return true if LI is a load that
@@ -322,9 +335,17 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
 
   const Value *MemLocBase = 0;
   int64_t MemLocOffset = 0;
-  
+
+  unsigned Limit = BlockScanLimit;
+
   // Walk backwards through the basic block, looking for dependencies.
   while (ScanIt != BB->begin()) {
+    // Limit the amount of scanning we do so we don't end up with quadratic
+    // running time on extreme testcases.
+    --Limit;
+    if (!Limit)
+      return MemDepResult::getUnknown();
+
     Instruction *Inst = --ScanIt;
 
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -458,11 +479,11 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad,
     }
   }
   
-  // No dependence found.  If this is the entry block of the function, it is a
-  // clobber, otherwise it is non-local.
+  // No dependence found.  If this is the entry block of the function, it is
+  // unknown, otherwise it is non-local.
   if (BB != &BB->getParent()->getEntryBlock())
     return MemDepResult::getNonLocal();
-  return MemDepResult::getClobber(ScanIt);
+  return MemDepResult::getUnknown();
 }
 
 /// getDependency - Return the instruction on which a memory operation
@@ -490,12 +511,12 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
   
   // Do the scan.
   if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
-    // No dependence found.  If this is the entry block of the function, it is a
-    // clobber, otherwise it is non-local.
+    // No dependence found.  If this is the entry block of the function, it is
+    // unknown, otherwise it is non-local.
     if (QueryParent != &QueryParent->getParent()->getEntryBlock())
       LocalCache = MemDepResult::getNonLocal();
     else
-      LocalCache = MemDepResult::getClobber(QueryInst);
+      LocalCache = MemDepResult::getUnknown();
   } else {
     AliasAnalysis::Location MemLoc;
     AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA);
@@ -514,7 +535,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
                                              QueryParent);
     } else
       // Non-memory instruction.
-      LocalCache = MemDepResult::getClobber(--BasicBlock::iterator(ScanPos));
+      LocalCache = MemDepResult::getUnknown();
   }
   
   // Remember the result!
@@ -648,10 +669,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
       Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB);
     } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) {
       // No dependence found.  If this is the entry block of the function, it is
-      // a clobber, otherwise it is non-local.
+      // a clobber, otherwise it is unknown.
       Dep = MemDepResult::getNonLocal();
     } else {
-      Dep = MemDepResult::getClobber(ScanPos);
+      Dep = MemDepResult::getUnknown();
     }
     
     // If we had a dirty entry for the block, update it.  Otherwise, just add
@@ -707,7 +728,7 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad,
     return;
   Result.clear();
   Result.push_back(NonLocalDepResult(FromBB,
-                                     MemDepResult::getClobber(FromBB->begin()),
+                                     MemDepResult::getUnknown(),
                                      const_cast<Value *>(Loc.Ptr)));
 }
 
@@ -769,7 +790,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc,
   // If the block has a dependency (i.e. it isn't completely transparent to
   // the value), remember the reverse association because we just added it
   // to Cache!
-  if (Dep.isNonLocal())
+  if (Dep.isNonLocal() || Dep.isUnknown())
     return Dep;
   
   // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently
@@ -1091,16 +1112,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
 
       // If getNonLocalPointerDepFromBB fails here, that means the cached
       // result conflicted with the Visited list; we have to conservatively
-      // assume a clobber, but this also does not block PRE of the load.
+      // assume it is unknown, but this also does not block PRE of the load.
       if (!CanTranslate ||
           getNonLocalPointerDepFromBB(PredPointer,
                                       Loc.getWithNewPtr(PredPtrVal),
                                       isLoad, Pred,
                                       Result, Visited)) {
         // Add the entry to the Result list.
-        NonLocalDepResult Entry(Pred,
-                                MemDepResult::getClobber(Pred->getTerminator()),
-                                PredPtrVal);
+        NonLocalDepResult Entry(Pred, MemDepResult::getUnknown(), PredPtrVal);
         Result.push_back(Entry);
 
         // Since we had a phi translation failure, the cache for CacheKey won't
@@ -1145,8 +1164,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
     // results from the set".  Clear out the indicator for this.
     CacheInfo->Pair = BBSkipFirstBlockPair();
     
-    // If *nothing* works, mark the pointer as being clobbered by the first
-    // instruction in this block.
+    // If *nothing* works, mark the pointer as unknown.
     //
     // If this is the magic first block, return this as a clobber of the whole
     // incoming value.  Since we can't phi translate to one of the predecessors,
@@ -1161,8 +1179,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer,
       
       assert(I->getResult().isNonLocal() &&
              "Should only be here with transparent block");
-      I->setResult(MemDepResult::getClobber(BB->getTerminator()));
-      ReverseNonLocalPtrDeps[BB->getTerminator()].insert(CacheKey);
+      I->setResult(MemDepResult::getUnknown());
       Result.push_back(NonLocalDepResult(I->getBB(), I->getResult(),
                                          Pointer.getAddr()));
       break;
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index 8e5a400..befe6d2 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -19,6 +19,7 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/ADT/STLExtras.h"
+
 using namespace llvm;
 
 /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
@@ -159,7 +160,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
   }
 
   // If we haven't found this binop, insert it.
-  Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp");
+  Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS, "tmp"));
+  BO->setDebugLoc(SaveInsertPt->getDebugLoc());
   rememberInstruction(BO);
 
   // Restore the original insert point.
@@ -847,6 +849,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
                                         const Loop *L,
                                         const Type *ExpandTy,
                                         const Type *IntTy) {
+  assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
+
   // Reuse a previously-inserted PHI, if present.
   for (BasicBlock::iterator I = L->getHeader()->begin();
        PHINode *PN = dyn_cast<PHINode>(I); ++I)
@@ -871,13 +875,15 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
           // If any of the operands don't dominate the insert position, bail.
           // Addrec operands are always loop-invariant, so this can only happen
           // if there are instructions which haven't been hoisted.
-          for (User::op_iterator OI = IncV->op_begin()+1,
-               OE = IncV->op_end(); OI != OE; ++OI)
-            if (Instruction *OInst = dyn_cast<Instruction>(OI))
-              if (!SE.DT->dominates(OInst, IVIncInsertPos)) {
-                IncV = 0;
-                break;
-              }
+          if (L == IVIncInsertLoop) {
+            for (User::op_iterator OI = IncV->op_begin()+1,
+                   OE = IncV->op_end(); OI != OE; ++OI)
+              if (Instruction *OInst = dyn_cast<Instruction>(OI))
+                if (!SE.DT->dominates(OInst, IVIncInsertPos)) {
+                  IncV = 0;
+                  break;
+                }
+          }
           if (!IncV)
             break;
           // Advance to the next instruction.
@@ -919,6 +925,11 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
                                 L->getHeader()->begin());
 
+  // StartV must be hoisted into L's preheader to dominate the new phi.
+  assert(!isa<Instruction>(StartV) ||
+         SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(),
+                                  L->getHeader()));
+
   // Expand code for the step value. Insert instructions right before the
   // terminator corresponding to the back-edge. Do this before creating the PHI
   // so that PHI reuse code doesn't see an incomplete PHI. If the stride is
@@ -935,7 +946,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   BasicBlock *Header = L->getHeader();
   Builder.SetInsertPoint(Header, Header->begin());
   pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
-  PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), "lsr.iv");
+  PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
+                                  Twine(IVName) + ".iv");
   rememberInstruction(PN);
 
   // Create the step instructions and populate the PHI.
@@ -953,7 +965,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
     // at IVIncInsertPos.
     Instruction *InsertPos = L == IVIncInsertLoop ?
       IVIncInsertPos : Pred->getTerminator();
-    Builder.SetInsertPoint(InsertPos->getParent(), InsertPos);
+    Builder.SetInsertPoint(InsertPos);
     Value *IncV;
     // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
     if (isPointer) {
@@ -971,8 +983,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
       }
     } else {
       IncV = isNegative ?
-        Builder.CreateSub(PN, StepV, "lsr.iv.next") :
-        Builder.CreateAdd(PN, StepV, "lsr.iv.next");
+        Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
+        Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
       rememberInstruction(IncV);
     }
     PN->addIncoming(IncV, Pred);
@@ -1155,6 +1167,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
         Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
                                                      "indvar.next",
                                                      HP->getTerminator());
+        Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
         rememberInstruction(Add);
         CanonicalIV->addIncoming(Add, HP);
       } else {
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index dab5aeb..455c910 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -1352,14 +1352,15 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
   // we might be able to find the complete struct somewhere.
   
   // Find the value that is at that particular spot
-  Value *V = FindInsertedValue(From, Idxs.begin(), Idxs.end());
+  Value *V = FindInsertedValue(From, Idxs);
 
   if (!V)
     return NULL;
 
   // Insert the value in the new (sub) aggregrate
-  return llvm::InsertValueInst::Create(To, V, Idxs.begin() + IdxSkip,
-                                       Idxs.end(), "tmp", InsertBefore);
+  return llvm::InsertValueInst::Create(To, V,
+                                       ArrayRef<unsigned>(Idxs).slice(IdxSkip),
+                                       "tmp", InsertBefore);
 }
 
 // This helper takes a nested struct and extracts a part of it (which is again a
@@ -1374,15 +1375,13 @@ static Value *BuildSubAggregate(Value *From, Value* To, const Type *IndexedType,
 // insertvalue instruction somewhere).
 //
 // All inserted insertvalue instructions are inserted before InsertBefore
-static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
-                                const unsigned *idx_end,
+static Value *BuildSubAggregate(Value *From, ArrayRef<unsigned> idx_range,
                                 Instruction *InsertBefore) {
   assert(InsertBefore && "Must have someplace to insert!");
   const Type *IndexedType = ExtractValueInst::getIndexedType(From->getType(),
-                                                             idx_begin,
-                                                             idx_end);
+                                                             idx_range);
   Value *To = UndefValue::get(IndexedType);
-  SmallVector<unsigned, 10> Idxs(idx_begin, idx_end);
+  SmallVector<unsigned, 10> Idxs(idx_range.begin(), idx_range.end());
   unsigned IdxSkip = Idxs.size();
 
   return BuildSubAggregate(From, To, IndexedType, Idxs, IdxSkip, InsertBefore);
@@ -1394,39 +1393,37 @@ static Value *BuildSubAggregate(Value *From, const unsigned *idx_begin,
 ///
 /// If InsertBefore is not null, this function will duplicate (modified)
 /// insertvalues when a part of a nested struct is extracted.
-Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
-                         const unsigned *idx_end, Instruction *InsertBefore) {
+Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,
+                               Instruction *InsertBefore) {
   // Nothing to index? Just return V then (this is useful at the end of our
   // recursion)
-  if (idx_begin == idx_end)
+  if (idx_range.empty())
     return V;
   // We have indices, so V should have an indexable type
   assert((V->getType()->isStructTy() || V->getType()->isArrayTy())
          && "Not looking at a struct or array?");
-  assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end)
+  assert(ExtractValueInst::getIndexedType(V->getType(), idx_range)
          && "Invalid indices for type?");
   const CompositeType *PTy = cast<CompositeType>(V->getType());
 
   if (isa<UndefValue>(V))
     return UndefValue::get(ExtractValueInst::getIndexedType(PTy,
-                                                              idx_begin,
-                                                              idx_end));
+                                                              idx_range));
   else if (isa<ConstantAggregateZero>(V))
     return Constant::getNullValue(ExtractValueInst::getIndexedType(PTy, 
-                                                                  idx_begin,
-                                                                  idx_end));
+                                                                  idx_range));
   else if (Constant *C = dyn_cast<Constant>(V)) {
     if (isa<ConstantArray>(C) || isa<ConstantStruct>(C))
       // Recursively process this constant
-      return FindInsertedValue(C->getOperand(*idx_begin), idx_begin + 1,
-                               idx_end, InsertBefore);
+      return FindInsertedValue(C->getOperand(idx_range[0]), idx_range.slice(1),
+                               InsertBefore);
   } else if (InsertValueInst *I = dyn_cast<InsertValueInst>(V)) {
     // Loop the indices for the insertvalue instruction in parallel with the
     // requested indices
-    const unsigned *req_idx = idx_begin;
+    const unsigned *req_idx = idx_range.begin();
     for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
          i != e; ++i, ++req_idx) {
-      if (req_idx == idx_end) {
+      if (req_idx == idx_range.end()) {
         if (InsertBefore)
           // The requested index identifies a part of a nested aggregate. Handle
           // this specially. For example,
@@ -1438,7 +1435,10 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
           // %C = insertvalue {i32, i32 } %A, i32 11, 1
           // which allows the unused 0,0 element from the nested struct to be
           // removed.
-          return BuildSubAggregate(V, idx_begin, req_idx, InsertBefore);
+          return BuildSubAggregate(V,
+                                   ArrayRef<unsigned>(idx_range.begin(),
+                                                      req_idx),
+                                   InsertBefore);
         else
           // We can't handle this without inserting insertvalues
           return 0;
@@ -1448,13 +1448,14 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
       // See if the (aggregrate) value inserted into has the value we are
       // looking for, then.
       if (*req_idx != *i)
-        return FindInsertedValue(I->getAggregateOperand(), idx_begin, idx_end,
+        return FindInsertedValue(I->getAggregateOperand(), idx_range,
                                  InsertBefore);
     }
     // If we end up here, the indices of the insertvalue match with those
     // requested (though possibly only partially). Now we recursively look at
     // the inserted value, passing any remaining indices.
-    return FindInsertedValue(I->getInsertedValueOperand(), req_idx, idx_end,
+    return FindInsertedValue(I->getInsertedValueOperand(),
+                             ArrayRef<unsigned>(req_idx, idx_range.end()),
                              InsertBefore);
   } else if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) {
     // If we're extracting a value from an aggregrate that was extracted from
@@ -1462,24 +1463,20 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
     // However, we will need to chain I's indices with the requested indices.
    
     // Calculate the number of indices required 
-    unsigned size = I->getNumIndices() + (idx_end - idx_begin);
+    unsigned size = I->getNumIndices() + idx_range.size();
     // Allocate some space to put the new indices in
     SmallVector<unsigned, 5> Idxs;
     Idxs.reserve(size);
     // Add indices from the extract value instruction
-    for (const unsigned *i = I->idx_begin(), *e = I->idx_end();
-         i != e; ++i)
-      Idxs.push_back(*i);
+    Idxs.append(I->idx_begin(), I->idx_end());
     
     // Add requested indices
-    for (const unsigned *i = idx_begin, *e = idx_end; i != e; ++i)
-      Idxs.push_back(*i);
+    Idxs.append(idx_range.begin(), idx_range.end());
 
     assert(Idxs.size() == size 
            && "Number of indices added not correct?");
     
-    return FindInsertedValue(I->getAggregateOperand(), Idxs.begin(), Idxs.end(),
-                             InsertBefore);
+    return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore);
   }
   // Otherwise, we don't know (such as, extracting from a function return value
   // or load instruction)
@@ -1783,3 +1780,19 @@ llvm::GetUnderlyingObject(Value *V, const TargetData *TD, unsigned MaxLookup) {
   }
   return V;
 }
+
+/// onlyUsedByLifetimeMarkers - Return true if the only users of this pointer
+/// are lifetime markers.
+///
+bool llvm::onlyUsedByLifetimeMarkers(const Value *V) {
+  for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+       UI != UE; ++UI) {
+    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(*UI);
+    if (!II) return false;
+
+    if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+        II->getIntrinsicID() != Intrinsic::lifetime_end)
+      return false;
+  }
+  return true;
+}
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 014e816..3c63106 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -406,29 +406,20 @@ lltok::Kind LLLexer::LexQuote() {
   return kind;
 }
 
-static bool JustWhitespaceNewLine(const char *&Ptr) {
-  const char *ThisPtr = Ptr;
-  while (*ThisPtr == ' ' || *ThisPtr == '\t')
-    ++ThisPtr;
-  if (*ThisPtr == '\n' || *ThisPtr == '\r') {
-    Ptr = ThisPtr;
-    return true;
-  }
-  return false;
-}
-
 /// LexExclaim:
 ///    !foo
 ///    !
 lltok::Kind LLLexer::LexExclaim() {
   // Lex a metadata name as a MetadataVar.
-  if (isalpha(CurPtr[0])) {
+  if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
+      CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
     ++CurPtr;
     while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
-           CurPtr[0] == '.' || CurPtr[0] == '_')
+           CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
       ++CurPtr;
 
     StrVal.assign(TokStart+1, CurPtr);   // Skip !
+    UnEscapeLexed(StrVal);
     return lltok::MetadataVar;
   }
   return lltok::exclaim;
@@ -480,7 +471,6 @@ lltok::Kind LLLexer::LexIdentifier() {
   if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
     return lltok::kw_##STR;
 
-  KEYWORD(begin);   KEYWORD(end);
   KEYWORD(true);    KEYWORD(false);
   KEYWORD(declare); KEYWORD(define);
   KEYWORD(global);  KEYWORD(constant);
@@ -570,6 +560,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(noimplicitfloat);
   KEYWORD(naked);
   KEYWORD(hotpatch);
+  KEYWORD(nonlazybind);
 
   KEYWORD(type);
   KEYWORD(opaque);
@@ -598,26 +589,6 @@ lltok::Kind LLLexer::LexIdentifier() {
   TYPEKEYWORD("x86_mmx",   Type::getX86_MMXTy(Context));
 #undef TYPEKEYWORD
 
-  // Handle special forms for autoupgrading.  Drop these in LLVM 3.0.  This is
-  // to avoid conflicting with the sext/zext instructions, below.
-  if (Len == 4 && !memcmp(StartChar, "sext", 4)) {
-    // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
-    if (JustWhitespaceNewLine(CurPtr))
-      return lltok::kw_signext;
-  } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) {
-    // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
-    if (JustWhitespaceNewLine(CurPtr))
-      return lltok::kw_zeroext;
-  } else if (Len == 6 && !memcmp(StartChar, "malloc", 6)) {
-    // FIXME: Remove in LLVM 3.0.
-    // Autoupgrade malloc instruction.
-    return lltok::kw_malloc;
-  } else if (Len == 4 && !memcmp(StartChar, "free", 4)) {
-    // FIXME: Remove in LLVM 3.0.
-    // Autoupgrade malloc instruction.
-    return lltok::kw_free;
-  }
-
   // Keywords for instructions.
 #define INSTKEYWORD(STR, Enum) \
   if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
@@ -664,7 +635,6 @@ lltok::Kind LLLexer::LexIdentifier() {
   INSTKEYWORD(extractelement, ExtractElement);
   INSTKEYWORD(insertelement,  InsertElement);
   INSTKEYWORD(shufflevector,  ShuffleVector);
-  INSTKEYWORD(getresult,      ExtractValue);
   INSTKEYWORD(extractvalue,   ExtractValue);
   INSTKEYWORD(insertvalue,    InsertValue);
 #undef INSTKEYWORD
@@ -689,14 +659,6 @@ lltok::Kind LLLexer::LexIdentifier() {
     return lltok::kw_cc;
   }
 
-  // If this starts with "call", return it as CALL.  This is to support old
-  // broken .ll files.  FIXME: remove this with LLVM 3.0.
-  if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) {
-    CurPtr = TokStart+4;
-    UIntVal = Instruction::Call;
-    return lltok::kw_call;
-  }
-
   // Finally, if this isn't known, return an error.
   CurPtr = TokStart+1;
   return lltok::Error;
diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h
index 4fe705e..33b9135 100644
--- a/lib/AsmParser/LLLexer.h
+++ b/lib/AsmParser/LLLexer.h
@@ -38,7 +38,7 @@ namespace llvm {
     lltok::Kind CurKind;
     std::string StrVal;
     unsigned UIntVal;
-    const Type *TyVal;
+    Type *TyVal;
     APFloat APFloatVal;
     APSInt  APSIntVal;
 
@@ -56,7 +56,7 @@ namespace llvm {
     LocTy getLoc() const { return SMLoc::getFromPointer(TokStart); }
     lltok::Kind getKind() const { return CurKind; }
     const std::string &getStrVal() const { return StrVal; }
-    const Type *getTyVal() const { return TyVal; }
+    Type *getTyVal() const { return TyVal; }
     unsigned getUIntVal() const { return UIntVal; }
     const APSInt &getAPSIntVal() const { return APSIntVal; }
     const APFloat &getAPFloatVal() const { return APFloatVal; }
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 81e0747..cfc31f3 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -26,6 +26,13 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+static std::string getTypeString(const Type *T) {
+  std::string Result;
+  raw_string_ostream Tmp(Result);
+  Tmp << *T;
+  return Tmp.str();
+}
+
 /// Run: module ::= toplevelentity*
 bool LLParser::Run() {
   // Prime the lexer.
@@ -59,24 +66,6 @@ bool LLParser::ValidateEndOfModule() {
   }
   
   
-  // Update auto-upgraded malloc calls to "malloc".
-  // FIXME: Remove in LLVM 3.0.
-  if (MallocF) {
-    MallocF->setName("malloc");
-    // If setName() does not set the name to "malloc", then there is already a 
-    // declaration of "malloc".  In that case, iterate over all calls to MallocF
-    // and get them to call the declared "malloc" instead.
-    if (MallocF->getName() != "malloc") {
-      Constant *RealMallocF = M->getFunction("malloc");
-      if (RealMallocF->getType() != MallocF->getType())
-        RealMallocF = ConstantExpr::getBitCast(RealMallocF, MallocF->getType());
-      MallocF->replaceAllUsesWith(RealMallocF);
-      MallocF->eraseFromParent();
-      MallocF = NULL;
-    }
-  }
-  
-  
   // If there are entries in ForwardRefBlockAddresses at this point, they are
   // references after the function was defined.  Resolve those now.
   while (!ForwardRefBlockAddresses.empty()) {
@@ -100,15 +89,16 @@ bool LLParser::ValidateEndOfModule() {
     ForwardRefBlockAddresses.erase(ForwardRefBlockAddresses.begin());
   }
   
-  
-  if (!ForwardRefTypes.empty())
-    return Error(ForwardRefTypes.begin()->second.second,
-                 "use of undefined type named '" +
-                 ForwardRefTypes.begin()->first + "'");
-  if (!ForwardRefTypeIDs.empty())
-    return Error(ForwardRefTypeIDs.begin()->second.second,
-                 "use of undefined type '%" +
-                 Twine(ForwardRefTypeIDs.begin()->first) + "'");
+  for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i)
+    if (NumberedTypes[i].second.isValid())
+      return Error(NumberedTypes[i].second,
+                   "use of undefined type '%" + Twine(i) + "'");
+
+  for (StringMap<std::pair<Type*, LocTy> >::iterator I =
+       NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I)
+    if (I->second.second.isValid())
+      return Error(I->second.second,
+                   "use of undefined type named '" + I->getKey() + "'");
 
   if (!ForwardRefVals.empty())
     return Error(ForwardRefVals.begin()->second.second,
@@ -176,15 +166,12 @@ bool LLParser::ParseTopLevelEntities() {
     switch (Lex.getKind()) {
     default:         return TokError("expected top-level entity");
     case lltok::Eof: return false;
-    //case lltok::kw_define:
     case lltok::kw_declare: if (ParseDeclare()) return true; break;
     case lltok::kw_define:  if (ParseDefine()) return true; break;
     case lltok::kw_module:  if (ParseModuleAsm()) return true; break;
     case lltok::kw_target:  if (ParseTargetDefinition()) return true; break;
     case lltok::kw_deplibs: if (ParseDepLibs()) return true; break;
-    case lltok::kw_type:    if (ParseUnnamedType()) return true; break;
     case lltok::LocalVarID: if (ParseUnnamedType()) return true; break;
-    case lltok::StringConstant: // FIXME: REMOVE IN LLVM 3.0
     case lltok::LocalVar:   if (ParseNamedType()) return true; break;
     case lltok::GlobalID:   if (ParseUnnamedGlobal()) return true; break;
     case lltok::GlobalVar:  if (ParseNamedGlobal()) return true; break;
@@ -304,45 +291,35 @@ bool LLParser::ParseDepLibs() {
 }
 
 /// ParseUnnamedType:
-///   ::= 'type' type
 ///   ::= LocalVarID '=' 'type' type
 bool LLParser::ParseUnnamedType() {
-  unsigned TypeID = NumberedTypes.size();
-
-  // Handle the LocalVarID form.
-  if (Lex.getKind() == lltok::LocalVarID) {
-    if (Lex.getUIntVal() != TypeID)
-      return Error(Lex.getLoc(), "type expected to be numbered '%" +
-                   Twine(TypeID) + "'");
-    Lex.Lex(); // eat LocalVarID;
-
-    if (ParseToken(lltok::equal, "expected '=' after name"))
-      return true;
-  }
-
   LocTy TypeLoc = Lex.getLoc();
-  if (ParseToken(lltok::kw_type, "expected 'type' after '='")) return true;
+  unsigned TypeID = Lex.getUIntVal();
+  Lex.Lex(); // eat LocalVarID;
 
-  PATypeHolder Ty(Type::getVoidTy(Context));
-  if (ParseType(Ty)) return true;
-
-  // See if this type was previously referenced.
-  std::map<unsigned, std::pair<PATypeHolder, LocTy> >::iterator
-    FI = ForwardRefTypeIDs.find(TypeID);
-  if (FI != ForwardRefTypeIDs.end()) {
-    if (FI->second.first.get() == Ty)
-      return Error(TypeLoc, "self referential type is invalid");
+  if (ParseToken(lltok::equal, "expected '=' after name") ||
+      ParseToken(lltok::kw_type, "expected 'type' after '='"))
+    return true;
 
-    cast<DerivedType>(FI->second.first.get())->refineAbstractTypeTo(Ty);
-    Ty = FI->second.first.get();
-    ForwardRefTypeIDs.erase(FI);
+  if (TypeID >= NumberedTypes.size())
+    NumberedTypes.resize(TypeID+1);
+  
+  Type *Result = 0;
+  if (ParseStructDefinition(TypeLoc, "",
+                            NumberedTypes[TypeID], Result)) return true;
+  
+  if (!isa<StructType>(Result)) {
+    std::pair<Type*, LocTy> &Entry = NumberedTypes[TypeID];
+    if (Entry.first)
+      return Error(TypeLoc, "non-struct types may not be recursive");
+    Entry.first = Result;
+    Entry.second = SMLoc();
   }
 
-  NumberedTypes.push_back(Ty);
-
   return false;
 }
 
+
 /// toplevelentity
 ///   ::= LocalVar '=' 'type' type
 bool LLParser::ParseNamedType() {
@@ -350,42 +327,23 @@ bool LLParser::ParseNamedType() {
   LocTy NameLoc = Lex.getLoc();
   Lex.Lex();  // eat LocalVar.
 
-  PATypeHolder Ty(Type::getVoidTy(Context));
-
   if (ParseToken(lltok::equal, "expected '=' after name") ||
-      ParseToken(lltok::kw_type, "expected 'type' after name") ||
-      ParseType(Ty))
+      ParseToken(lltok::kw_type, "expected 'type' after name"))
     return true;
-
-  // Set the type name, checking for conflicts as we do so.
-  bool AlreadyExists = M->addTypeName(Name, Ty);
-  if (!AlreadyExists) return false;
-
-  // See if this type is a forward reference.  We need to eagerly resolve
-  // types to allow recursive type redefinitions below.
-  std::map<std::string, std::pair<PATypeHolder, LocTy> >::iterator
-  FI = ForwardRefTypes.find(Name);
-  if (FI != ForwardRefTypes.end()) {
-    if (FI->second.first.get() == Ty)
-      return Error(NameLoc, "self referential type is invalid");
-
-    cast<DerivedType>(FI->second.first.get())->refineAbstractTypeTo(Ty);
-    Ty = FI->second.first.get();
-    ForwardRefTypes.erase(FI);
+  
+  Type *Result = 0;
+  if (ParseStructDefinition(NameLoc, Name,
+                            NamedTypes[Name], Result)) return true;
+  
+  if (!isa<StructType>(Result)) {
+    std::pair<Type*, LocTy> &Entry = NamedTypes[Name];
+    if (Entry.first)
+      return Error(NameLoc, "non-struct types may not be recursive");
+    Entry.first = Result;
+    Entry.second = SMLoc();
   }
-
-  // Inserting a name that is already defined, get the existing name.
-  const Type *Existing = M->getTypeByName(Name);
-  assert(Existing && "Conflict but no matching type?!");
-
-  // Otherwise, this is an attempt to redefine a type. That's okay if
-  // the redefinition is identical to the original.
-  // FIXME: REMOVE REDEFINITIONS IN LLVM 3.0
-  if (Existing == Ty) return false;
-
-  // Any other kind of (non-equivalent) redefinition is an error.
-  return Error(NameLoc, "redefinition of type named '" + Name + "' of type '" +
-               Ty->getDescription() + "'");
+  
+  return false;
 }
 
 
@@ -561,7 +519,7 @@ bool LLParser::ParseStandaloneMetadata() {
   unsigned MetadataID = 0;
 
   LocTy TyLoc;
-  PATypeHolder Ty(Type::getVoidTy(Context));
+  Type *Ty = 0;
   SmallVector<Value *, 16> Elts;
   if (ParseUInt32(MetadataID) ||
       ParseToken(lltok::equal, "expected '=' here") ||
@@ -693,7 +651,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
   LocTy UnnamedAddrLoc;
   LocTy TyLoc;
 
-  PATypeHolder Ty(Type::getVoidTy(Context));
+  Type *Ty = 0;
   if (ParseOptionalToken(lltok::kw_thread_local, ThreadLocal) ||
       ParseOptionalAddrSpace(AddrSpace) ||
       ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
@@ -811,24 +769,17 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
   if (Val) {
     if (Val->getType() == Ty) return Val;
     Error(Loc, "'@" + Name + "' defined with type '" +
-          Val->getType()->getDescription() + "'");
+          getTypeString(Val->getType()) + "'");
     return 0;
   }
 
   // Otherwise, create a new forward reference for this value and remember it.
   GlobalValue *FwdVal;
-  if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
-    // Function types can return opaque but functions can't.
-    if (FT->getReturnType()->isOpaqueTy()) {
-      Error(Loc, "function may not return opaque type");
-      return 0;
-    }
-
+  if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
     FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, Name, M);
-  } else {
+  else
     FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
                                 GlobalValue::ExternalWeakLinkage, 0, Name);
-  }
 
   ForwardRefVals[Name] = std::make_pair(FwdVal, Loc);
   return FwdVal;
@@ -856,23 +807,17 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
   if (Val) {
     if (Val->getType() == Ty) return Val;
     Error(Loc, "'@" + Twine(ID) + "' defined with type '" +
-          Val->getType()->getDescription() + "'");
+          getTypeString(Val->getType()) + "'");
     return 0;
   }
 
   // Otherwise, create a new forward reference for this value and remember it.
   GlobalValue *FwdVal;
-  if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
-    // Function types can return opaque but functions can't.
-    if (FT->getReturnType()->isOpaqueTy()) {
-      Error(Loc, "function may not return opaque type");
-      return 0;
-    }
+  if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType()))
     FwdVal = Function::Create(FT, GlobalValue::ExternalWeakLinkage, "", M);
-  } else {
+  else
     FwdVal = new GlobalVariable(*M, PTy->getElementType(), false,
                                 GlobalValue::ExternalWeakLinkage, 0, "");
-  }
 
   ForwardRefValIDs[ID] = std::make_pair(FwdVal, Loc);
   return FwdVal;
@@ -931,33 +876,23 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) {
 /// ParseOptionalAttrs - Parse a potentially empty attribute list.  AttrKind
 /// indicates what kind of attribute list this is: 0: function arg, 1: result,
 /// 2: function attr.
-/// 3: function arg after value: FIXME: REMOVE IN LLVM 3.0
 bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
   Attrs = Attribute::None;
   LocTy AttrLoc = Lex.getLoc();
 
   while (1) {
     switch (Lex.getKind()) {
-    case lltok::kw_sext:
-    case lltok::kw_zext:
-      // Treat these as signext/zeroext if they occur in the argument list after
-      // the value, as in "call i8 @foo(i8 10 sext)".  If they occur before the
-      // value, as in "call i8 @foo(i8 sext (" then it is part of a constant
-      // expr.
-      // FIXME: REMOVE THIS IN LLVM 3.0
-      if (AttrKind == 3) {
-        if (Lex.getKind() == lltok::kw_sext)
-          Attrs |= Attribute::SExt;
-        else
-          Attrs |= Attribute::ZExt;
-        break;
-      }
-      // FALL THROUGH.
     default:  // End of attributes.
       if (AttrKind != 2 && (Attrs & Attribute::FunctionOnly))
         return Error(AttrLoc, "invalid use of function-only attribute");
 
-      if (AttrKind != 0 && AttrKind != 3 && (Attrs & Attribute::ParameterOnly))
+      // As a hack, we allow "align 2" on functions as a synonym for
+      // "alignstack 2".
+      if (AttrKind == 2 &&
+          (Attrs & ~(Attribute::FunctionOnly | Attribute::Alignment)))
+        return Error(AttrLoc, "invalid use of attribute on a function");
+
+      if (AttrKind != 0 && (Attrs & Attribute::ParameterOnly))
         return Error(AttrLoc, "invalid use of parameter-only attribute");
 
       return false;
@@ -985,6 +920,7 @@ bool LLParser::ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind) {
     case lltok::kw_noimplicitfloat: Attrs |= Attribute::NoImplicitFloat; break;
     case lltok::kw_naked:           Attrs |= Attribute::Naked; break;
     case lltok::kw_hotpatch:        Attrs |= Attribute::Hotpatch; break;
+    case lltok::kw_nonlazybind:     Attrs |= Attribute::NonLazyBind; break;
 
     case lltok::kw_alignstack: {
       unsigned Alignment;
@@ -1262,166 +1198,68 @@ bool LLParser::ParseIndexList(SmallVectorImpl<unsigned> &Indices,
 // Type Parsing.
 //===----------------------------------------------------------------------===//
 
-/// ParseType - Parse and resolve a full type.
-bool LLParser::ParseType(PATypeHolder &Result, bool AllowVoid) {
-  LocTy TypeLoc = Lex.getLoc();
-  if (ParseTypeRec(Result)) return true;
-
-  // Verify no unresolved uprefs.
-  if (!UpRefs.empty())
-    return Error(UpRefs.back().Loc, "invalid unresolved type up reference");
-
-  if (!AllowVoid && Result.get()->isVoidTy())
-    return Error(TypeLoc, "void type only allowed for function results");
-
-  return false;
-}
-
-/// HandleUpRefs - Every time we finish a new layer of types, this function is
-/// called.  It loops through the UpRefs vector, which is a list of the
-/// currently active types.  For each type, if the up-reference is contained in
-/// the newly completed type, we decrement the level count.  When the level
-/// count reaches zero, the up-referenced type is the type that is passed in:
-/// thus we can complete the cycle.
-///
-PATypeHolder LLParser::HandleUpRefs(const Type *ty) {
-  // If Ty isn't abstract, or if there are no up-references in it, then there is
-  // nothing to resolve here.
-  if (!ty->isAbstract() || UpRefs.empty()) return ty;
-
-  PATypeHolder Ty(ty);
-#if 0
-  dbgs() << "Type '" << Ty->getDescription()
-         << "' newly formed.  Resolving upreferences.\n"
-         << UpRefs.size() << " upreferences active!\n";
-#endif
-
-  // If we find any resolvable upreferences (i.e., those whose NestingLevel goes
-  // to zero), we resolve them all together before we resolve them to Ty.  At
-  // the end of the loop, if there is anything to resolve to Ty, it will be in
-  // this variable.
-  OpaqueType *TypeToResolve = 0;
-
-  for (unsigned i = 0; i != UpRefs.size(); ++i) {
-    // Determine if 'Ty' directly contains this up-references 'LastContainedTy'.
-    bool ContainsType =
-      std::find(Ty->subtype_begin(), Ty->subtype_end(),
-                UpRefs[i].LastContainedTy) != Ty->subtype_end();
-
-#if 0
-    dbgs() << "  UR#" << i << " - TypeContains(" << Ty->getDescription() << ", "
-           << UpRefs[i].LastContainedTy->getDescription() << ") = "
-           << (ContainsType ? "true" : "false")
-           << " level=" << UpRefs[i].NestingLevel << "\n";
-#endif
-    if (!ContainsType)
-      continue;
-
-    // Decrement level of upreference
-    unsigned Level = --UpRefs[i].NestingLevel;
-    UpRefs[i].LastContainedTy = Ty;
-
-    // If the Up-reference has a non-zero level, it shouldn't be resolved yet.
-    if (Level != 0)
-      continue;
-
-#if 0
-    dbgs() << "  * Resolving upreference for " << UpRefs[i].UpRefTy << "\n";
-#endif
-    if (!TypeToResolve)
-      TypeToResolve = UpRefs[i].UpRefTy;
-    else
-      UpRefs[i].UpRefTy->refineAbstractTypeTo(TypeToResolve);
-    UpRefs.erase(UpRefs.begin()+i);     // Remove from upreference list.
-    --i;                                // Do not skip the next element.
-  }
-
-  if (TypeToResolve)
-    TypeToResolve->refineAbstractTypeTo(Ty);
-
-  return Ty;
-}
-
-
-/// ParseTypeRec - The recursive function used to process the internal
-/// implementation details of types.
-bool LLParser::ParseTypeRec(PATypeHolder &Result) {
+/// ParseType - Parse a type.
+bool LLParser::ParseType(Type *&Result, bool AllowVoid) {
+  SMLoc TypeLoc = Lex.getLoc();
   switch (Lex.getKind()) {
   default:
     return TokError("expected type");
   case lltok::Type:
-    // TypeRec ::= 'float' | 'void' (etc)
+    // Type ::= 'float' | 'void' (etc)
     Result = Lex.getTyVal();
     Lex.Lex();
     break;
-  case lltok::kw_opaque:
-    // TypeRec ::= 'opaque'
-    Result = OpaqueType::get(Context);
-    Lex.Lex();
-    break;
   case lltok::lbrace:
-    // TypeRec ::= '{' ... '}'
-    if (ParseStructType(Result, false))
+    // Type ::= StructType
+    if (ParseAnonStructType(Result, false))
       return true;
     break;
   case lltok::lsquare:
-    // TypeRec ::= '[' ... ']'
+    // Type ::= '[' ... ']'
     Lex.Lex(); // eat the lsquare.
     if (ParseArrayVectorType(Result, false))
       return true;
     break;
   case lltok::less: // Either vector or packed struct.
-    // TypeRec ::= '<' ... '>'
+    // Type ::= '<' ... '>'
     Lex.Lex();
     if (Lex.getKind() == lltok::lbrace) {
-      if (ParseStructType(Result, true) ||
+      if (ParseAnonStructType(Result, true) ||
           ParseToken(lltok::greater, "expected '>' at end of packed struct"))
         return true;
     } else if (ParseArrayVectorType(Result, true))
       return true;
     break;
-  case lltok::LocalVar:
-  case lltok::StringConstant:  // FIXME: REMOVE IN LLVM 3.0
-    // TypeRec ::= %foo
-    if (const Type *T = M->getTypeByName(Lex.getStrVal())) {
-      Result = T;
-    } else {
-      Result = OpaqueType::get(Context);
-      ForwardRefTypes.insert(std::make_pair(Lex.getStrVal(),
-                                            std::make_pair(Result,
-                                                           Lex.getLoc())));
-      M->addTypeName(Lex.getStrVal(), Result.get());
+  case lltok::LocalVar: {
+    // Type ::= %foo
+    std::pair<Type*, LocTy> &Entry = NamedTypes[Lex.getStrVal()];
+    
+    // If the type hasn't been defined yet, create a forward definition and
+    // remember where that forward def'n was seen (in case it never is defined).
+    if (Entry.first == 0) {
+      Entry.first = StructType::createNamed(Context, Lex.getStrVal());
+      Entry.second = Lex.getLoc();
     }
+    Result = Entry.first;
     Lex.Lex();
     break;
+  }
 
-  case lltok::LocalVarID:
-    // TypeRec ::= %4
-    if (Lex.getUIntVal() < NumberedTypes.size())
-      Result = NumberedTypes[Lex.getUIntVal()];
-    else {
-      std::map<unsigned, std::pair<PATypeHolder, LocTy> >::iterator
-        I = ForwardRefTypeIDs.find(Lex.getUIntVal());
-      if (I != ForwardRefTypeIDs.end())
-        Result = I->second.first;
-      else {
-        Result = OpaqueType::get(Context);
-        ForwardRefTypeIDs.insert(std::make_pair(Lex.getUIntVal(),
-                                                std::make_pair(Result,
-                                                               Lex.getLoc())));
-      }
+  case lltok::LocalVarID: {
+    // Type ::= %4
+    if (Lex.getUIntVal() >= NumberedTypes.size())
+      NumberedTypes.resize(Lex.getUIntVal()+1);
+    std::pair<Type*, LocTy> &Entry = NumberedTypes[Lex.getUIntVal()];
+    
+    // If the type hasn't been defined yet, create a forward definition and
+    // remember where that forward def'n was seen (in case it never is defined).
+    if (Entry.first == 0) {
+      Entry.first = StructType::createNamed(Context, "");
+      Entry.second = Lex.getLoc();
     }
+    Result = Entry.first;
     Lex.Lex();
     break;
-  case lltok::backslash: {
-    // TypeRec ::= '\' 4
-    Lex.Lex();
-    unsigned Val;
-    if (ParseUInt32(Val)) return true;
-    OpaqueType *OT = OpaqueType::get(Context); //Use temporary placeholder.
-    UpRefs.push_back(UpRefRecord(Lex.getLoc(), Val, OT));
-    Result = OT;
-    break;
   }
   }
 
@@ -1429,34 +1267,37 @@ bool LLParser::ParseTypeRec(PATypeHolder &Result) {
   while (1) {
     switch (Lex.getKind()) {
     // End of type.
-    default: return false;
+    default:
+      if (!AllowVoid && Result->isVoidTy())
+        return Error(TypeLoc, "void type only allowed for function results");
+      return false;
 
-    // TypeRec ::= TypeRec '*'
+    // Type ::= Type '*'
     case lltok::star:
-      if (Result.get()->isLabelTy())
+      if (Result->isLabelTy())
         return TokError("basic block pointers are invalid");
-      if (Result.get()->isVoidTy())
-        return TokError("pointers to void are invalid; use i8* instead");
-      if (!PointerType::isValidElementType(Result.get()))
+      if (Result->isVoidTy())
+        return TokError("pointers to void are invalid - use i8* instead");
+      if (!PointerType::isValidElementType(Result))
         return TokError("pointer to this type is invalid");
-      Result = HandleUpRefs(PointerType::getUnqual(Result.get()));
+      Result = PointerType::getUnqual(Result);
       Lex.Lex();
       break;
 
-    // TypeRec ::= TypeRec 'addrspace' '(' uint32 ')' '*'
+    // Type ::= Type 'addrspace' '(' uint32 ')' '*'
     case lltok::kw_addrspace: {
-      if (Result.get()->isLabelTy())
+      if (Result->isLabelTy())
         return TokError("basic block pointers are invalid");
-      if (Result.get()->isVoidTy())
+      if (Result->isVoidTy())
         return TokError("pointers to void are invalid; use i8* instead");
-      if (!PointerType::isValidElementType(Result.get()))
+      if (!PointerType::isValidElementType(Result))
         return TokError("pointer to this type is invalid");
       unsigned AddrSpace;
       if (ParseOptionalAddrSpace(AddrSpace) ||
           ParseToken(lltok::star, "expected '*' in address space"))
         return true;
 
-      Result = HandleUpRefs(PointerType::get(Result.get(), AddrSpace));
+      Result = PointerType::get(Result, AddrSpace);
       break;
     }
 
@@ -1487,7 +1328,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
 
     // Parse the argument.
     LocTy ArgLoc;
-    PATypeHolder ArgTy(Type::getVoidTy(Context));
+    Type *ArgTy = 0;
     unsigned ArgAttrs1 = Attribute::None;
     unsigned ArgAttrs2 = Attribute::None;
     Value *V;
@@ -1495,11 +1336,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
       return true;
 
     // Otherwise, handle normal operands.
-    if (ParseOptionalAttrs(ArgAttrs1, 0) ||
-        ParseValue(ArgTy, V, PFS) ||
-        // FIXME: Should not allow attributes after the argument, remove this
-        // in LLVM 3.0.
-        ParseOptionalAttrs(ArgAttrs2, 3))
+    if (ParseOptionalAttrs(ArgAttrs1, 0) || ParseValue(ArgTy, V, PFS))
       return true;
     ArgList.push_back(ParamInfo(ArgLoc, V, ArgAttrs1|ArgAttrs2));
   }
@@ -1511,7 +1348,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
 
 
 /// ParseArgumentList - Parse the argument list for a function type or function
-/// prototype.  If 'inType' is true then we are parsing a FunctionType.
+/// prototype.
 ///   ::= '(' ArgTypeListI ')'
 /// ArgTypeListI
 ///   ::= /*empty*/
@@ -1519,8 +1356,8 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
 ///   ::= ArgTypeList ',' '...'
 ///   ::= ArgType (',' ArgType)*
 ///
-bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
-                                 bool &isVarArg, bool inType) {
+bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
+                                 bool &isVarArg){
   isVarArg = false;
   assert(Lex.getKind() == lltok::lparen);
   Lex.Lex(); // eat the (.
@@ -1532,21 +1369,17 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
     Lex.Lex();
   } else {
     LocTy TypeLoc = Lex.getLoc();
-    PATypeHolder ArgTy(Type::getVoidTy(Context));
+    Type *ArgTy = 0;
     unsigned Attrs;
     std::string Name;
 
-    // If we're parsing a type, use ParseTypeRec, because we allow recursive
-    // types (such as a function returning a pointer to itself).  If parsing a
-    // function prototype, we require fully resolved types.
-    if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) ||
+    if (ParseType(ArgTy) ||
         ParseOptionalAttrs(Attrs, 0)) return true;
 
     if (ArgTy->isVoidTy())
       return Error(TypeLoc, "argument can not have void type");
 
-    if (Lex.getKind() == lltok::LocalVar ||
-        Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0
+    if (Lex.getKind() == lltok::LocalVar) {
       Name = Lex.getStrVal();
       Lex.Lex();
     }
@@ -1565,21 +1398,19 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
 
       // Otherwise must be an argument type.
       TypeLoc = Lex.getLoc();
-      if ((inType ? ParseTypeRec(ArgTy) : ParseType(ArgTy)) ||
-          ParseOptionalAttrs(Attrs, 0)) return true;
+      if (ParseType(ArgTy) || ParseOptionalAttrs(Attrs, 0)) return true;
 
       if (ArgTy->isVoidTy())
         return Error(TypeLoc, "argument can not have void type");
 
-      if (Lex.getKind() == lltok::LocalVar ||
-          Lex.getKind() == lltok::StringConstant) { // FIXME: REMOVE IN LLVM 3.0
+      if (Lex.getKind() == lltok::LocalVar) {
         Name = Lex.getStrVal();
         Lex.Lex();
       } else {
         Name = "";
       }
 
-      if (!ArgTy->isFirstClassType() && !ArgTy->isOpaqueTy())
+      if (!ArgTy->isFirstClassType())
         return Error(TypeLoc, "invalid type for function argument");
 
       ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name));
@@ -1591,94 +1422,142 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
 
 /// ParseFunctionType
 ///  ::= Type ArgumentList OptionalAttrs
-bool LLParser::ParseFunctionType(PATypeHolder &Result) {
+bool LLParser::ParseFunctionType(Type *&Result) {
   assert(Lex.getKind() == lltok::lparen);
 
   if (!FunctionType::isValidReturnType(Result))
     return TokError("invalid function return type");
 
-  std::vector<ArgInfo> ArgList;
+  SmallVector<ArgInfo, 8> ArgList;
   bool isVarArg;
-  unsigned Attrs;
-  if (ParseArgumentList(ArgList, isVarArg, true) ||
-      // FIXME: Allow, but ignore attributes on function types!
-      // FIXME: Remove in LLVM 3.0
-      ParseOptionalAttrs(Attrs, 2))
+  if (ParseArgumentList(ArgList, isVarArg))
     return true;
 
   // Reject names on the arguments lists.
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
     if (!ArgList[i].Name.empty())
       return Error(ArgList[i].Loc, "argument name invalid in function type");
-    if (!ArgList[i].Attrs != 0) {
-      // Allow but ignore attributes on function types; this permits
-      // auto-upgrade.
-      // FIXME: REJECT ATTRIBUTES ON FUNCTION TYPES in LLVM 3.0
-    }
+    if (ArgList[i].Attrs != 0)
+      return Error(ArgList[i].Loc,
+                   "argument attributes invalid in function type");
   }
 
-  std::vector<const Type*> ArgListTy;
+  SmallVector<Type*, 16> ArgListTy;
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
-    ArgListTy.push_back(ArgList[i].Type);
+    ArgListTy.push_back(ArgList[i].Ty);
+
+  Result = FunctionType::get(Result, ArgListTy, isVarArg);
+  return false;
+}
 
-  Result = HandleUpRefs(FunctionType::get(Result.get(),
-                                                ArgListTy, isVarArg));
+/// ParseAnonStructType - Parse an anonymous struct type, which is inlined into
+/// other structs.
+bool LLParser::ParseAnonStructType(Type *&Result, bool Packed) {
+  SmallVector<Type*, 8> Elts;
+  if (ParseStructBody(Elts)) return true;
+  
+  Result = StructType::get(Context, Elts, Packed);
   return false;
 }
 
+/// ParseStructDefinition - Parse a struct in a 'type' definition.
+bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
+                                     std::pair<Type*, LocTy> &Entry,
+                                     Type *&ResultTy) {
+  // If the type was already defined, diagnose the redefinition.
+  if (Entry.first && !Entry.second.isValid())
+    return Error(TypeLoc, "redefinition of type");
+  
+  // If we have opaque, just return without filling in the definition for the
+  // struct.  This counts as a definition as far as the .ll file goes.
+  if (EatIfPresent(lltok::kw_opaque)) {
+    // This type is being defined, so clear the location to indicate this.
+    Entry.second = SMLoc();
+    
+    // If this type number has never been uttered, create it.
+    if (Entry.first == 0)
+      Entry.first = StructType::createNamed(Context, Name);
+    ResultTy = Entry.first;
+    return false;
+  }
+  
+  // If the type starts with '<', then it is either a packed struct or a vector.
+  bool isPacked = EatIfPresent(lltok::less);
+
+  // If we don't have a struct, then we have a random type alias, which we
+  // accept for compatibility with old files.  These types are not allowed to be
+  // forward referenced and not allowed to be recursive.
+  if (Lex.getKind() != lltok::lbrace) {
+    if (Entry.first)
+      return Error(TypeLoc, "forward references to non-struct type");
+  
+    ResultTy = 0;
+    if (isPacked)
+      return ParseArrayVectorType(ResultTy, true);
+    return ParseType(ResultTy);
+  }
+                               
+  // This type is being defined, so clear the location to indicate this.
+  Entry.second = SMLoc();
+  
+  // If this type number has never been uttered, create it.
+  if (Entry.first == 0)
+    Entry.first = StructType::createNamed(Context, Name);
+  
+  StructType *STy = cast<StructType>(Entry.first);
+ 
+  SmallVector<Type*, 8> Body;
+  if (ParseStructBody(Body) ||
+      (isPacked && ParseToken(lltok::greater, "expected '>' in packed struct")))
+    return true;
+  
+  STy->setBody(Body, isPacked);
+  ResultTy = STy;
+  return false;
+}
+
+
 /// ParseStructType: Handles packed and unpacked types.  </> parsed elsewhere.
-///   TypeRec
+///   StructType
 ///     ::= '{' '}'
-///     ::= '{' TypeRec (',' TypeRec)* '}'
+///     ::= '{' Type (',' Type)* '}'
 ///     ::= '<' '{' '}' '>'
-///     ::= '<' '{' TypeRec (',' TypeRec)* '}' '>'
-bool LLParser::ParseStructType(PATypeHolder &Result, bool Packed) {
+///     ::= '<' '{' Type (',' Type)* '}' '>'
+bool LLParser::ParseStructBody(SmallVectorImpl<Type*> &Body) {
   assert(Lex.getKind() == lltok::lbrace);
   Lex.Lex(); // Consume the '{'
 
-  if (EatIfPresent(lltok::rbrace)) {
-    Result = StructType::get(Context, Packed);
+  // Handle the empty struct.
+  if (EatIfPresent(lltok::rbrace))
     return false;
-  }
 
-  std::vector<PATypeHolder> ParamsList;
   LocTy EltTyLoc = Lex.getLoc();
-  if (ParseTypeRec(Result)) return true;
-  ParamsList.push_back(Result);
+  Type *Ty = 0;
+  if (ParseType(Ty)) return true;
+  Body.push_back(Ty);
 
-  if (Result->isVoidTy())
-    return Error(EltTyLoc, "struct element can not have void type");
-  if (!StructType::isValidElementType(Result))
+  if (!StructType::isValidElementType(Ty))
     return Error(EltTyLoc, "invalid element type for struct");
 
   while (EatIfPresent(lltok::comma)) {
     EltTyLoc = Lex.getLoc();
-    if (ParseTypeRec(Result)) return true;
+    if (ParseType(Ty)) return true;
 
-    if (Result->isVoidTy())
-      return Error(EltTyLoc, "struct element can not have void type");
-    if (!StructType::isValidElementType(Result))
+    if (!StructType::isValidElementType(Ty))
       return Error(EltTyLoc, "invalid element type for struct");
 
-    ParamsList.push_back(Result);
+    Body.push_back(Ty);
   }
 
-  if (ParseToken(lltok::rbrace, "expected '}' at end of struct"))
-    return true;
-
-  std::vector<const Type*> ParamsListTy;
-  for (unsigned i = 0, e = ParamsList.size(); i != e; ++i)
-    ParamsListTy.push_back(ParamsList[i].get());
-  Result = HandleUpRefs(StructType::get(Context, ParamsListTy, Packed));
-  return false;
+  return ParseToken(lltok::rbrace, "expected '}' at end of struct");
 }
 
 /// ParseArrayVectorType - Parse an array or vector type, assuming the first
 /// token has already been consumed.
-///   TypeRec
+///   Type
 ///     ::= '[' APSINTVAL 'x' Types ']'
 ///     ::= '<' APSINTVAL 'x' Types '>'
-bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) {
+bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) {
   if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned() ||
       Lex.getAPSIntVal().getBitWidth() > 64)
     return TokError("expected number in address space");
@@ -1691,11 +1570,8 @@ bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) {
       return true;
 
   LocTy TypeLoc = Lex.getLoc();
-  PATypeHolder EltTy(Type::getVoidTy(Context));
-  if (ParseTypeRec(EltTy)) return true;
-
-  if (EltTy->isVoidTy())
-    return Error(TypeLoc, "array and vector element type cannot be void");
+  Type *EltTy = 0;
+  if (ParseType(EltTy)) return true;
 
   if (ParseToken(isVector ? lltok::greater : lltok::rsquare,
                  "expected end of sequential type"))
@@ -1712,7 +1588,7 @@ bool LLParser::ParseArrayVectorType(PATypeHolder &Result, bool isVector) {
   } else {
     if (!ArrayType::isValidElementType(EltTy))
       return Error(TypeLoc, "invalid array element type");
-    Result = HandleUpRefs(ArrayType::get(EltTy, Size));
+    Result = ArrayType::get(EltTy, Size);
   }
   return false;
 }
@@ -1812,12 +1688,12 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
       P.Error(Loc, "'%" + Name + "' is not a basic block");
     else
       P.Error(Loc, "'%" + Name + "' defined with type '" +
-              Val->getType()->getDescription() + "'");
+              getTypeString(Val->getType()) + "'");
     return 0;
   }
 
   // Don't make placeholders with invalid type.
-  if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) {
+  if (!Ty->isFirstClassType() && !Ty->isLabelTy()) {
     P.Error(Loc, "invalid use of a non-first-class type");
     return 0;
   }
@@ -1854,11 +1730,11 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
       P.Error(Loc, "'%" + Twine(ID) + "' is not a basic block");
     else
       P.Error(Loc, "'%" + Twine(ID) + "' defined with type '" +
-              Val->getType()->getDescription() + "'");
+              getTypeString(Val->getType()) + "'");
     return 0;
   }
 
-  if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) {
+  if (!Ty->isFirstClassType() && !Ty->isLabelTy()) {
     P.Error(Loc, "invalid use of a non-first-class type");
     return 0;
   }
@@ -1902,7 +1778,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
     if (FI != ForwardRefValIDs.end()) {
       if (FI->second.first->getType() != Inst->getType())
         return P.Error(NameLoc, "instruction forward referenced with type '" +
-                       FI->second.first->getType()->getDescription() + "'");
+                       getTypeString(FI->second.first->getType()) + "'");
       FI->second.first->replaceAllUsesWith(Inst);
       delete FI->second.first;
       ForwardRefValIDs.erase(FI);
@@ -1918,7 +1794,7 @@ bool LLParser::PerFunctionState::SetInstName(int NameID,
   if (FI != ForwardRefVals.end()) {
     if (FI->second.first->getType() != Inst->getType())
       return P.Error(NameLoc, "instruction forward referenced with type '" +
-                     FI->second.first->getType()->getDescription() + "'");
+                     getTypeString(FI->second.first->getType()) + "'");
     FI->second.first->replaceAllUsesWith(Inst);
     delete FI->second.first;
     ForwardRefVals.erase(FI);
@@ -2001,7 +1877,6 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
     ID.Kind = ValID::t_LocalID;
     break;
   case lltok::LocalVar:  // %foo
-  case lltok::StringConstant:  // "foo" - FIXME: REMOVE IN LLVM 3.0
     ID.StrVal = Lex.getStrVal();
     ID.Kind = ValID::t_LocalName;
     break;
@@ -2035,9 +1910,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
         ParseToken(lltok::rbrace, "expected end of struct constant"))
       return true;
 
-    ID.ConstantVal = ConstantStruct::get(Context, Elts.data(),
-                                         Elts.size(), false);
-    ID.Kind = ValID::t_Constant;
+    ID.ConstantStructElts = new Constant*[Elts.size()];
+    ID.UIntVal = Elts.size();
+    memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0]));
+    ID.Kind = ValID::t_ConstantStruct;
     return false;
   }
   case lltok::less: {
@@ -2055,9 +1931,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
 
     if (isPackedStruct) {
-      ID.ConstantVal =
-        ConstantStruct::get(Context, Elts.data(), Elts.size(), true);
-      ID.Kind = ValID::t_Constant;
+      ID.ConstantStructElts = new Constant*[Elts.size()];
+      memcpy(ID.ConstantStructElts, Elts.data(), Elts.size()*sizeof(Elts[0]));
+      ID.UIntVal = Elts.size();
+      ID.Kind = ValID::t_PackedConstantStruct;
       return false;
     }
 
@@ -2074,7 +1951,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       if (Elts[i]->getType() != Elts[0]->getType())
         return Error(FirstEltLoc,
                      "vector element #" + Twine(i) +
-                    " is not of type '" + Elts[0]->getType()->getDescription());
+                    " is not of type '" + getTypeString(Elts[0]->getType()));
 
     ID.ConstantVal = ConstantVector::get(Elts);
     ID.Kind = ValID::t_Constant;
@@ -2098,7 +1975,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
 
     if (!Elts[0]->getType()->isFirstClassType())
       return Error(FirstEltLoc, "invalid array element type: " +
-                   Elts[0]->getType()->getDescription());
+                   getTypeString(Elts[0]->getType()));
 
     ArrayType *ATy = ArrayType::get(Elts[0]->getType(), Elts.size());
 
@@ -2107,10 +1984,10 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       if (Elts[i]->getType() != Elts[0]->getType())
         return Error(FirstEltLoc,
                      "array element #" + Twine(i) +
-                     " is not of type '" +Elts[0]->getType()->getDescription());
+                     " is not of type '" + getTypeString(Elts[0]->getType()));
     }
 
-    ID.ConstantVal = ConstantArray::get(ATy, Elts.data(), Elts.size());
+    ID.ConstantVal = ConstantArray::get(ATy, Elts);
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -2179,7 +2056,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
   case lltok::kw_inttoptr:
   case lltok::kw_ptrtoint: {
     unsigned Opc = Lex.getUIntVal();
-    PATypeHolder DestTy(Type::getVoidTy(Context));
+    Type *DestTy = 0;
     Constant *SrcVal;
     Lex.Lex();
     if (ParseToken(lltok::lparen, "expected '(' after constantexpr cast") ||
@@ -2190,8 +2067,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
     if (!CastInst::castIsValid((Instruction::CastOps)Opc, SrcVal, DestTy))
       return Error(ID.Loc, "invalid cast opcode for cast from '" +
-                   SrcVal->getType()->getDescription() + "' to '" +
-                   DestTy->getDescription() + "'");
+                   getTypeString(SrcVal->getType()) + "' to '" +
+                   getTypeString(DestTy) + "'");
     ID.ConstantVal = ConstantExpr::getCast((Instruction::CastOps)Opc,
                                                  SrcVal, DestTy);
     ID.Kind = ValID::t_Constant;
@@ -2209,11 +2086,9 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
 
     if (!Val->getType()->isAggregateType())
       return Error(ID.Loc, "extractvalue operand must be aggregate type");
-    if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(),
-                                          Indices.end()))
+    if (!ExtractValueInst::getIndexedType(Val->getType(), Indices))
       return Error(ID.Loc, "invalid indices for extractvalue");
-    ID.ConstantVal =
-      ConstantExpr::getExtractValue(Val, Indices.data(), Indices.size());
+    ID.ConstantVal = ConstantExpr::getExtractValue(Val, Indices);
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -2230,11 +2105,9 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
       return true;
     if (!Val0->getType()->isAggregateType())
       return Error(ID.Loc, "insertvalue operand must be aggregate type");
-    if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
-                                          Indices.end()))
+    if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices))
       return Error(ID.Loc, "invalid indices for insertvalue");
-    ID.ConstantVal = ConstantExpr::getInsertValue(Val0, Val1,
-                       Indices.data(), Indices.size());
+    ID.ConstantVal = ConstantExpr::getInsertValue(Val0, Val1, Indices);
     ID.Kind = ValID::t_Constant;
     return false;
   }
@@ -2462,9 +2335,9 @@ bool LLParser::ParseGlobalValue(const Type *Ty, Constant *&C) {
 }
 
 bool LLParser::ParseGlobalTypeAndValue(Constant *&V) {
-  PATypeHolder Type(Type::getVoidTy(Context));
-  return ParseType(Type) ||
-         ParseGlobalValue(Type, V);
+  Type *Ty = 0;
+  return ParseType(Ty) ||
+         ParseGlobalValue(Ty, V);
 }
 
 /// ParseGlobalValueVector
@@ -2600,7 +2473,7 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
 
     if (V->getType() != Ty)
       return Error(ID.Loc, "floating point constant does not have type '" +
-                   Ty->getDescription() + "'");
+                   getTypeString(Ty) + "'");
 
     return false;
   case ValID::t_Null:
@@ -2610,8 +2483,7 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
     return false;
   case ValID::t_Undef:
     // FIXME: LabelTy should not be a first-class type.
-    if ((!Ty->isFirstClassType() || Ty->isLabelTy()) &&
-        !Ty->isOpaqueTy())
+    if (!Ty->isFirstClassType() || Ty->isLabelTy())
       return Error(ID.Loc, "invalid type for undef constant");
     V = UndefValue::get(Ty);
     return false;
@@ -2632,20 +2504,40 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
 
     V = ID.ConstantVal;
     return false;
+  case ValID::t_ConstantStruct:
+  case ValID::t_PackedConstantStruct:
+    if (const StructType *ST = dyn_cast<StructType>(Ty)) {
+      if (ST->getNumElements() != ID.UIntVal)
+        return Error(ID.Loc,
+                     "initializer with struct type has wrong # elements");
+      if (ST->isPacked() != (ID.Kind == ValID::t_PackedConstantStruct))
+        return Error(ID.Loc, "packed'ness of initializer and type don't match");
+        
+      // Verify that the elements are compatible with the structtype.
+      for (unsigned i = 0, e = ID.UIntVal; i != e; ++i)
+        if (ID.ConstantStructElts[i]->getType() != ST->getElementType(i))
+          return Error(ID.Loc, "element " + Twine(i) +
+                    " of struct initializer doesn't match struct element type");
+      
+      V = ConstantStruct::get(ST, ArrayRef<Constant*>(ID.ConstantStructElts,
+                                                      ID.UIntVal));
+    } else
+      return Error(ID.Loc, "constant expression type mismatch");
+    return false;
   }
 }
 
-bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) {
+bool LLParser::ParseValue(const Type *Ty, Value *&V, PerFunctionState *PFS) {
   V = 0;
   ValID ID;
-  return ParseValID(ID, &PFS) ||
-         ConvertValIDToValue(Ty, ID, V, &PFS);
+  return ParseValID(ID, PFS) ||
+         ConvertValIDToValue(Ty, ID, V, PFS);
 }
 
-bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
-  PATypeHolder T(Type::getVoidTy(Context));
-  return ParseType(T) ||
-         ParseValue(T, V, PFS);
+bool LLParser::ParseTypeAndValue(Value *&V, PerFunctionState *PFS) {
+  Type *Ty = 0;
+  return ParseType(Ty) ||
+         ParseValue(Ty, V, PFS);
 }
 
 bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
@@ -2671,7 +2563,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
   unsigned Visibility, RetAttrs;
   CallingConv::ID CC;
-  PATypeHolder RetType(Type::getVoidTy(Context));
+  Type *RetType = 0;
   LocTy RetTypeLoc = Lex.getLoc();
   if (ParseOptionalLinkage(Linkage) ||
       ParseOptionalVisibility(Visibility) ||
@@ -2708,8 +2600,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
     return Error(LinkageLoc, "invalid function linkage type");
   }
 
-  if (!FunctionType::isValidReturnType(RetType) ||
-      RetType->isOpaqueTy())
+  if (!FunctionType::isValidReturnType(RetType))
     return Error(RetTypeLoc, "invalid function return type");
 
   LocTy NameLoc = Lex.getLoc();
@@ -2732,7 +2623,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   if (Lex.getKind() != lltok::lparen)
     return TokError("expected '(' in function argument list");
 
-  std::vector<ArgInfo> ArgList;
+  SmallVector<ArgInfo, 8> ArgList;
   bool isVarArg;
   unsigned FuncAttrs;
   std::string Section;
@@ -2741,7 +2632,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   bool UnnamedAddr;
   LocTy UnnamedAddrLoc;
 
-  if (ParseArgumentList(ArgList, isVarArg, false) ||
+  if (ParseArgumentList(ArgList, isVarArg) ||
       ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr,
                          &UnnamedAddrLoc) ||
       ParseOptionalAttrs(FuncAttrs, 2) ||
@@ -2760,21 +2651,14 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
   // Okay, if we got here, the function is syntactically valid.  Convert types
   // and do semantic checks.
-  std::vector<const Type*> ParamTypeList;
+  std::vector<Type*> ParamTypeList;
   SmallVector<AttributeWithIndex, 8> Attrs;
-  // FIXME : In 3.0, stop accepting zext, sext and inreg as optional function
-  // attributes.
-  unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg;
-  if (FuncAttrs & ObsoleteFuncAttrs) {
-    RetAttrs |= FuncAttrs & ObsoleteFuncAttrs;
-    FuncAttrs &= ~ObsoleteFuncAttrs;
-  }
 
   if (RetAttrs != Attribute::None)
     Attrs.push_back(AttributeWithIndex::get(0, RetAttrs));
 
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i) {
-    ParamTypeList.push_back(ArgList[i].Type);
+    ParamTypeList.push_back(ArgList[i].Ty);
     if (ArgList[i].Attrs != Attribute::None)
       Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
   }
@@ -2805,21 +2689,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
       
       ForwardRefVals.erase(FRVI);
     } else if ((Fn = M->getFunction(FunctionName))) {
-      // If this function already exists in the symbol table, then it is
-      // multiply defined.  We accept a few cases for old backwards compat.
-      // FIXME: Remove this stuff for LLVM 3.0.
-      if (Fn->getType() != PFT || Fn->getAttributes() != PAL ||
-          (!Fn->isDeclaration() && isDefine)) {
-        // If the redefinition has different type or different attributes,
-        // reject it.  If both have bodies, reject it.
-        return Error(NameLoc, "invalid redefinition of function '" +
-                     FunctionName + "'");
-      } else if (Fn->isDeclaration()) {
-        // Make sure to strip off any argument names so we can't get conflicts.
-        for (Function::arg_iterator AI = Fn->arg_begin(), AE = Fn->arg_end();
-             AI != AE; ++AI)
-          AI->setName("");
-      }
+      // Reject redefinitions.
+      return Error(NameLoc, "invalid redefinition of function '" +
+                   FunctionName + "'");
     } else if (M->getNamedValue(FunctionName)) {
       return Error(NameLoc, "redefinition of function '@" + FunctionName + "'");
     }
@@ -2858,10 +2730,6 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
   // Add all of the arguments we parsed to the function.
   Function::arg_iterator ArgIt = Fn->arg_begin();
   for (unsigned i = 0, e = ArgList.size(); i != e; ++i, ++ArgIt) {
-    // If we run out of arguments in the Function prototype, exit early.
-    // FIXME: REMOVE THIS IN LLVM 3.0, this is just for the mismatch case above.
-    if (ArgIt == Fn->arg_end()) break;
-    
     // If the argument has a name, insert it into the argument symbol table.
     if (ArgList[i].Name.empty()) continue;
 
@@ -2879,10 +2747,9 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
 
 /// ParseFunctionBody
 ///   ::= '{' BasicBlock+ '}'
-///   ::= 'begin' BasicBlock+ 'end'  // FIXME: remove in LLVM 3.0
 ///
 bool LLParser::ParseFunctionBody(Function &Fn) {
-  if (Lex.getKind() != lltok::lbrace && Lex.getKind() != lltok::kw_begin)
+  if (Lex.getKind() != lltok::lbrace)
     return TokError("expected '{' in function body");
   Lex.Lex();  // eat the {.
 
@@ -2892,10 +2759,10 @@ bool LLParser::ParseFunctionBody(Function &Fn) {
   PerFunctionState PFS(*this, Fn, FunctionNumber);
 
   // We need at least one basic block.
-  if (Lex.getKind() == lltok::rbrace || Lex.getKind() == lltok::kw_end)
+  if (Lex.getKind() == lltok::rbrace)
     return TokError("function body requires at least one basic block");
   
-  while (Lex.getKind() != lltok::rbrace && Lex.getKind() != lltok::kw_end)
+  while (Lex.getKind() != lltok::rbrace)
     if (ParseBasicBlock(PFS)) return true;
 
   // Eat the }.
@@ -2936,9 +2803,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) {
       Lex.Lex();
       if (ParseToken(lltok::equal, "expected '=' after instruction id"))
         return true;
-    } else if (Lex.getKind() == lltok::LocalVar ||
-               // FIXME: REMOVE IN LLVM 3.0
-               Lex.getKind() == lltok::StringConstant) {
+    } else if (Lex.getKind() == lltok::LocalVar) {
       NameStr = Lex.getStrVal();
       Lex.Lex();
       if (ParseToken(lltok::equal, "expected '=' after instruction name"))
@@ -3062,8 +2927,6 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
   case lltok::kw_tail:           return ParseCall(Inst, PFS, true);
   // Memory.
   case lltok::kw_alloca:         return ParseAlloc(Inst, PFS);
-  case lltok::kw_malloc:         return ParseAlloc(Inst, PFS, BB, false);
-  case lltok::kw_free:           return ParseFree(Inst, PFS, BB);
   case lltok::kw_load:           return ParseLoad(Inst, PFS, false);
   case lltok::kw_store:          return ParseStore(Inst, PFS, false);
   case lltok::kw_volatile:
@@ -3073,7 +2936,6 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB,
       return ParseStore(Inst, PFS, true);
     else
       return TokError("expected 'load' or 'store'");
-  case lltok::kw_getresult:     return ParseGetResult(Inst, PFS);
   case lltok::kw_getelementptr: return ParseGetElementPtr(Inst, PFS);
   case lltok::kw_extractvalue:  return ParseExtractValue(Inst, PFS);
   case lltok::kw_insertvalue:   return ParseInsertValue(Inst, PFS);
@@ -3128,14 +2990,19 @@ bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) {
 /// ParseRet - Parse a return instruction.
 ///   ::= 'ret' void (',' !dbg, !1)*
 ///   ::= 'ret' TypeAndValue (',' !dbg, !1)*
-///   ::= 'ret' TypeAndValue (',' TypeAndValue)+  (',' !dbg, !1)*
-///         [[obsolete: LLVM 3.0]]
-int LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
-                       PerFunctionState &PFS) {
-  PATypeHolder Ty(Type::getVoidTy(Context));
+bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
+                        PerFunctionState &PFS) {
+  SMLoc TypeLoc = Lex.getLoc();
+  Type *Ty = 0;
   if (ParseType(Ty, true /*void allowed*/)) return true;
 
+  Type *ResType = PFS.getFunction().getReturnType();
+  
   if (Ty->isVoidTy()) {
+    if (!ResType->isVoidTy())
+      return Error(TypeLoc, "value doesn't match function result type '" +
+                   getTypeString(ResType) + "'");
+    
     Inst = ReturnInst::Create(Context);
     return false;
   }
@@ -3143,38 +3010,12 @@ int LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
   Value *RV;
   if (ParseValue(Ty, RV, PFS)) return true;
 
-  bool ExtraComma = false;
-  if (EatIfPresent(lltok::comma)) {
-    // Parse optional custom metadata, e.g. !dbg
-    if (Lex.getKind() == lltok::MetadataVar) {
-      ExtraComma = true;
-    } else {
-      // The normal case is one return value.
-      // FIXME: LLVM 3.0 remove MRV support for 'ret i32 1, i32 2', requiring
-      // use of 'ret {i32,i32} {i32 1, i32 2}'
-      SmallVector<Value*, 8> RVs;
-      RVs.push_back(RV);
-
-      do {
-        // If optional custom metadata, e.g. !dbg is seen then this is the 
-        // end of MRV.
-        if (Lex.getKind() == lltok::MetadataVar)
-          break;
-        if (ParseTypeAndValue(RV, PFS)) return true;
-        RVs.push_back(RV);
-      } while (EatIfPresent(lltok::comma));
-
-      RV = UndefValue::get(PFS.getFunction().getReturnType());
-      for (unsigned i = 0, e = RVs.size(); i != e; ++i) {
-        Instruction *I = InsertValueInst::Create(RV, RVs[i], i, "mrv");
-        BB->getInstList().push_back(I);
-        RV = I;
-      }
-    }
-  }
-
+  if (ResType != RV->getType())
+    return Error(TypeLoc, "value doesn't match function result type '" +
+                 getTypeString(ResType) + "'");
+  
   Inst = ReturnInst::Create(Context, RV);
-  return ExtraComma ? InstExtraComma : InstNormal;
+  return false;
 }
 
 
@@ -3300,7 +3141,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   LocTy CallLoc = Lex.getLoc();
   unsigned RetAttrs, FnAttrs;
   CallingConv::ID CC;
-  PATypeHolder RetType(Type::getVoidTy(Context));
+  Type *RetType = 0;
   LocTy RetTypeLoc;
   ValID CalleeID;
   SmallVector<ParamInfo, 16> ArgList;
@@ -3326,7 +3167,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
       !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
     // Pull out the types of all of the arguments...
-    std::vector<const Type*> ParamTypes;
+    std::vector<Type*> ParamTypes;
     for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
       ParamTypes.push_back(ArgList[i].V->getType());
 
@@ -3341,14 +3182,6 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Callee;
   if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
 
-  // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional
-  // function attributes.
-  unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg;
-  if (FnAttrs & ObsoleteFuncAttrs) {
-    RetAttrs |= FnAttrs & ObsoleteFuncAttrs;
-    FnAttrs &= ~ObsoleteFuncAttrs;
-  }
-
   // Set up the Attributes for the function.
   SmallVector<AttributeWithIndex, 8> Attrs;
   if (RetAttrs != Attribute::None)
@@ -3370,7 +3203,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
 
     if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
       return Error(ArgList[i].Loc, "argument is not of expected type '" +
-                   ExpectedTy->getDescription() + "'");
+                   getTypeString(ExpectedTy) + "'");
     Args.push_back(ArgList[i].V);
     if (ArgList[i].Attrs != Attribute::None)
       Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
@@ -3385,8 +3218,7 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) {
   // Finish off the Attributes and check them
   AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
 
-  InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB,
-                                      Args.begin(), Args.end());
+  InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, Args);
   II->setCallingConv(CC);
   II->setAttributes(PAL);
   Inst = II;
@@ -3486,8 +3318,9 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
 ///   ::= CastOpc TypeAndValue 'to' Type
 bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS,
                          unsigned Opc) {
-  LocTy Loc;  Value *Op;
-  PATypeHolder DestTy(Type::getVoidTy(Context));
+  LocTy Loc;
+  Value *Op;
+  Type *DestTy = 0;
   if (ParseTypeAndValue(Op, Loc, PFS) ||
       ParseToken(lltok::kw_to, "expected 'to' after cast value") ||
       ParseType(DestTy))
@@ -3496,8 +3329,8 @@ bool LLParser::ParseCast(Instruction *&Inst, PerFunctionState &PFS,
   if (!CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy)) {
     CastInst::castIsValid((Instruction::CastOps)Opc, Op, DestTy);
     return Error(Loc, "invalid cast opcode for cast from '" +
-                 Op->getType()->getDescription() + "' to '" +
-                 DestTy->getDescription() + "'");
+                 getTypeString(Op->getType()) + "' to '" +
+                 getTypeString(DestTy) + "'");
   }
   Inst = CastInst::Create((Instruction::CastOps)Opc, Op, DestTy);
   return false;
@@ -3526,7 +3359,7 @@ bool LLParser::ParseSelect(Instruction *&Inst, PerFunctionState &PFS) {
 ///   ::= 'va_arg' TypeAndValue ',' Type
 bool LLParser::ParseVA_Arg(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Op;
-  PATypeHolder EltTy(Type::getVoidTy(Context));
+  Type *EltTy = 0;
   LocTy TypeLoc;
   if (ParseTypeAndValue(Op, PFS) ||
       ParseToken(lltok::comma, "expected ',' after vaarg operand") ||
@@ -3598,11 +3431,10 @@ bool LLParser::ParseShuffleVector(Instruction *&Inst, PerFunctionState &PFS) {
 /// ParsePHI
 ///   ::= 'phi' Type '[' Value ',' Value ']' (',' '[' Value ',' Value ']')*
 int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
-  PATypeHolder Ty(Type::getVoidTy(Context));
+  Type *Ty = 0;  LocTy TypeLoc;
   Value *Op0, *Op1;
-  LocTy TypeLoc = Lex.getLoc();
 
-  if (ParseType(Ty) ||
+  if (ParseType(Ty, TypeLoc) ||
       ParseToken(lltok::lsquare, "expected '[' in phi value list") ||
       ParseValue(Ty, Op0, PFS) ||
       ParseToken(lltok::comma, "expected ',' after insertelement value") ||
@@ -3648,7 +3480,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
                          bool isTail) {
   unsigned RetAttrs, FnAttrs;
   CallingConv::ID CC;
-  PATypeHolder RetType(Type::getVoidTy(Context));
+  Type *RetType = 0;
   LocTy RetTypeLoc;
   ValID CalleeID;
   SmallVector<ParamInfo, 16> ArgList;
@@ -3671,7 +3503,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   if (!(PFTy = dyn_cast<PointerType>(RetType)) ||
       !(Ty = dyn_cast<FunctionType>(PFTy->getElementType()))) {
     // Pull out the types of all of the arguments...
-    std::vector<const Type*> ParamTypes;
+    std::vector<Type*> ParamTypes;
     for (unsigned i = 0, e = ArgList.size(); i != e; ++i)
       ParamTypes.push_back(ArgList[i].V->getType());
 
@@ -3686,14 +3518,6 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   Value *Callee;
   if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true;
 
-  // FIXME: In LLVM 3.0, stop accepting zext, sext and inreg as optional
-  // function attributes.
-  unsigned ObsoleteFuncAttrs = Attribute::ZExt|Attribute::SExt|Attribute::InReg;
-  if (FnAttrs & ObsoleteFuncAttrs) {
-    RetAttrs |= FnAttrs & ObsoleteFuncAttrs;
-    FnAttrs &= ~ObsoleteFuncAttrs;
-  }
-
   // Set up the Attributes for the function.
   SmallVector<AttributeWithIndex, 8> Attrs;
   if (RetAttrs != Attribute::None)
@@ -3715,7 +3539,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
 
     if (ExpectedTy && ExpectedTy != ArgList[i].V->getType())
       return Error(ArgList[i].Loc, "argument is not of expected type '" +
-                   ExpectedTy->getDescription() + "'");
+                   getTypeString(ExpectedTy) + "'");
     Args.push_back(ArgList[i].V);
     if (ArgList[i].Attrs != Attribute::None)
       Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs));
@@ -3730,7 +3554,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
   // Finish off the Attributes and check them
   AttrListPtr PAL = AttrListPtr::get(Attrs.begin(), Attrs.end());
 
-  CallInst *CI = CallInst::Create(Callee, Args.begin(), Args.end());
+  CallInst *CI = CallInst::Create(Callee, Args);
   CI->setTailCall(isTail);
   CI->setCallingConv(CC);
   CI->setAttributes(PAL);
@@ -3743,14 +3567,12 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS,
 //===----------------------------------------------------------------------===//
 
 /// ParseAlloc
-///   ::= 'malloc' Type (',' TypeAndValue)? (',' OptionalInfo)?
 ///   ::= 'alloca' Type (',' TypeAndValue)? (',' OptionalInfo)?
-int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
-                         BasicBlock* BB, bool isAlloca) {
-  PATypeHolder Ty(Type::getVoidTy(Context));
+int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) {
   Value *Size = 0;
   LocTy SizeLoc;
   unsigned Alignment = 0;
+  Type *Ty = 0;
   if (ParseType(Ty)) return true;
 
   bool AteExtraComma = false;
@@ -3769,37 +3591,8 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS,
   if (Size && !Size->getType()->isIntegerTy())
     return Error(SizeLoc, "element count must have integer type");
 
-  if (isAlloca) {
-    Inst = new AllocaInst(Ty, Size, Alignment);
-    return AteExtraComma ? InstExtraComma : InstNormal;
-  }
-
-  // Autoupgrade old malloc instruction to malloc call.
-  // FIXME: Remove in LLVM 3.0.
-  if (Size && !Size->getType()->isIntegerTy(32))
-    return Error(SizeLoc, "element count must be i32");
-  const Type *IntPtrTy = Type::getInt32Ty(Context);
-  Constant *AllocSize = ConstantExpr::getSizeOf(Ty);
-  AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, IntPtrTy);
-  if (!MallocF)
-    // Prototype malloc as "void *(int32)".
-    // This function is renamed as "malloc" in ValidateEndOfModule().
-    MallocF = cast<Function>(
-       M->getOrInsertFunction("", Type::getInt8PtrTy(Context), IntPtrTy, NULL));
-  Inst = CallInst::CreateMalloc(BB, IntPtrTy, Ty, AllocSize, Size, MallocF);
-return AteExtraComma ? InstExtraComma : InstNormal;
-}
-
-/// ParseFree
-///   ::= 'free' TypeAndValue
-bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS,
-                         BasicBlock* BB) {
-  Value *Val; LocTy Loc;
-  if (ParseTypeAndValue(Val, Loc, PFS)) return true;
-  if (!Val->getType()->isPointerTy())
-    return Error(Loc, "operand to free must be a pointer");
-  Inst = CallInst::CreateFree(Val, BB);
-  return false;
+  Inst = new AllocaInst(Ty, Size, Alignment);
+  return AteExtraComma ? InstExtraComma : InstNormal;
 }
 
 /// ParseLoad
@@ -3845,25 +3638,6 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
   return AteExtraComma ? InstExtraComma : InstNormal;
 }
 
-/// ParseGetResult
-///   ::= 'getresult' TypeAndValue ',' i32
-/// FIXME: Remove support for getresult in LLVM 3.0
-bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) {
-  Value *Val; LocTy ValLoc, EltLoc;
-  unsigned Element;
-  if (ParseTypeAndValue(Val, ValLoc, PFS) ||
-      ParseToken(lltok::comma, "expected ',' after getresult operand") ||
-      ParseUInt32(Element, EltLoc))
-    return true;
-
-  if (!Val->getType()->isStructTy() && !Val->getType()->isArrayTy())
-    return Error(ValLoc, "getresult inst requires an aggregate operand");
-  if (!ExtractValueInst::getIndexedType(Val->getType(), Element))
-    return Error(EltLoc, "invalid getresult index for value");
-  Inst = ExtractValueInst::Create(Val, Element);
-  return false;
-}
-
 /// ParseGetElementPtr
 ///   ::= 'getelementptr' 'inbounds'? TypeAndValue (',' TypeAndValue)*
 int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
@@ -3911,10 +3685,9 @@ int LLParser::ParseExtractValue(Instruction *&Inst, PerFunctionState &PFS) {
   if (!Val->getType()->isAggregateType())
     return Error(Loc, "extractvalue operand must be aggregate type");
 
-  if (!ExtractValueInst::getIndexedType(Val->getType(), Indices.begin(),
-                                        Indices.end()))
+  if (!ExtractValueInst::getIndexedType(Val->getType(), Indices))
     return Error(Loc, "invalid indices for extractvalue");
-  Inst = ExtractValueInst::Create(Val, Indices.begin(), Indices.end());
+  Inst = ExtractValueInst::Create(Val, Indices);
   return AteExtraComma ? InstExtraComma : InstNormal;
 }
 
@@ -3933,10 +3706,9 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) {
   if (!Val0->getType()->isAggregateType())
     return Error(Loc0, "insertvalue operand must be aggregate type");
 
-  if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices.begin(),
-                                        Indices.end()))
+  if (!ExtractValueInst::getIndexedType(Val0->getType(), Indices))
     return Error(Loc0, "invalid indices for insertvalue");
-  Inst = InsertValueInst::Create(Val0, Val1, Indices.begin(), Indices.end());
+  Inst = InsertValueInst::Create(Val0, Val1, Indices);
   return AteExtraComma ? InstExtraComma : InstNormal;
 }
 
@@ -3962,12 +3734,7 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl<Value*> &Elts,
     }
     
     Value *V = 0;
-    PATypeHolder Ty(Type::getVoidTy(Context));
-    ValID ID;
-    if (ParseType(Ty) || ParseValID(ID, PFS) ||
-        ConvertValIDToValue(Ty, ID, V, PFS))
-      return true;
-    
+    if (ParseTypeAndValue(V, PFS)) return true;
     Elts.push_back(V);
   } while (EatIfPresent(lltok::comma));
 
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 93e7f77..9630657 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -18,6 +18,7 @@
 #include "llvm/Module.h"
 #include "llvm/Type.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/ValueHandle.h"
 #include <map>
 
@@ -32,6 +33,7 @@ namespace llvm {
   class GlobalValue;
   class MDString;
   class MDNode;
+  class StructType;
 
   /// ValID - Represents a reference of a definition of some sort with no type.
   /// There are several cases where we have to parse the value but where the
@@ -47,7 +49,9 @@ namespace llvm {
       t_Constant,                 // Value in ConstantVal.
       t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
       t_MDNode,                   // Value in MDNodeVal.
-      t_MDString                  // Value in MDStringVal.
+      t_MDString,                 // Value in MDStringVal.
+      t_ConstantStruct,           // Value in ConstantStructElts.
+      t_PackedConstantStruct      // Value in ConstantStructElts.
     } Kind;
     
     LLLexer::LocTy Loc;
@@ -58,12 +62,19 @@ namespace llvm {
     Constant *ConstantVal;
     MDNode *MDNodeVal;
     MDString *MDStringVal;
-    ValID() : APFloatVal(0.0) {}
+    Constant **ConstantStructElts;
+    
+    ValID() : Kind(t_LocalID), APFloatVal(0.0) {}
+    ~ValID() {
+      if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct)
+        delete [] ConstantStructElts;
+    }
     
     bool operator<(const ValID &RHS) const {
       if (Kind == t_LocalID || Kind == t_GlobalID)
         return UIntVal < RHS.UIntVal;
-      assert((Kind == t_LocalName || Kind == t_GlobalName) && 
+      assert((Kind == t_LocalName || Kind == t_GlobalName ||
+              Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) && 
              "Ordering not defined for this ValID kind yet");
       return StrVal < RHS.StrVal;
     }
@@ -93,33 +104,13 @@ namespace llvm {
     };
     DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata;
 
-    // Type resolution handling data structures.
-    std::map<std::string, std::pair<PATypeHolder, LocTy> > ForwardRefTypes;
-    std::map<unsigned, std::pair<PATypeHolder, LocTy> > ForwardRefTypeIDs;
-    std::vector<PATypeHolder> NumberedTypes;
+    // Type resolution handling data structures.  The location is set when we
+    // have processed a use of the type but not a definition yet.
+    StringMap<std::pair<Type*, LocTy> > NamedTypes;
+    std::vector<std::pair<Type*, LocTy> > NumberedTypes;
+    
     std::vector<TrackingVH<MDNode> > NumberedMetadata;
     std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes;
-    struct UpRefRecord {
-      /// Loc - This is the location of the upref.
-      LocTy Loc;
-
-      /// NestingLevel - The number of nesting levels that need to be popped
-      /// before this type is resolved.
-      unsigned NestingLevel;
-
-      /// LastContainedTy - This is the type at the current binding level for
-      /// the type.  Every time we reduce the nesting level, this gets updated.
-      const Type *LastContainedTy;
-
-      /// UpRefTy - This is the actual opaque type that the upreference is
-      /// represented with.
-      OpaqueType *UpRefTy;
-
-      UpRefRecord(LocTy L, unsigned NL, OpaqueType *URTy)
-        : Loc(L), NestingLevel(NL), LastContainedTy((Type*)URTy),
-          UpRefTy(URTy) {}
-    };
-    std::vector<UpRefRecord> UpRefs;
 
     // Global Value reference information.
     std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
@@ -131,14 +122,13 @@ namespace llvm {
     std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
       ForwardRefBlockAddresses;
     
-    Function *MallocF;
   public:
     LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : 
       Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
-      M(m), MallocF(NULL) {}
+      M(m) {}
     bool Run();
 
-    LLVMContext& getContext() { return Context; }
+    LLVMContext &getContext() { return Context; }
 
   private:
 
@@ -223,16 +213,19 @@ namespace llvm {
     bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo);
 
     // Type Parsing.
-    bool ParseType(PATypeHolder &Result, bool AllowVoid = false);
-    bool ParseType(PATypeHolder &Result, LocTy &Loc, bool AllowVoid = false) {
+    bool ParseType(Type *&Result, bool AllowVoid = false);
+    bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) {
       Loc = Lex.getLoc();
       return ParseType(Result, AllowVoid);
     }
-    bool ParseTypeRec(PATypeHolder &H);
-    bool ParseStructType(PATypeHolder &H, bool Packed);
-    bool ParseArrayVectorType(PATypeHolder &H, bool isVector);
-    bool ParseFunctionType(PATypeHolder &Result);
-    PATypeHolder HandleUpRefs(const Type *Ty);
+    bool ParseAnonStructType(Type *&Result, bool Packed);
+    bool ParseStructBody(SmallVectorImpl<Type*> &Body);
+    bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
+                               std::pair<Type*, LocTy> &Entry,
+                               Type *&ResultTy);
+
+    bool ParseArrayVectorType(Type *&Result, bool isVector);
+    bool ParseFunctionType(Type *&Result);
 
     // Function Semantic Analysis.
     class PerFunctionState {
@@ -279,14 +272,20 @@ namespace llvm {
     bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
                              PerFunctionState *PFS);
 
-    bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS);
+    bool ParseValue(const Type *Ty, Value *&V, PerFunctionState *PFS);
+    bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS) {
+      return ParseValue(Ty, V, &PFS);
+    }
     bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc,
                     PerFunctionState &PFS) {
       Loc = Lex.getLoc();
-      return ParseValue(Ty, V, PFS);
+      return ParseValue(Ty, V, &PFS);
     }
 
-    bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS);
+    bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS);
+    bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
+      return ParseTypeAndValue(V, &PFS);
+    }
     bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
       Loc = Lex.getLoc();
       return ParseTypeAndValue(V, PFS);
@@ -322,14 +321,13 @@ namespace llvm {
     // Function Parsing.
     struct ArgInfo {
       LocTy Loc;
-      PATypeHolder Type;
+      Type *Ty;
       unsigned Attrs;
       std::string Name;
-      ArgInfo(LocTy L, PATypeHolder Ty, unsigned Attr, const std::string &N)
-        : Loc(L), Type(Ty), Attrs(Attr), Name(N) {}
+      ArgInfo(LocTy L, Type *ty, unsigned Attr, const std::string &N)
+        : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
     };
-    bool ParseArgumentList(std::vector<ArgInfo> &ArgList,
-                           bool &isVarArg, bool inType);
+    bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
     bool ParseFunctionHeader(Function *&Fn, bool isDefine);
     bool ParseFunctionBody(Function &Fn);
     bool ParseBasicBlock(PerFunctionState &PFS);
@@ -341,7 +339,7 @@ namespace llvm {
                          PerFunctionState &PFS);
     bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
 
-    int ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
+    bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
     bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
     bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
@@ -359,12 +357,9 @@ namespace llvm {
     bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
     int ParsePHI(Instruction *&I, PerFunctionState &PFS);
     bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
-    int ParseAlloc(Instruction *&I, PerFunctionState &PFS,
-                    BasicBlock *BB = 0, bool isAlloca = true);
-    bool ParseFree(Instruction *&I, PerFunctionState &PFS, BasicBlock *BB);
+    int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
     int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
     int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
-    bool ParseGetResult(Instruction *&I, PerFunctionState &PFS);
     int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
     int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
     int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 02f97a3..a5f89fc 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -32,7 +32,6 @@ namespace lltok {
     exclaim,           // !
 
     kw_x,
-    kw_begin,   kw_end,
     kw_true,    kw_false,
     kw_declare, kw_define,
     kw_global,  kw_constant,
@@ -99,6 +98,7 @@ namespace lltok {
     kw_noimplicitfloat,
     kw_naked,
     kw_hotpatch,
+    kw_nonlazybind,
 
     kw_type,
     kw_opaque,
@@ -121,9 +121,9 @@ namespace lltok {
     kw_ret, kw_br, kw_switch, kw_indirectbr, kw_invoke, kw_unwind,
     kw_unreachable,
 
-    kw_malloc, kw_alloca, kw_free, kw_load, kw_store, kw_getelementptr,
+    kw_alloca, kw_load, kw_store, kw_getelementptr,
 
-    kw_extractelement, kw_insertelement, kw_shufflevector, kw_getresult,
+    kw_extractelement, kw_insertelement, kw_shufflevector,
     kw_extractvalue, kw_insertvalue, kw_blockaddress,
 
     // Unsigned Valued tokens (UIntVal).
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index bc995ae..24c2994 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -31,7 +31,7 @@ void BitcodeReader::FreeState() {
   if (BufferOwned)
     delete Buffer;
   Buffer = 0;
-  std::vector<PATypeHolder>().swap(TypeList);
+  std::vector<Type*>().swap(TypeList);
   ValueList.clear();
   MDValueList.clear();
 
@@ -292,11 +292,9 @@ void BitcodeReaderValueList::ResolveConstantForwardRefs() {
       // Make the new constant.
       Constant *NewC;
       if (ConstantArray *UserCA = dyn_cast<ConstantArray>(UserC)) {
-        NewC = ConstantArray::get(UserCA->getType(), &NewOps[0],
-                                        NewOps.size());
+        NewC = ConstantArray::get(UserCA->getType(), NewOps);
       } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
-        NewC = ConstantStruct::get(Context, &NewOps[0], NewOps.size(),
-                                         UserCS->getType()->isPacked());
+        NewC = ConstantStruct::get(UserCS->getType(), NewOps);
       } else if (isa<ConstantVector>(UserC)) {
         NewC = ConstantVector::get(NewOps);
       } else {
@@ -354,19 +352,28 @@ Value *BitcodeReaderMDValueList::getValueFwdRef(unsigned Idx) {
   return V;
 }
 
-const Type *BitcodeReader::getTypeByID(unsigned ID, bool isTypeTable) {
-  // If the TypeID is in range, return it.
-  if (ID < TypeList.size())
-    return TypeList[ID].get();
-  if (!isTypeTable) return 0;
-
-  // The type table allows forward references.  Push as many Opaque types as
-  // needed to get up to ID.
-  while (TypeList.size() <= ID)
-    TypeList.push_back(OpaqueType::get(Context));
-  return TypeList.back().get();
+Type *BitcodeReader::getTypeByID(unsigned ID) {
+  // The type table size is always specified correctly.
+  if (ID >= TypeList.size())
+    return 0;
+  
+  if (Type *Ty = TypeList[ID])
+    return Ty;
+
+  // If we have a forward reference, the only possible case is when it is to a
+  // named struct.  Just create a placeholder for now.
+  return TypeList[ID] = StructType::createNamed(Context, "");
 }
 
+/// FIXME: Remove in LLVM 3.1, only used by ParseOldTypeTable.
+Type *BitcodeReader::getTypeByIDOrNull(unsigned ID) {
+  if (ID >= TypeList.size())
+    TypeList.resize(ID+1);
+  
+  return TypeList[ID];
+}
+
+
 //===----------------------------------------------------------------------===//
 //  Functions for parsing blocks from the bitcode file
 //===----------------------------------------------------------------------===//
@@ -473,17 +480,22 @@ bool BitcodeReader::ParseAttributeBlock() {
   }
 }
 
-
 bool BitcodeReader::ParseTypeTable() {
-  if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID))
+  if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
     return Error("Malformed block record");
+  
+  return ParseTypeTableBody();
+}
 
+bool BitcodeReader::ParseTypeTableBody() {
   if (!TypeList.empty())
     return Error("Multiple TYPE_BLOCKs found!");
 
   SmallVector<uint64_t, 64> Record;
   unsigned NumRecords = 0;
 
+  SmallString<64> TypeName;
+  
   // Read all the records for this type table.
   while (1) {
     unsigned Code = Stream.ReadCode();
@@ -510,17 +522,15 @@ bool BitcodeReader::ParseTypeTable() {
 
     // Read a record.
     Record.clear();
-    const Type *ResultTy = 0;
+    Type *ResultTy = 0;
     switch (Stream.ReadRecord(Code, Record)) {
-    default:  // Default behavior: unknown type.
-      ResultTy = 0;
-      break;
+    default: return Error("unknown type in type table");
     case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
       // TYPE_CODE_NUMENTRY contains a count of the number of types in the
       // type list.  This allows us to reserve space.
       if (Record.size() < 1)
         return Error("Invalid TYPE_CODE_NUMENTRY record");
-      TypeList.reserve(Record[0]);
+      TypeList.resize(Record[0]);
       continue;
     case bitc::TYPE_CODE_VOID:      // VOID
       ResultTy = Type::getVoidTy(Context);
@@ -543,9 +553,6 @@ bool BitcodeReader::ParseTypeTable() {
     case bitc::TYPE_CODE_LABEL:     // LABEL
       ResultTy = Type::getLabelTy(Context);
       break;
-    case bitc::TYPE_CODE_OPAQUE:    // OPAQUE
-      ResultTy = 0;
-      break;
     case bitc::TYPE_CODE_METADATA:  // METADATA
       ResultTy = Type::getMetadataTy(Context);
       break;
@@ -565,8 +572,9 @@ bool BitcodeReader::ParseTypeTable() {
       unsigned AddressSpace = 0;
       if (Record.size() == 2)
         AddressSpace = Record[1];
-      ResultTy = PointerType::get(getTypeByID(Record[0], true),
-                                        AddressSpace);
+      ResultTy = getTypeByID(Record[0]);
+      if (ResultTy == 0) return Error("invalid element type in pointer type");
+      ResultTy = PointerType::get(ResultTy, AddressSpace);
       break;
     }
     case bitc::TYPE_CODE_FUNCTION: {
@@ -574,69 +582,306 @@ bool BitcodeReader::ParseTypeTable() {
       // FUNCTION: [vararg, attrid, retty, paramty x N]
       if (Record.size() < 3)
         return Error("Invalid FUNCTION type record");
-      std::vector<const Type*> ArgTys;
-      for (unsigned i = 3, e = Record.size(); i != e; ++i)
-        ArgTys.push_back(getTypeByID(Record[i], true));
+      std::vector<Type*> ArgTys;
+      for (unsigned i = 3, e = Record.size(); i != e; ++i) {
+        if (Type *T = getTypeByID(Record[i]))
+          ArgTys.push_back(T);
+        else
+          break;
+      }
+      
+      ResultTy = getTypeByID(Record[2]);
+      if (ResultTy == 0 || ArgTys.size() < Record.size()-3)
+        return Error("invalid type in function type");
 
-      ResultTy = FunctionType::get(getTypeByID(Record[2], true), ArgTys,
-                                   Record[0]);
+      ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
       break;
     }
-    case bitc::TYPE_CODE_STRUCT: {  // STRUCT: [ispacked, eltty x N]
+    case bitc::TYPE_CODE_STRUCT_ANON: {  // STRUCT: [ispacked, eltty x N]
       if (Record.size() < 1)
         return Error("Invalid STRUCT type record");
-      std::vector<const Type*> EltTys;
-      for (unsigned i = 1, e = Record.size(); i != e; ++i)
-        EltTys.push_back(getTypeByID(Record[i], true));
+      std::vector<Type*> EltTys;
+      for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+        if (Type *T = getTypeByID(Record[i]))
+          EltTys.push_back(T);
+        else
+          break;
+      }
+      if (EltTys.size() != Record.size()-1)
+        return Error("invalid type in struct type");
       ResultTy = StructType::get(Context, EltTys, Record[0]);
       break;
     }
+    case bitc::TYPE_CODE_STRUCT_NAME:   // STRUCT_NAME: [strchr x N]
+      if (ConvertToString(Record, 0, TypeName))
+        return Error("Invalid STRUCT_NAME record");
+      continue;
+
+    case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N]
+      if (Record.size() < 1)
+        return Error("Invalid STRUCT type record");
+      
+      if (NumRecords >= TypeList.size())
+        return Error("invalid TYPE table");
+      
+      // Check to see if this was forward referenced, if so fill in the temp.
+      StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
+      if (Res) {
+        Res->setName(TypeName);
+        TypeList[NumRecords] = 0;
+      } else  // Otherwise, create a new struct.
+        Res = StructType::createNamed(Context, TypeName);
+      TypeName.clear();
+      
+      SmallVector<Type*, 8> EltTys;
+      for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+        if (Type *T = getTypeByID(Record[i]))
+          EltTys.push_back(T);
+        else
+          break;
+      }
+      if (EltTys.size() != Record.size()-1)
+        return Error("invalid STRUCT type record");
+      Res->setBody(EltTys, Record[0]);
+      ResultTy = Res;
+      break;
+    }
+    case bitc::TYPE_CODE_OPAQUE: {       // OPAQUE: []
+      if (Record.size() != 1)
+        return Error("Invalid OPAQUE type record");
+
+      if (NumRecords >= TypeList.size())
+        return Error("invalid TYPE table");
+      
+      // Check to see if this was forward referenced, if so fill in the temp.
+      StructType *Res = cast_or_null<StructType>(TypeList[NumRecords]);
+      if (Res) {
+        Res->setName(TypeName);
+        TypeList[NumRecords] = 0;
+      } else  // Otherwise, create a new struct with no body.
+        Res = StructType::createNamed(Context, TypeName);
+      TypeName.clear();
+      ResultTy = Res;
+      break;
+    }        
     case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
       if (Record.size() < 2)
         return Error("Invalid ARRAY type record");
-      ResultTy = ArrayType::get(getTypeByID(Record[1], true), Record[0]);
+      if ((ResultTy = getTypeByID(Record[1])))
+        ResultTy = ArrayType::get(ResultTy, Record[0]);
+      else
+        return Error("Invalid ARRAY type element");
       break;
     case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty]
       if (Record.size() < 2)
         return Error("Invalid VECTOR type record");
-      ResultTy = VectorType::get(getTypeByID(Record[1], true), Record[0]);
+      if ((ResultTy = getTypeByID(Record[1])))
+        ResultTy = VectorType::get(ResultTy, Record[0]);
+      else
+        return Error("Invalid ARRAY type element");
       break;
     }
 
-    if (NumRecords == TypeList.size()) {
-      // If this is a new type slot, just append it.
-      TypeList.push_back(ResultTy ? ResultTy : OpaqueType::get(Context));
-      ++NumRecords;
-    } else if (ResultTy == 0) {
-      // Otherwise, this was forward referenced, so an opaque type was created,
-      // but the result type is actually just an opaque.  Leave the one we
-      // created previously.
-      ++NumRecords;
-    } else {
-      // Otherwise, this was forward referenced, so an opaque type was created.
-      // Resolve the opaque type to the real type now.
-      assert(NumRecords < TypeList.size() && "Typelist imbalance");
-      const OpaqueType *OldTy = cast<OpaqueType>(TypeList[NumRecords++].get());
-
-      // Don't directly push the new type on the Tab. Instead we want to replace
-      // the opaque type we previously inserted with the new concrete value. The
-      // refinement from the abstract (opaque) type to the new type causes all
-      // uses of the abstract type to use the concrete type (NewTy). This will
-      // also cause the opaque type to be deleted.
-      const_cast<OpaqueType*>(OldTy)->refineAbstractTypeTo(ResultTy);
-
-      // This should have replaced the old opaque type with the new type in the
-      // value table... or with a preexisting type that was already in the
-      // system.  Let's just make sure it did.
-      assert(TypeList[NumRecords-1].get() != OldTy &&
-             "refineAbstractType didn't work!");
+    if (NumRecords >= TypeList.size())
+      return Error("invalid TYPE table");
+    assert(ResultTy && "Didn't read a type?");
+    assert(TypeList[NumRecords] == 0 && "Already read type?");
+    TypeList[NumRecords++] = ResultTy;
+  }
+}
+
+// FIXME: Remove in LLVM 3.1
+bool BitcodeReader::ParseOldTypeTable() {
+  if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_OLD))
+    return Error("Malformed block record");
+
+  if (!TypeList.empty())
+    return Error("Multiple TYPE_BLOCKs found!");
+  
+  
+  // While horrible, we have no good ordering of types in the bc file.  Just
+  // iteratively parse types out of the bc file in multiple passes until we get
+  // them all.  Do this by saving a cursor for the start of the type block.
+  BitstreamCursor StartOfTypeBlockCursor(Stream);
+  
+  unsigned NumTypesRead = 0;
+  
+  SmallVector<uint64_t, 64> Record;
+RestartScan:
+  unsigned NextTypeID = 0;
+  bool ReadAnyTypes = false;
+  
+  // Read all the records for this type table.
+  while (1) {
+    unsigned Code = Stream.ReadCode();
+    if (Code == bitc::END_BLOCK) {
+      if (NextTypeID != TypeList.size())
+        return Error("Invalid type forward reference in TYPE_BLOCK_ID_OLD");
+      
+      // If we haven't read all of the types yet, iterate again.
+      if (NumTypesRead != TypeList.size()) {
+        // If we didn't successfully read any types in this pass, then we must
+        // have an unhandled forward reference.
+        if (!ReadAnyTypes)
+          return Error("Obsolete bitcode contains unhandled recursive type");
+        
+        Stream = StartOfTypeBlockCursor;
+        goto RestartScan;
+      }
+      
+      if (Stream.ReadBlockEnd())
+        return Error("Error at end of type table block");
+      return false;
+    }
+    
+    if (Code == bitc::ENTER_SUBBLOCK) {
+      // No known subblocks, always skip them.
+      Stream.ReadSubBlockID();
+      if (Stream.SkipBlock())
+        return Error("Malformed block record");
+      continue;
+    }
+    
+    if (Code == bitc::DEFINE_ABBREV) {
+      Stream.ReadAbbrevRecord();
+      continue;
     }
+    
+    // Read a record.
+    Record.clear();
+    Type *ResultTy = 0;
+    switch (Stream.ReadRecord(Code, Record)) {
+    default: return Error("unknown type in type table");
+    case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
+      // TYPE_CODE_NUMENTRY contains a count of the number of types in the
+      // type list.  This allows us to reserve space.
+      if (Record.size() < 1)
+        return Error("Invalid TYPE_CODE_NUMENTRY record");
+      TypeList.resize(Record[0]);
+      continue;
+    case bitc::TYPE_CODE_VOID:      // VOID
+      ResultTy = Type::getVoidTy(Context);
+      break;
+    case bitc::TYPE_CODE_FLOAT:     // FLOAT
+      ResultTy = Type::getFloatTy(Context);
+      break;
+    case bitc::TYPE_CODE_DOUBLE:    // DOUBLE
+      ResultTy = Type::getDoubleTy(Context);
+      break;
+    case bitc::TYPE_CODE_X86_FP80:  // X86_FP80
+      ResultTy = Type::getX86_FP80Ty(Context);
+      break;
+    case bitc::TYPE_CODE_FP128:     // FP128
+      ResultTy = Type::getFP128Ty(Context);
+      break;
+    case bitc::TYPE_CODE_PPC_FP128: // PPC_FP128
+      ResultTy = Type::getPPC_FP128Ty(Context);
+      break;
+    case bitc::TYPE_CODE_LABEL:     // LABEL
+      ResultTy = Type::getLabelTy(Context);
+      break;
+    case bitc::TYPE_CODE_METADATA:  // METADATA
+      ResultTy = Type::getMetadataTy(Context);
+      break;
+    case bitc::TYPE_CODE_X86_MMX:   // X86_MMX
+      ResultTy = Type::getX86_MMXTy(Context);
+      break;
+    case bitc::TYPE_CODE_INTEGER:   // INTEGER: [width]
+      if (Record.size() < 1)
+        return Error("Invalid Integer type record");
+      ResultTy = IntegerType::get(Context, Record[0]);
+      break;
+    case bitc::TYPE_CODE_OPAQUE:    // OPAQUE
+      if (NextTypeID < TypeList.size() && TypeList[NextTypeID] == 0)
+        ResultTy = StructType::createNamed(Context, "");
+      break;
+    case bitc::TYPE_CODE_STRUCT_OLD: {// STRUCT_OLD
+      if (NextTypeID >= TypeList.size()) break;
+      // If we already read it, don't reprocess.
+      if (TypeList[NextTypeID] &&
+          !cast<StructType>(TypeList[NextTypeID])->isOpaque())
+        break;
+
+      // Set a type.
+      if (TypeList[NextTypeID] == 0)
+        TypeList[NextTypeID] = StructType::createNamed(Context, "");
+
+      std::vector<Type*> EltTys;
+      for (unsigned i = 1, e = Record.size(); i != e; ++i) {
+        if (Type *Elt = getTypeByIDOrNull(Record[i]))
+          EltTys.push_back(Elt);
+        else
+          break;
+      }
+
+      if (EltTys.size() != Record.size()-1)
+        break;      // Not all elements are ready.
+      
+      cast<StructType>(TypeList[NextTypeID])->setBody(EltTys, Record[0]);
+      ResultTy = TypeList[NextTypeID];
+      TypeList[NextTypeID] = 0;
+      break;
+    }
+    case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or
+      //          [pointee type, address space]
+      if (Record.size() < 1)
+        return Error("Invalid POINTER type record");
+      unsigned AddressSpace = 0;
+      if (Record.size() == 2)
+        AddressSpace = Record[1];
+      if ((ResultTy = getTypeByIDOrNull(Record[0])))
+        ResultTy = PointerType::get(ResultTy, AddressSpace);
+      break;
+    }
+    case bitc::TYPE_CODE_FUNCTION: {
+      // FIXME: attrid is dead, remove it in LLVM 3.0
+      // FUNCTION: [vararg, attrid, retty, paramty x N]
+      if (Record.size() < 3)
+        return Error("Invalid FUNCTION type record");
+      std::vector<Type*> ArgTys;
+      for (unsigned i = 3, e = Record.size(); i != e; ++i) {
+        if (Type *Elt = getTypeByIDOrNull(Record[i]))
+          ArgTys.push_back(Elt);
+        else
+          break;
+      }
+      if (ArgTys.size()+3 != Record.size())
+        break;  // Something was null.
+      if ((ResultTy = getTypeByIDOrNull(Record[2])))
+        ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
+      break;
+    }
+    case bitc::TYPE_CODE_ARRAY:     // ARRAY: [numelts, eltty]
+      if (Record.size() < 2)
+        return Error("Invalid ARRAY type record");
+      if ((ResultTy = getTypeByIDOrNull(Record[1])))
+        ResultTy = ArrayType::get(ResultTy, Record[0]);
+      break;
+    case bitc::TYPE_CODE_VECTOR:    // VECTOR: [numelts, eltty]
+      if (Record.size() < 2)
+        return Error("Invalid VECTOR type record");
+      if ((ResultTy = getTypeByIDOrNull(Record[1])))
+        ResultTy = VectorType::get(ResultTy, Record[0]);
+      break;
+    }
+    
+    if (NextTypeID >= TypeList.size())
+      return Error("invalid TYPE table");
+    
+    if (ResultTy && TypeList[NextTypeID] == 0) {
+      ++NumTypesRead;
+      ReadAnyTypes = true;
+      
+      TypeList[NextTypeID] = ResultTy;
+    }
+    
+    ++NextTypeID;
   }
 }
 
 
-bool BitcodeReader::ParseTypeSymbolTable() {
-  if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID))
+bool BitcodeReader::ParseOldTypeSymbolTable() {
+  if (Stream.EnterSubBlock(bitc::TYPE_SYMTAB_BLOCK_ID_OLD))
     return Error("Malformed block record");
 
   SmallVector<uint64_t, 64> Record;
@@ -676,7 +921,10 @@ bool BitcodeReader::ParseTypeSymbolTable() {
       if (TypeID >= TypeList.size())
         return Error("Invalid Type ID in TST_ENTRY record");
 
-      TheModule->addTypeName(TypeName, TypeList[TypeID].get());
+      // Only apply the type name to a struct type with no name.
+      if (StructType *STy = dyn_cast<StructType>(TypeList[TypeID]))
+        if (!STy->isAnonymous() && !STy->hasName())
+          STy->setName(TypeName);
       TypeName.clear();
       break;
     }
@@ -790,13 +1038,9 @@ bool BitcodeReader::ParseMetadata() {
       Record.clear();
       Code = Stream.ReadCode();
 
-      // METADATA_NAME is always followed by METADATA_NAMED_NODE2.
-      // Or METADATA_NAMED_NODE in LLVM 2.7. FIXME: Remove this in LLVM 3.0.
+      // METADATA_NAME is always followed by METADATA_NAMED_NODE.
       unsigned NextBitCode = Stream.ReadRecord(Code, Record);
-      if (NextBitCode == bitc::METADATA_NAMED_NODE) {
-        LLVM2_7MetadataDetected = true;
-      } else if (NextBitCode != bitc::METADATA_NAMED_NODE2)
-        assert ( 0 && "Invalid Named Metadata record");
+      assert(NextBitCode == bitc::METADATA_NAMED_NODE); (void)NextBitCode;
 
       // Read named metadata elements.
       unsigned Size = Record.size();
@@ -807,35 +1051,20 @@ bool BitcodeReader::ParseMetadata() {
           return Error("Malformed metadata record");
         NMD->addOperand(MD);
       }
-      // Backwards compatibility hack: NamedMDValues used to be Values,
-      // and they got their own slots in the value numbering. They are no
-      // longer Values, however we still need to account for them in the
-      // numbering in order to be able to read old bitcode files.
-      // FIXME: Remove this in LLVM 3.0.
-      if (LLVM2_7MetadataDetected)
-        MDValueList.AssignValue(0, NextMDValueNo++);
       break;
     }
-    case bitc::METADATA_FN_NODE: // FIXME: Remove in LLVM 3.0.
-    case bitc::METADATA_FN_NODE2:
+    case bitc::METADATA_FN_NODE:
       IsFunctionLocal = true;
       // fall-through
-    case bitc::METADATA_NODE:    // FIXME: Remove in LLVM 3.0.
-    case bitc::METADATA_NODE2: {
-
-      // Detect 2.7-era metadata.
-      // FIXME: Remove in LLVM 3.0.
-      if (Code == bitc::METADATA_FN_NODE || Code == bitc::METADATA_NODE)
-        LLVM2_7MetadataDetected = true;
-
+    case bitc::METADATA_NODE: {
       if (Record.size() % 2 == 1)
-        return Error("Invalid METADATA_NODE2 record");
+        return Error("Invalid METADATA_NODE record");
 
       unsigned Size = Record.size();
       SmallVector<Value*, 8> Elts;
       for (unsigned i = 0; i != Size; i += 2) {
         const Type *Ty = getTypeByID(Record[i]);
-        if (!Ty) return Error("Invalid METADATA_NODE2 record");
+        if (!Ty) return Error("Invalid METADATA_NODE record");
         if (Ty->isMetadataTy())
           Elts.push_back(MDValueList.getValueFwdRef(Record[i+1]));
         else if (!Ty->isVoidTy())
@@ -1331,12 +1560,16 @@ bool BitcodeReader::ParseModule() {
         if (ParseAttributeBlock())
           return true;
         break;
-      case bitc::TYPE_BLOCK_ID:
+      case bitc::TYPE_BLOCK_ID_NEW:
         if (ParseTypeTable())
           return true;
         break;
-      case bitc::TYPE_SYMTAB_BLOCK_ID:
-        if (ParseTypeSymbolTable())
+      case bitc::TYPE_BLOCK_ID_OLD:
+        if (ParseOldTypeTable())
+          return true;
+        break;
+      case bitc::TYPE_SYMTAB_BLOCK_ID_OLD:
+        if (ParseOldTypeSymbolTable())
           return true;
         break;
       case bitc::VALUE_SYMTAB_BLOCK_ID:
@@ -1755,10 +1988,7 @@ bool BitcodeReader::ParseMetadataAttachment() {
     switch (Stream.ReadRecord(Code, Record)) {
     default:  // Default behavior: ignore.
       break;
-    // FIXME: Remove in LLVM 3.0.
-    case bitc::METADATA_ATTACHMENT:
-      LLVM2_7MetadataDetected = true;
-    case bitc::METADATA_ATTACHMENT2: {
+    case bitc::METADATA_ATTACHMENT: {
       unsigned RecordLength = Record.size();
       if (Record.empty() || (RecordLength - 1) % 2 == 1)
         return Error ("Invalid METADATA_ATTACHMENT reader!");
@@ -1870,10 +2100,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       I = 0;
       continue;
         
-    // FIXME: Remove this in LLVM 3.0.
-    case bitc::FUNC_CODE_DEBUG_LOC:
-      LLVM2_7MetadataDetected = true;
-    case bitc::FUNC_CODE_DEBUG_LOC2: {      // DEBUG_LOC: [line, col, scope, ia]
+    case bitc::FUNC_CODE_DEBUG_LOC: {      // DEBUG_LOC: [line, col, scope, ia]
       I = 0;     // Get the last instruction emitted.
       if (CurBB && !CurBB->empty())
         I = &CurBB->back();
@@ -1979,8 +2206,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         EXTRACTVALIdx.push_back((unsigned)Index);
       }
 
-      I = ExtractValueInst::Create(Agg,
-                                   EXTRACTVALIdx.begin(), EXTRACTVALIdx.end());
+      I = ExtractValueInst::Create(Agg, EXTRACTVALIdx);
       InstructionList.push_back(I);
       break;
     }
@@ -2004,8 +2230,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         INSERTVALIdx.push_back((unsigned)Index);
       }
 
-      I = InsertValueInst::Create(Agg, Val,
-                                  INSERTVALIdx.begin(), INSERTVALIdx.end());
+      I = InsertValueInst::Create(Agg, Val, INSERTVALIdx);
       InstructionList.push_back(I);
       break;
     }
@@ -2112,18 +2337,6 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       break;
     }
 
-    case bitc::FUNC_CODE_INST_GETRESULT: { // GETRESULT: [ty, val, n]
-      if (Record.size() != 2)
-        return Error("Invalid GETRESULT record");
-      unsigned OpNum = 0;
-      Value *Op;
-      getValueTypePair(Record, OpNum, NextValueNo, Op);
-      unsigned Index = Record[1];
-      I = ExtractValueInst::Create(Op, Index);
-      InstructionList.push_back(I);
-      break;
-    }
-
     case bitc::FUNC_CODE_INST_RET: // RET: [opty,opval<optional>]
       {
         unsigned Size = Record.size();
@@ -2134,33 +2347,13 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         }
 
         unsigned OpNum = 0;
-        SmallVector<Value *,4> Vs;
-        do {
-          Value *Op = NULL;
-          if (getValueTypePair(Record, OpNum, NextValueNo, Op))
-            return Error("Invalid RET record");
-          Vs.push_back(Op);
-        } while(OpNum != Record.size());
-
-        const Type *ReturnType = F->getReturnType();
-        // Handle multiple return values. FIXME: Remove in LLVM 3.0.
-        if (Vs.size() > 1 ||
-            (ReturnType->isStructTy() &&
-             (Vs.empty() || Vs[0]->getType() != ReturnType))) {
-          Value *RV = UndefValue::get(ReturnType);
-          for (unsigned i = 0, e = Vs.size(); i != e; ++i) {
-            I = InsertValueInst::Create(RV, Vs[i], i, "mrv");
-            InstructionList.push_back(I);
-            CurBB->getInstList().push_back(I);
-            ValueList.AssignValue(I, NextValueNo++);
-            RV = I;
-          }
-          I = ReturnInst::Create(Context, RV);
-          InstructionList.push_back(I);
-          break;
-        }
+        Value *Op = NULL;
+        if (getValueTypePair(Record, OpNum, NextValueNo, Op))
+          return Error("Invalid RET record");
+        if (OpNum != Record.size())
+          return Error("Invalid RET record");
 
-        I = ReturnInst::Create(Context, Vs[0]);
+        I = ReturnInst::Create(Context, Op);
         InstructionList.push_back(I);
         break;
       }
@@ -2272,8 +2465,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         }
       }
 
-      I = InvokeInst::Create(Callee, NormalBB, UnwindBB,
-                             Ops.begin(), Ops.end());
+      I = InvokeInst::Create(Callee, NormalBB, UnwindBB, Ops);
       InstructionList.push_back(I);
       cast<InvokeInst>(I)->setCallingConv(
         static_cast<CallingConv::ID>(CCInfo));
@@ -2307,47 +2499,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       break;
     }
 
-    case bitc::FUNC_CODE_INST_MALLOC: { // MALLOC: [instty, op, align]
-      // Autoupgrade malloc instruction to malloc call.
-      // FIXME: Remove in LLVM 3.0.
-      if (Record.size() < 3)
-        return Error("Invalid MALLOC record");
-      const PointerType *Ty =
-        dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
-      Value *Size = getFnValueByID(Record[1], Type::getInt32Ty(Context));
-      if (!Ty || !Size) return Error("Invalid MALLOC record");
-      if (!CurBB) return Error("Invalid malloc instruction with no BB");
-      const Type *Int32Ty = IntegerType::getInt32Ty(CurBB->getContext());
-      Constant *AllocSize = ConstantExpr::getSizeOf(Ty->getElementType());
-      AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, Int32Ty);
-      I = CallInst::CreateMalloc(CurBB, Int32Ty, Ty->getElementType(),
-                                 AllocSize, Size, NULL);
-      InstructionList.push_back(I);
-      break;
-    }
-    case bitc::FUNC_CODE_INST_FREE: { // FREE: [op, opty]
-      unsigned OpNum = 0;
-      Value *Op;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Op) ||
-          OpNum != Record.size())
-        return Error("Invalid FREE record");
-      if (!CurBB) return Error("Invalid free instruction with no BB");
-      I = CallInst::CreateFree(Op, CurBB);
-      InstructionList.push_back(I);
-      break;
-    }
     case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align]
-      // For backward compatibility, tolerate a lack of an opty, and use i32.
-      // Remove this in LLVM 3.0.
-      if (Record.size() < 3 || Record.size() > 4)
+      if (Record.size() != 4)
         return Error("Invalid ALLOCA record");
-      unsigned OpNum = 0;
       const PointerType *Ty =
-        dyn_cast_or_null<PointerType>(getTypeByID(Record[OpNum++]));
-      const Type *OpTy = Record.size() == 4 ? getTypeByID(Record[OpNum++]) :
-                                              Type::getInt32Ty(Context);
-      Value *Size = getFnValueByID(Record[OpNum++], OpTy);
-      unsigned Align = Record[OpNum++];
+        dyn_cast_or_null<PointerType>(getTypeByID(Record[0]));
+      const Type *OpTy = getTypeByID(Record[1]);
+      Value *Size = getFnValueByID(Record[2], OpTy);
+      unsigned Align = Record[3];
       if (!Ty || !Size) return Error("Invalid ALLOCA record");
       I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1);
       InstructionList.push_back(I);
@@ -2364,7 +2523,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       InstructionList.push_back(I);
       break;
     }
-    case bitc::FUNC_CODE_INST_STORE2: { // STORE2:[ptrty, ptr, val, align, vol]
+    case bitc::FUNC_CODE_INST_STORE: { // STORE2:[ptrty, ptr, val, align, vol]
       unsigned OpNum = 0;
       Value *Val, *Ptr;
       if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) ||
@@ -2377,24 +2536,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       InstructionList.push_back(I);
       break;
     }
-    case bitc::FUNC_CODE_INST_STORE: { // STORE:[val, valty, ptr, align, vol]
-      // FIXME: Legacy form of store instruction. Should be removed in LLVM 3.0.
-      unsigned OpNum = 0;
-      Value *Val, *Ptr;
-      if (getValueTypePair(Record, OpNum, NextValueNo, Val) ||
-          getValue(Record, OpNum,
-                   PointerType::getUnqual(Val->getType()), Ptr)||
-          OpNum+2 != Record.size())
-        return Error("Invalid STORE record");
-
-      I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1);
-      InstructionList.push_back(I);
-      break;
-    }
-    // FIXME: Remove this in LLVM 3.0.
-    case bitc::FUNC_CODE_INST_CALL:
-      LLVM2_7MetadataDetected = true;
-    case bitc::FUNC_CODE_INST_CALL2: {
+    case bitc::FUNC_CODE_INST_CALL: {
       // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...]
       if (Record.size() < 3)
         return Error("Invalid CALL record");
@@ -2416,7 +2558,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
       SmallVector<Value*, 16> Args;
       // Read the fixed params.
       for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) {
-        if (FTy->getParamType(i)->getTypeID()==Type::LabelTyID)
+        if (FTy->getParamType(i)->isLabelTy())
           Args.push_back(getBasicBlock(Record[OpNum]));
         else
           Args.push_back(getFnValueByID(Record[OpNum], FTy->getParamType(i)));
@@ -2436,7 +2578,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
         }
       }
 
-      I = CallInst::Create(Callee, Args.begin(), Args.end());
+      I = CallInst::Create(Callee, Args);
       InstructionList.push_back(I);
       cast<CallInst>(I)->setCallingConv(
         static_cast<CallingConv::ID>(CCInfo>>1));
@@ -2513,23 +2655,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
     BlockAddrFwdRefs.erase(BAFRI);
   }
   
-  // FIXME: Remove this in LLVM 3.0.
-  unsigned NewMDValueListSize = MDValueList.size();
-
   // Trim the value list down to the size it was before we parsed this function.
   ValueList.shrinkTo(ModuleValueListSize);
   MDValueList.shrinkTo(ModuleMDValueListSize);
-
-  // Backwards compatibility hack: Function-local metadata numbers
-  // were previously not reset between functions. This is now fixed,
-  // however we still need to understand the old numbering in order
-  // to be able to read old bitcode files.
-  // FIXME: Remove this in LLVM 3.0.
-  if (LLVM2_7MetadataDetected)
-    MDValueList.resize(NewMDValueListSize);
-
   std::vector<BasicBlock*>().swap(FunctionBBs);
-
   return false;
 }
 
diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h
index f8fc079..1b3bf1a 100644
--- a/lib/Bitcode/Reader/BitcodeReader.h
+++ b/lib/Bitcode/Reader/BitcodeReader.h
@@ -44,9 +44,9 @@ class BitcodeReaderValueList {
   /// number that holds the resolved value.
   typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy;
   ResolveConstantsTy ResolveConstants;
-  LLVMContext& Context;
+  LLVMContext &Context;
 public:
-  BitcodeReaderValueList(LLVMContext& C) : Context(C) {}
+  BitcodeReaderValueList(LLVMContext &C) : Context(C) {}
   ~BitcodeReaderValueList() {
     assert(ResolveConstants.empty() && "Constants not resolved?");
   }
@@ -131,7 +131,7 @@ class BitcodeReader : public GVMaterializer {
   
   const char *ErrorString;
   
-  std::vector<PATypeHolder> TypeList;
+  std::vector<Type*> TypeList;
   BitcodeReaderValueList ValueList;
   BitcodeReaderMDValueList MDValueList;
   SmallVector<Instruction *, 64> InstructionList;
@@ -174,17 +174,10 @@ class BitcodeReader : public GVMaterializer {
   typedef std::pair<unsigned, GlobalVariable*> BlockAddrRefTy;
   DenseMap<Function*, std::vector<BlockAddrRefTy> > BlockAddrFwdRefs;
 
-  /// LLVM2_7MetadataDetected - True if metadata produced by LLVM 2.7 or
-  /// earlier was detected, in which case we behave slightly differently,
-  /// for compatibility.
-  /// FIXME: Remove in LLVM 3.0.
-  bool LLVM2_7MetadataDetected;
-  
 public:
   explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C)
     : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
-      ErrorString(0), ValueList(C), MDValueList(C),
-      LLVM2_7MetadataDetected(false) {
+      ErrorString(0), ValueList(C), MDValueList(C) {
     HasReversedFunctionsWithBodies = false;
   }
   ~BitcodeReader() {
@@ -217,12 +210,12 @@ public:
   /// @returns true if an error occurred.
   bool ParseTriple(std::string &Triple);
 private:
-  const Type *getTypeByID(unsigned ID, bool isTypeTable = false);
+  Type *getTypeByID(unsigned ID);
+  Type *getTypeByIDOrNull(unsigned ID);
   Value *getFnValueByID(unsigned ID, const Type *Ty) {
-    if (Ty == Type::getMetadataTy(Context))
+    if (Ty && Ty->isMetadataTy())
       return MDValueList.getValueFwdRef(ID);
-    else
-      return ValueList.getValueFwdRef(ID, Ty);
+    return ValueList.getValueFwdRef(ID, Ty);
   }
   BasicBlock *getBasicBlock(unsigned ID) const {
     if (ID >= FunctionBBs.size()) return 0; // Invalid ID
@@ -266,7 +259,10 @@ private:
   bool ParseModule();
   bool ParseAttributeBlock();
   bool ParseTypeTable();
-  bool ParseTypeSymbolTable();
+  bool ParseOldTypeTable();         // FIXME: Remove in LLVM 3.1
+  bool ParseTypeTableBody();
+
+  bool ParseOldTypeSymbolTable();   // FIXME: Remove in LLVM 3.1
   bool ParseValueSymbolTable();
   bool ParseConstants();
   bool RememberAndSkipFunctionBody();
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index 6972a45..85d67ce 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -21,13 +21,14 @@
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Operator.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/ValueSymbolTable.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Program.h"
 #include <cctype>
+#include <map>
 using namespace llvm;
 
 /// These are manifest constants used by the bitcode writer. They do not need to
@@ -100,13 +101,16 @@ static unsigned GetEncodedBinaryOpcode(unsigned Opcode) {
   }
 }
 
-static void WriteStringRecord(unsigned Code, const std::string &Str,
+static void WriteStringRecord(unsigned Code, StringRef Str,
                               unsigned AbbrevToUse, BitstreamWriter &Stream) {
   SmallVector<unsigned, 64> Vals;
 
   // Code: [strchar x N]
-  for (unsigned i = 0, e = Str.size(); i != e; ++i)
+  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
+    if (AbbrevToUse && !BitCodeAbbrevOp::isChar6(Str[i]))
+      AbbrevToUse = 0;
     Vals.push_back(Str[i]);
+  }
 
   // Emit the finished record.
   Stream.EmitRecord(Code, Vals, AbbrevToUse);
@@ -150,7 +154,7 @@ static void WriteAttributeTable(const ValueEnumerator &VE,
 static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   const ValueEnumerator::TypeList &TypeList = VE.getTypes();
 
-  Stream.EnterSubblock(bitc::TYPE_BLOCK_ID, 4 /*count from # abbrevs */);
+  Stream.EnterSubblock(bitc::TYPE_BLOCK_ID_NEW, 4 /*count from # abbrevs */);
   SmallVector<uint64_t, 64> TypeVals;
 
   // Abbrev for TYPE_CODE_POINTER.
@@ -171,15 +175,32 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
                             Log2_32_Ceil(VE.getTypes().size()+1)));
   unsigned FunctionAbbrev = Stream.EmitAbbrev(Abbv);
 
-  // Abbrev for TYPE_CODE_STRUCT.
+  // Abbrev for TYPE_CODE_STRUCT_ANON.
   Abbv = new BitCodeAbbrev();
-  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT));
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_ANON));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // ispacked
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
                             Log2_32_Ceil(VE.getTypes().size()+1)));
-  unsigned StructAbbrev = Stream.EmitAbbrev(Abbv);
+  unsigned StructAnonAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Abbrev for TYPE_CODE_STRUCT_NAME.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAME));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
+  unsigned StructNameAbbrev = Stream.EmitAbbrev(Abbv);
 
+  // Abbrev for TYPE_CODE_STRUCT_NAMED.
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_STRUCT_NAMED));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1));  // ispacked
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
+                            Log2_32_Ceil(VE.getTypes().size()+1)));
+  unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv);
+
+  
   // Abbrev for TYPE_CODE_ARRAY.
   Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY));
@@ -201,16 +222,15 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
 
     switch (T->getTypeID()) {
     default: llvm_unreachable("Unknown type!");
-    case Type::VoidTyID:   Code = bitc::TYPE_CODE_VOID;   break;
-    case Type::FloatTyID:  Code = bitc::TYPE_CODE_FLOAT;  break;
-    case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break;
-    case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break;
-    case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break;
+    case Type::VoidTyID:      Code = bitc::TYPE_CODE_VOID;   break;
+    case Type::FloatTyID:     Code = bitc::TYPE_CODE_FLOAT;  break;
+    case Type::DoubleTyID:    Code = bitc::TYPE_CODE_DOUBLE; break;
+    case Type::X86_FP80TyID:  Code = bitc::TYPE_CODE_X86_FP80; break;
+    case Type::FP128TyID:     Code = bitc::TYPE_CODE_FP128; break;
     case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break;
-    case Type::LabelTyID:  Code = bitc::TYPE_CODE_LABEL;  break;
-    case Type::OpaqueTyID: Code = bitc::TYPE_CODE_OPAQUE; break;
-    case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break;
-    case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break;
+    case Type::LabelTyID:     Code = bitc::TYPE_CODE_LABEL;  break;
+    case Type::MetadataTyID:  Code = bitc::TYPE_CODE_METADATA; break;
+    case Type::X86_MMXTyID:   Code = bitc::TYPE_CODE_X86_MMX; break;
     case Type::IntegerTyID:
       // INTEGER: [width]
       Code = bitc::TYPE_CODE_INTEGER;
@@ -241,13 +261,28 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) {
     case Type::StructTyID: {
       const StructType *ST = cast<StructType>(T);
       // STRUCT: [ispacked, eltty x N]
-      Code = bitc::TYPE_CODE_STRUCT;
       TypeVals.push_back(ST->isPacked());
       // Output all of the element types.
       for (StructType::element_iterator I = ST->element_begin(),
            E = ST->element_end(); I != E; ++I)
         TypeVals.push_back(VE.getTypeID(*I));
-      AbbrevToUse = StructAbbrev;
+      
+      if (ST->isAnonymous()) {
+        Code = bitc::TYPE_CODE_STRUCT_ANON;
+        AbbrevToUse = StructAnonAbbrev;
+      } else {
+        if (ST->isOpaque()) {
+          Code = bitc::TYPE_CODE_OPAQUE;
+        } else {
+          Code = bitc::TYPE_CODE_STRUCT_NAMED;
+          AbbrevToUse = StructNamedAbbrev;
+        }
+
+        // Emit the name if it is present.
+        if (!ST->getName().empty())
+          WriteStringRecord(bitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
+                            StructNameAbbrev, Stream);
+      }
       break;
     }
     case Type::ArrayTyID: {
@@ -489,8 +524,8 @@ static void WriteMDNode(const MDNode *N,
       Record.push_back(0);
     }
   }
-  unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE2 :
-                                           bitc::METADATA_NODE2;
+  unsigned MDCode = N->isFunctionLocal() ? bitc::METADATA_FN_NODE :
+                                           bitc::METADATA_NODE;
   Stream.EmitRecord(MDCode, Record, 0);
   Record.clear();
 }
@@ -553,7 +588,7 @@ static void WriteModuleMetadata(const Module *M,
     // Write named metadata operands.
     for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
       Record.push_back(VE.getValueID(NMD->getOperand(i)));
-    Stream.EmitRecord(bitc::METADATA_NAMED_NODE2, Record, 0);
+    Stream.EmitRecord(bitc::METADATA_NAMED_NODE, Record, 0);
     Record.clear();
   }
 
@@ -589,7 +624,7 @@ static void WriteMetadataAttachment(const Function &F,
   SmallVector<uint64_t, 64> Record;
 
   // Write metadata attachments
-  // METADATA_ATTACHMENT2 - [m x [value, [n x [id, mdnode]]]
+  // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]]
   SmallVector<std::pair<unsigned, MDNode*>, 4> MDs;
   
   for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -607,7 +642,7 @@ static void WriteMetadataAttachment(const Function &F,
         Record.push_back(MDs[i].first);
         Record.push_back(VE.getValueID(MDs[i].second));
       }
-      Stream.EmitRecord(bitc::METADATA_ATTACHMENT2, Record, 0);
+      Stream.EmitRecord(bitc::METADATA_ATTACHMENT, Record, 0);
       Record.clear();
     }
 
@@ -1078,12 +1113,16 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
     break;
 
-  case Instruction::PHI:
+  case Instruction::PHI: {
+    const PHINode &PN = cast<PHINode>(I);
     Code = bitc::FUNC_CODE_INST_PHI;
-    Vals.push_back(VE.getTypeID(I.getType()));
-    for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
-      Vals.push_back(VE.getValueID(I.getOperand(i)));
+    Vals.push_back(VE.getTypeID(PN.getType()));
+    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+      Vals.push_back(VE.getValueID(PN.getIncomingValue(i)));
+      Vals.push_back(VE.getValueID(PN.getIncomingBlock(i)));
+    }
     break;
+  }
 
   case Instruction::Alloca:
     Code = bitc::FUNC_CODE_INST_ALLOCA;
@@ -1102,7 +1141,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     Vals.push_back(cast<LoadInst>(I).isVolatile());
     break;
   case Instruction::Store:
-    Code = bitc::FUNC_CODE_INST_STORE2;
+    Code = bitc::FUNC_CODE_INST_STORE;
     PushValueAndType(I.getOperand(1), InstID, Vals, VE);  // ptrty + ptr
     Vals.push_back(VE.getValueID(I.getOperand(0)));       // val.
     Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
@@ -1113,7 +1152,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID,
     const PointerType *PTy = cast<PointerType>(CI.getCalledValue()->getType());
     const FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
 
-    Code = bitc::FUNC_CODE_INST_CALL2;
+    Code = bitc::FUNC_CODE_INST_CALL;
 
     Vals.push_back(VE.getAttributeID(CI.getAttributes()));
     Vals.push_back((CI.getCallingConv() << 1) | unsigned(CI.isTailCall()));
@@ -1257,7 +1296,7 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
         Vals.push_back(DL.getCol());
         Vals.push_back(Scope ? VE.getValueID(Scope)+1 : 0);
         Vals.push_back(IA ? VE.getValueID(IA)+1 : 0);
-        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC2, Vals);
+        Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals);
         Vals.clear();
         
         LastDL = DL;
@@ -1273,46 +1312,6 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE,
   Stream.ExitBlock();
 }
 
-/// WriteTypeSymbolTable - Emit a block for the specified type symtab.
-static void WriteTypeSymbolTable(const TypeSymbolTable &TST,
-                                 const ValueEnumerator &VE,
-                                 BitstreamWriter &Stream) {
-  if (TST.empty()) return;
-
-  Stream.EnterSubblock(bitc::TYPE_SYMTAB_BLOCK_ID, 3);
-
-  // 7-bit fixed width VST_CODE_ENTRY strings.
-  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
-  Abbv->Add(BitCodeAbbrevOp(bitc::VST_CODE_ENTRY));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed,
-                            Log2_32_Ceil(VE.getTypes().size()+1)));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
-  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7));
-  unsigned V7Abbrev = Stream.EmitAbbrev(Abbv);
-
-  SmallVector<unsigned, 64> NameVals;
-
-  for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end();
-       TI != TE; ++TI) {
-    // TST_ENTRY: [typeid, namechar x N]
-    NameVals.push_back(VE.getTypeID(TI->second));
-
-    const std::string &Str = TI->first;
-    bool is7Bit = true;
-    for (unsigned i = 0, e = Str.size(); i != e; ++i) {
-      NameVals.push_back((unsigned char)Str[i]);
-      if (Str[i] & 128)
-        is7Bit = false;
-    }
-
-    // Emit the finished record.
-    Stream.EmitRecord(bitc::VST_CODE_ENTRY, NameVals, is7Bit ? V7Abbrev : 0);
-    NameVals.clear();
-  }
-
-  Stream.ExitBlock();
-}
-
 // Emit blockinfo, which defines the standard abbreviations etc.
 static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) {
   // We only want to emit block info records for blocks that have multiple
@@ -1516,9 +1515,6 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) {
   // Emit metadata.
   WriteModuleMetadataStore(M, Stream);
 
-  // Emit the type symbol table information.
-  WriteTypeSymbolTable(M->getTypeSymbolTable(), VE, Stream);
-
   // Emit names for globals/functions etc.
   WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
 
@@ -1543,40 +1539,7 @@ enum {
   DarwinBCHeaderSize = 5*4
 };
 
-/// isARMTriplet - Return true if the triplet looks like:
-/// arm-*, thumb-*, armv[0-9]-*, thumbv[0-9]-*, armv5te-*, or armv6t2-*.
-static bool isARMTriplet(const std::string &TT) {
-  size_t Pos = 0;
-  size_t Size = TT.size();
-  if (Size >= 6 &&
-      TT[0] == 't' && TT[1] == 'h' && TT[2] == 'u' &&
-      TT[3] == 'm' && TT[4] == 'b')
-    Pos = 5;
-  else if (Size >= 4 && TT[0] == 'a' && TT[1] == 'r' && TT[2] == 'm')
-    Pos = 3;
-  else
-    return false;
-
-  if (TT[Pos] == '-')
-    return true;
-  else if (TT[Pos] == 'v') {
-    if (Size >= Pos+4 &&
-        TT[Pos+1] == '6' && TT[Pos+2] == 't' && TT[Pos+3] == '2')
-      return true;
-    else if (Size >= Pos+4 &&
-             TT[Pos+1] == '5' && TT[Pos+2] == 't' && TT[Pos+3] == 'e')
-      return true;
-  } else
-    return false;
-  while (++Pos < Size && TT[Pos] != '-') {
-    if (!isdigit(TT[Pos]))
-      return false;
-  }
-  return true;
-}
-
-static void EmitDarwinBCHeader(BitstreamWriter &Stream,
-                               const std::string &TT) {
+static void EmitDarwinBCHeader(BitstreamWriter &Stream, const Triple &TT) {
   unsigned CPUType = ~0U;
 
   // Match x86_64-*, i[3-9]86-*, powerpc-*, powerpc64-*, arm-*, thumb-*,
@@ -1590,16 +1553,16 @@ static void EmitDarwinBCHeader(BitstreamWriter &Stream,
     DARWIN_CPU_TYPE_POWERPC    = 18
   };
 
-  if (TT.find("x86_64-") == 0)
+  Triple::ArchType Arch = TT.getArch();
+  if (Arch == Triple::x86_64)
     CPUType = DARWIN_CPU_TYPE_X86 | DARWIN_CPU_ARCH_ABI64;
-  else if (TT.size() >= 5 && TT[0] == 'i' && TT[2] == '8' && TT[3] == '6' &&
-           TT[4] == '-' && TT[1] - '3' < 6)
+  else if (Arch == Triple::x86)
     CPUType = DARWIN_CPU_TYPE_X86;
-  else if (TT.find("powerpc-") == 0)
+  else if (Arch == Triple::ppc)
     CPUType = DARWIN_CPU_TYPE_POWERPC;
-  else if (TT.find("powerpc64-") == 0)
+  else if (Arch == Triple::ppc64)
     CPUType = DARWIN_CPU_TYPE_POWERPC | DARWIN_CPU_ARCH_ABI64;
-  else if (isARMTriplet(TT))
+  else if (Arch == Triple::arm || Arch == Triple::thumb)
     CPUType = DARWIN_CPU_TYPE_ARM;
 
   // Traditional Bitcode starts after header.
@@ -1645,11 +1608,9 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) {
 void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) {
   // If this is darwin or another generic macho target, emit a file header and
   // trailer if needed.
-  bool isMacho =
-    M->getTargetTriple().find("-darwin") != std::string::npos ||
-    M->getTargetTriple().find("-macho") != std::string::npos;
-  if (isMacho)
-    EmitDarwinBCHeader(Stream, M->getTargetTriple());
+  Triple TT(M->getTargetTriple());
+  if (TT.isOSDarwin())
+    EmitDarwinBCHeader(Stream, TT);
 
   // Emit the file header.
   Stream.Emit((unsigned)'B', 8);
@@ -1662,6 +1623,6 @@ void llvm::WriteBitcodeToStream(const Module *M, BitstreamWriter &Stream) {
   // Emit the module.
   WriteModule(M, Stream);
 
-  if (isMacho)
+  if (TT.isOSDarwin())
     EmitDarwinBCTrailer(Stream, Stream.getBuffer().size());
 }
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 5138c3c..b68bf92 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -17,7 +17,6 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/Instructions.h"
 #include <algorithm>
@@ -59,9 +58,6 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
        I != E; ++I)
     EnumerateValue(I->getAliasee());
 
-  // Enumerate types used by the type symbol table.
-  EnumerateTypeSymbolTable(M->getTypeSymbolTable());
-
   // Insert constants and metadata that are named at module level into the slot 
   // pool so that the module symbol table can refer to them...
   EnumerateValueSymbolTable(M->getValueSymbolTable());
@@ -109,78 +105,12 @@ ValueEnumerator::ValueEnumerator(const Module *M) {
 
   // Optimize constant ordering.
   OptimizeConstants(FirstConstant, Values.size());
-
-  OptimizeTypes();
-
-  // Now that we rearranged the type table, rebuild TypeMap.
-  for (unsigned i = 0, e = Types.size(); i != e; ++i)
-    TypeMap[Types[i]] = i+1;
-}
-
-struct TypeAndDeps {
-  const Type *Ty;
-  unsigned NumDeps;
-};
-
-static int CompareByDeps(const void *a, const void *b) {
-  const TypeAndDeps &ta = *(const TypeAndDeps*) a;
-  const TypeAndDeps &tb = *(const TypeAndDeps*) b;
-  return ta.NumDeps - tb.NumDeps;
-}
-
-static void VisitType(const Type *Ty, SmallPtrSet<const Type*, 16> &Visited,
-                      std::vector<const Type*> &Out) {
-  if (Visited.count(Ty))
-    return;
-
-  Visited.insert(Ty);
-
-  for (Type::subtype_iterator I2 = Ty->subtype_begin(),
-         E2 = Ty->subtype_end(); I2 != E2; ++I2) {
-    const Type *InnerType = I2->get();
-    VisitType(InnerType, Visited, Out);
-  }
-
-  Out.push_back(Ty);
 }
 
-void ValueEnumerator::OptimizeTypes(void) {
-  // If the types form a DAG, this will compute a topological sort and
-  // no forward references will be needed when reading them in.
-  // If there are cycles, this is a simple but reasonable heuristic for
-  // the minimum feedback arc set problem.
-  const unsigned NumTypes = Types.size();
-  std::vector<TypeAndDeps> TypeDeps;
-  TypeDeps.resize(NumTypes);
-
-  for (unsigned I = 0; I < NumTypes; ++I) {
-    const Type *Ty = Types[I];
-    TypeDeps[I].Ty = Ty;
-    TypeDeps[I].NumDeps = 0;
-  }
-
-  for (unsigned I = 0; I < NumTypes; ++I) {
-    const Type *Ty = TypeDeps[I].Ty;
-    for (Type::subtype_iterator I2 = Ty->subtype_begin(),
-           E2 = Ty->subtype_end(); I2 != E2; ++I2) {
-      const Type *InnerType = I2->get();
-      unsigned InnerIndex = TypeMap.lookup(InnerType) - 1;
-      TypeDeps[InnerIndex].NumDeps++;
-    }
-  }
-  array_pod_sort(TypeDeps.begin(), TypeDeps.end(), CompareByDeps);
-
-  SmallPtrSet<const Type*, 16> Visited;
-  Types.clear();
-  Types.reserve(NumTypes);
-  for (unsigned I = 0; I < NumTypes; ++I) {
-    VisitType(TypeDeps[I].Ty, Visited, Types);
-  }
-}
 
 unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const {
   InstructionMapType::const_iterator I = InstructionMap.find(Inst);
-  assert (I != InstructionMap.end() && "Instruction is not mapped!");
+  assert(I != InstructionMap.end() && "Instruction is not mapped!");
   return I->second;
 }
 
@@ -235,14 +165,6 @@ void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
 }
 
 
-/// EnumerateTypeSymbolTable - Insert all of the types in the specified symbol
-/// table.
-void ValueEnumerator::EnumerateTypeSymbolTable(const TypeSymbolTable &TST) {
-  for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end();
-       TI != TE; ++TI)
-    EnumerateType(TI->second);
-}
-
 /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
 /// table into the values table.
 void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
@@ -394,20 +316,40 @@ void ValueEnumerator::EnumerateValue(const Value *V) {
 
 
 void ValueEnumerator::EnumerateType(const Type *Ty) {
-  unsigned &TypeID = TypeMap[Ty];
+  unsigned *TypeID = &TypeMap[Ty];
 
   // We've already seen this type.
-  if (TypeID)
+  if (*TypeID)
     return;
 
-  // First time we saw this type, add it.
-  Types.push_back(Ty);
-  TypeID = Types.size();
-
-  // Enumerate subtypes.
+  // If it is a non-anonymous struct, mark the type as being visited so that we
+  // don't recursively visit it.  This is safe because we allow forward
+  // references of these in the bitcode reader.
+  if (const StructType *STy = dyn_cast<StructType>(Ty))
+    if (!STy->isAnonymous())
+      *TypeID = ~0U;
+  
+  // Enumerate all of the subtypes before we enumerate this type.  This ensures
+  // that the type will be enumerated in an order that can be directly built.
   for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
        I != E; ++I)
     EnumerateType(*I);
+  
+  // Refresh the TypeID pointer in case the table rehashed.
+  TypeID = &TypeMap[Ty];
+  
+  // Check to see if we got the pointer another way.  This can happen when
+  // enumerating recursive types that hit the base case deeper than they start.
+  //
+  // If this is actually a struct that we are treating as forward ref'able,
+  // then emit the definition now that all of its contents are available.
+  if (*TypeID && *TypeID != ~0U)
+    return;
+  
+  // Add this type now that its contents are all happily enumerated.
+  Types.push_back(Ty);
+  
+  *TypeID = Types.size();
 }
 
 // Enumerate the types for the specified value.  If the value is a constant,
diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h
index 1e42a26..6617b60 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.h
+++ b/lib/Bitcode/Writer/ValueEnumerator.h
@@ -30,7 +30,6 @@ class Module;
 class MDNode;
 class NamedMDNode;
 class AttrListPtr;
-class TypeSymbolTable;
 class ValueSymbolTable;
 class MDSymbolTable;
 
@@ -135,7 +134,6 @@ public:
 
 private:
   void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
-  void OptimizeTypes();
     
   void EnumerateMDNodeOperands(const MDNode *N);
   void EnumerateMetadata(const Value *MD);
@@ -146,7 +144,6 @@ private:
   void EnumerateOperandType(const Value *V);
   void EnumerateAttributes(const AttrListPtr &PAL);
   
-  void EnumerateTypeSymbolTable(const TypeSymbolTable &ST);
   void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
   void EnumerateNamedMetadata(const Module *M);
 };
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index dca1d29..25842a7 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -16,6 +16,7 @@
 
 #define DEBUG_TYPE "post-RA-sched"
 #include "AggressiveAntiDepBreaker.h"
+#include "RegisterClassInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -114,12 +115,13 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg)
 
 AggressiveAntiDepBreaker::
 AggressiveAntiDepBreaker(MachineFunction& MFi,
-                         TargetSubtarget::RegClassVector& CriticalPathRCs) :
+                         const RegisterClassInfo &RCI,
+                         TargetSubtargetInfo::RegClassVector& CriticalPathRCs) :
   AntiDepBreaker(), MF(MFi),
   MRI(MF.getRegInfo()),
   TII(MF.getTarget().getInstrInfo()),
   TRI(MF.getTarget().getRegisterInfo()),
-  AllocatableSet(TRI->getAllocatableSet(MF)),
+  RegClassInfo(RCI),
   State(NULL) {
   /* Collect a bitset of all registers that are only broken if they
      are on the critical path. */
@@ -402,7 +404,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
     // Note register reference...
     const TargetRegisterClass *RC = NULL;
     if (i < MI->getDesc().getNumOperands())
-      RC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+      RC = TII->getRegClass(MI->getDesc(), i, TRI);
     AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
     RegRefs.insert(std::make_pair(Reg, RR));
   }
@@ -477,7 +479,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI,
     // Note register reference...
     const TargetRegisterClass *RC = NULL;
     if (i < MI->getDesc().getNumOperands())
-      RC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+      RC = TII->getRegClass(MI->getDesc(), i, TRI);
     AggressiveAntiDepState::RegisterReference RR = { &MO, RC };
     RegRefs.insert(std::make_pair(Reg, RR));
   }
@@ -618,9 +620,8 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   const TargetRegisterClass *SuperRC =
     TRI->getMinimalPhysRegClass(SuperReg, MVT::Other);
 
-  const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF);
-  const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF);
-  if (RB == RE) {
+  ArrayRef<unsigned> Order = RegClassInfo.getOrder(SuperRC);
+  if (Order.empty()) {
     DEBUG(dbgs() << "\tEmpty Super Regclass!!\n");
     return false;
   }
@@ -628,17 +629,17 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
   DEBUG(dbgs() << "\tFind Registers:");
 
   if (RenameOrder.count(SuperRC) == 0)
-    RenameOrder.insert(RenameOrderType::value_type(SuperRC, RE));
+    RenameOrder.insert(RenameOrderType::value_type(SuperRC, Order.size()));
 
-  const TargetRegisterClass::iterator OrigR = RenameOrder[SuperRC];
-  const TargetRegisterClass::iterator EndR = ((OrigR == RE) ? RB : OrigR);
-  TargetRegisterClass::iterator R = OrigR;
+  unsigned OrigR = RenameOrder[SuperRC];
+  unsigned EndR = ((OrigR == Order.size()) ? 0 : OrigR);
+  unsigned R = OrigR;
   do {
-    if (R == RB) R = RE;
+    if (R == 0) R = Order.size();
     --R;
-    const unsigned NewSuperReg = *R;
+    const unsigned NewSuperReg = Order[R];
     // Don't consider non-allocatable registers
-    if (!AllocatableSet.test(NewSuperReg)) continue;
+    if (!RegClassInfo.isAllocatable(NewSuperReg)) continue;
     // Don't replace a register with itself.
     if (NewSuperReg == SuperReg) continue;
 
@@ -819,7 +820,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
         DEBUG(dbgs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
         assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
 
-        if (!AllocatableSet.test(AntiDepReg)) {
+        if (!RegClassInfo.isAllocatable(AntiDepReg)) {
           // Don't break anti-dependencies on non-allocatable registers.
           DEBUG(dbgs() << " (non-allocatable)\n");
           continue;
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index b7ddafc..7067784 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -23,13 +23,15 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include <map>
 
 namespace llvm {
+class RegisterClassInfo;
+
   /// Class AggressiveAntiDepState
   /// Contains all the state necessary for anti-dep breaking.
   class AggressiveAntiDepState {
@@ -117,11 +119,7 @@ namespace llvm {
     MachineRegisterInfo &MRI;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
-
-    /// AllocatableSet - The set of allocatable registers.
-    /// We'll be ignoring anti-dependencies on non-allocatable registers,
-    /// because they may not be safe to break.
-    const BitVector AllocatableSet;
+    const RegisterClassInfo &RegClassInfo;
 
     /// CriticalPathSet - The set of registers that should only be
     /// renamed if they are on the critical path.
@@ -133,7 +131,8 @@ namespace llvm {
 
   public:
     AggressiveAntiDepBreaker(MachineFunction& MFi,
-                             TargetSubtarget::RegClassVector& CriticalPathRCs);
+                          const RegisterClassInfo &RCI,
+                          TargetSubtargetInfo::RegClassVector& CriticalPathRCs);
     ~AggressiveAntiDepBreaker();
 
     /// Start - Initialize anti-dep breaking for a new basic block.
@@ -158,8 +157,8 @@ namespace llvm {
     void FinishBlock();
 
   private:
-    typedef std::map<const TargetRegisterClass *,
-                     TargetRegisterClass::const_iterator> RenameOrderType;
+    /// Keep track of a position in the allocation order for each regclass.
+    typedef std::map<const TargetRegisterClass *, unsigned> RenameOrderType;
 
     /// IsImplicitDefUse - Return true if MO represents a register
     /// that is both implicitly used and defined in MI
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index a8ee2b6..1005f10 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -41,21 +41,19 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
   if (HintPair.first) {
     const TargetRegisterInfo &TRI = VRM.getTargetRegInfo();
     // The remaining allocation order may depend on the hint.
-    const unsigned *B, *E;
-    tie(B, E) = TRI.getAllocationOrder(RC, HintPair.first, Hint,
-                                       VRM.getMachineFunction());
-
-    // Empty allocation order?
-    if (B == E)
+    ArrayRef<unsigned> Order =
+      TRI.getRawAllocationOrder(RC, HintPair.first, Hint,
+                                VRM.getMachineFunction());
+    if (Order.empty())
       return;
 
     // Copy the allocation order with reserved registers removed.
     OwnedBegin = true;
-    unsigned *P = new unsigned[E - B];
+    unsigned *P = new unsigned[Order.size()];
     Begin = P;
-    for (; B != E; ++B)
-      if (!RCI.isReserved(*B))
-        *P++ = *B;
+    for (unsigned i = 0; i != Order.size(); ++i)
+      if (!RCI.isReserved(Order[i]))
+        *P++ = Order[i];
     End = P;
 
     // Target-dependent hints require resolution.
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 161afba..7f314ee 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -575,6 +575,8 @@ static bool EmitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
     }
   } else if (MI->getOperand(0).isImm()) {
     OS << MI->getOperand(0).getImm();
+  } else if (MI->getOperand(0).isCImm()) {
+    MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/);
   } else {
     assert(MI->getOperand(0).isReg() && "Unknown operand type");
     if (MI->getOperand(0).getReg() == 0) {
@@ -1211,9 +1213,9 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
 /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
 /// global in the specified llvm.used list for which emitUsedDirectiveFor
 /// is true, as being used with this directive.
-void AsmPrinter::EmitLLVMUsedList(Constant *List) {
+void AsmPrinter::EmitLLVMUsedList(const Constant *List) {
   // Should be an array of 'i8*'.
-  ConstantArray *InitList = dyn_cast<ConstantArray>(List);
+  const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
   if (InitList == 0) return;
 
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
@@ -1226,11 +1228,11 @@ void AsmPrinter::EmitLLVMUsedList(Constant *List) {
 
 /// EmitXXStructorList - Emit the ctor or dtor list.  This just prints out the
 /// function pointers, ignoring the init priority.
-void AsmPrinter::EmitXXStructorList(Constant *List) {
+void AsmPrinter::EmitXXStructorList(const Constant *List) {
   // Should be an array of '{ int, void ()* }' structs.  The first value is the
   // init priority, which we ignore.
   if (!isa<ConstantArray>(List)) return;
-  ConstantArray *InitList = cast<ConstantArray>(List);
+  const ConstantArray *InitList = cast<ConstantArray>(List);
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i)
     if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
       if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
@@ -1516,6 +1518,13 @@ static void EmitGlobalConstantVector(const ConstantVector *CV,
                                      unsigned AddrSpace, AsmPrinter &AP) {
   for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
     EmitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP);
+
+  const TargetData &TD = *AP.TM.getTargetData();
+  unsigned Size = TD.getTypeAllocSize(CV->getType());
+  unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) *
+                         CV->getType()->getNumElements();
+  if (unsigned Padding = Size - EmittedSize)
+    AP.OutStreamer.EmitZeros(Padding, AddrSpace);
 }
 
 static void EmitGlobalConstantStruct(const ConstantStruct *CS,
@@ -1925,7 +1934,7 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
     return false;
 
   // The predecessor has to be immediately before this block.
-  const MachineBasicBlock *Pred = *PI;
+  MachineBasicBlock *Pred = *PI;
 
   if (!Pred->isLayoutSuccessor(MBB))
     return false;
@@ -1934,9 +1943,28 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
   if (Pred->empty())
     return true;
 
-  // Otherwise, check the last instruction.
-  const MachineInstr &LastInst = Pred->back();
-  return !LastInst.getDesc().isBarrier();
+  // Check the terminators in the previous blocks
+  for (MachineBasicBlock::iterator II = Pred->getFirstTerminator(),
+         IE = Pred->end(); II != IE; ++II) {
+    MachineInstr &MI = *II;
+
+    // If it is not a simple branch, we are in a table somewhere.
+    if (!MI.getDesc().isBranch() || MI.getDesc().isIndirectBranch())
+      return false;
+
+    // If we are the operands of one of the branches, this is not
+    // a fall through.
+    for (MachineInstr::mop_iterator OI = MI.operands_begin(),
+           OE = MI.operands_end(); OI != OE; ++OI) {
+      const MachineOperand& OP = *OI;
+      if (OP.isJTI())
+        return false;
+      if (OP.isMBB() && OP.getMBB() == MBB)
+        return false;
+    }
+  }
+
+  return true;
 }
 
 
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index c6166e2..5ac455e 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetAsmParser.h"
 #include "llvm/Target/TargetMachine.h"
@@ -111,7 +112,16 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
   OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr,
                                                   OutContext, OutStreamer,
                                                   *MAI));
-  OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*Parser, TM));
+
+  // FIXME: It would be nice if we can avoid createing a new instance of
+  // MCSubtargetInfo here given TargetSubtargetInfo is available. However,
+  // we have to watch out for asm directives which can change subtarget
+  // state. e.g. .code 16, .code 32.
+  OwningPtr<MCSubtargetInfo>
+    STI(TM.getTarget().createMCSubtargetInfo(TM.getTargetTriple(),
+                                             TM.getTargetCPU(),
+                                             TM.getTargetFeatureString()));
+  OwningPtr<TargetAsmParser> TAP(TM.getTarget().createAsmParser(*STI, *Parser));
   if (!TAP)
     report_fatal_error("Inline asm not supported by this streamer because"
                        " we don't have an asm parser for this target\n");
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index bff1a35..1fe035e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -491,7 +491,7 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
 }
 
 /// addConstantValue - Add constant value entry in variable DIE.
-bool CompileUnit::addConstantValue(DIE *Die, ConstantInt *CI,
+bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI,
                                    bool Unsigned) {
   unsigned CIBitWidth = CI->getBitWidth();
   if (CIBitWidth <= 64) {
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 60a9b28..213c7fc 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -181,7 +181,7 @@ public:
 
   /// addConstantValue - Add constant value entry in variable DIE.
   bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty);
-  bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned);
+  bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned);
 
   /// addConstantFPValue - Add constant value entry in variable DIE.
   bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 8845bfa..125e1e8 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -229,6 +229,7 @@ public:
 void DbgScope::dump() const {
   raw_ostream &err = dbgs();
   err.indent(IndentLevel);
+  err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n";
   const MDNode *N = Desc;
   N->dump();
   if (AbstractScope)
@@ -618,6 +619,21 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
   return ScopeDIE;
 }
 
+/// isUnsignedDIType - Return true if type encoding is unsigned.
+static bool isUnsignedDIType(DIType Ty) {
+  DIDerivedType DTy(Ty);
+  if (DTy.Verify())
+    return isUnsignedDIType(DTy.getTypeDerivedFrom());
+
+  DIBasicType BTy(Ty);
+  if (BTy.Verify()) {
+    unsigned Encoding = BTy.getEncoding();
+    if (Encoding == dwarf::DW_ATE_unsigned ||
+        Encoding == dwarf::DW_ATE_unsigned_char)
+      return true;
+  }
+  return false;
+}
 
 /// constructVariableDIE - Construct a DIE for the given DbgVariable.
 DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
@@ -718,6 +734,11 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
       else if (DVInsn->getOperand(0).isFPImm())
         updated =
           VariableCU->addConstantFPValue(VariableDie, DVInsn->getOperand(0));
+      else if (DVInsn->getOperand(0).isCImm())
+        updated =
+          VariableCU->addConstantValue(VariableDie, 
+                                       DVInsn->getOperand(0).getCImm(),
+                                       isUnsignedDIType(DV->getType()));
     } else {
       VariableCU->addVariableAddress(DV, VariableDie, 
                                      Asm->getDebugValueLocation(DVInsn));
@@ -913,22 +934,6 @@ CompileUnit *DwarfDebug::getCompileUnit(const MDNode *N) const {
   return I->second;
 }
 
-/// isUnsignedDIType - Return true if type encoding is unsigned.
-static bool isUnsignedDIType(DIType Ty) {
-  DIDerivedType DTy(Ty);
-  if (DTy.Verify())
-    return isUnsignedDIType(DTy.getTypeDerivedFrom());
-
-  DIBasicType BTy(Ty);
-  if (BTy.Verify()) {
-    unsigned Encoding = BTy.getEncoding();
-    if (Encoding == dwarf::DW_ATE_unsigned ||
-        Encoding == dwarf::DW_ATE_unsigned_char)
-      return true;
-  }
-  return false;
-}
-
 // Return const exprssion if value is a GEP to access merged global
 // constant. e.g.
 // i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
@@ -1017,7 +1022,7 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
     } else {
       TheCU->addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
     } 
-  } else if (ConstantInt *CI = 
+  } else if (const ConstantInt *CI = 
              dyn_cast_or_null<ConstantInt>(GV.getConstant()))
     TheCU->addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy));
   else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
@@ -1310,7 +1315,6 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF,
 void
 DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
                                    SmallPtrSet<const MDNode *, 16> &Processed) {
-  const LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
   MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
   for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
          VE = VMap.end(); VI != VE; ++VI) {
@@ -1320,11 +1324,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
     DIVariable DV(Var);
     const std::pair<unsigned, DebugLoc> &VP = VI->second;
 
-    DbgScope *Scope = 0;
-    if (const MDNode *IA = VP.second.getInlinedAt(Ctx))
-      Scope = ConcreteScopes.lookup(IA);
-    if (Scope == 0)
-      Scope = DbgScopeMap.lookup(VP.second.getScope(Ctx));
+    DbgScope *Scope = findDbgScope(VP.second);
 
     // If variable scope is not found then skip this variable.
     if (Scope == 0)
@@ -1351,6 +1351,34 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
          MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0;
 }
 
+/// getDebugLocEntry - Get .debug_loc entry for the instraction range starting
+/// at MI.
+static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, 
+                                         const MCSymbol *FLabel, 
+                                         const MCSymbol *SLabel,
+                                         const MachineInstr *MI) {
+  const MDNode *Var =  MI->getOperand(MI->getNumOperands() - 1).getMetadata();
+
+  if (MI->getNumOperands() != 3) {
+    MachineLocation MLoc = Asm->getDebugValueLocation(MI);
+    return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
+  }
+  if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) {
+    MachineLocation MLoc;
+    MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm());
+    return DotDebugLocEntry(FLabel, SLabel, MLoc, Var);
+  }
+  if (MI->getOperand(0).isImm())
+    return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getImm());
+  if (MI->getOperand(0).isFPImm())
+    return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getFPImm());
+  if (MI->getOperand(0).isCImm())
+    return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm());
+
+  assert (0 && "Unexpected 3 operand DBG_VALUE instruction!");
+  return DotDebugLocEntry();
+}
+
 /// collectVariableInfo - Populate DbgScope entries with variables' info.
 void
 DwarfDebug::collectVariableInfo(const MachineFunction *MF,
@@ -1379,7 +1407,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
         DISubprogram(DV.getContext()).describes(MF->getFunction()))
       Scope = CurrentFnDbgScope;
     else
-      Scope = findDbgScope(MInsn);
+      Scope = findDbgScope(MInsn->getDebugLoc());
     // If variable scope is not found then skip this variable.
     if (!Scope)
       continue;
@@ -1424,6 +1452,8 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
         SLabel = FunctionEndSym;
       else {
         const MachineInstr *End = HI[1];
+        DEBUG(dbgs() << "DotDebugLoc Pair:\n" 
+              << "\t" << *Begin << "\t" << *End << "\n");
         if (End->isDebugValue())
           SLabel = getLabelBeforeInsn(End);
         else {
@@ -1435,25 +1465,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
       }
 
       // The value is valid until the next DBG_VALUE or clobber.
-      MachineLocation MLoc;
-      if (Begin->getNumOperands() == 3) {
-        if (Begin->getOperand(0).isReg() && Begin->getOperand(1).isImm()) {
-          MLoc.set(Begin->getOperand(0).getReg(), 
-                   Begin->getOperand(1).getImm());
-          DotDebugLocEntries.
-            push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var));
-        }
-        // FIXME: Handle isFPImm also.
-        else if (Begin->getOperand(0).isImm()) {
-          DotDebugLocEntries.
-            push_back(DotDebugLocEntry(FLabel, SLabel, 
-                                       Begin->getOperand(0).getImm()));
-        }
-      } else {
-        MLoc = Asm->getDebugValueLocation(Begin);
-        DotDebugLocEntries.
-          push_back(DotDebugLocEntry(FLabel, SLabel, MLoc, Var));
-      }
+      DotDebugLocEntries.push_back(getDebugLocEntry(Asm, FLabel, SLabel, Begin));
     }
     DotDebugLocEntries.push_back(DotDebugLocEntry());
   }
@@ -1550,8 +1562,12 @@ void DwarfDebug::endInstruction(const MachineInstr *MI) {
 }
 
 /// getOrCreateDbgScope - Create DbgScope for the scope.
-DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
-                                          const MDNode *InlinedAt) {
+DbgScope *DwarfDebug::getOrCreateDbgScope(DebugLoc DL) {
+  LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
+  MDNode *Scope = NULL;
+  MDNode *InlinedAt = NULL;
+  DL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx);
+
   if (!InlinedAt) {
     DbgScope *WScope = DbgScopeMap.lookup(Scope);
     if (WScope)
@@ -1560,22 +1576,12 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
     DbgScopeMap.insert(std::make_pair(Scope, WScope));
     if (DIDescriptor(Scope).isLexicalBlock()) {
       DbgScope *Parent =
-        getOrCreateDbgScope(DILexicalBlock(Scope).getContext(), NULL);
+        getOrCreateDbgScope(DebugLoc::getFromDILexicalBlock(Scope));
       WScope->setParent(Parent);
       Parent->addScope(WScope);
-    }
-
-    if (!WScope->getParent()) {
-      StringRef SPName = DISubprogram(Scope).getLinkageName();
-      // We used to check only for a linkage name, but that fails
-      // since we began omitting the linkage name for private
-      // functions.  The new way is to check for the name in metadata,
-      // but that's not supported in old .ll test cases.  Ergo, we
-      // check both.
-      if (SPName == Asm->MF->getFunction()->getName() ||
-          DISubprogram(Scope).getFunction() == Asm->MF->getFunction())
-        CurrentFnDbgScope = WScope;
-    }
+    } else if (DIDescriptor(Scope).isSubprogram()
+               && DISubprogram(Scope).describes(Asm->MF->getFunction()))
+      CurrentFnDbgScope = WScope;
 
     return WScope;
   }
@@ -1587,37 +1593,14 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope,
 
   WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt);
   DbgScopeMap.insert(std::make_pair(InlinedAt, WScope));
-  DILocation DL(InlinedAt);
+  InlinedDbgScopeMap[DebugLoc::getFromDILocation(InlinedAt)] = WScope;
   DbgScope *Parent =
-    getOrCreateDbgScope(DL.getScope(), DL.getOrigLocation());
+    getOrCreateDbgScope(DebugLoc::getFromDILocation(InlinedAt));
   WScope->setParent(Parent);
   Parent->addScope(WScope);
-
-  ConcreteScopes[InlinedAt] = WScope;
-
   return WScope;
 }
 
-/// hasValidLocation - Return true if debug location entry attached with
-/// machine instruction encodes valid location info.
-static bool hasValidLocation(LLVMContext &Ctx,
-                             const MachineInstr *MInsn,
-                             const MDNode *&Scope, const MDNode *&InlinedAt) {
-  DebugLoc DL = MInsn->getDebugLoc();
-  if (DL.isUnknown()) return false;
-
-  const MDNode *S = DL.getScope(Ctx);
-
-  // There is no need to create another DIE for compile unit. For all
-  // other scopes, create one DbgScope now. This will be translated
-  // into a scope DIE at the end.
-  if (DIScope(S).isCompileUnit()) return false;
-
-  Scope = S;
-  InlinedAt = DL.getInlinedAt(Ctx);
-  return true;
-}
-
 /// calculateDominanceGraph - Calculate dominance graph for DbgScope
 /// hierarchy.
 static void calculateDominanceGraph(DbgScope *Scope) {
@@ -1648,21 +1631,24 @@ static void calculateDominanceGraph(DbgScope *Scope) {
 
 /// printDbgScopeInfo - Print DbgScope info for each machine instruction.
 static
-void printDbgScopeInfo(LLVMContext &Ctx, const MachineFunction *MF,
+void printDbgScopeInfo(const MachineFunction *MF,
                        DenseMap<const MachineInstr *, DbgScope *> &MI2ScopeMap)
 {
 #ifndef NDEBUG
+  LLVMContext &Ctx = MF->getFunction()->getContext();
   unsigned PrevDFSIn = 0;
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
        I != E; ++I) {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
-      const MDNode *Scope = NULL;
-      const MDNode *InlinedAt = NULL;
+      MDNode *Scope = NULL;
+      MDNode *InlinedAt = NULL;
 
       // Check if instruction has valid location information.
-      if (hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
+      DebugLoc MIDL = MInsn->getDebugLoc();
+      if (!MIDL.isUnknown()) {
+        MIDL.getScopeAndInlinedAt(Scope, InlinedAt, Ctx);
         dbgs() << " [ ";
         if (InlinedAt)
           dbgs() << "*";
@@ -1692,11 +1678,9 @@ bool DwarfDebug::extractScopeInformation() {
     return false;
 
   // Scan each instruction and create scopes. First build working set of scopes.
-  LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
   SmallVector<DbgRange, 4> MIRanges;
   DenseMap<const MachineInstr *, DbgScope *> MI2ScopeMap;
-  const MDNode *PrevScope = NULL;
-  const MDNode *PrevInlinedAt = NULL;
+  DebugLoc PrevDL;
   const MachineInstr *RangeBeginMI = NULL;
   const MachineInstr *PrevMI = NULL;
   for (MachineFunction::const_iterator I = Asm->MF->begin(), E = Asm->MF->end();
@@ -1704,17 +1688,16 @@ bool DwarfDebug::extractScopeInformation() {
     for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
          II != IE; ++II) {
       const MachineInstr *MInsn = II;
-      const MDNode *Scope = NULL;
-      const MDNode *InlinedAt = NULL;
 
       // Check if instruction has valid location information.
-      if (!hasValidLocation(Ctx, MInsn, Scope, InlinedAt)) {
+      const DebugLoc MIDL = MInsn->getDebugLoc();
+      if (MIDL.isUnknown()) {
         PrevMI = MInsn;
         continue;
       }
 
       // If scope has not changed then skip this instruction.
-      if (Scope == PrevScope && PrevInlinedAt == InlinedAt) {
+      if (MIDL == PrevDL) {
         PrevMI = MInsn;
         continue;
       }
@@ -1727,9 +1710,13 @@ bool DwarfDebug::extractScopeInformation() {
         // If we have alread seen a beginning of a instruction range and
         // current instruction scope does not match scope of first instruction
         // in this range then create a new instruction range.
+        DEBUG(dbgs() << "Creating new instruction range :\n");
+        DEBUG(dbgs() << "Begin Range at " << *RangeBeginMI);
+        DEBUG(dbgs() << "End Range at " << *PrevMI);
+        DEBUG(dbgs() << "Next Range starting at " << *MInsn);
+        DEBUG(dbgs() << "------------------------\n");
         DbgRange R(RangeBeginMI, PrevMI);
-        MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope,
-                                                        PrevInlinedAt);
+        MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL);
         MIRanges.push_back(R);
       }
 
@@ -1738,16 +1725,15 @@ bool DwarfDebug::extractScopeInformation() {
 
       // Reset previous markers.
       PrevMI = MInsn;
-      PrevScope = Scope;
-      PrevInlinedAt = InlinedAt;
+      PrevDL = MIDL;
     }
   }
 
   // Create last instruction range.
-  if (RangeBeginMI && PrevMI && PrevScope) {
+  if (RangeBeginMI && PrevMI && !PrevDL.isUnknown()) {
     DbgRange R(RangeBeginMI, PrevMI);
     MIRanges.push_back(R);
-    MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt);
+    MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevDL);
   }
 
   if (!CurrentFnDbgScope)
@@ -1755,7 +1741,7 @@ bool DwarfDebug::extractScopeInformation() {
 
   calculateDominanceGraph(CurrentFnDbgScope);
   if (PrintDbgScope)
-    printDbgScopeInfo(Ctx, Asm->MF, MI2ScopeMap);
+    printDbgScopeInfo(Asm->MF, MI2ScopeMap);
 
   // Find ranges of instructions covered by each DbgScope;
   DbgScope *PrevDbgScope = NULL;
@@ -1842,8 +1828,6 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
 
   assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned");
 
-  /// ProcessedArgs - Collection of arguments already processed.
-  SmallPtrSet<const MDNode *, 8> ProcessedArgs;
   const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo();
   /// LiveUserVar - Map physreg numbers to the MDNode they contain.
   std::vector<const MDNode*> LiveUserVar(TRI->getNumRegs());
@@ -1883,8 +1867,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
           if (Prev->isDebugValue()) {
             // Coalesce identical entries at the end of History.
             if (History.size() >= 2 &&
-                Prev->isIdenticalTo(History[History.size() - 2]))
+                Prev->isIdenticalTo(History[History.size() - 2])) {
+              DEBUG(dbgs() << "Coalesce identical DBG_VALUE entries:\n"
+                    << "\t" << *Prev 
+                    << "\t" << *History[History.size() - 2] << "\n");
               History.pop_back();
+            }
 
             // Terminate old register assignments that don't reach MI;
             MachineFunction::const_iterator PrevMBB = Prev->getParent();
@@ -1894,9 +1882,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
               // its basic block.
               MachineBasicBlock::const_iterator LastMI =
                 PrevMBB->getLastNonDebugInstr();
-              if (LastMI == PrevMBB->end())
+              if (LastMI == PrevMBB->end()) {
                 // Drop DBG_VALUE for empty range.
+                DEBUG(dbgs() << "Drop DBG_VALUE for empty range:\n"
+                      << "\t" << *Prev << "\n");
                 History.pop_back();
+              }
               else {
                 // Terminate after LastMI.
                 History.push_back(LastMI);
@@ -2053,10 +2044,10 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   DbgVariableToFrameIndexMap.clear();
   VarToAbstractVarMap.clear();
   DbgVariableToDbgInstMap.clear();
+  InlinedDbgScopeMap.clear();
   DeleteContainerSeconds(DbgScopeMap);
   UserVariables.clear();
   DbgValues.clear();
-  ConcreteScopes.clear();
   DeleteContainerSeconds(AbstractScopes);
   AbstractScopesList.clear();
   AbstractVariables.clear();
@@ -2083,22 +2074,17 @@ bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) {
   return true;
 }
 
-/// findDbgScope - Find DbgScope for the debug loc attached with an
-/// instruction.
-DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
-  DbgScope *Scope = NULL;
-  LLVMContext &Ctx =
-    MInsn->getParent()->getParent()->getFunction()->getContext();
-  DebugLoc DL = MInsn->getDebugLoc();
-
+/// findDbgScope - Find DbgScope for the debug loc.
+DbgScope *DwarfDebug::findDbgScope(DebugLoc DL) {
   if (DL.isUnknown())
-    return Scope;
+    return NULL;
 
-  if (const MDNode *IA = DL.getInlinedAt(Ctx))
-    Scope = ConcreteScopes.lookup(IA);
-  if (Scope == 0)
+  DbgScope *Scope = NULL;
+  LLVMContext &Ctx = Asm->MF->getFunction()->getContext();
+  if (MDNode *IA = DL.getInlinedAt(Ctx))
+    Scope = InlinedDbgScopeMap.lookup(DebugLoc::getFromDILocation(IA));
+  else
     Scope = DbgScopeMap.lookup(DL.getScope(Ctx));
-
   return Scope;
 }
 
@@ -2597,56 +2583,61 @@ void DwarfDebug::emitDebugLoc() {
       MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol();
       Asm->EmitLabelDifference(end, begin, 2);
       Asm->OutStreamer.EmitLabel(begin);
-      if (Entry.isConstant()) {
+      if (Entry.isInt()) {
         DIBasicType BTy(DV.getType());
         if (BTy.Verify() &&
             (BTy.getEncoding()  == dwarf::DW_ATE_signed 
              || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) {
           Asm->OutStreamer.AddComment("DW_OP_consts");
           Asm->EmitInt8(dwarf::DW_OP_consts);
-          Asm->EmitSLEB128(Entry.getConstant());
+          Asm->EmitSLEB128(Entry.getInt());
         } else {
           Asm->OutStreamer.AddComment("DW_OP_constu");
           Asm->EmitInt8(dwarf::DW_OP_constu);
-          Asm->EmitULEB128(Entry.getConstant());
+          Asm->EmitULEB128(Entry.getInt());
         }
-      } else if (DV.hasComplexAddress()) {
-        unsigned N = DV.getNumAddrElements();
-        unsigned i = 0;
-        if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
-          if (Entry.Loc.getOffset()) {
-            i = 2;
-            Asm->EmitDwarfRegOp(Entry.Loc);
-            Asm->OutStreamer.AddComment("DW_OP_deref");
-            Asm->EmitInt8(dwarf::DW_OP_deref);
-            Asm->OutStreamer.AddComment("DW_OP_plus_uconst");
-            Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
-            Asm->EmitSLEB128(DV.getAddrElement(1));
+      } else if (Entry.isLocation()) {
+        if (!DV.hasComplexAddress()) 
+          // Regular entry.
+          Asm->EmitDwarfRegOp(Entry.Loc);
+        else {
+          // Complex address entry.
+          unsigned N = DV.getNumAddrElements();
+          unsigned i = 0;
+          if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) {
+            if (Entry.Loc.getOffset()) {
+              i = 2;
+              Asm->EmitDwarfRegOp(Entry.Loc);
+              Asm->OutStreamer.AddComment("DW_OP_deref");
+              Asm->EmitInt8(dwarf::DW_OP_deref);
+              Asm->OutStreamer.AddComment("DW_OP_plus_uconst");
+              Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+              Asm->EmitSLEB128(DV.getAddrElement(1));
+            } else {
+              // If first address element is OpPlus then emit
+              // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
+              MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1));
+              Asm->EmitDwarfRegOp(Loc);
+              i = 2;
+            }
           } else {
-            // If first address element is OpPlus then emit
-            // DW_OP_breg + Offset instead of DW_OP_reg + Offset.
-            MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1));
-            Asm->EmitDwarfRegOp(Loc);
-            i = 2;
+            Asm->EmitDwarfRegOp(Entry.Loc);
+          }
+          
+          // Emit remaining complex address elements.
+          for (; i < N; ++i) {
+            uint64_t Element = DV.getAddrElement(i);
+            if (Element == DIBuilder::OpPlus) {
+              Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
+              Asm->EmitULEB128(DV.getAddrElement(++i));
+            } else if (Element == DIBuilder::OpDeref)
+              Asm->EmitInt8(dwarf::DW_OP_deref);
+            else llvm_unreachable("unknown Opcode found in complex address");
           }
-        } else {
-          Asm->EmitDwarfRegOp(Entry.Loc);
-        }
-
-        // Emit remaining complex address elements.
-        for (; i < N; ++i) {
-          uint64_t Element = DV.getAddrElement(i);
-          if (Element == DIBuilder::OpPlus) {
-            Asm->EmitInt8(dwarf::DW_OP_plus_uconst);
-            Asm->EmitULEB128(DV.getAddrElement(++i));
-          } else if (Element == DIBuilder::OpDeref)
-            Asm->EmitInt8(dwarf::DW_OP_deref);
-          else llvm_unreachable("unknown Opcode found in complex address");
         }
-      } else {
-        // Regular entry.
-        Asm->EmitDwarfRegOp(Entry.Loc);
       }
+      // else ... ignore constant fp. There is not any good way to
+      // to represent them here in dwarf.
       Asm->OutStreamer.EmitLabel(end);
     }
   }
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index abda2e6..b245006 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -69,17 +69,35 @@ typedef struct DotDebugLocEntry {
   const MDNode *Variable;
   bool Merged;
   bool Constant;
-  int64_t iConstant;
+  enum EntryType {
+    E_Location,
+    E_Integer,
+    E_ConstantFP,
+    E_ConstantInt
+  };
+  enum EntryType EntryKind;
+
+  union {
+    int64_t Int;
+    const ConstantFP *CFP;
+    const ConstantInt *CIP;
+  } Constants;
   DotDebugLocEntry() 
     : Begin(0), End(0), Variable(0), Merged(false), 
-      Constant(false), iConstant(0) {}
+      Constant(false) { Constants.Int = 0;}
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L,
                    const MDNode *V) 
     : Begin(B), End(E), Loc(L), Variable(V), Merged(false), 
-      Constant(false), iConstant(0) {}
+      Constant(false) { Constants.Int = 0; EntryKind = E_Location; }
   DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i)
     : Begin(B), End(E), Variable(0), Merged(false), 
-      Constant(true), iConstant(i) {}
+      Constant(true) { Constants.Int = i; EntryKind = E_Integer; }
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr)
+    : Begin(B), End(E), Variable(0), Merged(false), 
+      Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; }
+  DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr)
+    : Begin(B), End(E), Variable(0), Merged(false), 
+      Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; }
 
   /// Empty entries are also used as a trigger to emit temp label. Such
   /// labels are referenced is used to find debug_loc offset for a given DIE.
@@ -91,8 +109,13 @@ typedef struct DotDebugLocEntry {
     Next->Begin = Begin;
     Merged = true;
   }
-  bool isConstant() { return Constant; }
-  int64_t getConstant() { return iConstant; }
+  bool isLocation() const    { return EntryKind == E_Location; }
+  bool isInt() const         { return EntryKind == E_Integer; }
+  bool isConstantFP() const  { return EntryKind == E_ConstantFP; }
+  bool isConstantInt() const { return EntryKind == E_ConstantInt; }
+  int64_t getInt()                    { return Constants.Int; }
+  const ConstantFP *getConstantFP()   { return Constants.CFP; }
+  const ConstantInt *getConstantInt() { return Constants.CIP; }
 } DotDebugLocEntry;
 
 //===----------------------------------------------------------------------===//
@@ -178,12 +201,10 @@ class DwarfDebug {
 
   /// DbgScopeMap - Tracks the scopes in the current function.  Owns the
   /// contained DbgScope*s.
-  ///
   DenseMap<const MDNode *, DbgScope *> DbgScopeMap;
 
-  /// ConcreteScopes - Tracks the concrete scopees in the current function.
-  /// These scopes are also included in DbgScopeMap.
-  DenseMap<const MDNode *, DbgScope *> ConcreteScopes;
+  /// InlinedDbgScopeMap - Tracks inlined function scopes in current function.
+  DenseMap<DebugLoc, DbgScope *> InlinedDbgScopeMap;
 
   /// AbstractScopes - Tracks the abstract scopes a module. These scopes are
   /// not included DbgScopeMap.  AbstractScopes owns its DbgScope*s.
@@ -296,7 +317,7 @@ private:
   void assignAbbrevNumber(DIEAbbrev &Abbrev);
 
   /// getOrCreateDbgScope - Create DbgScope for the scope.
-  DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt);
+  DbgScope *getOrCreateDbgScope(DebugLoc DL);
 
   DbgScope *getOrCreateAbstractScope(const MDNode *N);
 
@@ -427,9 +448,8 @@ private:
   /// is found. Update FI to hold value of the index.
   bool findVariableFrameIndex(const DbgVariable *V, int *FI);
 
-  /// findDbgScope - Find DbgScope for the debug loc attached with an 
-  /// instruction.
-  DbgScope *findDbgScope(const MachineInstr *MI);
+  /// findDbgScope - Find DbgScope for the debug loc.
+  DbgScope *findDbgScope(DebugLoc DL);
 
   /// identifyScopeMarkers() - Indentify instructions that are marking
   /// beginning of or end of a scope.
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 967a278..1f992fa 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -512,6 +512,8 @@ void DwarfException::EmitExceptionTable() {
     SizeAlign = 0;
   }
 
+  bool VerboseAsm = Asm->OutStreamer.isVerboseAsm();
+
   // SjLj Exception handling
   if (IsSJLJ) {
     Asm->EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
@@ -525,14 +527,30 @@ void DwarfException::EmitExceptionTable() {
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I, ++idx) {
       const CallSiteEntry &S = *I;
 
+      if (VerboseAsm) {
+        // Emit comments that decode the call site.
+        Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
+                                    llvm::utostr(idx) + " <<");
+        Asm->OutStreamer.AddComment(Twine("  On exception at call site ") +
+                                    llvm::utostr(idx));
+
+        if (S.Action == 0)
+          Asm->OutStreamer.AddComment("  Action: cleanup");
+        else
+          Asm->OutStreamer.AddComment(Twine("  Action: ") +
+                                      llvm::utostr((S.Action - 1) / 2 + 1));
+
+        Asm->OutStreamer.AddBlankLine();
+      }
+
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
-      Asm->EmitULEB128(idx, "Landing pad");
+      Asm->EmitULEB128(idx);
 
       // Offset of the first associated action record, relative to the start of
       // the action table. This value is biased by 1 (1 indicates the start of
       // the action table), and 0 indicates that there are no actions.
-      Asm->EmitULEB128(S.Action, "Action");
+      Asm->EmitULEB128(S.Action);
     }
   } else {
     // DWARF Exception handling
@@ -562,6 +580,7 @@ void DwarfException::EmitExceptionTable() {
     // Add extra padding if it wasn't added to the TType base offset.
     Asm->EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
 
+    unsigned Entry = 0;
     for (SmallVectorImpl<CallSiteEntry>::const_iterator
          I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
       const CallSiteEntry &S = *I;
@@ -576,19 +595,38 @@ void DwarfException::EmitExceptionTable() {
       if (EndLabel == 0)
         EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
 
+      if (VerboseAsm) {
+        // Emit comments that decode the call site.
+        Asm->OutStreamer.AddComment(Twine(">> Call Site ") +
+                                    llvm::utostr(++Entry) + " <<");
+        Asm->OutStreamer.AddComment(Twine("  Call between ") +
+                                    BeginLabel->getName() + " and " +
+                                    EndLabel->getName());
+
+        if (!S.PadLabel) {
+          Asm->OutStreamer.AddComment("    has no landing pad");
+        } else {
+          Asm->OutStreamer.AddComment(Twine("    jumps to ") +
+                                      S.PadLabel->getName());
+
+          if (S.Action == 0)
+            Asm->OutStreamer.AddComment("  On action: cleanup");
+          else
+            Asm->OutStreamer.AddComment(Twine("  On action: ") +
+                                        llvm::utostr((S.Action - 1) / 2 + 1));
+        }
+
+        Asm->OutStreamer.AddBlankLine();
+      }
+
       // Offset of the call site relative to the previous call site, counted in
       // number of 16-byte bundles. The first call site is counted relative to
       // the start of the procedure fragment.
-      Asm->OutStreamer.AddComment("Region start");
       Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
-
-      Asm->OutStreamer.AddComment("Region length");
       Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
 
-
       // Offset of the landing pad, counted in 16-byte bundles relative to the
       // @LPStart address.
-      Asm->OutStreamer.AddComment("Landing pad");
       if (!S.PadLabel)
         Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
       else
@@ -597,45 +635,63 @@ void DwarfException::EmitExceptionTable() {
       // Offset of the first associated action record, relative to the start of
       // the action table. This value is biased by 1 (1 indicates the start of
       // the action table), and 0 indicates that there are no actions.
-      Asm->EmitULEB128(S.Action, "Action");
+      Asm->EmitULEB128(S.Action);
     }
   }
 
   // Emit the Action Table.
-  if (Actions.size() != 0) {
-    Asm->OutStreamer.AddComment("-- Action Record Table --");
-    Asm->OutStreamer.AddBlankLine();
-  }
-
+  int Entry = 0;
   for (SmallVectorImpl<ActionEntry>::const_iterator
          I = Actions.begin(), E = Actions.end(); I != E; ++I) {
     const ActionEntry &Action = *I;
-    Asm->OutStreamer.AddComment("Action Record");
-    Asm->OutStreamer.AddBlankLine();
+
+    if (VerboseAsm) {
+      // Emit comments that decode the action table.
+      Asm->OutStreamer.AddComment(Twine(">> Action Record ") +
+                                  llvm::utostr(++Entry) + " <<");
+      if (Action.ValueForTypeID >= 0)
+        Asm->OutStreamer.AddComment(Twine("  Catch TypeInfo ") +
+                                    llvm::itostr(Action.ValueForTypeID));
+      else 
+        Asm->OutStreamer.AddComment(Twine("  Filter TypeInfo ") +
+                                    llvm::itostr(Action.ValueForTypeID));
+
+      if (Action.NextAction == 0) {
+        Asm->OutStreamer.AddComment("  No further actions");
+      } else {
+        unsigned NextAction = Entry + (Action.NextAction + 1) / 2;
+        Asm->OutStreamer.AddComment(Twine("  Continue to action ") +
+                                    llvm::utostr(NextAction));
+      }
+
+      Asm->OutStreamer.AddBlankLine();
+    }
 
     // Type Filter
     //
     //   Used by the runtime to match the type of the thrown exception to the
     //   type of the catch clauses or the types in the exception specification.
-    Asm->EmitSLEB128(Action.ValueForTypeID, "  TypeInfo index");
+    Asm->EmitSLEB128(Action.ValueForTypeID);
 
     // Action Record
     //
     //   Self-relative signed displacement in bytes of the next action record,
     //   or 0 if there is no next action record.
-    Asm->EmitSLEB128(Action.NextAction, "  Next action");
+    Asm->EmitSLEB128(Action.NextAction);
   }
 
   // Emit the Catch TypeInfos.
-  if (!TypeInfos.empty()) {
-    Asm->OutStreamer.AddComment("-- Catch TypeInfos --");
+  if (VerboseAsm && !TypeInfos.empty()) {
+    Asm->OutStreamer.AddComment(">> Catch TypeInfos <<");
     Asm->OutStreamer.AddBlankLine();
+    Entry = TypeInfos.size();
   }
+
   for (std::vector<const GlobalVariable *>::const_reverse_iterator
          I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) {
     const GlobalVariable *GV = *I;
-
-    Asm->OutStreamer.AddComment("TypeInfo");
+    if (VerboseAsm)
+      Asm->OutStreamer.AddComment(Twine("TypeInfo ") + llvm::utostr(Entry--));
     if (GV)
       Asm->EmitReference(GV, TTypeEncoding);
     else
@@ -644,14 +700,21 @@ void DwarfException::EmitExceptionTable() {
   }
 
   // Emit the Exception Specifications.
-  if (!FilterIds.empty()) {
-    Asm->OutStreamer.AddComment("-- Filter IDs --");
+  if (VerboseAsm && !FilterIds.empty()) {
+    Asm->OutStreamer.AddComment(">> Filter TypeInfos <<");
     Asm->OutStreamer.AddBlankLine();
+    Entry = 0;
   }
   for (std::vector<unsigned>::const_iterator
          I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) {
     unsigned TypeID = *I;
-    Asm->EmitULEB128(TypeID, TypeID != 0 ? "Exception specification" : 0);
+    if (VerboseAsm) {
+      --Entry;
+      if (TypeID != 0)
+        Asm->OutStreamer.AddComment(Twine("FilterInfo ") + llvm::itostr(Entry));
+    }
+
+    Asm->EmitULEB128(TypeID);
   }
 
   Asm->EmitAlignment(2);
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 719cd26..99090a8 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -108,6 +108,9 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
   while (!MBB->succ_empty())
     MBB->removeSuccessor(MBB->succ_end()-1);
 
+  // Avoid matching if this pointer gets reused.
+  TriedMerging.erase(MBB);
+
   // Remove the block.
   MF->erase(MBB);
 }
@@ -171,6 +174,8 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
                                     MachineModuleInfo *mmi) {
   if (!tii) return false;
 
+  TriedMerging.clear();
+
   TII = tii;
   TRI = tri;
   MMI = mmi;
@@ -361,11 +366,31 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
   return TailLen;
 }
 
+void BranchFolder::MaintainLiveIns(MachineBasicBlock *CurMBB,
+                                   MachineBasicBlock *NewMBB) {
+  if (RS) {
+    RS->enterBasicBlock(CurMBB);
+    if (!CurMBB->empty())
+      RS->forward(prior(CurMBB->end()));
+    BitVector RegsLiveAtExit(TRI->getNumRegs());
+    RS->getRegsUsed(RegsLiveAtExit, false);
+    for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
+      if (RegsLiveAtExit[i])
+        NewMBB->addLiveIn(i);
+  }
+}
+
 /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
 /// after it, replacing it with an unconditional branch to NewDest.
 void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
                                            MachineBasicBlock *NewDest) {
+  MachineBasicBlock *CurMBB = OldInst->getParent();
+
   TII->ReplaceTailWithBranchTo(OldInst, NewDest);
+
+  // For targets that use the register scavenger, we must maintain LiveIns.
+  MaintainLiveIns(CurMBB, NewDest);
+
   ++NumTailMerge;
 }
 
@@ -394,16 +419,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
   NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
 
   // For targets that use the register scavenger, we must maintain LiveIns.
-  if (RS) {
-    RS->enterBasicBlock(&CurMBB);
-    if (!CurMBB.empty())
-      RS->forward(prior(CurMBB.end()));
-    BitVector RegsLiveAtExit(TRI->getNumRegs());
-    RS->getRegsUsed(RegsLiveAtExit, false);
-    for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
-      if (RegsLiveAtExit[i])
-        NewMBB->addLiveIn(i);
-  }
+  MaintainLiveIns(&CurMBB, NewMBB);
 
   return NewMBB;
 }
@@ -416,10 +432,10 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
   for (; I != E; ++I) {
     if (I->isDebugValue())
       continue;
-    const TargetInstrDesc &TID = I->getDesc();
-    if (TID.isCall())
+    const MCInstrDesc &MCID = I->getDesc();
+    if (MCID.isCall())
       Time += 10;
-    else if (TID.mayLoad() || TID.mayStore())
+    else if (MCID.mayLoad() || MCID.mayStore())
       Time += 2;
     else
       ++Time;
@@ -799,14 +815,21 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
   // First find blocks with no successors.
   MergePotentials.clear();
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+  for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+       I != E && MergePotentials.size() < TailMergeThreshold; ++I) {
+    if (TriedMerging.count(I))
+      continue;
     if (I->succ_empty())
       MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I), I));
   }
 
+  // If this is a large problem, avoid visiting the same basic blocks
+  // multiple times.
+  if (MergePotentials.size() == TailMergeThreshold)
+    for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+      TriedMerging.insert(MergePotentials[i].getBlock());
   // See if we can do any tail merging on those.
-  if (MergePotentials.size() < TailMergeThreshold &&
-      MergePotentials.size() >= 2)
+  if (MergePotentials.size() >= 2)
     MadeChange |= TryTailMergeBlocks(NULL, NULL);
 
   // Look at blocks (IBB) with multiple predecessors (PBB).
@@ -830,15 +853,17 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
   for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
        I != E; ++I) {
-    if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
+    if (I->pred_size() >= 2) {
       SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
       MachineBasicBlock *IBB = I;
       MachineBasicBlock *PredBB = prior(I);
       MergePotentials.clear();
       for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
                                             E2 = I->pred_end();
-           P != E2; ++P) {
+           P != E2 && MergePotentials.size() < TailMergeThreshold; ++P) {
         MachineBasicBlock *PBB = *P;
+        if (TriedMerging.count(PBB))
+          continue;
         // Skip blocks that loop to themselves, can't tail merge these.
         if (PBB == IBB)
           continue;
@@ -891,6 +916,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
           MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P));
         }
       }
+      // If this is a large problem, avoid visiting the same basic blocks
+      // multiple times.
+      if (MergePotentials.size() == TailMergeThreshold)
+        for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+          TriedMerging.insert(MergePotentials[i].getBlock());
       if (MergePotentials.size() >= 2)
         MadeChange |= TryTailMergeBlocks(IBB, PredBB);
       // Reinsert an unconditional branch if needed.
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 4daf4ec..df795df 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_CODEGEN_BRANCHFOLDING_HPP
 #define LLVM_CODEGEN_BRANCHFOLDING_HPP
 
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include <vector>
 
@@ -47,6 +48,7 @@ namespace llvm {
     };
     typedef std::vector<MergePotentialsElt>::iterator MPIterator;
     std::vector<MergePotentialsElt> MergePotentials;
+    SmallPtrSet<const MachineBasicBlock*, 2> TriedMerging;
 
     class SameTailElt {
       MPIterator MPIter;
@@ -93,6 +95,8 @@ namespace llvm {
     bool TailMergeBlocks(MachineFunction &MF);
     bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
                        MachineBasicBlock* PredBB);
+    void MaintainLiveIns(MachineBasicBlock *CurMBB,
+                         MachineBasicBlock *NewMBB);
     void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
                                  MachineBasicBlock *NewDest);
     MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index c726d92..06d2a95 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -33,6 +33,8 @@ add_llvm_library(LLVMCodeGen
   LocalStackSlotAllocation.cpp
   LowerSubregs.cpp
   MachineBasicBlock.cpp
+  MachineBlockFrequency.cpp
+  MachineBranchProbabilityInfo.cpp
   MachineCSE.cpp
   MachineDominators.cpp
   MachineFunction.cpp
@@ -58,7 +60,6 @@ add_llvm_library(LLVMCodeGen
   Passes.cpp
   PeepholeOptimizer.cpp
   PostRASchedulerList.cpp
-  PreAllocSplitting.cpp
   ProcessImplicitDefs.cpp
   PrologEpilogInserter.cpp
   PseudoSourceValue.cpp
@@ -78,7 +79,6 @@ add_llvm_library(LLVMCodeGen
   ScoreboardHazardRecognizer.cpp
   ShadowStackGC.cpp
   ShrinkWrapping.cpp
-  SimpleRegisterCoalescing.cpp
   SjLjEHPrepare.cpp
   SlotIndexes.cpp
   Spiller.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 5d722ee..e6b3bbc 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -188,6 +188,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
 
 void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
   MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
   const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo();
   const TargetRegisterClass *OldRC = MRI.getRegClass(reg);
   const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC);
@@ -202,8 +203,11 @@ void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
     // TRI doesn't have accurate enough information to model this yet.
     if (I.getOperand().getSubReg())
       return;
+    // Inline asm instuctions don't remember their constraints.
+    if (I->isInlineAsm())
+      return;
     const TargetRegisterClass *OpRC =
-      I->getDesc().getRegClass(I.getOperandNo(), TRI);
+      TII->getRegClass(I->getDesc(), I.getOperandNo(), TRI);
     if (OpRC)
       NewRC = getCommonSubClass(NewRC, OpRC);
     if (!NewRC || NewRC == OldRC)
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 515e6f9..489746c 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -37,13 +37,11 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeOptimizePHIsPass(Registry);
   initializePHIEliminationPass(Registry);
   initializePeepholeOptimizerPass(Registry);
-  initializePreAllocSplittingPass(Registry);
   initializeProcessImplicitDefsPass(Registry);
   initializePEIPass(Registry);
   initializeRALinScanPass(Registry);
-  initializeRegisterCoalescerAnalysisGroup(Registry);
+  initializeRegisterCoalescerPass(Registry);
   initializeRenderMachineFunctionPass(Registry);
-  initializeSimpleRegisterCoalescingPass(Registry);
   initializeSlotIndexesPass(Registry);
   initializeLoopSplitterPass(Registry);
   initializeStackProtectorPass(Registry);
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 51d984f..84c4d59 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -27,12 +27,12 @@
 using namespace llvm;
 
 CriticalAntiDepBreaker::
-CriticalAntiDepBreaker(MachineFunction& MFi) :
+CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo &RCI) :
   AntiDepBreaker(), MF(MFi),
   MRI(MF.getRegInfo()),
   TII(MF.getTarget().getInstrInfo()),
   TRI(MF.getTarget().getRegisterInfo()),
-  AllocatableSet(TRI->getAllocatableSet(MF)),
+  RegClassInfo(RCI),
   Classes(TRI->getNumRegs(), static_cast<const TargetRegisterClass *>(0)),
   KillIndices(TRI->getNumRegs(), 0),
   DefIndices(TRI->getNumRegs(), 0) {}
@@ -207,7 +207,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
     const TargetRegisterClass *NewRC = 0;
 
     if (i < MI->getDesc().getNumOperands())
-      NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+      NewRC = TII->getRegClass(MI->getDesc(), i, TRI);
 
     // For now, only allow the register to be changed if its register
     // class is consistent across all uses.
@@ -295,7 +295,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
 
     const TargetRegisterClass *NewRC = 0;
     if (i < MI->getDesc().getNumOperands())
-      NewRC = MI->getDesc().OpInfo[i].getRegClass(TRI);
+      NewRC = TII->getRegClass(MI->getDesc(), i, TRI);
 
     // For now, only allow the register to be changed if its register
     // class is consistent across all uses.
@@ -385,11 +385,9 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
                                                  unsigned LastNewReg,
                                                  const TargetRegisterClass *RC)
 {
-  for (TargetRegisterClass::iterator R = RC->allocation_order_begin(MF),
-       RE = RC->allocation_order_end(MF); R != RE; ++R) {
-    unsigned NewReg = *R;
-    // Don't consider non-allocatable registers
-    if (!AllocatableSet.test(NewReg)) continue;
+  ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
+  for (unsigned i = 0; i != Order.size(); ++i) {
+    unsigned NewReg = Order[i];
     // Don't replace a register with itself.
     if (NewReg == AntiDepReg) continue;
     // Don't replace a register with one that was recently used to repair
@@ -534,7 +532,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
         if (Edge->getKind() == SDep::Anti) {
           AntiDepReg = Edge->getReg();
           assert(AntiDepReg != 0 && "Anti-dependence on reg0?");
-          if (!AllocatableSet.test(AntiDepReg))
+          if (!RegClassInfo.isAllocatable(AntiDepReg))
             // Don't break anti-dependencies on non-allocatable registers.
             AntiDepReg = 0;
           else if (KeepRegs.count(AntiDepReg))
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 5bbb8f5..0710780 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -17,6 +17,7 @@
 #define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H
 
 #include "AntiDepBreaker.h"
+#include "RegisterClassInfo.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -27,6 +28,7 @@
 #include <map>
 
 namespace llvm {
+class RegisterClassInfo;
 class TargetInstrInfo;
 class TargetRegisterInfo;
 
@@ -35,6 +37,7 @@ class TargetRegisterInfo;
     MachineRegisterInfo &MRI;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
+    const RegisterClassInfo &RegClassInfo;
 
     /// AllocatableSet - The set of allocatable registers.
     /// We'll be ignoring anti-dependencies on non-allocatable registers,
@@ -66,7 +69,7 @@ class TargetRegisterInfo;
     SmallSet<unsigned, 4> KeepRegs;
 
   public:
-    CriticalAntiDepBreaker(MachineFunction& MFi);
+    CriticalAntiDepBreaker(MachineFunction& MFi, const RegisterClassInfo&);
     ~CriticalAntiDepBreaker();
 
     /// Start - Initialize anti-dep breaking for a new basic block.
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index fdc1d91..6de6c0c 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -110,9 +110,14 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
           LivePhysRegs.set(Reg);
       }
 
-    // FIXME: Add live-ins from sucessors to LivePhysRegs. Normally, physregs
-    // are not live across blocks, but some targets (x86) can have flags live
-    // out of a block.
+    // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not
+    // live across blocks, but some targets (x86) can have flags live out of a
+    // block.
+    for (MachineBasicBlock::succ_iterator S = MBB->succ_begin(),
+           E = MBB->succ_end(); S != E; S++)
+      for (MachineBasicBlock::livein_iterator LI = (*S)->livein_begin();
+           LI != (*S)->livein_end(); LI++)
+        LivePhysRegs.set(*LI);
 
     // Now scan the instructions and delete dead ones, tracking physreg
     // liveness as we go.
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 22c5465..03604b0 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -336,8 +336,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
       Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator.
 
       CallInst *NewSelector =
-        CallInst::Create(SelectorIntrinsic, Args.begin(), Args.end(),
-                         "eh.sel.catch.all", II);
+        CallInst::Create(SelectorIntrinsic, Args, "eh.sel.catch.all", II);
 
       NewSelector->setTailCall(II->isTailCall());
       NewSelector->setAttributes(II->getAttributes());
@@ -497,10 +496,8 @@ bool DwarfEHPrepare::LowerUnwindsAndResumes() {
   // Find the rewind function if we didn't already.
   if (!RewindFunction) {
     LLVMContext &Ctx = ResumeInsts[0]->getContext();
-    std::vector<const Type*>
-      Params(1, Type::getInt8PtrTy(Ctx));
     FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx),
-                                          Params, false);
+                                          Type::getInt8PtrTy(Ctx), false);
     const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME);
     RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy);
   }
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index fa2319b..d977651 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -659,11 +659,11 @@ bool ELFWriter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
 
 /// EmitXXStructorList - Emit the ctor or dtor list.  This just emits out the 
 /// function pointers, ignoring the init priority.
-void ELFWriter::EmitXXStructorList(Constant *List, ELFSection &Xtor) {
+void ELFWriter::EmitXXStructorList(const Constant *List, ELFSection &Xtor) {
   // Should be an array of '{ i32, void ()* }' structs.  The first value is the
   // init priority, which we ignore.
   if (List->isNullValue()) return;
-  ConstantArray *InitList = cast<ConstantArray>(List);
+  const ConstantArray *InitList = cast<ConstantArray>(List);
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
     if (InitList->getOperand(i)->isNullValue())
       continue;
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index b8bac55..6f7fbac 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -232,7 +232,7 @@ namespace llvm {
     void EmitGlobalDataRelocation(const GlobalValue *GV, unsigned Size, 
                                   ELFSection &GblS, int64_t Offset = 0);
     bool EmitSpecialLLVMGlobal(const GlobalVariable *GV);
-    void EmitXXStructorList(Constant *List, ELFSection &Xtor);
+    void EmitXXStructorList(const Constant *List, ELFSection &Xtor);
     void EmitRelocations();
     void EmitRelocation(BinaryObject &RelSec, ELFRelocation &Rel, bool HasRelA);
     void EmitSectionHeader(BinaryObject &SHdrTab, const ELFSection &SHdr);
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
index 646e014..a7aba89 100644
--- a/lib/CodeGen/EdgeBundles.cpp
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -39,7 +39,7 @@ void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const {
 bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   EC.clear();
-  EC.grow(2 * MF->size());
+  EC.grow(2 * MF->getNumBlockIDs());
 
   for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
        ++I) {
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
index ebc2fc9..a67140e 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -62,8 +62,8 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
       MachineInstr *MI = MBBI++;
 
       // If MI is a pseudo, expand it.
-      const TargetInstrDesc &TID = MI->getDesc();
-      if (TID.usesCustomInsertionHook()) {
+      const MCInstrDesc &MCID = MI->getDesc();
+      if (MCID.usesCustomInsertionHook()) {
         Changed = true;
         MachineBasicBlock *NewMBB =
           TLI->EmitInstrWithCustomInserter(MI, MBB);
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 8b2c981..6cb2277 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -18,11 +18,12 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -173,10 +174,10 @@ namespace {
   private:
     bool ReverseBranchCondition(BBInfo &BBI);
     bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
-                     float Prediction, float Confidence) const;
+                     const BranchProbability &Prediction) const;
     bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
                        bool FalseBranch, unsigned &Dups,
-                       float Prediction, float Confidence) const;
+                       const BranchProbability &Prediction) const;
     bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
                       unsigned &Dups1, unsigned &Dups2) const;
     void ScanInstructions(BBInfo &BBI);
@@ -203,19 +204,19 @@ namespace {
 
     bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
                             unsigned Cycle, unsigned Extra,
-                            float Prediction, float Confidence) const {
+                            const BranchProbability &Prediction) const {
       return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
-                                                   Prediction, Confidence);
+                                                   Prediction);
     }
 
     bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
                             unsigned TCycle, unsigned TExtra,
                             MachineBasicBlock &FBB,
                             unsigned FCycle, unsigned FExtra,
-                            float Prediction, float Confidence) const {
+                            const BranchProbability &Prediction) const {
       return TCycle > 0 && FCycle > 0 &&
         TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
-                                 Prediction, Confidence);
+                                 Prediction);
     }
 
     // blockAlwaysFallThrough - Block ends without a terminator.
@@ -450,7 +451,7 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
 /// number of instructions that the ifcvt would need to duplicate if performed
 /// in Dups.
 bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
-                              float Prediction, float Confidence) const {
+                              const BranchProbability &Prediction) const {
   Dups = 0;
   if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
     return false;
@@ -461,7 +462,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
   if (TrueBBI.BB->pred_size() > 1) {
     if (TrueBBI.CannotBeCopied ||
         !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
-                                        Prediction, Confidence))
+                                        Prediction))
       return false;
     Dups = TrueBBI.NonPredSize;
   }
@@ -477,7 +478,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
 /// if performed in 'Dups'.
 bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
                                 bool FalseBranch, unsigned &Dups,
-                                float Prediction, float Confidence) const {
+                                const BranchProbability &Prediction) const {
   Dups = 0;
   if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
     return false;
@@ -499,8 +500,7 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
           ++Size;
       }
     }
-    if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size,
-                                        Prediction, Confidence))
+    if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size, Prediction))
       return false;
     Dups = Size;
   }
@@ -651,12 +651,12 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     if (I->isDebugValue())
       continue;
 
-    const TargetInstrDesc &TID = I->getDesc();
-    if (TID.isNotDuplicable())
+    const MCInstrDesc &MCID = I->getDesc();
+    if (MCID.isNotDuplicable())
       BBI.CannotBeCopied = true;
 
     bool isPredicated = TII->isPredicated(I);
-    bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch();
+    bool isCondBr = BBI.IsBrAnalyzable && MCID.isConditionalBranch();
 
     if (!isCondBr) {
       if (!isPredicated) {
@@ -751,8 +751,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
 
   ScanInstructions(BBI);
 
-  // Unanalyzable or ends with fallthrough or unconditional branch.
-  if (!BBI.IsBrAnalyzable || BBI.BrCond.empty()) {
+  // Unanalyzable or ends with fallthrough or unconditional branch, or if is not
+  // considered for ifcvt anymore.
+  if (!BBI.IsBrAnalyzable || BBI.BrCond.empty() || BBI.IsDone) {
     BBI.IsBeingAnalyzed = false;
     BBI.IsAnalyzed = true;
     return BBI;
@@ -795,21 +796,20 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
   //   - backedge -> 90% taken
   //   - early exit -> 20% taken
   //   - branch predictor confidence -> 90%
-  float Prediction = 0.5f;
-  float Confidence = 0.9f;
+  BranchProbability Prediction(5, 10);
   MachineLoop *Loop = MLI->getLoopFor(BB);
   if (Loop) {
     if (TrueBBI.BB == Loop->getHeader())
-      Prediction = 0.9f;
+      Prediction = BranchProbability(9, 10);
     else if (FalseBBI.BB == Loop->getHeader())
-      Prediction = 0.1f;
+      Prediction = BranchProbability(1, 10);
 
     MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB);
     MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB);
     if (!TrueLoop || TrueLoop->getParentLoop() == Loop)
-      Prediction = 0.2f;
+      Prediction = BranchProbability(2, 10);
     else if (!FalseLoop || FalseLoop->getParentLoop() == Loop)
-      Prediction = 0.8f;
+      Prediction = BranchProbability(8, 10);
   }
   
   if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
@@ -817,7 +817,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
                                        TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
                          *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
                                         FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
-                         Prediction, Confidence) &&
+                         Prediction) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
       FeasibilityAnalysis(FalseBBI, RevCond)) {
     // Diamond:
@@ -833,9 +833,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
     Enqueued = true;
   }
 
-  if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) &&
+  if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
       MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
-                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
+                         TrueBBI.ExtraCost2, Prediction) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
     // Triangle:
     //   EBB
@@ -848,17 +848,17 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
     Enqueued = true;
   }
 
-  if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) &&
+  if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction) &&
       MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
-                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
+                         TrueBBI.ExtraCost2, Prediction) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
     Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
     Enqueued = true;
   }
 
-  if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) &&
+  if (ValidSimple(TrueBBI, Dups, Prediction) &&
       MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
-                         TrueBBI.ExtraCost2, Prediction, Confidence) &&
+                         TrueBBI.ExtraCost2, Prediction) &&
       FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
     // Simple (split, no rejoin):
     //   EBB
@@ -874,29 +874,29 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
   if (CanRevCond) {
     // Try the other path...
     if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
-                      1.0-Prediction, Confidence) &&
+                      Prediction.getCompl()) &&
         MeetIfcvtSizeLimit(*FalseBBI.BB,
                            FalseBBI.NonPredSize + FalseBBI.ExtraCost,
-                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
+                           FalseBBI.ExtraCost2, Prediction.getCompl()) &&
         FeasibilityAnalysis(FalseBBI, RevCond, true)) {
       Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
       Enqueued = true;
     }
 
     if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
-                      1.0-Prediction, Confidence) &&
+                      Prediction.getCompl()) &&
         MeetIfcvtSizeLimit(*FalseBBI.BB,
                            FalseBBI.NonPredSize + FalseBBI.ExtraCost,
-                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
+                           FalseBBI.ExtraCost2, Prediction.getCompl()) &&
         FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
       Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
       Enqueued = true;
     }
 
-    if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) &&
+    if (ValidSimple(FalseBBI, Dups, Prediction.getCompl()) &&
         MeetIfcvtSizeLimit(*FalseBBI.BB,
                            FalseBBI.NonPredSize + FalseBBI.ExtraCost,
-                           FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
+                           FalseBBI.ExtraCost2, Prediction.getCompl()) &&
         FeasibilityAnalysis(FalseBBI, RevCond)) {
       Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
       Enqueued = true;
@@ -1414,9 +1414,9 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
 
   for (MachineBasicBlock::iterator I = FromBBI.BB->begin(),
          E = FromBBI.BB->end(); I != E; ++I) {
-    const TargetInstrDesc &TID = I->getDesc();
+    const MCInstrDesc &MCID = I->getDesc();
     // Do not copy the end of the block branches.
-    if (IgnoreBr && TID.isBranch())
+    if (IgnoreBr && MCID.isBranch())
       break;
 
     MachineInstr *MI = MF.CloneMachineInstr(I);
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 19ae333..5547f73 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -180,11 +180,7 @@ Spiller *createInlineSpiller(MachineFunctionPass &pass,
 /// isFullCopyOf - If MI is a COPY to or from Reg, return the other register,
 /// otherwise return 0.
 static unsigned isFullCopyOf(const MachineInstr *MI, unsigned Reg) {
-  if (!MI->isCopy())
-    return 0;
-  if (MI->getOperand(0).getSubReg() != 0)
-    return 0;
-  if (MI->getOperand(1).getSubReg() != 0)
+  if (!MI->isFullCopy())
     return 0;
   if (MI->getOperand(0).getReg() == Reg)
       return MI->getOperand(1).getReg();
@@ -307,7 +303,8 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
   // Best spill candidate seen so far. This must dominate UseVNI.
   SibValueInfo SVI(UseReg, UseVNI);
   MachineBasicBlock *UseMBB = LIS.getMBBFromIndex(UseVNI->def);
-  unsigned SpillDepth = Loops.getLoopDepth(UseMBB);
+  MachineBasicBlock *SpillMBB = UseMBB;
+  unsigned SpillDepth = Loops.getLoopDepth(SpillMBB);
   bool SeenOrigPHI = false; // Original PHI met.
 
   do {
@@ -320,7 +317,30 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
     // Is this value a better spill candidate?
     if (!isRegToSpill(Reg)) {
       MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def);
-      if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) {
+      if (MBB == SpillMBB) {
+        // This is an alternative def earlier in the same MBB.
+        // Hoist the spill as far as possible in SpillMBB. This can ease
+        // register pressure:
+        //
+        //   x = def
+        //   y = use x
+        //   s = copy x
+        //
+        // Hoisting the spill of s to immediately after the def removes the
+        // interference between x and y:
+        //
+        //   x = def
+        //   spill x
+        //   y = use x<kill>
+        //
+        if (VNI->def < SVI.SpillVNI->def) {
+          DEBUG(dbgs() << "  hoist in BB#" << MBB->getNumber() << ": "
+                       << PrintReg(Reg) << ':' << VNI->id << '@' << VNI->def
+                       << '\n');
+          SVI.SpillReg = Reg;
+          SVI.SpillVNI = VNI;
+        }
+      } else if (MBB != UseMBB && MDT.dominates(MBB, UseMBB)) {
         // This is a valid spill location dominating UseVNI.
         // Prefer to spill at a smaller loop depth.
         unsigned Depth = Loops.getLoopDepth(MBB);
@@ -329,6 +349,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI,
                        << ':' << VNI->id << '@' << VNI->def << '\n');
           SVI.SpillReg = Reg;
           SVI.SpillVNI = VNI;
+          SpillMBB = MBB;
           SpillDepth = Depth;
         }
       }
@@ -429,6 +450,7 @@ void InlineSpiller::analyzeSiblingValues() {
       // Check possible sibling copies.
       if (VNI->isPHIDef() || VNI->getCopy()) {
         VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+        assert(OrigVNI && "Def outside original live range");
         if (OrigVNI->def != VNI->def)
           DefMI = traceSiblingValue(Reg, VNI, OrigVNI);
       }
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index b1014a9..a09bb39 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -14,6 +14,7 @@
 #define DEBUG_TYPE "regalloc"
 #include "InterferenceCache.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 
 using namespace llvm;
 
@@ -40,9 +41,18 @@ InterferenceCache::Entry *InterferenceCache::get(unsigned PhysReg) {
   E = RoundRobin;
   if (++RoundRobin == CacheEntries)
     RoundRobin = 0;
-  Entries[E].reset(PhysReg, LIUArray, TRI, MF);
-  PhysRegEntries[PhysReg] = E;
-  return &Entries[E];
+  for (unsigned i = 0; i != CacheEntries; ++i) {
+    // Skip entries that are in use.
+    if (Entries[E].hasRefs()) {
+      if (++E == CacheEntries)
+        E = 0;
+      continue;
+    }
+    Entries[E].reset(PhysReg, LIUArray, TRI, MF);
+    PhysRegEntries[PhysReg] = E;
+    return &Entries[E];
+  }
+  llvm_unreachable("Ran out of interference cache entries.");
 }
 
 /// revalidate - LIU contents have changed, update tags.
@@ -59,6 +69,7 @@ void InterferenceCache::Entry::reset(unsigned physReg,
                                      LiveIntervalUnion *LIUArray,
                                      const TargetRegisterInfo *TRI,
                                      const MachineFunction *MF) {
+  assert(!hasRefs() && "Cannot reset cache entry with references");
   // LIU's changed, invalidate cache.
   ++Tag;
   PhysReg = physReg;
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 6c36fa4..7f0a27a 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -43,6 +43,9 @@ class InterferenceCache {
     /// change.
     unsigned Tag;
 
+    /// RefCount - The total number of Cursor instances referring to this Entry.
+    unsigned RefCount;
+
     /// MF - The current function.
     MachineFunction *MF;
 
@@ -68,9 +71,10 @@ class InterferenceCache {
     void update(unsigned MBBNum);
 
   public:
-    Entry() : PhysReg(0), Tag(0), Indexes(0) {}
+    Entry() : PhysReg(0), Tag(0), RefCount(0), Indexes(0) {}
 
     void clear(MachineFunction *mf, SlotIndexes *indexes) {
+      assert(!hasRefs() && "Cannot clear cache entry with references");
       PhysReg = 0;
       MF = mf;
       Indexes = indexes;
@@ -78,6 +82,10 @@ class InterferenceCache {
 
     unsigned getPhysReg() const { return PhysReg; }
 
+    void addRef(int Delta) { RefCount += Delta; }
+
+    bool hasRefs() const { return RefCount > 0; }
+
     void revalidate();
 
     /// valid - Return true if this is a valid entry for physReg.
@@ -122,15 +130,48 @@ public:
   void init(MachineFunction*, LiveIntervalUnion*, SlotIndexes*,
             const TargetRegisterInfo *);
 
+  /// getMaxCursors - Return the maximum number of concurrent cursors that can
+  /// be supported.
+  unsigned getMaxCursors() const { return CacheEntries; }
+
   /// Cursor - The primary query interface for the block interference cache.
   class Cursor {
     Entry *CacheEntry;
     BlockInterference *Current;
+
+    void setEntry(Entry *E) {
+      Current = 0;
+      // Update reference counts. Nothing happens when RefCount reaches 0, so
+      // we don't have to check for E == CacheEntry etc.
+      if (CacheEntry)
+        CacheEntry->addRef(-1);
+      CacheEntry = E;
+      if (CacheEntry)
+        CacheEntry->addRef(+1);
+    }
+
   public:
-    /// Cursor - Create a cursor for the interference allocated to PhysReg and
-    /// all its aliases.
-    Cursor(InterferenceCache &Cache, unsigned PhysReg)
-      : CacheEntry(Cache.get(PhysReg)), Current(0) {}
+    /// Cursor - Create a dangling cursor.
+    Cursor() : CacheEntry(0), Current(0) {}
+    ~Cursor() { setEntry(0); }
+
+    Cursor(const Cursor &O) : CacheEntry(0), Current(0) {
+      setEntry(O.CacheEntry);
+    }
+
+    Cursor &operator=(const Cursor &O) {
+      setEntry(O.CacheEntry);
+      return *this;
+    }
+
+    /// setPhysReg - Point this cursor to PhysReg's interference.
+    void setPhysReg(InterferenceCache &Cache, unsigned PhysReg) {
+      // Release reference before getting a new one. That guarantees we can
+      // actually have CacheEntries live cursors.
+      setEntry(0);
+      if (PhysReg)
+        setEntry(Cache.get(PhysReg));
+    }
 
     /// moveTo - Move cursor to basic block MBBNum.
     void moveToBlock(unsigned MBBNum) {
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 3861dda..611886f 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -29,7 +29,7 @@ static void EnsureFunctionExists(Module &M, const char *Name,
                                  ArgIt ArgBegin, ArgIt ArgEnd,
                                  const Type *RetTy) {
   // Insert a correctly-typed definition now.
-  std::vector<const Type *> ParamTys;
+  std::vector<Type *> ParamTys;
   for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
     ParamTys.push_back(I->getType());
   M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
@@ -69,7 +69,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
   // program already contains a function with this name.
   Module *M = CI->getParent()->getParent()->getParent();
   // Get or insert the definition now.
-  std::vector<const Type *> ParamTys;
+  std::vector<Type *> ParamTys;
   for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
     ParamTys.push_back((*I)->getType());
   Constant* FCache = M->getOrInsertFunction(NewFn,
@@ -77,7 +77,7 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
 
   IRBuilder<> Builder(CI->getParent(), CI);
   SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
-  CallInst *NewCI = Builder.CreateCall(FCache, Args.begin(), Args.end());
+  CallInst *NewCI = Builder.CreateCall(FCache, Args);
   NewCI->setName(CI->getName());
   if (!CI->use_empty())
     CI->replaceAllUsesWith(NewCI);
@@ -353,6 +353,13 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     report_fatal_error("Code generator does not support intrinsic function '"+
                       Callee->getName()+"'!");
 
+  case Intrinsic::expect: {
+    // Just replace __builtin_expect(exp, c) with EXP.
+    Value *V = CI->getArgOperand(0);
+    CI->replaceAllUsesWith(V);
+    break;
+  }
+
     // The setjmp/longjmp intrinsics should only exist in the code if it was
     // never optimized (ie, right out of the CFE), or if it has been hacked on
     // by the lowerinvoke pass.  In both cases, the right thing to do is to
@@ -546,14 +553,13 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
       !CI->getType()->isIntegerTy())
     return false;
 
-  const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+  IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
   if (!Ty)
     return false;
 
   // Okay, we can do this xform, do so now.
-  const Type *Tys[] = { Ty };
   Module *M = CI->getParent()->getParent()->getParent();
-  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+  Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
 
   Value *Op = CI->getArgOperand(0);
   Op = CallInst::Create(Int, Op, CI->getName(), CI);
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 589d0a9..f985af8 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -24,10 +24,14 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/CommandLine.h"
@@ -98,10 +102,10 @@ static cl::opt<cl::boolOrDefault>
 EnableFastISelOption("fast-isel", cl::Hidden,
   cl::desc("Enable the \"fast\" instruction selector"));
 
-LLVMTargetMachine::LLVMTargetMachine(const Target &T,
-                                     const std::string &Triple)
-  : TargetMachine(T), TargetTriple(Triple) {
-  AsmInfo = T.createAsmInfo(TargetTriple);
+LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple,
+                                     StringRef CPU, StringRef FS)
+  : TargetMachine(T, Triple, CPU, FS) {
+  AsmInfo = T.createMCAsmInfo(Triple);
 }
 
 // Set the default code model for the JIT for a generic target.
@@ -136,14 +140,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   default: return true;
   case CGFT_AssemblyFile: {
     MCInstPrinter *InstPrinter =
-      getTarget().createMCInstPrinter(*this, MAI.getAssemblerDialect(), MAI);
+      getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI);
 
     // Create a code emitter if asked to show the encoding.
     MCCodeEmitter *MCE = 0;
     TargetAsmBackend *TAB = 0;
     if (ShowMCEncoding) {
-      MCE = getTarget().createCodeEmitter(*this, *Context);
-      TAB = getTarget().createAsmBackend(TargetTriple);
+      const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+      MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI, *Context);
+      TAB = getTarget().createAsmBackend(getTargetTriple());
     }
 
     MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
@@ -159,13 +164,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   case CGFT_ObjectFile: {
     // Create the code emitter for the target if it exists.  If not, .o file
     // emission fails.
-    MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Context);
-    TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple);
+    const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+    MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(), STI,
+                                                       *Context);
+    TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple());
     if (MCE == 0 || TAB == 0)
       return true;
 
-    AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Context,
-                                                       *TAB, Out, MCE,
+    AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(),
+                                                       *Context, *TAB, Out, MCE,
                                                        hasMCRelaxAll(),
                                                        hasMCNoExecStack()));
     AsmStreamer.get()->InitSections();
@@ -240,13 +247,14 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM,
 
   // Create the code emitter for the target if it exists.  If not, .o file
   // emission fails.
-  MCCodeEmitter *MCE = getTarget().createCodeEmitter(*this, *Ctx);
-  TargetAsmBackend *TAB = getTarget().createAsmBackend(TargetTriple);
+  const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
+  MCCodeEmitter *MCE = getTarget().createCodeEmitter(*getInstrInfo(),STI, *Ctx);
+  TargetAsmBackend *TAB = getTarget().createAsmBackend(getTargetTriple());
   if (MCE == 0 || TAB == 0)
     return true;
 
   OwningPtr<MCStreamer> AsmStreamer;
-  AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Ctx,
+  AsmStreamer.reset(getTarget().createObjectStreamer(getTargetTriple(), *Ctx,
                                                      *TAB, Out, MCE,
                                                      hasMCRelaxAll(),
                                                      hasMCNoExecStack()));
@@ -384,6 +392,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
   // Expand pseudo-instructions emitted by ISel.
   PM.add(createExpandISelPseudosPass());
 
+  // Pre-ra tail duplication.
+  if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
+    PM.add(createTailDuplicatePass(true));
+    printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
+  }
+
   // Optimize PHIs before DCE: removing dead PHI cycles may make more
   // instructions dead.
   if (OptLevel != CodeGenOpt::None)
@@ -412,12 +426,6 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
     printAndVerify(PM, "After codegen peephole optimization pass");
   }
 
-  // Pre-ra tail duplication.
-  if (OptLevel != CodeGenOpt::None && !DisableEarlyTailDup) {
-    PM.add(createTailDuplicatePass(true));
-    printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
-  }
-
   // Run pre-ra passes.
   if (addPreRegAlloc(PM, OptLevel))
     printAndVerify(PM, "After PreRegAlloc passes");
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 292928f..5d38c83 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -123,7 +123,7 @@ public:
   /// getNext - Return the next UserValue in the equivalence class.
   UserValue *getNext() const { return next; }
 
-  /// match - Does this UserValue match the aprameters?
+  /// match - Does this UserValue match the parameters?
   bool match(const MDNode *Var, unsigned Offset) const {
     return Var == variable && Offset == offset;
   }
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index b67f966..70003e7 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -244,7 +244,7 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
 //
 // For comments on how to speed it up, see Query::findIntersection().
 unsigned LiveIntervalUnion::Query::
-collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) {
+collectInterferingVRegs(unsigned MaxInterferingRegs) {
   InterferenceResult IR = firstInterference();
   LiveInterval::iterator VirtRegEnd = VirtReg->end();
   LiveInterval *RecentInterferingVReg = NULL;
@@ -287,10 +287,6 @@ collectInterferingVRegs(unsigned MaxInterferingRegs, float MaxWeight) {
       RecentInterferingVReg = IR.LiveUnionI.value();
       ++IR.LiveUnionI;
 
-      // Stop collecting when the max weight is exceeded.
-      if (RecentInterferingVReg->weight >= MaxWeight)
-        return InterferingVRegs.size();
-
       continue;
     }
     // VirtRegI may have advanced far beyond LiveUnionI,
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
index c83578e..5e78d5e 100644
--- a/lib/CodeGen/LiveIntervalUnion.h
+++ b/lib/CodeGen/LiveIntervalUnion.h
@@ -229,8 +229,7 @@ public:
 
     // Count the virtual registers in this union that interfere with this
     // query's live virtual register, up to maxInterferingRegs.
-    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX,
-                                     float MaxWeight = HUGE_VALF);
+    unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX);
 
     // Was this virtual register visited during collectInterferingVRegs?
     bool isSeenInterference(LiveInterval *VReg) const;
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 052abad..b385fb3 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -298,10 +298,16 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl<MachineInstr*> &Dead,
     if (NumComp <= 1)
       continue;
     ++NumFracRanges;
+    bool IsOriginal = VRM.getOriginal(LI->reg) == LI->reg;
     DEBUG(dbgs() << NumComp << " components: " << *LI << '\n');
     SmallVector<LiveInterval*, 8> Dups(1, LI);
     for (unsigned i = 1; i != NumComp; ++i) {
       Dups.push_back(&createFrom(LI->reg, LIS, VRM));
+      // If LI is an original interval that hasn't been split yet, make the new
+      // intervals their own originals instead of referring to LI. The original
+      // interval must contain all the split products, and LI doesn't.
+      if (IsOriginal)
+        VRM.setIsSplitFromReg(Dups.back()->reg, 0);
       if (delegate_)
         delegate_->LRE_DidCloneVirtReg(Dups.back()->reg, LI->reg);
     }
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 68946a2..8f0fb46 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -22,7 +22,6 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetInstrDesc.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Assembly/Writer.h"
@@ -61,7 +60,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
   return OS;
 }
 
-/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the 
+/// addNodeToList (MBB) - When an MBB is added to an MF, we need to update the
 /// parent pointer of the MBB, the MBB numbering, and any instructions in the
 /// MBB to be on the right operand list for registers.
 ///
@@ -93,7 +92,7 @@ void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
 void ilist_traits<MachineInstr>::addNodeToList(MachineInstr *N) {
   assert(N->getParent() == 0 && "machine instruction already in a basic block");
   N->setParent(Parent);
-  
+
   // Add the instruction's register operands to their corresponding
   // use/def lists.
   MachineFunction *MF = Parent->getParent();
@@ -110,7 +109,7 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
 
   // Remove from the use/def lists.
   N->RemoveRegOperandsFromUseLists();
-  
+
   N->setParent(0);
 
   LeakDetector::addGarbageObject(N);
@@ -339,25 +338,64 @@ void MachineBasicBlock::updateTerminator() {
   }
 }
 
-void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) {
-  Successors.push_back(succ);
-  succ->addPredecessor(this);
-}
+void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ, uint32_t weight) {
+
+  // If we see non-zero value for the first time it means we actually use Weight
+  // list, so we fill all Weights with 0's.
+  if (weight != 0 && Weights.empty())
+    Weights.resize(Successors.size());
+
+  if (weight != 0 || !Weights.empty())
+    Weights.push_back(weight);
+
+   Successors.push_back(succ);
+   succ->addPredecessor(this);
+ }
 
 void MachineBasicBlock::removeSuccessor(MachineBasicBlock *succ) {
   succ->removePredecessor(this);
   succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
   assert(I != Successors.end() && "Not a current successor!");
+
+  // If Weight list is empty it means we don't use it (disabled optimization).
+  if (!Weights.empty()) {
+    weight_iterator WI = getWeightIterator(I);
+    Weights.erase(WI);
+  }
+
   Successors.erase(I);
 }
 
-MachineBasicBlock::succ_iterator 
+MachineBasicBlock::succ_iterator
 MachineBasicBlock::removeSuccessor(succ_iterator I) {
   assert(I != Successors.end() && "Not a current successor!");
+
+  // If Weight list is empty it means we don't use it (disabled optimization).
+  if (!Weights.empty()) {
+    weight_iterator WI = getWeightIterator(I);
+    Weights.erase(WI);
+  }
+
   (*I)->removePredecessor(this);
   return Successors.erase(I);
 }
 
+void MachineBasicBlock::replaceSuccessor(MachineBasicBlock *Old,
+                                         MachineBasicBlock *New) {
+  uint32_t weight = 0;
+  succ_iterator SI = std::find(Successors.begin(), Successors.end(), Old);
+
+  // If Weight list is empty it means we don't use it (disabled optimization).
+  if (!Weights.empty()) {
+    weight_iterator WI = getWeightIterator(SI);
+    weight = *WI;
+  }
+
+  // Update the successor information.
+  removeSuccessor(SI);
+  addSuccessor(New, weight);
+}
+
 void MachineBasicBlock::addPredecessor(MachineBasicBlock *pred) {
   Predecessors.push_back(pred);
 }
@@ -371,10 +409,17 @@ void MachineBasicBlock::removePredecessor(MachineBasicBlock *pred) {
 void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) {
   if (this == fromMBB)
     return;
-  
+
   while (!fromMBB->succ_empty()) {
     MachineBasicBlock *Succ = *fromMBB->succ_begin();
-    addSuccessor(Succ);
+    uint32_t weight = 0;
+
+
+    // If Weight list is empty it means we don't use it (disabled optimization).
+    if (!fromMBB->Weights.empty())
+      weight = *fromMBB->Weights.begin();
+
+    addSuccessor(Succ, weight);
     fromMBB->removeSuccessor(Succ);
   }
 }
@@ -383,7 +428,7 @@ void
 MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) {
   if (this == fromMBB)
     return;
-  
+
   while (!fromMBB->succ_empty()) {
     MachineBasicBlock *Succ = *fromMBB->succ_begin();
     addSuccessor(Succ);
@@ -637,15 +682,14 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
   }
 
   // Update the successor information.
-  removeSuccessor(Old);
-  addSuccessor(New);
+  replaceSuccessor(Old, New);
 }
 
 /// CorrectExtraCFGEdges - Various pieces of code can cause excess edges in the
 /// CFG to be inserted.  If we have proven that MBB can only branch to DestA and
 /// DestB, remove any other MBB successors from the CFG.  DestA and DestB can be
 /// null.
-/// 
+///
 /// Besides DestA and DestB, retain other edges leading to LandingPads
 /// (currently there can be only one; we don't check or require that here).
 /// Note it is possible that DestA and/or DestB are LandingPads.
@@ -720,6 +764,26 @@ MachineBasicBlock::findDebugLoc(MachineBasicBlock::iterator &MBBI) {
   return DL;
 }
 
+/// getSuccWeight - Return weight of the edge from this block to MBB.
+///
+uint32_t MachineBasicBlock::getSuccWeight(MachineBasicBlock *succ) {
+  if (Weights.empty())
+    return 0;
+
+  succ_iterator I = std::find(Successors.begin(), Successors.end(), succ);
+  return *getWeightIterator(I);
+}
+
+/// getWeightIterator - Return wight iterator corresonding to the I successor
+/// iterator
+MachineBasicBlock::weight_iterator MachineBasicBlock::
+getWeightIterator(MachineBasicBlock::succ_iterator I) {
+  assert(Weights.size() == Successors.size() && "Async weight list!");
+  size_t index = std::distance(Successors.begin(), I);
+  assert(index < Weights.size() && "Not a current successor!");
+  return Weights.begin() + index;
+}
+
 void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
                           bool t) {
   OS << "BB#" << MBB->getNumber();
diff --git a/lib/CodeGen/MachineBlockFrequency.cpp b/lib/CodeGen/MachineBlockFrequency.cpp
new file mode 100644
index 0000000..893a320
--- /dev/null
+++ b/lib/CodeGen/MachineBlockFrequency.cpp
@@ -0,0 +1,59 @@
+//====----- MachineBlockFrequency.cpp - Machine Block Frequency Analysis ----====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Loops should be simplified before this analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm/Analysis/BlockFrequencyImpl.h"
+#include "llvm/CodeGen/MachineBlockFrequency.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBlockFrequency, "machine-block-freq",
+                      "Machine Block Frequency Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
+INITIALIZE_PASS_END(MachineBlockFrequency, "machine-block-freq",
+                    "Machine Block Frequency Analysis", true, true)
+
+char MachineBlockFrequency::ID = 0;
+
+
+MachineBlockFrequency::MachineBlockFrequency() : MachineFunctionPass(ID) {
+  initializeMachineBlockFrequencyPass(*PassRegistry::getPassRegistry());
+  MBFI = new BlockFrequencyImpl<MachineBasicBlock, MachineFunction,
+                                MachineBranchProbabilityInfo>();
+}
+
+MachineBlockFrequency::~MachineBlockFrequency() {
+  delete MBFI;
+}
+
+void MachineBlockFrequency::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<MachineBranchProbabilityInfo>();
+  AU.setPreservesAll();
+}
+
+bool MachineBlockFrequency::runOnMachineFunction(MachineFunction &F) {
+  MachineBranchProbabilityInfo &MBPI = getAnalysis<MachineBranchProbabilityInfo>();
+  MBFI->doFunction(&F, &MBPI);
+  return false;
+}
+
+/// getblockFreq - Return block frequency. Never return 0, value must be
+/// positive. Please note that initial frequency is equal to 1024. It means that
+/// we should not rely on the value itself, but only on the comparison to the
+/// other block frequencies. We do this to avoid using of floating points.
+///
+uint32_t MachineBlockFrequency::getBlockFreq(MachineBasicBlock *MBB) {
+  return MBFI->getBlockFreq(MBB);
+}
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
new file mode 100644
index 0000000..c13fa6b
--- /dev/null
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -0,0 +1,113 @@
+//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis uses probability info stored in Machine Basic Blocks.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Instructions.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+INITIALIZE_PASS_BEGIN(MachineBranchProbabilityInfo, "machine-branch-prob",
+                      "Machine Branch Probability Analysis", false, true)
+INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob",
+                    "Machine Branch Probability Analysis", false, true)
+
+char MachineBranchProbabilityInfo::ID = 0;
+
+uint32_t MachineBranchProbabilityInfo::
+getSumForBlock(MachineBasicBlock *MBB) const {
+  uint32_t Sum = 0;
+
+  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I) {
+    MachineBasicBlock *Succ = *I;
+    uint32_t Weight = getEdgeWeight(MBB, Succ);
+    uint32_t PrevSum = Sum;
+
+    Sum += Weight;
+    assert(Sum > PrevSum); (void) PrevSum;
+  }
+
+  return Sum;
+}
+
+uint32_t
+MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src,
+                                            MachineBasicBlock *Dst) const {
+  uint32_t Weight = Src->getSuccWeight(Dst);
+  if (!Weight)
+    return DEFAULT_WEIGHT;
+  return Weight;
+}
+
+bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src,
+                                             MachineBasicBlock *Dst) const {
+  // Hot probability is at least 4/5 = 80%
+  uint32_t Weight = getEdgeWeight(Src, Dst);
+  uint32_t Sum = getSumForBlock(Src);
+
+  // FIXME: Implement BranchProbability::compare then change this code to
+  // compare this BranchProbability against a static "hot" BranchProbability.
+  return (uint64_t)Weight * 5 > (uint64_t)Sum * 4;
+}
+
+MachineBasicBlock *
+MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const {
+  uint32_t Sum = 0;
+  uint32_t MaxWeight = 0;
+  MachineBasicBlock *MaxSucc = 0;
+
+  for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+       E = MBB->succ_end(); I != E; ++I) {
+    MachineBasicBlock *Succ = *I;
+    uint32_t Weight = getEdgeWeight(MBB, Succ);
+    uint32_t PrevSum = Sum;
+
+    Sum += Weight;
+    assert(Sum > PrevSum); (void) PrevSum;
+
+    if (Weight > MaxWeight) {
+      MaxWeight = Weight;
+      MaxSucc = Succ;
+    }
+  }
+
+  // FIXME: Use BranchProbability::compare.
+  if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4)
+    return MaxSucc;
+
+  return 0;
+}
+
+BranchProbability
+MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src,
+                                                 MachineBasicBlock *Dst) const {
+  uint32_t N = getEdgeWeight(Src, Dst);
+  uint32_t D = getSumForBlock(Src);
+
+  return BranchProbability(N, D);
+}
+
+raw_ostream &MachineBranchProbabilityInfo::
+printEdgeProbability(raw_ostream &OS, MachineBasicBlock *Src,
+                     MachineBasicBlock *Dst) const {
+
+  const BranchProbability Prob = getEdgeProbability(Src, Dst);
+  OS << "edge MBB#" << Src->getNumber() << " -> MBB#" << Dst->getNumber()
+     << " probability is "  << Prob 
+     << (isEdgeHot(Src, Dst) ? " [HOT edge]\n" : "\n");
+
+  return OS;
+}
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index f97ccf6..3a60a37 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -260,12 +260,12 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
     return false;
 
   // Ignore stuff that we obviously can't move.
-  const TargetInstrDesc &TID = MI->getDesc();  
-  if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+  const MCInstrDesc &MCID = MI->getDesc();  
+  if (MCID.mayStore() || MCID.isCall() || MCID.isTerminator() ||
       MI->hasUnmodeledSideEffects())
     return false;
 
-  if (TID.mayLoad()) {
+  if (MCID.mayLoad()) {
     // Okay, this instruction does a load. As a refinement, we allow the target
     // to decide whether the loaded value is actually a constant. If so, we can
     // actually use it as a load.
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 50750a5..cd25156 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -152,10 +152,10 @@ void MachineFunction::RenumberBlocks(MachineBasicBlock *MBB) {
 /// of `new MachineInstr'.
 ///
 MachineInstr *
-MachineFunction::CreateMachineInstr(const TargetInstrDesc &TID,
+MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID,
                                     DebugLoc DL, bool NoImp) {
   return new (InstructionRecycler.Allocate<MachineInstr>(Allocator))
-    MachineInstr(TID, DL, NoImp);
+    MachineInstr(MCID, DL, NoImp);
 }
 
 /// CloneMachineInstr - Create a new MachineInstr which is a copy of the
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 36b0b83..143a29b 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -15,19 +15,22 @@
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/InlineAsm.h"
+#include "llvm/LLVMContext.h"
 #include "llvm/Metadata.h"
+#include "llvm/Module.h"
 #include "llvm/Type.h"
 #include "llvm/Value.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrDesc.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/DebugInfo.h"
@@ -194,6 +197,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
            getSubReg() == Other.getSubReg();
   case MachineOperand::MO_Immediate:
     return getImm() == Other.getImm();
+  case MachineOperand::MO_CImmediate:
+    return getCImm() == Other.getCImm();
   case MachineOperand::MO_FPImmediate:
     return getFPImm() == Other.getFPImm();
   case MachineOperand::MO_MachineBasicBlock:
@@ -267,6 +272,9 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
   case MachineOperand::MO_Immediate:
     OS << getImm();
     break;
+  case MachineOperand::MO_CImmediate:
+    getCImm()->getValue().print(OS, false);
+    break;
   case MachineOperand::MO_FPImmediate:
     if (getFPImm()->getType()->isFloatTy())
       OS << getFPImm()->getValueAPF().convertToFloat();
@@ -454,9 +462,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
 //===----------------------------------------------------------------------===//
 
 /// MachineInstr ctor - This constructor creates a dummy MachineInstr with
-/// TID NULL and no operands.
+/// MCID NULL and no operands.
 MachineInstr::MachineInstr()
-  : TID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+  : MCID(0), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0),
     Parent(0) {
   // Make sure that we get added to a machine basicblock
@@ -464,23 +472,23 @@ MachineInstr::MachineInstr()
 }
 
 void MachineInstr::addImplicitDefUseOperands() {
-  if (TID->ImplicitDefs)
-    for (const unsigned *ImpDefs = TID->ImplicitDefs; *ImpDefs; ++ImpDefs)
+  if (MCID->ImplicitDefs)
+    for (const unsigned *ImpDefs = MCID->ImplicitDefs; *ImpDefs; ++ImpDefs)
       addOperand(MachineOperand::CreateReg(*ImpDefs, true, true));
-  if (TID->ImplicitUses)
-    for (const unsigned *ImpUses = TID->ImplicitUses; *ImpUses; ++ImpUses)
+  if (MCID->ImplicitUses)
+    for (const unsigned *ImpUses = MCID->ImplicitUses; *ImpUses; ++ImpUses)
       addOperand(MachineOperand::CreateReg(*ImpUses, false, true));
 }
 
 /// MachineInstr ctor - This constructor creates a MachineInstr and adds the
 /// implicit operands. It reserves space for the number of operands specified by
-/// the TargetInstrDesc.
-MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
-  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+/// the MCInstrDesc.
+MachineInstr::MachineInstr(const MCInstrDesc &tid, bool NoImp)
+  : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0), Parent(0) {
   if (!NoImp)
-    NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + TID->getNumOperands());
+    NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
   if (!NoImp)
     addImplicitDefUseOperands();
   // Make sure that we get added to a machine basicblock
@@ -488,13 +496,13 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
 }
 
 /// MachineInstr ctor - As above, but with a DebugLoc.
-MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
+MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl,
                            bool NoImp)
-  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+  : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
   if (!NoImp)
-    NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + TID->getNumOperands());
+    NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
   if (!NoImp)
     addImplicitDefUseOperands();
   // Make sure that we get added to a machine basicblock
@@ -504,12 +512,12 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
 /// MachineInstr ctor - Work exactly the same as the ctor two above, except
 /// that the MachineInstr is created and added to the end of the specified 
 /// basic block.
-MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
-  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+MachineInstr::MachineInstr(MachineBasicBlock *MBB, const MCInstrDesc &tid)
+  : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0), Parent(0) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
-  NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + TID->getNumOperands());
+  NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
   addImplicitDefUseOperands();
   // Make sure that we get added to a machine basicblock
   LeakDetector::addGarbageObject(this);
@@ -519,12 +527,12 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
 /// MachineInstr ctor - As above, but with a DebugLoc.
 ///
 MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
-                           const TargetInstrDesc &tid)
-  : TID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+                           const MCInstrDesc &tid)
+  : MCID(&tid), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
     MemRefs(0), MemRefsEnd(0), Parent(0), debugLoc(dl) {
   assert(MBB && "Cannot use inserting ctor with null basic block!");
-  NumImplicitOps = TID->getNumImplicitDefs() + TID->getNumImplicitUses();
-  Operands.reserve(NumImplicitOps + TID->getNumOperands());
+  NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses();
+  Operands.reserve(NumImplicitOps + MCID->getNumOperands());
   addImplicitDefUseOperands();
   // Make sure that we get added to a machine basicblock
   LeakDetector::addGarbageObject(this);
@@ -534,7 +542,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
 /// MachineInstr ctor - Copies MachineInstr arg exactly
 ///
 MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
-  : TID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
+  : MCID(&MI.getDesc()), NumImplicitOps(0), Flags(0), AsmPrinterFlags(0),
     MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
     Parent(0), debugLoc(MI.getDebugLoc()) {
   Operands.reserve(MI.getNumOperands());
@@ -621,7 +629,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
         Operands.back().AddRegOperandToRegInfo(RegInfo);
         // If the register operand is flagged as early, mark the operand as such
         unsigned OpNo = Operands.size() - 1;
-        if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+        if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
           Operands[OpNo].setIsEarlyClobber(true);
       }
       return;
@@ -643,7 +651,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
     if (Operands[OpNo].isReg()) {
       Operands[OpNo].AddRegOperandToRegInfo(0);
       // If the register operand is flagged as early, mark the operand as such
-      if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+      if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
         Operands[OpNo].setIsEarlyClobber(true);
     }
 
@@ -668,7 +676,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
     if (Operands[OpNo].isReg()) {
       Operands[OpNo].AddRegOperandToRegInfo(RegInfo);
       // If the register operand is flagged as early, mark the operand as such
-      if (TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+      if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
         Operands[OpNo].setIsEarlyClobber(true);
     }
     
@@ -691,7 +699,7 @@ void MachineInstr::addOperand(const MachineOperand &Op) {
 
       // If the register operand is flagged as early, mark the operand as such
     if (Operands[OpNo].isReg()
-        && TID->getOperandConstraint(OpNo, TOI::EARLY_CLOBBER) != -1)
+        && MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1)
       Operands[OpNo].setIsEarlyClobber(true);
   }
 }
@@ -794,6 +802,11 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
         return false;
     }
   }
+  // If DebugLoc does not match then two dbg.values are not identical.
+  if (isDebugValue())
+    if (!getDebugLoc().isUnknown() && !Other->getDebugLoc().isUnknown()
+        && getDebugLoc() != Other->getDebugLoc())
+      return false;
   return true;
 }
 
@@ -817,8 +830,8 @@ void MachineInstr::eraseFromParent() {
 /// OperandComplete - Return true if it's illegal to add a new operand
 ///
 bool MachineInstr::OperandsComplete() const {
-  unsigned short NumOperands = TID->getNumOperands();
-  if (!TID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands)
+  unsigned short NumOperands = MCID->getNumOperands();
+  if (!MCID->isVariadic() && getNumOperands()-NumImplicitOps >= NumOperands)
     return true;  // Broken: we have all the operands of this instruction!
   return false;
 }
@@ -826,8 +839,8 @@ bool MachineInstr::OperandsComplete() const {
 /// getNumExplicitOperands - Returns the number of non-implicit operands.
 ///
 unsigned MachineInstr::getNumExplicitOperands() const {
-  unsigned NumOperands = TID->getNumOperands();
-  if (!TID->isVariadic())
+  unsigned NumOperands = MCID->getNumOperands();
+  if (!MCID->isVariadic())
     return NumOperands;
 
   for (unsigned i = NumOperands, e = getNumOperands(); i != e; ++i) {
@@ -928,10 +941,10 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
 /// operand list that is used to represent the predicate. It returns -1 if
 /// none is found.
 int MachineInstr::findFirstPredOperandIdx() const {
-  const TargetInstrDesc &TID = getDesc();
-  if (TID.isPredicable()) {
+  const MCInstrDesc &MCID = getDesc();
+  if (MCID.isPredicable()) {
     for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-      if (TID.OpInfo[i].isPredicate())
+      if (MCID.OpInfo[i].isPredicate())
         return i;
   }
 
@@ -987,11 +1000,11 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
   }
 
   assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!");
-  const TargetInstrDesc &TID = getDesc();
-  for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+  const MCInstrDesc &MCID = getDesc();
+  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
     if (MO.isReg() && MO.isUse() &&
-        TID.getOperandConstraint(i, TOI::TIED_TO) == (int)DefOpIdx) {
+        MCID.getOperandConstraint(i, MCOI::TIED_TO) == (int)DefOpIdx) {
       if (UseOpIdx)
         *UseOpIdx = (unsigned)i;
       return true;
@@ -1047,13 +1060,13 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
     return false;
   }
 
-  const TargetInstrDesc &TID = getDesc();
-  if (UseOpIdx >= TID.getNumOperands())
+  const MCInstrDesc &MCID = getDesc();
+  if (UseOpIdx >= MCID.getNumOperands())
     return false;
   const MachineOperand &MO = getOperand(UseOpIdx);
   if (!MO.isReg() || !MO.isUse())
     return false;
-  int DefIdx = TID.getOperandConstraint(UseOpIdx, TOI::TIED_TO);
+  int DefIdx = MCID.getOperandConstraint(UseOpIdx, MCOI::TIED_TO);
   if (DefIdx == -1)
     return false;
   if (DefOpIdx)
@@ -1093,11 +1106,11 @@ void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) {
 
 /// copyPredicates - Copies predicate operand(s) from MI.
 void MachineInstr::copyPredicates(const MachineInstr *MI) {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isPredicable())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isPredicable())
     return;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    if (TID.OpInfo[i].isPredicate()) {
+    if (MCID.OpInfo[i].isPredicate()) {
       // Predicated operands must be last operands.
       addOperand(MI->getOperand(i));
     }
@@ -1134,13 +1147,13 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
                                 AliasAnalysis *AA,
                                 bool &SawStore) const {
   // Ignore stuff that we obviously can't move.
-  if (TID->mayStore() || TID->isCall()) {
+  if (MCID->mayStore() || MCID->isCall()) {
     SawStore = true;
     return false;
   }
 
   if (isLabel() || isDebugValue() ||
-      TID->isTerminator() || hasUnmodeledSideEffects())
+      MCID->isTerminator() || hasUnmodeledSideEffects())
     return false;
 
   // See if this instruction does a load.  If so, we have to guarantee that the
@@ -1148,7 +1161,7 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
   // destination. The check for isInvariantLoad gives the targe the chance to
   // classify the load as always returning a constant, e.g. a constant pool
   // load.
-  if (TID->mayLoad() && !isInvariantLoad(AA))
+  if (MCID->mayLoad() && !isInvariantLoad(AA))
     // Otherwise, this is a real load.  If there is a store between the load and
     // end of block, or if the load is volatile, we can't move it.
     return !SawStore && !hasVolatileMemoryRef();
@@ -1188,9 +1201,9 @@ bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
 /// have no volatile memory references.
 bool MachineInstr::hasVolatileMemoryRef() const {
   // An instruction known never to access memory won't have a volatile access.
-  if (!TID->mayStore() &&
-      !TID->mayLoad() &&
-      !TID->isCall() &&
+  if (!MCID->mayStore() &&
+      !MCID->mayLoad() &&
+      !MCID->isCall() &&
       !hasUnmodeledSideEffects())
     return false;
 
@@ -1214,7 +1227,7 @@ bool MachineInstr::hasVolatileMemoryRef() const {
 /// *all* loads the instruction does are invariant (if it does multiple loads).
 bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
   // If the instruction doesn't load at all, it isn't an invariant load.
-  if (!TID->mayLoad())
+  if (!MCID->mayLoad())
     return false;
 
   // If the instruction has lost its memoperands, conservatively assume that
@@ -1364,6 +1377,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
   // Print the rest of the operands.
   bool OmittedAnyCallClobbers = false;
   bool FirstOp = true;
+  unsigned AsmDescOp = ~0u;
+  unsigned AsmOpCount = 0;
 
   if (isInlineAsm()) {
     // Print asm string.
@@ -1377,7 +1392,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
       OS << " [alignstack]";
 
-    StartOp = InlineAsm::MIOp_FirstOperand;
+    StartOp = AsmDescOp = InlineAsm::MIOp_FirstOperand;
     FirstOp = false;
   }
 
@@ -1416,10 +1431,10 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     if (FirstOp) FirstOp = false; else OS << ",";
     OS << " ";
     if (i < getDesc().NumOperands) {
-      const TargetOperandInfo &TOI = getDesc().OpInfo[i];
-      if (TOI.isPredicate())
+      const MCOperandInfo &MCOI = getDesc().OpInfo[i];
+      if (MCOI.isPredicate())
         OS << "pred:";
-      if (TOI.isOptionalDef())
+      if (MCOI.isOptionalDef())
         OS << "opt:";
     }
     if (isDebugValue() && MO.isMetadata()) {
@@ -1431,6 +1446,26 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
         MO.print(OS, TM);
     } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) {
       OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm());
+    } else if (i == AsmDescOp && MO.isImm()) {
+      // Pretty print the inline asm operand descriptor.
+      OS << '$' << AsmOpCount++;
+      unsigned Flag = MO.getImm();
+      switch (InlineAsm::getKind(Flag)) {
+      case InlineAsm::Kind_RegUse:             OS << ":[reguse]"; break;
+      case InlineAsm::Kind_RegDef:             OS << ":[regdef]"; break;
+      case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec]"; break;
+      case InlineAsm::Kind_Clobber:            OS << ":[clobber]"; break;
+      case InlineAsm::Kind_Imm:                OS << ":[imm]"; break;
+      case InlineAsm::Kind_Mem:                OS << ":[mem]"; break;
+      default: OS << ":[??" << InlineAsm::getKind(Flag) << ']'; break;
+      }
+
+      unsigned TiedTo = 0;
+      if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
+        OS << " [tiedto:$" << TiedTo << ']';
+
+      // Compute the index of the next operand descriptor.
+      AsmDescOp += 1 + InlineAsm::getNumOperandRegisters(Flag);
     } else
       MO.print(OS, TM);
   }
@@ -1685,3 +1720,24 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
   }
   return Hash;
 }
+
+void MachineInstr::emitError(StringRef Msg) const {
+  // Find the source location cookie.
+  unsigned LocCookie = 0;
+  const MDNode *LocMD = 0;
+  for (unsigned i = getNumOperands(); i != 0; --i) {
+    if (getOperand(i-1).isMetadata() &&
+        (LocMD = getOperand(i-1).getMetadata()) &&
+        LocMD->getNumOperands() != 0) {
+      if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+        LocCookie = CI->getZExtValue();
+        break;
+      }
+    }
+  }
+
+  if (const MachineBasicBlock *MBB = getParent())
+    if (const MachineFunction *MF = MBB->getParent())
+      return MF->getMMI().getModule()->getContext().emitError(LocCookie, Msg);
+  report_fatal_error(Msg);
+}
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index b315702..722ceb2 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -28,10 +28,10 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/ADT/DenseMap.h"
@@ -1018,9 +1018,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
                                     /*UnfoldStore=*/false,
                                     &LoadRegIndex);
   if (NewOpc == 0) return 0;
-  const TargetInstrDesc &TID = TII->get(NewOpc);
-  if (TID.getNumDefs() != 1) return 0;
-  const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI);
+  const MCInstrDesc &MID = TII->get(NewOpc);
+  if (MID.getNumDefs() != 1) return 0;
+  const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI);
   // Ok, we're unfolding. Create a temporary register and do the unfold.
   unsigned Reg = MRI->createVirtualRegister(RC);
 
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 08ff5bb..4b3e64c 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -20,7 +20,6 @@ using namespace llvm;
 MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
   VRegInfo.reserve(256);
   RegAllocHints.reserve(256);
-  RegClass2VRegMap = new std::vector<unsigned>[TRI.getNumRegClasses()];
   UsedPhysRegs.resize(TRI.getNumRegs());
   
   // Create the physreg use/def lists.
@@ -38,25 +37,13 @@ MachineRegisterInfo::~MachineRegisterInfo() {
            "PhysRegUseDefLists has entries after all instructions are deleted");
 #endif
   delete [] PhysRegUseDefLists;
-  delete [] RegClass2VRegMap;
 }
 
 /// setRegClass - Set the register class of the specified virtual register.
 ///
 void
 MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
-  const TargetRegisterClass *OldRC = VRegInfo[Reg].first;
   VRegInfo[Reg].first = RC;
-
-  // Remove from old register class's vregs list. This may be slow but
-  // fortunately this operation is rarely needed.
-  std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()];
-  std::vector<unsigned>::iterator I =
-    std::find(VRegs.begin(), VRegs.end(), Reg);
-  VRegs.erase(I);
-
-  // Add to new register class's vregs list.
-  RegClass2VRegMap[RC->getID()].push_back(Reg);
 }
 
 const TargetRegisterClass *
@@ -95,7 +82,6 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
   if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase)
     // The vector reallocated, handle this now.
     HandleVRegListReallocation();
-  RegClass2VRegMap[RegClass->getID()].push_back(Reg);
   return Reg;
 }
 
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 471463b..7a55852 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -62,6 +62,7 @@ namespace {
     raw_ostream *OS;
     const MachineFunction *MF;
     const TargetMachine *TM;
+    const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
     const MachineRegisterInfo *MRI;
 
@@ -255,6 +256,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
 
   this->MF = &MF;
   TM = &MF.getTarget();
+  TII = TM->getInstrInfo();
   TRI = TM->getRegisterInfo();
   MRI = &MF.getRegInfo();
 
@@ -387,8 +389,6 @@ static bool matchPair(MachineBasicBlock::const_succ_iterator i,
 
 void
 MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
-  const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-
   // Count the number of landing pad successors.
   SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
   for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
@@ -541,19 +541,19 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
 }
 
 void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
-  const TargetInstrDesc &TI = MI->getDesc();
-  if (MI->getNumOperands() < TI.getNumOperands()) {
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MI->getNumOperands() < MCID.getNumOperands()) {
     report("Too few operands", MI);
-    *OS << TI.getNumOperands() << " operands expected, but "
+    *OS << MCID.getNumOperands() << " operands expected, but "
         << MI->getNumExplicitOperands() << " given.\n";
   }
 
   // Check the MachineMemOperands for basic consistency.
   for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
        E = MI->memoperands_end(); I != E; ++I) {
-    if ((*I)->isLoad() && !TI.mayLoad())
+    if ((*I)->isLoad() && !MCID.mayLoad())
       report("Missing mayLoad flag", MI);
-    if ((*I)->isStore() && !TI.mayStore())
+    if ((*I)->isStore() && !MCID.mayStore())
       report("Missing mayStore flag", MI);
   }
 
@@ -575,29 +575,30 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
 void
 MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   const MachineInstr *MI = MO->getParent();
-  const TargetInstrDesc &TI = MI->getDesc();
-  const TargetOperandInfo &TOI = TI.OpInfo[MONum];
+  const MCInstrDesc &MCID = MI->getDesc();
+  const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
 
-  // The first TI.NumDefs operands must be explicit register defines
-  if (MONum < TI.getNumDefs()) {
+  // The first MCID.NumDefs operands must be explicit register defines
+  if (MONum < MCID.getNumDefs()) {
     if (!MO->isReg())
       report("Explicit definition must be a register", MO, MONum);
     else if (!MO->isDef())
       report("Explicit definition marked as use", MO, MONum);
     else if (MO->isImplicit())
       report("Explicit definition marked as implicit", MO, MONum);
-  } else if (MONum < TI.getNumOperands()) {
+  } else if (MONum < MCID.getNumOperands()) {
     // Don't check if it's the last operand in a variadic instruction. See,
     // e.g., LDM_RET in the arm back end.
-    if (MO->isReg() && !(TI.isVariadic() && MONum == TI.getNumOperands()-1)) {
-      if (MO->isDef() && !TOI.isOptionalDef())
+    if (MO->isReg() &&
+        !(MCID.isVariadic() && MONum == MCID.getNumOperands()-1)) {
+      if (MO->isDef() && !MCOI.isOptionalDef())
           report("Explicit operand marked as def", MO, MONum);
       if (MO->isImplicit())
         report("Explicit operand marked as implicit", MO, MONum);
     }
   } else {
     // ARM adds %reg0 operands to indicate predicates. We'll allow that.
-    if (MO->isReg() && !MO->isImplicit() && !TI.isVariadic() && MO->getReg())
+    if (MO->isReg() && !MO->isImplicit() && !MCID.isVariadic() && MO->getReg())
       report("Extra explicit operand on non-variadic instruction", MO, MONum);
   }
 
@@ -709,7 +710,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
     }
 
     // Check register classes.
-    if (MONum < TI.getNumOperands() && !MO->isImplicit()) {
+    if (MONum < MCID.getNumOperands() && !MO->isImplicit()) {
       unsigned SubIdx = MO->getSubReg();
 
       if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -723,7 +724,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
           }
           sr = s;
         }
-        if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) {
+        if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
           if (!DRC->contains(sr)) {
             report("Illegal physical register for instruction", MO, MONum);
             *OS << TRI->getName(sr) << " is not a "
@@ -743,7 +744,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
           }
           RC = SRC;
         }
-        if (const TargetRegisterClass *DRC = TOI.getRegClass(TRI)) {
+        if (const TargetRegisterClass *DRC = TII->getRegClass(MCID,MONum,TRI)) {
           if (!RC->hasSuperClassEq(DRC)) {
             report("Illegal virtual register for instruction", MO, MONum);
             *OS << "Expected a " << DRC->getName() << " register, but got a "
@@ -765,11 +766,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
         LiveInts && !LiveInts->isNotInMIMap(MI)) {
       LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
       SlotIndex Idx = LiveInts->getInstructionIndex(MI);
-      if (TI.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
+      if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
         report("Instruction loads from dead spill slot", MO, MONum);
         *OS << "Live stack: " << LI << '\n';
       }
-      if (TI.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
+      if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
         report("Instruction stores to dead spill slot", MO, MONum);
         *OS << "Live stack: " << LI << '\n';
       }
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index c105bb0..c523e39 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -353,10 +353,10 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
 bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
                                         SmallSet<unsigned, 4> &ImmDefRegs,
                                  DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isMoveImmediate())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isMoveImmediate())
     return false;
-  if (TID.getNumDefs() != 1)
+  if (MCID.getNumDefs() != 1)
     return false;
   unsigned Reg = MI->getOperand(0).getReg();
   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
@@ -429,16 +429,16 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
         continue;
       }
 
-      const TargetInstrDesc &TID = MI->getDesc();
+      const MCInstrDesc &MCID = MI->getDesc();
 
-      if (TID.isBitcast()) {
+      if (MCID.isBitcast()) {
         if (OptimizeBitcastInstr(MI, MBB)) {
           // MI is deleted.
           Changed = true;
           MII = First ? I->begin() : llvm::next(PMII);
           continue;
         }        
-      } else if (TID.isCompare()) {
+      } else if (MCID.isCompare()) {
         if (OptimizeCmpInstr(MI, MBB)) {
           // MI is deleted.
           Changed = true;
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index ba8501f..c73e877 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -22,6 +22,7 @@
 #include "AntiDepBreaker.h"
 #include "AggressiveAntiDepBreaker.h"
 #include "CriticalAntiDepBreaker.h"
+#include "RegisterClassInfo.h"
 #include "ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/LatencyPriorityQueue.h"
@@ -37,7 +38,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -52,7 +53,7 @@ STATISTIC(NumStalls, "Number of pipeline stalls");
 STATISTIC(NumFixedAnti, "Number of fixed anti-dependencies");
 
 // Post-RA scheduling is enabled with
-// TargetSubtarget.enablePostRAScheduler(). This flag can be used to
+// TargetSubtargetInfo.enablePostRAScheduler(). This flag can be used to
 // override the target.
 static cl::opt<bool>
 EnablePostRAScheduler("post-RA-scheduler",
@@ -80,6 +81,7 @@ namespace {
   class PostRAScheduler : public MachineFunctionPass {
     AliasAnalysis *AA;
     const TargetInstrInfo *TII;
+    RegisterClassInfo RegClassInfo;
     CodeGenOpt::Level OptLevel;
 
   public:
@@ -135,7 +137,8 @@ namespace {
   public:
     SchedulePostRATDList(
       MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
-      AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+      AliasAnalysis *AA, const RegisterClassInfo&,
+      TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
       SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs);
 
     ~SchedulePostRATDList();
@@ -179,7 +182,8 @@ namespace {
 
 SchedulePostRATDList::SchedulePostRATDList(
   MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
-  AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+  AliasAnalysis *AA, const RegisterClassInfo &RCI,
+  TargetSubtargetInfo::AntiDepBreakMode AntiDepMode,
   SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs)
   : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA),
     KillIndices(TRI->getNumRegs())
@@ -189,10 +193,10 @@ SchedulePostRATDList::SchedulePostRATDList(
   HazardRec =
     TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
   AntiDepBreak =
-    ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
-     (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, CriticalPathRCs) :
-     ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
-      (AntiDepBreaker *)new CriticalAntiDepBreaker(MF) : NULL));
+    ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_ALL) ?
+     (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, RCI, CriticalPathRCs) :
+     ((AntiDepMode == TargetSubtargetInfo::ANTIDEP_CRITICAL) ?
+      (AntiDepBreaker *)new CriticalAntiDepBreaker(MF, RCI) : NULL));
 }
 
 SchedulePostRATDList::~SchedulePostRATDList() {
@@ -205,9 +209,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
   MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
   AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
+  RegClassInfo.runOnMachineFunction(Fn);
 
   // Check for explicit enable/disable of post-ra scheduling.
-  TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
+  TargetSubtargetInfo::AntiDepBreakMode AntiDepMode = TargetSubtargetInfo::ANTIDEP_NONE;
   SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
   if (EnablePostRAScheduler.getPosition() > 0) {
     if (!EnablePostRAScheduler)
@@ -215,22 +220,23 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
   } else {
     // Check that post-RA scheduling is enabled for this target.
     // This may upgrade the AntiDepMode.
-    const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
+    const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
     if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
       return false;
   }
 
   // Check for antidep breaking override...
   if (EnableAntiDepBreaking.getPosition() > 0) {
-    AntiDepMode = (EnableAntiDepBreaking == "all") ?
-      TargetSubtarget::ANTIDEP_ALL :
-        (EnableAntiDepBreaking == "critical")
-           ? TargetSubtarget::ANTIDEP_CRITICAL : TargetSubtarget::ANTIDEP_NONE;
+    AntiDepMode = (EnableAntiDepBreaking == "all")
+      ? TargetSubtargetInfo::ANTIDEP_ALL
+      : ((EnableAntiDepBreaking == "critical")
+         ? TargetSubtargetInfo::ANTIDEP_CRITICAL
+         : TargetSubtargetInfo::ANTIDEP_NONE);
   }
 
   DEBUG(dbgs() << "PostRAScheduler\n");
 
-  SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, AntiDepMode,
+  SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, RegClassInfo, AntiDepMode,
                                  CriticalPathRCs);
 
   // Loop over all of the basic blocks
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
deleted file mode 100644
index d6e31da..0000000
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ /dev/null
@@ -1,1430 +0,0 @@
-//===-- PreAllocSplitting.cpp - Pre-allocation Interval Spltting Pass. ----===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the machine instruction level pre-register allocation
-// live interval splitting pass. It finds live interval barriers, i.e.
-// instructions which will kill all physical registers in certain register
-// classes, and split all live intervals which cross the barrier.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "pre-alloc-split"
-#include "VirtRegMap.h"
-#include "llvm/CodeGen/CalcSpillWeights.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
-#include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden);
-static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1),
-                                   cl::Hidden);
-static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1),
-                                     cl::Hidden);
-
-STATISTIC(NumSplits, "Number of intervals split");
-STATISTIC(NumRemats, "Number of intervals split by rematerialization");
-STATISTIC(NumFolds, "Number of intervals split with spill folding");
-STATISTIC(NumRestoreFolds, "Number of intervals split with restore folding");
-STATISTIC(NumRenumbers, "Number of intervals renumbered into new registers");
-STATISTIC(NumDeadSpills, "Number of dead spills removed");
-
-namespace {
-  class PreAllocSplitting : public MachineFunctionPass {
-    MachineFunction       *CurrMF;
-    const TargetMachine   *TM;
-    const TargetInstrInfo *TII;
-    const TargetRegisterInfo* TRI;
-    MachineFrameInfo      *MFI;
-    MachineRegisterInfo   *MRI;
-    SlotIndexes           *SIs;
-    LiveIntervals         *LIs;
-    LiveStacks            *LSs;
-    VirtRegMap            *VRM;
-
-    // Barrier - Current barrier being processed.
-    MachineInstr          *Barrier;
-
-    // BarrierMBB - Basic block where the barrier resides in.
-    MachineBasicBlock     *BarrierMBB;
-
-    // Barrier - Current barrier index.
-    SlotIndex     BarrierIdx;
-
-    // CurrLI - Current live interval being split.
-    LiveInterval          *CurrLI;
-
-    // CurrSLI - Current stack slot live interval.
-    LiveInterval          *CurrSLI;
-
-    // CurrSValNo - Current val# for the stack slot live interval.
-    VNInfo                *CurrSValNo;
-
-    // IntervalSSMap - A map from live interval to spill slots.
-    DenseMap<unsigned, int> IntervalSSMap;
-
-    // Def2SpillMap - A map from a def instruction index to spill index.
-    DenseMap<SlotIndex, SlotIndex> Def2SpillMap;
-
-  public:
-    static char ID;
-    PreAllocSplitting() : MachineFunctionPass(ID) {
-      initializePreAllocSplittingPass(*PassRegistry::getPassRegistry());
-    }
-
-    virtual bool runOnMachineFunction(MachineFunction &MF);
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      AU.addRequired<SlotIndexes>();
-      AU.addPreserved<SlotIndexes>();
-      AU.addRequired<LiveIntervals>();
-      AU.addPreserved<LiveIntervals>();
-      AU.addRequired<LiveStacks>();
-      AU.addPreserved<LiveStacks>();
-      AU.addPreserved<RegisterCoalescer>();
-      AU.addPreserved<CalculateSpillWeights>();
-      AU.addPreservedID(StrongPHIEliminationID);
-      AU.addPreservedID(PHIEliminationID);
-      AU.addRequired<MachineDominatorTree>();
-      AU.addRequired<MachineLoopInfo>();
-      AU.addRequired<VirtRegMap>();
-      AU.addPreserved<MachineDominatorTree>();
-      AU.addPreserved<MachineLoopInfo>();
-      AU.addPreserved<VirtRegMap>();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-    
-    virtual void releaseMemory() {
-      IntervalSSMap.clear();
-      Def2SpillMap.clear();
-    }
-
-    virtual const char *getPassName() const {
-      return "Pre-Register Allocaton Live Interval Splitting";
-    }
-
-    /// print - Implement the dump method.
-    virtual void print(raw_ostream &O, const Module* M = 0) const {
-      LIs->print(O, M);
-    }
-
-
-  private:
-
-    MachineBasicBlock::iterator
-      findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*,
-                     SmallPtrSet<MachineInstr*, 4>&);
-
-    MachineBasicBlock::iterator
-      findRestorePoint(MachineBasicBlock*, MachineInstr*, SlotIndex,
-                     SmallPtrSet<MachineInstr*, 4>&);
-
-    int CreateSpillStackSlot(unsigned, const TargetRegisterClass *);
-
-    bool IsAvailableInStack(MachineBasicBlock*, unsigned,
-                            SlotIndex, SlotIndex,
-                            SlotIndex&, int&) const;
-
-    void UpdateSpillSlotInterval(VNInfo*, SlotIndex, SlotIndex);
-
-    bool SplitRegLiveInterval(LiveInterval*);
-
-    bool SplitRegLiveIntervals(const TargetRegisterClass **,
-                               SmallPtrSet<LiveInterval*, 8>&);
-    
-    bool createsNewJoin(LiveRange* LR, MachineBasicBlock* DefMBB,
-                        MachineBasicBlock* BarrierMBB);
-    bool Rematerialize(unsigned vreg, VNInfo* ValNo,
-                       MachineInstr* DefMI,
-                       MachineBasicBlock::iterator RestorePt,
-                       SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
-    MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC,
-                            MachineInstr* DefMI,
-                            MachineInstr* Barrier,
-                            MachineBasicBlock* MBB,
-                            int& SS,
-                            SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
-    MachineInstr* FoldRestore(unsigned vreg, 
-                              const TargetRegisterClass* RC,
-                              MachineInstr* Barrier,
-                              MachineBasicBlock* MBB,
-                              int SS,
-                              SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
-    void RenumberValno(VNInfo* VN);
-    void ReconstructLiveInterval(LiveInterval* LI);
-    bool removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split);
-    unsigned getNumberOfNonSpills(SmallPtrSet<MachineInstr*, 4>& MIs,
-                               unsigned Reg, int FrameIndex, bool& TwoAddr);
-    VNInfo* PerformPHIConstruction(MachineBasicBlock::iterator Use,
-                                   MachineBasicBlock* MBB, LiveInterval* LI,
-                                   SmallPtrSet<MachineInstr*, 4>& Visited,
-            DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
-            DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
-                                      DenseMap<MachineInstr*, VNInfo*>& NewVNs,
-                                DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
-                                DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
-                                        bool IsTopLevel, bool IsIntraBlock);
-    VNInfo* PerformPHIConstructionFallBack(MachineBasicBlock::iterator Use,
-                                   MachineBasicBlock* MBB, LiveInterval* LI,
-                                   SmallPtrSet<MachineInstr*, 4>& Visited,
-            DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
-            DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
-                                      DenseMap<MachineInstr*, VNInfo*>& NewVNs,
-                                DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
-                                DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
-                                        bool IsTopLevel, bool IsIntraBlock);
-};
-} // end anonymous namespace
-
-char PreAllocSplitting::ID = 0;
-
-INITIALIZE_PASS_BEGIN(PreAllocSplitting, "pre-alloc-splitting",
-                "Pre-Register Allocation Live Interval Splitting",
-                false, false)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(LiveStacks)
-INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
-INITIALIZE_PASS_END(PreAllocSplitting, "pre-alloc-splitting",
-                "Pre-Register Allocation Live Interval Splitting",
-                false, false)
-
-char &llvm::PreAllocSplittingID = PreAllocSplitting::ID;
-
-/// findSpillPoint - Find a gap as far away from the given MI that's suitable
-/// for spilling the current live interval. The index must be before any
-/// defs and uses of the live interval register in the mbb. Return begin() if
-/// none is found.
-MachineBasicBlock::iterator
-PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
-                                  MachineInstr *DefMI,
-                                  SmallPtrSet<MachineInstr*, 4> &RefsInMBB) {
-  MachineBasicBlock::iterator Pt = MBB->begin();
-
-  MachineBasicBlock::iterator MII = MI;
-  MachineBasicBlock::iterator EndPt = DefMI
-    ? MachineBasicBlock::iterator(DefMI) : MBB->begin();
-    
-  while (MII != EndPt && !RefsInMBB.count(MII) &&
-         MII->getOpcode() != TRI->getCallFrameSetupOpcode())
-    --MII;
-  if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
-    
-  while (MII != EndPt && !RefsInMBB.count(MII)) {
-    // We can't insert the spill between the barrier (a call), and its
-    // corresponding call frame setup.
-    if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) {
-      while (MII->getOpcode() != TRI->getCallFrameSetupOpcode()) {
-        --MII;
-        if (MII == EndPt) {
-          return Pt;
-        }
-      }
-      continue;
-    } else {
-      Pt = MII;
-    }
-    
-    if (RefsInMBB.count(MII))
-      return Pt;
-    
-    
-    --MII;
-  }
-
-  return Pt;
-}
-
-/// findRestorePoint - Find a gap in the instruction index map that's suitable
-/// for restoring the current live interval value. The index must be before any
-/// uses of the live interval register in the mbb. Return end() if none is
-/// found.
-MachineBasicBlock::iterator
-PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
-                                    SlotIndex LastIdx,
-                                    SmallPtrSet<MachineInstr*, 4> &RefsInMBB) {
-  // FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb
-  // begin index accordingly.
-  MachineBasicBlock::iterator Pt = MBB->end();
-  MachineBasicBlock::iterator EndPt = MBB->getFirstTerminator();
-
-  // We start at the call, so walk forward until we find the call frame teardown
-  // since we can't insert restores before that.  Bail if we encounter a use
-  // during this time.
-  MachineBasicBlock::iterator MII = MI;
-  if (MII == EndPt) return Pt;
-  
-  while (MII != EndPt && !RefsInMBB.count(MII) &&
-         MII->getOpcode() != TRI->getCallFrameDestroyOpcode())
-    ++MII;
-  if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
-  ++MII;
-  
-  // FIXME: Limit the number of instructions to examine to reduce
-  // compile time?
-  while (MII != EndPt) {
-    SlotIndex Index = LIs->getInstructionIndex(MII);
-    if (Index > LastIdx)
-      break;
-      
-    // We can't insert a restore between the barrier (a call) and its 
-    // corresponding call frame teardown.
-    if (MII->getOpcode() == TRI->getCallFrameSetupOpcode()) {
-      do {
-        if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
-        ++MII;
-      } while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode());
-    } else {
-      Pt = MII;
-    }
-    
-    if (RefsInMBB.count(MII))
-      return Pt;
-    
-    ++MII;
-  }
-
-  return Pt;
-}
-
-/// CreateSpillStackSlot - Create a stack slot for the live interval being
-/// split. If the live interval was previously split, just reuse the same
-/// slot.
-int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
-                                            const TargetRegisterClass *RC) {
-  int SS;
-  DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg);
-  if (I != IntervalSSMap.end()) {
-    SS = I->second;
-  } else {
-    SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
-    IntervalSSMap[Reg] = SS;
-  }
-
-  // Create live interval for stack slot.
-  CurrSLI = &LSs->getOrCreateInterval(SS, RC);
-  if (CurrSLI->hasAtLeastOneValue())
-    CurrSValNo = CurrSLI->getValNumInfo(0);
-  else
-    CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0,
-                                       LSs->getVNInfoAllocator());
-  return SS;
-}
-
-/// IsAvailableInStack - Return true if register is available in a split stack
-/// slot at the specified index.
-bool
-PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
-                                    unsigned Reg, SlotIndex DefIndex,
-                                    SlotIndex RestoreIndex,
-                                    SlotIndex &SpillIndex,
-                                    int& SS) const {
-  if (!DefMBB)
-    return false;
-
-  DenseMap<unsigned, int>::const_iterator I = IntervalSSMap.find(Reg);
-  if (I == IntervalSSMap.end())
-    return false;
-  DenseMap<SlotIndex, SlotIndex>::const_iterator
-    II = Def2SpillMap.find(DefIndex);
-  if (II == Def2SpillMap.end())
-    return false;
-
-  // If last spill of def is in the same mbb as barrier mbb (where restore will
-  // be), make sure it's not below the intended restore index.
-  // FIXME: Undo the previous spill?
-  assert(LIs->getMBBFromIndex(II->second) == DefMBB);
-  if (DefMBB == BarrierMBB && II->second >= RestoreIndex)
-    return false;
-
-  SS = I->second;
-  SpillIndex = II->second;
-  return true;
-}
-
-/// UpdateSpillSlotInterval - Given the specified val# of the register live
-/// interval being split, and the spill and restore indicies, update the live
-/// interval of the spill stack slot.
-void
-PreAllocSplitting::UpdateSpillSlotInterval(VNInfo *ValNo, SlotIndex SpillIndex,
-                                           SlotIndex RestoreIndex) {
-  assert(LIs->getMBBFromIndex(RestoreIndex) == BarrierMBB &&
-         "Expect restore in the barrier mbb");
-
-  MachineBasicBlock *MBB = LIs->getMBBFromIndex(SpillIndex);
-  if (MBB == BarrierMBB) {
-    // Intra-block spill + restore. We are done.
-    LiveRange SLR(SpillIndex, RestoreIndex, CurrSValNo);
-    CurrSLI->addRange(SLR);
-    return;
-  }
-
-  SmallPtrSet<MachineBasicBlock*, 4> Processed;
-  SlotIndex EndIdx = LIs->getMBBEndIdx(MBB);
-  LiveRange SLR(SpillIndex, EndIdx, CurrSValNo);
-  CurrSLI->addRange(SLR);
-  Processed.insert(MBB);
-
-  // Start from the spill mbb, figure out the extend of the spill slot's
-  // live interval.
-  SmallVector<MachineBasicBlock*, 4> WorkList;
-  const LiveRange *LR = CurrLI->getLiveRangeContaining(SpillIndex);
-  if (LR->end > EndIdx)
-    // If live range extend beyond end of mbb, add successors to work list.
-    for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
-           SE = MBB->succ_end(); SI != SE; ++SI)
-      WorkList.push_back(*SI);
-
-  while (!WorkList.empty()) {
-    MachineBasicBlock *MBB = WorkList.back();
-    WorkList.pop_back();
-    if (Processed.count(MBB))
-      continue;
-    SlotIndex Idx = LIs->getMBBStartIdx(MBB);
-    LR = CurrLI->getLiveRangeContaining(Idx);
-    if (LR && LR->valno == ValNo) {
-      EndIdx = LIs->getMBBEndIdx(MBB);
-      if (Idx <= RestoreIndex && RestoreIndex < EndIdx) {
-        // Spill slot live interval stops at the restore.
-        LiveRange SLR(Idx, RestoreIndex, CurrSValNo);
-        CurrSLI->addRange(SLR);
-      } else if (LR->end > EndIdx) {
-        // Live range extends beyond end of mbb, process successors.
-        LiveRange SLR(Idx, EndIdx.getNextIndex(), CurrSValNo);
-        CurrSLI->addRange(SLR);
-        for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
-               SE = MBB->succ_end(); SI != SE; ++SI)
-          WorkList.push_back(*SI);
-      } else {
-        LiveRange SLR(Idx, LR->end, CurrSValNo);
-        CurrSLI->addRange(SLR);
-      }
-      Processed.insert(MBB);
-    }
-  }
-}
-
-/// PerformPHIConstruction - From properly set up use and def lists, use a PHI
-/// construction algorithm to compute the ranges and valnos for an interval.
-VNInfo*
-PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI,
-                                       MachineBasicBlock* MBB, LiveInterval* LI,
-                                       SmallPtrSet<MachineInstr*, 4>& Visited,
-             DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
-             DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
-                                       DenseMap<MachineInstr*, VNInfo*>& NewVNs,
-                                 DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
-                                 DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
-                                           bool IsTopLevel, bool IsIntraBlock) {
-  // Return memoized result if it's available.
-  if (IsTopLevel && Visited.count(UseI) && NewVNs.count(UseI))
-    return NewVNs[UseI];
-  else if (!IsTopLevel && IsIntraBlock && NewVNs.count(UseI))
-    return NewVNs[UseI];
-  else if (!IsIntraBlock && LiveOut.count(MBB))
-    return LiveOut[MBB];
-  
-  // Check if our block contains any uses or defs.
-  bool ContainsDefs = Defs.count(MBB);
-  bool ContainsUses = Uses.count(MBB);
-  
-  VNInfo* RetVNI = 0;
-  
-  // Enumerate the cases of use/def contaning blocks.
-  if (!ContainsDefs && !ContainsUses) {
-    return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs, Uses,
-                                          NewVNs, LiveOut, Phis,
-                                          IsTopLevel, IsIntraBlock);
-  } else if (ContainsDefs && !ContainsUses) {
-    SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
-
-    // Search for the def in this block.  If we don't find it before the
-    // instruction we care about, go to the fallback case.  Note that that
-    // should never happen: this cannot be intrablock, so use should
-    // always be an end() iterator.
-    assert(UseI == MBB->end() && "No use marked in intrablock");
-    
-    MachineBasicBlock::iterator Walker = UseI;
-    --Walker;
-    while (Walker != MBB->begin()) {
-      if (BlockDefs.count(Walker))
-        break;
-      --Walker;
-    }
-    
-    // Once we've found it, extend its VNInfo to our instruction.
-    SlotIndex DefIndex = LIs->getInstructionIndex(Walker);
-    DefIndex = DefIndex.getDefIndex();
-    SlotIndex EndIndex = LIs->getMBBEndIdx(MBB);
-    
-    RetVNI = NewVNs[Walker];
-    LI->addRange(LiveRange(DefIndex, EndIndex, RetVNI));
-  } else if (!ContainsDefs && ContainsUses) {
-    SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
-    
-    // Search for the use in this block that precedes the instruction we care 
-    // about, going to the fallback case if we don't find it.    
-    MachineBasicBlock::iterator Walker = UseI;
-    bool found = false;
-    while (Walker != MBB->begin()) {
-      --Walker;
-      if (BlockUses.count(Walker)) {
-        found = true;
-        break;
-      }
-    }
-
-    if (!found)
-      return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
-                                            Uses, NewVNs, LiveOut, Phis,
-                                            IsTopLevel, IsIntraBlock);
-
-    SlotIndex UseIndex = LIs->getInstructionIndex(Walker);
-    UseIndex = UseIndex.getUseIndex();
-    SlotIndex EndIndex;
-    if (IsIntraBlock) {
-      EndIndex = LIs->getInstructionIndex(UseI).getDefIndex();
-    } else
-      EndIndex = LIs->getMBBEndIdx(MBB);
-
-    // Now, recursively phi construct the VNInfo for the use we found,
-    // and then extend it to include the instruction we care about
-    RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
-                                    NewVNs, LiveOut, Phis, false, true);
-    
-    LI->addRange(LiveRange(UseIndex, EndIndex, RetVNI));
-    
-    // FIXME: Need to set kills properly for inter-block stuff.
-  } else if (ContainsDefs && ContainsUses) {
-    SmallPtrSet<MachineInstr*, 2>& BlockDefs = Defs[MBB];
-    SmallPtrSet<MachineInstr*, 2>& BlockUses = Uses[MBB];
-    
-    // This case is basically a merging of the two preceding case, with the
-    // special note that checking for defs must take precedence over checking
-    // for uses, because of two-address instructions.
-    MachineBasicBlock::iterator Walker = UseI;
-    bool foundDef = false;
-    bool foundUse = false;
-    while (Walker != MBB->begin()) {
-      --Walker;
-      if (BlockDefs.count(Walker)) {
-        foundDef = true;
-        break;
-      } else if (BlockUses.count(Walker)) {
-        foundUse = true;
-        break;
-      }
-    }
-
-    if (!foundDef && !foundUse)
-      return PerformPHIConstructionFallBack(UseI, MBB, LI, Visited, Defs,
-                                            Uses, NewVNs, LiveOut, Phis,
-                                            IsTopLevel, IsIntraBlock);
-
-    SlotIndex StartIndex = LIs->getInstructionIndex(Walker);
-    StartIndex = foundDef ? StartIndex.getDefIndex() : StartIndex.getUseIndex();
-    SlotIndex EndIndex;
-    if (IsIntraBlock) {
-      EndIndex = LIs->getInstructionIndex(UseI).getDefIndex();
-    } else
-      EndIndex = LIs->getMBBEndIdx(MBB);
-
-    if (foundDef)
-      RetVNI = NewVNs[Walker];
-    else
-      RetVNI = PerformPHIConstruction(Walker, MBB, LI, Visited, Defs, Uses,
-                                      NewVNs, LiveOut, Phis, false, true);
-
-    LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI));
-  }
-  
-  // Memoize results so we don't have to recompute them.
-  if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
-  else {
-    if (!NewVNs.count(UseI))
-      NewVNs[UseI] = RetVNI;
-    Visited.insert(UseI);
-  }
-
-  return RetVNI;
-}
-
-/// PerformPHIConstructionFallBack - PerformPHIConstruction fall back path.
-///
-VNInfo*
-PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator UseI,
-                                       MachineBasicBlock* MBB, LiveInterval* LI,
-                                       SmallPtrSet<MachineInstr*, 4>& Visited,
-             DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Defs,
-             DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> >& Uses,
-                                       DenseMap<MachineInstr*, VNInfo*>& NewVNs,
-                                 DenseMap<MachineBasicBlock*, VNInfo*>& LiveOut,
-                                 DenseMap<MachineBasicBlock*, VNInfo*>& Phis,
-                                           bool IsTopLevel, bool IsIntraBlock) {
-  // NOTE: Because this is the fallback case from other cases, we do NOT
-  // assume that we are not intrablock here.
-  if (Phis.count(MBB)) return Phis[MBB]; 
-
-  SlotIndex StartIndex = LIs->getMBBStartIdx(MBB);
-  VNInfo *RetVNI = Phis[MBB] =
-    LI->getNextValue(SlotIndex(), /*FIXME*/ 0,
-                     LIs->getVNInfoAllocator());
-
-  if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
-    
-  // If there are no uses or defs between our starting point and the
-  // beginning of the block, then recursive perform phi construction
-  // on our predecessors.
-  DenseMap<MachineBasicBlock*, VNInfo*> IncomingVNs;
-  for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
-         PE = MBB->pred_end(); PI != PE; ++PI) {
-    VNInfo* Incoming = PerformPHIConstruction((*PI)->end(), *PI, LI, 
-                                              Visited, Defs, Uses, NewVNs,
-                                              LiveOut, Phis, false, false);
-    if (Incoming != 0)
-      IncomingVNs[*PI] = Incoming;
-  }
-    
-  if (MBB->pred_size() == 1 && !RetVNI->hasPHIKill()) {
-    VNInfo* OldVN = RetVNI;
-    VNInfo* NewVN = IncomingVNs.begin()->second;
-    VNInfo* MergedVN = LI->MergeValueNumberInto(OldVN, NewVN);
-    if (MergedVN == OldVN) std::swap(OldVN, NewVN);
-    
-    for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator LOI = LiveOut.begin(),
-         LOE = LiveOut.end(); LOI != LOE; ++LOI)
-      if (LOI->second == OldVN)
-        LOI->second = MergedVN;
-    for (DenseMap<MachineInstr*, VNInfo*>::iterator NVI = NewVNs.begin(),
-         NVE = NewVNs.end(); NVI != NVE; ++NVI)
-      if (NVI->second == OldVN)
-        NVI->second = MergedVN;
-    for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator PI = Phis.begin(),
-         PE = Phis.end(); PI != PE; ++PI)
-      if (PI->second == OldVN)
-        PI->second = MergedVN;
-    RetVNI = MergedVN;
-  } else {
-    // Otherwise, merge the incoming VNInfos with a phi join.  Create a new
-    // VNInfo to represent the joined value.
-    for (DenseMap<MachineBasicBlock*, VNInfo*>::iterator I =
-           IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) {
-      I->second->setHasPHIKill(true);
-    }
-  }
-      
-  SlotIndex EndIndex;
-  if (IsIntraBlock) {
-    EndIndex = LIs->getInstructionIndex(UseI).getDefIndex();
-  } else
-    EndIndex = LIs->getMBBEndIdx(MBB);
-  LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI));
-
-  // Memoize results so we don't have to recompute them.
-  if (!IsIntraBlock)
-    LiveOut[MBB] = RetVNI;
-  else {
-    if (!NewVNs.count(UseI))
-      NewVNs[UseI] = RetVNI;
-    Visited.insert(UseI);
-  }
-
-  return RetVNI;
-}
-
-/// ReconstructLiveInterval - Recompute a live interval from scratch.
-void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
-  VNInfo::Allocator& Alloc = LIs->getVNInfoAllocator();
-  
-  // Clear the old ranges and valnos;
-  LI->clear();
-  
-  // Cache the uses and defs of the register
-  typedef DenseMap<MachineBasicBlock*, SmallPtrSet<MachineInstr*, 2> > RegMap;
-  RegMap Defs, Uses;
-  
-  // Keep track of the new VNs we're creating.
-  DenseMap<MachineInstr*, VNInfo*> NewVNs;
-  SmallPtrSet<VNInfo*, 2> PhiVNs;
-  
-  // Cache defs, and create a new VNInfo for each def.
-  for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg),
-       DE = MRI->def_end(); DI != DE; ++DI) {
-    Defs[(*DI).getParent()].insert(&*DI);
-    
-    SlotIndex DefIdx = LIs->getInstructionIndex(&*DI);
-    DefIdx = DefIdx.getDefIndex();
-    
-    assert(!DI->isPHI() && "PHI instr in code during pre-alloc splitting.");
-    VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc);
-    
-    // If the def is a move, set the copy field.
-    if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg)
-      NewVN->setCopy(&*DI);
-
-    NewVNs[&*DI] = NewVN;
-  }
-  
-  // Cache uses as a separate pass from actually processing them.
-  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg),
-       UE = MRI->use_end(); UI != UE; ++UI)
-    Uses[(*UI).getParent()].insert(&*UI);
-    
-  // Now, actually process every use and use a phi construction algorithm
-  // to walk from it to its reaching definitions, building VNInfos along
-  // the way.
-  DenseMap<MachineBasicBlock*, VNInfo*> LiveOut;
-  DenseMap<MachineBasicBlock*, VNInfo*> Phis;
-  SmallPtrSet<MachineInstr*, 4> Visited;
-  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(LI->reg),
-       UE = MRI->use_end(); UI != UE; ++UI) {
-    PerformPHIConstruction(&*UI, UI->getParent(), LI, Visited, Defs,
-                           Uses, NewVNs, LiveOut, Phis, true, true); 
-  }
-  
-  // Add ranges for dead defs
-  for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(LI->reg),
-       DE = MRI->def_end(); DI != DE; ++DI) {
-    SlotIndex DefIdx = LIs->getInstructionIndex(&*DI);
-    DefIdx = DefIdx.getDefIndex();
-    
-    if (LI->liveAt(DefIdx)) continue;
-    
-    VNInfo* DeadVN = NewVNs[&*DI];
-    LI->addRange(LiveRange(DefIdx, DefIdx.getNextSlot(), DeadVN));
-  }
-}
-
-/// RenumberValno - Split the given valno out into a new vreg, allowing it to
-/// be allocated to a different register.  This function creates a new vreg,
-/// copies the valno and its live ranges over to the new vreg's interval,
-/// removes them from the old interval, and rewrites all uses and defs of
-/// the original reg to the new vreg within those ranges.
-void PreAllocSplitting::RenumberValno(VNInfo* VN) {
-  SmallVector<VNInfo*, 4> Stack;
-  SmallVector<VNInfo*, 4> VNsToCopy;
-  Stack.push_back(VN);
-
-  // Walk through and copy the valno we care about, and any other valnos
-  // that are two-address redefinitions of the one we care about.  These
-  // will need to be rewritten as well.  We also check for safety of the 
-  // renumbering here, by making sure that none of the valno involved has
-  // phi kills.
-  while (!Stack.empty()) {
-    VNInfo* OldVN = Stack.back();
-    Stack.pop_back();
-    
-    // Bail out if we ever encounter a valno that has a PHI kill.  We can't
-    // renumber these.
-    if (OldVN->hasPHIKill()) return;
-    
-    VNsToCopy.push_back(OldVN);
-    
-    // Locate two-address redefinitions
-    for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(CurrLI->reg),
-         DE = MRI->def_end(); DI != DE; ++DI) {
-      if (!DI->isRegTiedToUseOperand(DI.getOperandNo())) continue;
-      SlotIndex DefIdx = LIs->getInstructionIndex(&*DI).getDefIndex();
-      VNInfo* NextVN = CurrLI->findDefinedVNInfoForRegInt(DefIdx);
-      if (std::find(VNsToCopy.begin(), VNsToCopy.end(), NextVN) !=
-          VNsToCopy.end())
-        Stack.push_back(NextVN);
-    }
-  }
-  
-  // Create the new vreg
-  unsigned NewVReg = MRI->createVirtualRegister(MRI->getRegClass(CurrLI->reg));
-  
-  // Create the new live interval
-  LiveInterval& NewLI = LIs->getOrCreateInterval(NewVReg);
-  
-  for (SmallVector<VNInfo*, 4>::iterator OI = VNsToCopy.begin(), OE = 
-       VNsToCopy.end(); OI != OE; ++OI) {
-    VNInfo* OldVN = *OI;
-    
-    // Copy the valno over
-    VNInfo* NewVN = NewLI.createValueCopy(OldVN, LIs->getVNInfoAllocator());
-    NewLI.MergeValueInAsValue(*CurrLI, OldVN, NewVN);
-
-    // Remove the valno from the old interval
-    CurrLI->removeValNo(OldVN);
-  }
-  
-  // Rewrite defs and uses.  This is done in two stages to avoid invalidating
-  // the reg_iterator.
-  SmallVector<std::pair<MachineInstr*, unsigned>, 8> OpsToChange;
-  
-  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg),
-         E = MRI->reg_end(); I != E; ++I) {
-    MachineOperand& MO = I.getOperand();
-    SlotIndex InstrIdx = LIs->getInstructionIndex(&*I);
-    
-    if ((MO.isUse() && NewLI.liveAt(InstrIdx.getUseIndex())) ||
-        (MO.isDef() && NewLI.liveAt(InstrIdx.getDefIndex())))
-      OpsToChange.push_back(std::make_pair(&*I, I.getOperandNo()));
-  }
-  
-  for (SmallVector<std::pair<MachineInstr*, unsigned>, 8>::iterator I =
-       OpsToChange.begin(), E = OpsToChange.end(); I != E; ++I) {
-    MachineInstr* Inst = I->first;
-    unsigned OpIdx = I->second;
-    MachineOperand& MO = Inst->getOperand(OpIdx);
-    MO.setReg(NewVReg);
-  }
-  
-  // Grow the VirtRegMap, since we've created a new vreg.
-  VRM->grow();
-  
-  // The renumbered vreg shares a stack slot with the old register.
-  if (IntervalSSMap.count(CurrLI->reg))
-    IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg];
-  
-  ++NumRenumbers;
-}
-
-bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
-                                      MachineInstr* DefMI,
-                                      MachineBasicBlock::iterator RestorePt,
-                                    SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
-  MachineBasicBlock& MBB = *RestorePt->getParent();
-  
-  MachineBasicBlock::iterator KillPt = BarrierMBB->end();
-  if (!DefMI || DefMI->getParent() == BarrierMBB)
-    KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
-  else
-    KillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
-  
-  if (KillPt == DefMI->getParent()->end())
-    return false;
-  
-  TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, *TRI);
-  SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt));
-  
-  ReconstructLiveInterval(CurrLI);
-  RematIdx = RematIdx.getDefIndex();
-  RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx));
-  
-  ++NumSplits;
-  ++NumRemats;
-  return true;  
-}
-
-MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, 
-                                           const TargetRegisterClass* RC,
-                                           MachineInstr* DefMI,
-                                           MachineInstr* Barrier,
-                                           MachineBasicBlock* MBB,
-                                           int& SS,
-                                    SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
-  // Go top down if RefsInMBB is empty.
-  if (RefsInMBB.empty())
-    return 0;
-  
-  MachineBasicBlock::iterator FoldPt = Barrier;
-  while (&*FoldPt != DefMI && FoldPt != MBB->begin() &&
-         !RefsInMBB.count(FoldPt))
-    --FoldPt;
-  
-  int OpIdx = FoldPt->findRegisterDefOperandIdx(vreg);
-  if (OpIdx == -1)
-    return 0;
-  
-  SmallVector<unsigned, 1> Ops;
-  Ops.push_back(OpIdx);
-  
-  if (!TII->canFoldMemoryOperand(FoldPt, Ops))
-    return 0;
-  
-  DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(vreg);
-  if (I != IntervalSSMap.end()) {
-    SS = I->second;
-  } else {
-    SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
-  }
-  
-  MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS);
-  
-  if (FMI) {
-    LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
-    FoldPt->eraseFromParent();
-    ++NumFolds;
-    
-    IntervalSSMap[vreg] = SS;
-    CurrSLI = &LSs->getOrCreateInterval(SS, RC);
-    if (CurrSLI->hasAtLeastOneValue())
-      CurrSValNo = CurrSLI->getValNumInfo(0);
-    else
-      CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0,
-                                         LSs->getVNInfoAllocator());
-  }
-  
-  return FMI;
-}
-
-MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg, 
-                                             const TargetRegisterClass* RC,
-                                             MachineInstr* Barrier,
-                                             MachineBasicBlock* MBB,
-                                             int SS,
-                                     SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
-  if ((int)RestoreFoldLimit != -1 && RestoreFoldLimit == (int)NumRestoreFolds)
-    return 0;
-                                       
-  // Go top down if RefsInMBB is empty.
-  if (RefsInMBB.empty())
-    return 0;
-  
-  // Can't fold a restore between a call stack setup and teardown.
-  MachineBasicBlock::iterator FoldPt = Barrier;
-  
-  // Advance from barrier to call frame teardown.
-  while (FoldPt != MBB->getFirstTerminator() &&
-         FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) {
-    if (RefsInMBB.count(FoldPt))
-      return 0;
-    
-    ++FoldPt;
-  }
-  
-  if (FoldPt == MBB->getFirstTerminator())
-    return 0;
-  else
-    ++FoldPt;
-  
-  // Now find the restore point.
-  while (FoldPt != MBB->getFirstTerminator() && !RefsInMBB.count(FoldPt)) {
-    if (FoldPt->getOpcode() == TRI->getCallFrameSetupOpcode()) {
-      while (FoldPt != MBB->getFirstTerminator() &&
-             FoldPt->getOpcode() != TRI->getCallFrameDestroyOpcode()) {
-        if (RefsInMBB.count(FoldPt))
-          return 0;
-        
-        ++FoldPt;
-      }
-      
-      if (FoldPt == MBB->getFirstTerminator())
-        return 0;
-    } 
-    
-    ++FoldPt;
-  }
-  
-  if (FoldPt == MBB->getFirstTerminator())
-    return 0;
-  
-  int OpIdx = FoldPt->findRegisterUseOperandIdx(vreg, true);
-  if (OpIdx == -1)
-    return 0;
-  
-  SmallVector<unsigned, 1> Ops;
-  Ops.push_back(OpIdx);
-  
-  if (!TII->canFoldMemoryOperand(FoldPt, Ops))
-    return 0;
-  
-  MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS);
-  
-  if (FMI) {
-    LIs->ReplaceMachineInstrInMaps(FoldPt, FMI);
-    FoldPt->eraseFromParent();
-    ++NumRestoreFolds;
-  }
-  
-  return FMI;
-}
-
-/// SplitRegLiveInterval - Split (spill and restore) the given live interval
-/// so it would not cross the barrier that's being processed. Shrink wrap
-/// (minimize) the live interval to the last uses.
-bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
-  DEBUG(dbgs() << "Pre-alloc splitting " << LI->reg << " for " << *Barrier
-               << "  result: ");
-
-  CurrLI = LI;
-
-  // Find live range where current interval cross the barrier.
-  LiveInterval::iterator LR =
-    CurrLI->FindLiveRangeContaining(BarrierIdx.getUseIndex());
-  VNInfo *ValNo = LR->valno;
-
-  assert(!ValNo->isUnused() && "Val# is defined by a dead def?");
-
-  MachineInstr *DefMI = LIs->getInstructionFromIndex(ValNo->def);
-
-  // If this would create a new join point, do not split.
-  if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) {
-    DEBUG(dbgs() << "FAILED (would create a new join point).\n");
-    return false;
-  }
-
-  // Find all references in the barrier mbb.
-  SmallPtrSet<MachineInstr*, 4> RefsInMBB;
-  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(CurrLI->reg),
-         E = MRI->reg_end(); I != E; ++I) {
-    MachineInstr *RefMI = &*I;
-    if (RefMI->getParent() == BarrierMBB)
-      RefsInMBB.insert(RefMI);
-  }
-
-  // Find a point to restore the value after the barrier.
-  MachineBasicBlock::iterator RestorePt =
-    findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB);
-  if (RestorePt == BarrierMBB->end()) {
-    DEBUG(dbgs() << "FAILED (could not find a suitable restore point).\n");
-    return false;
-  }
-
-  if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI))
-    if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) {
-      DEBUG(dbgs() << "success (remat).\n");
-      return true;
-    }
-
-  // Add a spill either before the barrier or after the definition.
-  MachineBasicBlock *DefMBB = DefMI ? DefMI->getParent() : NULL;
-  const TargetRegisterClass *RC = MRI->getRegClass(CurrLI->reg);
-  SlotIndex SpillIndex;
-  MachineInstr *SpillMI = NULL;
-  int SS = -1;
-  if (!DefMI) {
-    // If we don't know where the def is we must split just before the barrier.
-    if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier,
-                            BarrierMBB, SS, RefsInMBB))) {
-      SpillIndex = LIs->getInstructionIndex(SpillMI);
-    } else {
-      MachineBasicBlock::iterator SpillPt = 
-        findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
-      if (SpillPt == BarrierMBB->begin()) {
-        DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
-        return false; // No gap to insert spill.
-      }
-      // Add spill.
-    
-      SS = CreateSpillStackSlot(CurrLI->reg, RC);
-      TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC,
-                               TRI);
-      SpillMI = prior(SpillPt);
-      SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
-    }
-  } else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def,
-                                 LIs->getZeroIndex(), SpillIndex, SS)) {
-    // If it's already split, just restore the value. There is no need to spill
-    // the def again.
-    if (!DefMI) {
-      DEBUG(dbgs() << "FAILED (def is dead).\n");
-      return false; // Def is dead. Do nothing.
-    }
-    
-    if ((SpillMI = FoldSpill(LI->reg, RC, DefMI, Barrier,
-                             BarrierMBB, SS, RefsInMBB))) {
-      SpillIndex = LIs->getInstructionIndex(SpillMI);
-    } else {
-      // Check if it's possible to insert a spill after the def MI.
-      MachineBasicBlock::iterator SpillPt;
-      if (DefMBB == BarrierMBB) {
-        // Add spill after the def and the last use before the barrier.
-        SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI,
-                                 RefsInMBB);
-        if (SpillPt == DefMBB->begin()) {
-          DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
-          return false; // No gap to insert spill.
-        }
-      } else {
-        SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
-        if (SpillPt == DefMBB->end()) {
-          DEBUG(dbgs() << "FAILED (could not find a suitable spill point).\n");
-          return false; // No gap to insert spill.
-        }
-      }
-      // Add spill. 
-      SS = CreateSpillStackSlot(CurrLI->reg, RC);
-      TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC,
-                               TRI);
-      SpillMI = prior(SpillPt);
-      SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
-    }
-  }
-
-  // Remember def instruction index to spill index mapping.
-  if (DefMI && SpillMI)
-    Def2SpillMap[ValNo->def] = SpillIndex;
-
-  // Add restore.
-  bool FoldedRestore = false;
-  SlotIndex RestoreIndex;
-  if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier,
-                                      BarrierMBB, SS, RefsInMBB)) {
-    RestorePt = LMI;
-    RestoreIndex = LIs->getInstructionIndex(RestorePt);
-    FoldedRestore = true;
-  } else {
-    TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC, TRI);
-    MachineInstr *LoadMI = prior(RestorePt);
-    RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI);
-  }
-
-  // Update spill stack slot live interval.
-  UpdateSpillSlotInterval(ValNo, SpillIndex.getUseIndex().getNextSlot(),
-                          RestoreIndex.getDefIndex());
-
-  ReconstructLiveInterval(CurrLI);
-
-  if (!FoldedRestore) {
-    SlotIndex RestoreIdx = LIs->getInstructionIndex(prior(RestorePt));
-    RestoreIdx = RestoreIdx.getDefIndex();
-    RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RestoreIdx));
-  }
-  
-  ++NumSplits;
-  DEBUG(dbgs() << "success.\n");
-  return true;
-}
-
-/// SplitRegLiveIntervals - Split all register live intervals that cross the
-/// barrier that's being processed.
-bool
-PreAllocSplitting::SplitRegLiveIntervals(const TargetRegisterClass **RCs,
-                                         SmallPtrSet<LiveInterval*, 8>& Split) {
-  // First find all the virtual registers whose live intervals are intercepted
-  // by the current barrier.
-  SmallVector<LiveInterval*, 8> Intervals;
-  for (const TargetRegisterClass **RC = RCs; *RC; ++RC) {
-    // FIXME: If it's not safe to move any instruction that defines the barrier
-    // register class, then it means there are some special dependencies which
-    // codegen is not modelling. Ignore these barriers for now.
-    if (!TII->isSafeToMoveRegClassDefs(*RC))
-      continue;
-    const std::vector<unsigned> &VRs = MRI->getRegClassVirtRegs(*RC);
-    for (unsigned i = 0, e = VRs.size(); i != e; ++i) {
-      unsigned Reg = VRs[i];
-      if (!LIs->hasInterval(Reg))
-        continue;
-      LiveInterval *LI = &LIs->getInterval(Reg);
-      if (LI->liveAt(BarrierIdx) && !Barrier->readsRegister(Reg))
-        // Virtual register live interval is intercepted by the barrier. We
-        // should split and shrink wrap its interval if possible.
-        Intervals.push_back(LI);
-    }
-  }
-
-  // Process the affected live intervals.
-  bool Change = false;
-  while (!Intervals.empty()) {
-    if (PreSplitLimit != -1 && (int)NumSplits == PreSplitLimit)
-      break;
-    LiveInterval *LI = Intervals.back();
-    Intervals.pop_back();
-    bool result = SplitRegLiveInterval(LI);
-    if (result) Split.insert(LI);
-    Change |= result;
-  }
-
-  return Change;
-}
-
-unsigned PreAllocSplitting::getNumberOfNonSpills(
-                                  SmallPtrSet<MachineInstr*, 4>& MIs,
-                                  unsigned Reg, int FrameIndex,
-                                  bool& FeedsTwoAddr) {
-  unsigned NonSpills = 0;
-  for (SmallPtrSet<MachineInstr*, 4>::iterator UI = MIs.begin(), UE = MIs.end();
-       UI != UE; ++UI) {
-    int StoreFrameIndex;
-    unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
-    if (StoreVReg != Reg || StoreFrameIndex != FrameIndex)
-      ++NonSpills;
-    
-    int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg);
-    if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx))
-      FeedsTwoAddr = true;
-  }
-  
-  return NonSpills;
-}
-
-/// removeDeadSpills - After doing splitting, filter through all intervals we've
-/// split, and see if any of the spills are unnecessary.  If so, remove them.
-bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
-  bool changed = false;
-  
-  // Walk over all of the live intervals that were touched by the splitter,
-  // and see if we can do any DCE and/or folding.
-  for (SmallPtrSet<LiveInterval*, 8>::iterator LI = split.begin(),
-       LE = split.end(); LI != LE; ++LI) {
-    DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> > VNUseCount;
-    
-    // First, collect all the uses of the vreg, and sort them by their
-    // reaching definition (VNInfo).
-    for (MachineRegisterInfo::use_iterator UI = MRI->use_begin((*LI)->reg),
-         UE = MRI->use_end(); UI != UE; ++UI) {
-      SlotIndex index = LIs->getInstructionIndex(&*UI);
-      index = index.getUseIndex();
-      
-      const LiveRange* LR = (*LI)->getLiveRangeContaining(index);
-      VNUseCount[LR->valno].insert(&*UI);
-    }
-    
-    // Now, take the definitions (VNInfo's) one at a time and try to DCE 
-    // and/or fold them away.
-    for (LiveInterval::vni_iterator VI = (*LI)->vni_begin(),
-         VE = (*LI)->vni_end(); VI != VE; ++VI) {
-      
-      if (DeadSplitLimit != -1 && (int)NumDeadSpills == DeadSplitLimit) 
-        return changed;
-      
-      VNInfo* CurrVN = *VI;
-      
-      // We don't currently try to handle definitions with PHI kills, because
-      // it would involve processing more than one VNInfo at once.
-      if (CurrVN->hasPHIKill()) continue;
-      
-      // We also don't try to handle the results of PHI joins, since there's
-      // no defining instruction to analyze.
-      MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def);
-      if (!DefMI || CurrVN->isUnused()) continue;
-    
-      // We're only interested in eliminating cruft introduced by the splitter,
-      // is of the form load-use or load-use-store.  First, check that the
-      // definition is a load, and remember what stack slot we loaded it from.
-      int FrameIndex;
-      if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue;
-      
-      // If the definition has no uses at all, just DCE it.
-      if (VNUseCount[CurrVN].size() == 0) {
-        LIs->RemoveMachineInstrFromMaps(DefMI);
-        (*LI)->removeValNo(CurrVN);
-        DefMI->eraseFromParent();
-        VNUseCount.erase(CurrVN);
-        ++NumDeadSpills;
-        changed = true;
-        continue;
-      }
-      
-      // Second, get the number of non-store uses of the definition, as well as
-      // a flag indicating whether it feeds into a later two-address definition.
-      bool FeedsTwoAddr = false;
-      unsigned NonSpillCount = getNumberOfNonSpills(VNUseCount[CurrVN],
-                                                    (*LI)->reg, FrameIndex,
-                                                    FeedsTwoAddr);
-      
-      // If there's one non-store use and it doesn't feed a two-addr, then
-      // this is a load-use-store case that we can try to fold.
-      if (NonSpillCount == 1 && !FeedsTwoAddr) {
-        // Start by finding the non-store use MachineInstr.
-        SmallPtrSet<MachineInstr*, 4>::iterator UI = VNUseCount[CurrVN].begin();
-        int StoreFrameIndex;
-        unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
-        while (UI != VNUseCount[CurrVN].end() &&
-               (StoreVReg == (*LI)->reg && StoreFrameIndex == FrameIndex)) {
-          ++UI;
-          if (UI != VNUseCount[CurrVN].end())
-            StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex);
-        }
-        if (UI == VNUseCount[CurrVN].end()) continue;
-        
-        MachineInstr* use = *UI;
-        
-        // Attempt to fold it away!
-        int OpIdx = use->findRegisterUseOperandIdx((*LI)->reg, false);
-        if (OpIdx == -1) continue;
-        SmallVector<unsigned, 1> Ops;
-        Ops.push_back(OpIdx);
-        if (!TII->canFoldMemoryOperand(use, Ops)) continue;
-
-        MachineInstr* NewMI = TII->foldMemoryOperand(use, Ops, FrameIndex);
-
-        if (!NewMI) continue;
-
-        // Update relevant analyses.
-        LIs->RemoveMachineInstrFromMaps(DefMI);
-        LIs->ReplaceMachineInstrInMaps(use, NewMI);
-        (*LI)->removeValNo(CurrVN);
-
-        DefMI->eraseFromParent();
-        use->eraseFromParent();
-        VNUseCount[CurrVN].erase(use);
-
-        // Remove deleted instructions.  Note that we need to remove them from 
-        // the VNInfo->use map as well, just to be safe.
-        for (SmallPtrSet<MachineInstr*, 4>::iterator II = 
-             VNUseCount[CurrVN].begin(), IE = VNUseCount[CurrVN].end();
-             II != IE; ++II) {
-          for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator
-               VNI = VNUseCount.begin(), VNE = VNUseCount.end(); VNI != VNE; 
-               ++VNI)
-            if (VNI->first != CurrVN)
-              VNI->second.erase(*II);
-          LIs->RemoveMachineInstrFromMaps(*II);
-          (*II)->eraseFromParent();
-        }
-        
-        VNUseCount.erase(CurrVN);
-
-        for (DenseMap<VNInfo*, SmallPtrSet<MachineInstr*, 4> >::iterator
-             VI = VNUseCount.begin(), VE = VNUseCount.end(); VI != VE; ++VI)
-          if (VI->second.erase(use))
-            VI->second.insert(NewMI);
-
-        ++NumDeadSpills;
-        changed = true;
-        continue;
-      }
-      
-      // If there's more than one non-store instruction, we can't profitably
-      // fold it, so bail.
-      if (NonSpillCount) continue;
-        
-      // Otherwise, this is a load-store case, so DCE them.
-      for (SmallPtrSet<MachineInstr*, 4>::iterator UI = 
-           VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end();
-           UI != UE; ++UI) {
-        LIs->RemoveMachineInstrFromMaps(*UI);
-        (*UI)->eraseFromParent();
-      }
-        
-      VNUseCount.erase(CurrVN);
-        
-      LIs->RemoveMachineInstrFromMaps(DefMI);
-      (*LI)->removeValNo(CurrVN);
-      DefMI->eraseFromParent();
-      ++NumDeadSpills;
-      changed = true;
-    }
-  }
-  
-  return changed;
-}
-
-bool PreAllocSplitting::createsNewJoin(LiveRange* LR,
-                                       MachineBasicBlock* DefMBB,
-                                       MachineBasicBlock* BarrierMBB) {
-  if (DefMBB == BarrierMBB)
-    return false;
-  
-  if (LR->valno->hasPHIKill())
-    return false;
-  
-  SlotIndex MBBEnd = LIs->getMBBEndIdx(BarrierMBB);
-  if (LR->end < MBBEnd)
-    return false;
-  
-  MachineLoopInfo& MLI = getAnalysis<MachineLoopInfo>();
-  if (MLI.getLoopFor(DefMBB) != MLI.getLoopFor(BarrierMBB))
-    return true;
-  
-  MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
-  SmallPtrSet<MachineBasicBlock*, 4> Visited;
-  typedef std::pair<MachineBasicBlock*,
-                    MachineBasicBlock::succ_iterator> ItPair;
-  SmallVector<ItPair, 4> Stack;
-  Stack.push_back(std::make_pair(BarrierMBB, BarrierMBB->succ_begin()));
-  
-  while (!Stack.empty()) {
-    ItPair P = Stack.back();
-    Stack.pop_back();
-    
-    MachineBasicBlock* PredMBB = P.first;
-    MachineBasicBlock::succ_iterator S = P.second;
-    
-    if (S == PredMBB->succ_end())
-      continue;
-    else if (Visited.count(*S)) {
-      Stack.push_back(std::make_pair(PredMBB, ++S));
-      continue;
-    } else
-      Stack.push_back(std::make_pair(PredMBB, S+1));
-    
-    MachineBasicBlock* MBB = *S;
-    Visited.insert(MBB);
-    
-    if (MBB == BarrierMBB)
-      return true;
-    
-    MachineDomTreeNode* DefMDTN = MDT.getNode(DefMBB);
-    MachineDomTreeNode* BarrierMDTN = MDT.getNode(BarrierMBB);
-    MachineDomTreeNode* MDTN = MDT.getNode(MBB)->getIDom();
-    while (MDTN) {
-      if (MDTN == DefMDTN)
-        return true;
-      else if (MDTN == BarrierMDTN)
-        break;
-      MDTN = MDTN->getIDom();
-    }
-    
-    MBBEnd = LIs->getMBBEndIdx(MBB);
-    if (LR->end > MBBEnd)
-      Stack.push_back(std::make_pair(MBB, MBB->succ_begin()));
-  }
-  
-  return false;
-} 
-  
-
-bool PreAllocSplitting::runOnMachineFunction(MachineFunction &MF) {
-  CurrMF = &MF;
-  TM     = &MF.getTarget();
-  TRI    = TM->getRegisterInfo();
-  TII    = TM->getInstrInfo();
-  MFI    = MF.getFrameInfo();
-  MRI    = &MF.getRegInfo();
-  SIs    = &getAnalysis<SlotIndexes>();
-  LIs    = &getAnalysis<LiveIntervals>();
-  LSs    = &getAnalysis<LiveStacks>();
-  VRM    = &getAnalysis<VirtRegMap>();
-
-  bool MadeChange = false;
-
-  // Make sure blocks are numbered in order.
-  MF.RenumberBlocks();
-
-  MachineBasicBlock *Entry = MF.begin();
-  SmallPtrSet<MachineBasicBlock*,16> Visited;
-
-  SmallPtrSet<LiveInterval*, 8> Split;
-
-  for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
-         DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
-       DFI != E; ++DFI) {
-    BarrierMBB = *DFI;
-    for (MachineBasicBlock::iterator I = BarrierMBB->begin(),
-           E = BarrierMBB->end(); I != E; ++I) {
-      Barrier = &*I;
-      const TargetRegisterClass **BarrierRCs =
-        Barrier->getDesc().getRegClassBarriers();
-      if (!BarrierRCs)
-        continue;
-      BarrierIdx = LIs->getInstructionIndex(Barrier);
-      MadeChange |= SplitRegLiveIntervals(BarrierRCs, Split);
-    }
-  }
-
-  MadeChange |= removeDeadSpills(Split);
-
-  return MadeChange;
-}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index f1f3c99..a901c5f 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -145,6 +145,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
 /// pseudo instructions.
 void PEI::calculateCallsInformation(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+  const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
   const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
   MachineFrameInfo *MFI = Fn.getFrameInfo();
 
@@ -152,8 +153,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
   bool AdjustsStack = MFI->adjustsStack();
 
   // Get the function call frame set-up and tear-down instruction opcode
-  int FrameSetupOpcode   = RegInfo->getCallFrameSetupOpcode();
-  int FrameDestroyOpcode = RegInfo->getCallFrameDestroyOpcode();
+  int FrameSetupOpcode   = TII.getCallFrameSetupOpcode();
+  int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
 
   // Early exit for targets which have no call frame setup/destroy pseudo
   // instructions.
@@ -705,12 +706,13 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
 
   const TargetMachine &TM = Fn.getTarget();
   assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
+  const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
   const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
   const TargetFrameLowering *TFI = TM.getFrameLowering();
   bool StackGrowsDown =
     TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
-  int FrameSetupOpcode   = TRI.getCallFrameSetupOpcode();
-  int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
+  int FrameSetupOpcode   = TII.getCallFrameSetupOpcode();
+  int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
 
   for (MachineFunction::iterator BB = Fn.begin(),
          E = Fn.end(); BB != E; ++BB) {
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index 1d77b29..5ea26ad 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -20,6 +20,7 @@
 #include "RenderMachineFunction.h"
 #include "Spiller.h"
 #include "VirtRegMap.h"
+#include "RegisterCoalescer.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -34,7 +35,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -141,7 +141,7 @@ RABasic::RABasic(): MachineFunctionPass(ID) {
   initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
   initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
-  initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+  initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
   initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
   initializeLiveStacksPass(*PassRegistry::getPassRegistry());
   initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
@@ -324,19 +324,21 @@ void RegAllocBase::allocatePhysRegs() {
 
     if (AvailablePhysReg == ~0u) {
       // selectOrSplit failed to find a register!
-      std::string msg;
-      raw_string_ostream Msg(msg);
-      Msg << "Ran out of registers during register allocation!"
-             "\nCannot allocate: " << *VirtReg;
+      const char *Msg = "ran out of registers during register allocation";
+      // Probably caused by an inline asm.
+      MachineInstr *MI;
       for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg);
-      MachineInstr *MI = I.skipInstruction();) {
-        if (!MI->isInlineAsm())
-          continue;
-        Msg << "\nPlease check your inline asm statement for "
-          "invalid constraints:\n";
-        MI->print(Msg, &VRM->getMachineFunction().getTarget());
-      }
-      report_fatal_error(Msg.str());
+           (MI = I.skipInstruction());)
+        if (MI->isInlineAsm())
+          break;
+      if (MI)
+        MI->emitError(Msg);
+      else
+        report_fatal_error(Msg);
+      // Keep going after reporting the error.
+      VRM->assignVirt2Phys(VirtReg->reg,
+                 RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
+      continue;
     }
 
     if (AvailablePhysReg)
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 9765203..b36a445 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -86,7 +86,7 @@ namespace {
     // that is currently available in a physical register.
     LiveRegMap LiveVirtRegs;
 
-    DenseMap<unsigned, MachineInstr *> LiveDbgValueMap;
+    DenseMap<unsigned, SmallVector<MachineInstr *, 4> > LiveDbgValueMap;
 
     // RegState - Track the state of a physical register.
     enum RegState {
@@ -118,7 +118,7 @@ namespace {
     // SkippedInstrs - Descriptors of instructions whose clobber list was
     // ignored because all registers were spilled. It is still necessary to
     // mark all the clobbered registers as used by the function.
-    SmallPtrSet<const TargetInstrDesc*, 4> SkippedInstrs;
+    SmallPtrSet<const MCInstrDesc*, 4> SkippedInstrs;
 
     // isBulkSpilling - This flag is set when LiveRegMap will be cleared
     // completely after spilling all live registers. LiveRegMap entries should
@@ -272,7 +272,9 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
     // If this register is used by DBG_VALUE then insert new DBG_VALUE to
     // identify spilled location as the place to find corresponding variable's
     // value.
-    if (MachineInstr *DBG = LiveDbgValueMap.lookup(LRI->first)) {
+    SmallVector<MachineInstr *, 4> &LRIDbgValues = LiveDbgValueMap[LRI->first];
+    for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) {
+      MachineInstr *DBG = LRIDbgValues[li];
       const MDNode *MDPtr =
         DBG->getOperand(DBG->getNumOperands()-1).getMetadata();
       int64_t Offset = 0;
@@ -291,9 +293,11 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
         MachineBasicBlock *MBB = DBG->getParent();
         MBB->insert(MI, NewDV);
         DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV);
-        LiveDbgValueMap[LRI->first] = NewDV;
       }
     }
+    // Now this register is spilled there is should not be any DBG_VALUE pointing
+    // to this register because they are all pointing to spilled value now.
+    LRIDbgValues.clear();
     if (SpillKill)
       LR.LastUse = 0; // Don't kill register again
   }
@@ -419,7 +423,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg,
 // Returns spillImpossible when PhysReg or an alias can't be spilled.
 unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
   if (UsedInInstr.test(PhysReg)) {
-    DEBUG(dbgs() << "PhysReg: " << PhysReg << " is already used in instr.\n");
+    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n");
     return spillImpossible;
   }
   switch (unsigned VirtReg = PhysRegState[PhysReg]) {
@@ -428,15 +432,15 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
   case regFree:
     return 0;
   case regReserved:
-    DEBUG(dbgs() << "VirtReg: " << VirtReg << " corresponding to PhysReg: "
-          << PhysReg << " is reserved already.\n");
+    DEBUG(dbgs() << PrintReg(VirtReg, TRI) << " corresponding "
+                 << PrintReg(PhysReg, TRI) << " is reserved already.\n");
     return spillImpossible;
   default:
     return LiveVirtRegs.lookup(VirtReg).Dirty ? spillDirty : spillClean;
   }
 
   // This is a disabled register, add up cost of aliases.
-  DEBUG(dbgs() << "\tRegister: " << PhysReg << " is disabled.\n");
+  DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is disabled.\n");
   unsigned Cost = 0;
   for (const unsigned *AS = TRI->getAliasSet(PhysReg);
        unsigned Alias = *AS; ++AS) {
@@ -487,14 +491,12 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
 
   // Take hint when possible.
   if (Hint) {
-    switch(calcSpillCost(Hint)) {
-    default:
-      definePhysReg(MI, Hint, regFree);
-      // Fall through.
-    case 0:
+    // Ignore the hint if we would have to spill a dirty register.
+    unsigned Cost = calcSpillCost(Hint);
+    if (Cost < spillDirty) {
+      if (Cost)
+        definePhysReg(MI, Hint, regFree);
       return assignVirtToPhysReg(LRE, Hint);
-    case spillImpossible:
-      break;
     }
   }
 
@@ -513,7 +515,7 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
   unsigned BestReg = 0, BestCost = spillImpossible;
   for (ArrayRef<unsigned>::iterator I = AO.begin(), E = AO.end(); I != E; ++I) {
     unsigned Cost = calcSpillCost(*I);
-    DEBUG(dbgs() << "\tRegister: " << *I << "\n");
+    DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n");
     DEBUG(dbgs() << "\tCost: " << Cost << "\n");
     DEBUG(dbgs() << "\tBestCost: " << BestCost << "\n");
     // Cost is 0 when all aliases are already disabled.
@@ -528,16 +530,10 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
     return assignVirtToPhysReg(LRE, BestReg);
   }
 
-  // Nothing we can do.
-  std::string msg;
-  raw_string_ostream Msg(msg);
-  Msg << "Ran out of registers during register allocation!";
-  if (MI->isInlineAsm()) {
-    Msg << "\nPlease check your inline asm statement for "
-        << "invalid constraints:\n";
-    MI->print(Msg, TM);
-  }
-  report_fatal_error(Msg.str());
+  // Nothing we can do. Report an error and keep going with a bad allocation.
+  MI->emitError("ran out of registers during register allocation");
+  definePhysReg(MI, *AO.begin(), regFree);
+  assignVirtToPhysReg(LRE, *AO.begin());
 }
 
 /// defineVirtReg - Allocate a register for VirtReg and mark it as dirty.
@@ -724,7 +720,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
     if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
     unsigned Reg = MO.getReg();
     if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
-    DEBUG(dbgs() << "\tSetting reg " << Reg << " as used in instr\n");
+    DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI)
+                 << " as used in instr\n");
     UsedInInstr.set(Reg);
   }
 
@@ -774,7 +771,7 @@ void RAFast::AllocateBasicBlock() {
   // Otherwise, sequentially allocate each instruction in the MBB.
   while (MII != MBB->end()) {
     MachineInstr *MI = MII++;
-    const TargetInstrDesc &TID = MI->getDesc();
+    const MCInstrDesc &MCID = MI->getDesc();
     DEBUG({
         dbgs() << "\n>> " << *MI << "Regs:";
         for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) {
@@ -818,7 +815,7 @@ void RAFast::AllocateBasicBlock() {
           if (!MO.isReg()) continue;
           unsigned Reg = MO.getReg();
           if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
-          LiveDbgValueMap[Reg] = MI;
+          LiveDbgValueMap[Reg].push_back(MI);
           LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
           if (LRI != LiveVirtRegs.end())
             setPhysReg(MI, i, LRI->second.PhysReg);
@@ -887,7 +884,7 @@ void RAFast::AllocateBasicBlock() {
         VirtOpEnd = i+1;
         if (MO.isUse()) {
           hasTiedOps = hasTiedOps ||
-                                TID.getOperandConstraint(i, TOI::TIED_TO) != -1;
+                              MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1;
         } else {
           if (MO.isEarlyClobber())
             hasEarlyClobbers = true;
@@ -917,7 +914,7 @@ void RAFast::AllocateBasicBlock() {
     // We didn't detect inline asm tied operands above, so just make this extra
     // pass for all inline asm.
     if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs ||
-        (hasTiedOps && (hasPhysDefs || TID.getNumDefs() > 1))) {
+        (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) {
       handleThroughOperands(MI, VirtDead);
       // Don't attempt coalescing when we have funny stuff going on.
       CopyDst = 0;
@@ -962,7 +959,7 @@ void RAFast::AllocateBasicBlock() {
     }
 
     unsigned DefOpEnd = MI->getNumOperands();
-    if (TID.isCall()) {
+    if (MCID.isCall()) {
       // Spill all virtregs before a call. This serves two purposes: 1. If an
       // exception is thrown, the landing pad is going to expect to find
       // registers in their spill slots, and 2. we don't have to wade through
@@ -973,7 +970,7 @@ void RAFast::AllocateBasicBlock() {
 
       // The imp-defs are skipped below, but we still need to mark those
       // registers as used by the function.
-      SkippedInstrs.insert(&TID);
+      SkippedInstrs.insert(&MCID);
     }
 
     // Third scan.
@@ -1059,7 +1056,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
   MRI->closePhysRegsUsed(*TRI);
 
   // Add the clobber lists for all the instructions we skipped earlier.
-  for (SmallPtrSet<const TargetInstrDesc*, 4>::const_iterator
+  for (SmallPtrSet<const MCInstrDesc*, 4>::const_iterator
        I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I)
     if (const unsigned *Defs = (*I)->getImplicitDefs())
       while (*Defs)
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 8d06325..e235e87 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -22,6 +22,7 @@
 #include "SpillPlacement.h"
 #include "SplitKit.h"
 #include "VirtRegMap.h"
+#include "RegisterCoalescer.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Function.h"
@@ -33,11 +34,9 @@
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineLoopRanges.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -68,7 +67,6 @@ class RAGreedy : public MachineFunctionPass,
   LiveStacks *LS;
   MachineDominatorTree *DomTree;
   MachineLoopInfo *Loops;
-  MachineLoopRanges *LoopRanges;
   EdgeBundles *Bundles;
   SpillPlacement *SpillPlacer;
   LiveDebugVariables *DebugVars;
@@ -76,6 +74,7 @@ class RAGreedy : public MachineFunctionPass,
   // state
   std::auto_ptr<Spiller> SpillerInstance;
   std::priority_queue<std::pair<unsigned, unsigned> > Queue;
+  unsigned NextCascade;
 
   // Live ranges pass through a number of stages as we try to allocate them.
   // Some of the stages may also create new live ranges:
@@ -101,29 +100,49 @@ class RAGreedy : public MachineFunctionPass,
 
   static const char *const StageName[];
 
-  IndexedMap<unsigned char, VirtReg2IndexFunctor> LRStage;
+  // RegInfo - Keep additional information about each live range.
+  struct RegInfo {
+    LiveRangeStage Stage;
+
+    // Cascade - Eviction loop prevention. See canEvictInterference().
+    unsigned Cascade;
+
+    RegInfo() : Stage(RS_New), Cascade(0) {}
+  };
+
+  IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo;
 
   LiveRangeStage getStage(const LiveInterval &VirtReg) const {
-    return LiveRangeStage(LRStage[VirtReg.reg]);
+    return ExtraRegInfo[VirtReg.reg].Stage;
+  }
+
+  void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) {
+    ExtraRegInfo.resize(MRI->getNumVirtRegs());
+    ExtraRegInfo[VirtReg.reg].Stage = Stage;
   }
 
   template<typename Iterator>
   void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) {
-    LRStage.resize(MRI->getNumVirtRegs());
+    ExtraRegInfo.resize(MRI->getNumVirtRegs());
     for (;Begin != End; ++Begin) {
       unsigned Reg = (*Begin)->reg;
-      if (LRStage[Reg] == RS_New)
-        LRStage[Reg] = NewStage;
+      if (ExtraRegInfo[Reg].Stage == RS_New)
+        ExtraRegInfo[Reg].Stage = NewStage;
     }
   }
 
-  // Eviction. Sometimes an assigned live range can be evicted without
-  // conditions, but other times it must be split after being evicted to avoid
-  // infinite loops.
-  enum CanEvict {
-    CE_Never,    ///< Can never evict.
-    CE_Always,   ///< Can always evict.
-    CE_WithSplit ///< Can evict only if range is also split or spilled.
+  /// Cost of evicting interference.
+  struct EvictionCost {
+    unsigned BrokenHints; ///< Total number of broken hints.
+    float MaxWeight;      ///< Maximum spill weight evicted.
+
+    EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {}
+
+    bool operator<(const EvictionCost &O) const {
+      if (BrokenHints != O.BrokenHints)
+        return BrokenHints < O.BrokenHints;
+      return MaxWeight < O.MaxWeight;
+    }
   };
 
   // splitting state.
@@ -139,11 +158,13 @@ class RAGreedy : public MachineFunctionPass,
   /// Global live range splitting candidate info.
   struct GlobalSplitCandidate {
     unsigned PhysReg;
+    InterferenceCache::Cursor Intf;
     BitVector LiveBundles;
     SmallVector<unsigned, 8> ActiveBlocks;
 
-    void reset(unsigned Reg) {
+    void reset(InterferenceCache &Cache, unsigned Reg) {
       PhysReg = Reg;
+      Intf.setPhysReg(Cache, Reg);
       LiveBundles.clear();
       ActiveBlocks.clear();
     }
@@ -185,13 +206,15 @@ private:
   float calcSpillCost();
   bool addSplitConstraints(InterferenceCache::Cursor, float&);
   void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>);
-  void growRegion(GlobalSplitCandidate &Cand, InterferenceCache::Cursor);
-  float calcGlobalSplitCost(GlobalSplitCandidate&, InterferenceCache::Cursor);
+  void growRegion(GlobalSplitCandidate &Cand);
+  float calcGlobalSplitCost(GlobalSplitCandidate&);
   void splitAroundRegion(LiveInterval&, GlobalSplitCandidate&,
                          SmallVectorImpl<LiveInterval*>&);
   void calcGapWeights(unsigned, SmallVectorImpl<float>&);
-  CanEvict canEvict(LiveInterval &A, LiveInterval &B);
-  bool canEvictInterference(LiveInterval&, unsigned, float&);
+  bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
+  bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
+  void evictInterference(LiveInterval&, unsigned,
+                         SmallVectorImpl<LiveInterval*>&);
 
   unsigned tryAssign(LiveInterval&, AllocationOrder&,
                      SmallVectorImpl<LiveInterval*>&);
@@ -228,18 +251,17 @@ FunctionPass* llvm::createGreedyRegisterAllocator() {
   return new RAGreedy();
 }
 
-RAGreedy::RAGreedy(): MachineFunctionPass(ID), LRStage(RS_New) {
+RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
   initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
   initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
   initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
   initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
-  initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+  initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
   initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
   initializeLiveStacksPass(*PassRegistry::getPassRegistry());
   initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
   initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
-  initializeMachineLoopRangesPass(*PassRegistry::getPassRegistry());
   initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
   initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
   initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
@@ -264,8 +286,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<MachineDominatorTree>();
   AU.addRequired<MachineLoopInfo>();
   AU.addPreserved<MachineLoopInfo>();
-  AU.addRequired<MachineLoopRanges>();
-  AU.addPreserved<MachineLoopRanges>();
   AU.addRequired<VirtRegMap>();
   AU.addPreserved<VirtRegMap>();
   AU.addRequired<EdgeBundles>();
@@ -308,13 +328,13 @@ void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) {
   // LRE may clone a virtual register because dead code elimination causes it to
   // be split into connected components. Ensure that the new register gets the
   // same stage as the parent.
-  LRStage.grow(New);
-  LRStage[New] = LRStage[Old];
+  ExtraRegInfo.grow(New);
+  ExtraRegInfo[New] = ExtraRegInfo[Old];
 }
 
 void RAGreedy::releaseMemory() {
   SpillerInstance.reset(0);
-  LRStage.clear();
+  ExtraRegInfo.clear();
   GlobalCand.clear();
   RegAllocBase::releaseMemory();
 }
@@ -328,11 +348,11 @@ void RAGreedy::enqueue(LiveInterval *LI) {
          "Can only enqueue virtual registers");
   unsigned Prio;
 
-  LRStage.grow(Reg);
-  if (LRStage[Reg] == RS_New)
-    LRStage[Reg] = RS_First;
+  ExtraRegInfo.grow(Reg);
+  if (ExtraRegInfo[Reg].Stage == RS_New)
+    ExtraRegInfo[Reg].Stage = RS_First;
 
-  if (LRStage[Reg] == RS_Second)
+  if (ExtraRegInfo[Reg].Stage == RS_Second)
     // Unsplit ranges that couldn't be allocated immediately are deferred until
     // everything else has been allocated. Long ranges are allocated last so
     // they are split against realistic interference.
@@ -375,7 +395,21 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
   if (!PhysReg || Order.isHint(PhysReg))
     return PhysReg;
 
-  // PhysReg is available. Try to evict interference from a cheaper alternative.
+  // PhysReg is available, but there may be a better choice.
+
+  // If we missed a simple hint, try to cheaply evict interference from the
+  // preferred register.
+  if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg))
+    if (Order.isHint(Hint)) {
+      DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n');
+      EvictionCost MaxCost(1);
+      if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
+        evictInterference(VirtReg, Hint, NewVRegs);
+        return Hint;
+      }
+    }
+
+  // Try to evict interference from a cheaper alternative.
   unsigned Cost = TRI->getCostPerUse(PhysReg);
 
   // Most registers have 0 additional cost.
@@ -393,31 +427,58 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
 //                         Interference eviction
 //===----------------------------------------------------------------------===//
 
-/// canEvict - determine if A can evict the assigned live range B. The eviction
-/// policy defined by this function together with the allocation order defined
-/// by enqueue() decides which registers ultimately end up being split and
-/// spilled.
+/// shouldEvict - determine if A should evict the assigned live range B. The
+/// eviction policy defined by this function together with the allocation order
+/// defined by enqueue() decides which registers ultimately end up being split
+/// and spilled.
+///
+/// Cascade numbers are used to prevent infinite loops if this function is a
+/// cyclic relation.
 ///
-/// This function must define a non-circular relation when it returns CE_Always,
-/// otherwise infinite eviction loops are possible. When evicting a <= RS_Second
-/// range, it is possible to return CE_WithSplit which forces the evicted
-/// register to be split or spilled before it can evict anything again. That
-/// guarantees progress.
-RAGreedy::CanEvict RAGreedy::canEvict(LiveInterval &A, LiveInterval &B) {
-  return A.weight > B.weight ? CE_Always : CE_Never;
+/// @param A          The live range to be assigned.
+/// @param IsHint     True when A is about to be assigned to its preferred
+///                   register.
+/// @param B          The live range to be evicted.
+/// @param BreaksHint True when B is already assigned to its preferred register.
+bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
+                           LiveInterval &B, bool BreaksHint) {
+  bool CanSplit = getStage(B) <= RS_Second;
+
+  // Be fairly aggressive about following hints as long as the evictee can be
+  // split.
+  if (CanSplit && IsHint && !BreaksHint)
+    return true;
+
+  return A.weight > B.weight;
 }
 
-/// canEvict - Return true if all interferences between VirtReg and PhysReg can
-/// be evicted.
-/// Return false if any interference is heavier than MaxWeight.
-/// On return, set MaxWeight to the maximal spill weight of an interference.
+/// canEvictInterference - Return true if all interferences between VirtReg and
+/// PhysReg can be evicted.  When OnlyCheap is set, don't do anything
+///
+/// @param VirtReg Live range that is about to be assigned.
+/// @param PhysReg Desired register for assignment.
+/// @prarm IsHint  True when PhysReg is VirtReg's preferred register.
+/// @param MaxCost Only look for cheaper candidates and update with new cost
+///                when returning true.
+/// @returns True when interference can be evicted cheaper than MaxCost.
 bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
-                                    float &MaxWeight) {
-  float Weight = 0;
+                                    bool IsHint, EvictionCost &MaxCost) {
+  // Find VirtReg's cascade number. This will be unassigned if VirtReg was never
+  // involved in an eviction before. If a cascade number was assigned, deny
+  // evicting anything with the same or a newer cascade number. This prevents
+  // infinite eviction loops.
+  //
+  // This works out so a register without a cascade number is allowed to evict
+  // anything, and it can be evicted by anything.
+  unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+  if (!Cascade)
+    Cascade = NextCascade;
+
+  EvictionCost Cost;
   for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
     LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
     // If there is 10 or more interferences, chances are one is heavier.
-    if (Q.collectInterferingVRegs(10, MaxWeight) >= 10)
+    if (Q.collectInterferingVRegs(10) >= 10)
       return false;
 
     // Check if any interfering live range is heavier than MaxWeight.
@@ -425,25 +486,69 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
       LiveInterval *Intf = Q.interferingVRegs()[i - 1];
       if (TargetRegisterInfo::isPhysicalRegister(Intf->reg))
         return false;
-      if (Intf->weight >= MaxWeight)
-        return false;
-      switch (canEvict(VirtReg, *Intf)) {
-      case CE_Always:
-        break;
-      case CE_Never:
+      // Never evict spill products. They cannot split or spill.
+      if (getStage(*Intf) == RS_Spill)
         return false;
-      case CE_WithSplit:
-        if (getStage(*Intf) > RS_Second)
+      // Once a live range becomes small enough, it is urgent that we find a
+      // register for it. This is indicated by an infinite spill weight. These
+      // urgent live ranges get to evict almost anything.
+      bool Urgent = !VirtReg.isSpillable() && Intf->isSpillable();
+      // Only evict older cascades or live ranges without a cascade.
+      unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade;
+      if (Cascade <= IntfCascade) {
+        if (!Urgent)
           return false;
-        break;
+        // We permit breaking cascades for urgent evictions. It should be the
+        // last resort, though, so make it really expensive.
+        Cost.BrokenHints += 10;
       }
-      Weight = std::max(Weight, Intf->weight);
+      // Would this break a satisfied hint?
+      bool BreaksHint = VRM->hasPreferredPhys(Intf->reg);
+      // Update eviction cost.
+      Cost.BrokenHints += BreaksHint;
+      Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight);
+      // Abort if this would be too expensive.
+      if (!(Cost < MaxCost))
+        return false;
+      // Finally, apply the eviction policy for non-urgent evictions.
+      if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint))
+        return false;
     }
   }
-  MaxWeight = Weight;
+  MaxCost = Cost;
   return true;
 }
 
+/// evictInterference - Evict any interferring registers that prevent VirtReg
+/// from being assigned to Physreg. This assumes that canEvictInterference
+/// returned true.
+void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg,
+                                 SmallVectorImpl<LiveInterval*> &NewVRegs) {
+  // Make sure that VirtReg has a cascade number, and assign that cascade
+  // number to every evicted register. These live ranges than then only be
+  // evicted by a newer cascade, preventing infinite loops.
+  unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade;
+  if (!Cascade)
+    Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++;
+
+  DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI)
+               << " interference: Cascade " << Cascade << '\n');
+  for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+    LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+    assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
+    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
+      LiveInterval *Intf = Q.interferingVRegs()[i];
+      unassign(*Intf, VRM->getPhys(Intf->reg));
+      assert((ExtraRegInfo[Intf->reg].Cascade < Cascade ||
+              VirtReg.isSpillable() < Intf->isSpillable()) &&
+             "Cannot decrease cascade number, illegal eviction");
+      ExtraRegInfo[Intf->reg].Cascade = Cascade;
+      ++NumEvicted;
+      NewVRegs.push_back(Intf);
+    }
+  }
+}
+
 /// tryEvict - Try to evict all interferences for a physreg.
 /// @param  VirtReg Currently unassigned virtual register.
 /// @param  Order   Physregs to try.
@@ -454,31 +559,37 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
                             unsigned CostPerUseLimit) {
   NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
 
-  // Keep track of the lightest single interference seen so far.
-  float BestWeight = HUGE_VALF;
+  // Keep track of the cheapest interference seen so far.
+  EvictionCost BestCost(~0u);
   unsigned BestPhys = 0;
 
+  // When we are just looking for a reduced cost per use, don't break any
+  // hints, and only evict smaller spill weights.
+  if (CostPerUseLimit < ~0u) {
+    BestCost.BrokenHints = 0;
+    BestCost.MaxWeight = VirtReg.weight;
+  }
+
   Order.rewind();
   while (unsigned PhysReg = Order.next()) {
     if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit)
       continue;
-    // The first use of a register in a function has cost 1.
-    if (CostPerUseLimit == 1 && !MRI->isPhysRegUsed(PhysReg))
-      continue;
-
-    float Weight = BestWeight;
-    if (!canEvictInterference(VirtReg, PhysReg, Weight))
-      continue;
-
-    // This is an eviction candidate.
-    DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " interference = "
-                 << Weight << '\n');
-    if (BestPhys && Weight >= BestWeight)
+    // The first use of a callee-saved register in a function has cost 1.
+    // Don't start using a CSR when the CostPerUseLimit is low.
+    if (CostPerUseLimit == 1)
+     if (unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg))
+       if (!MRI->isPhysRegUsed(CSR)) {
+         DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR "
+                      << PrintReg(CSR, TRI) << '\n');
+         continue;
+       }
+
+    if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
       continue;
 
     // Best so far.
     BestPhys = PhysReg;
-    BestWeight = Weight;
+
     // Stop if the hint can be used.
     if (Order.isHint(PhysReg))
       break;
@@ -487,22 +598,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
   if (!BestPhys)
     return 0;
 
-  DEBUG(dbgs() << "evicting " << PrintReg(BestPhys, TRI) << " interference\n");
-  for (const unsigned *AliasI = TRI->getOverlaps(BestPhys); *AliasI; ++AliasI) {
-    LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
-    assert(Q.seenAllInterferences() && "Didn't check all interfererences.");
-    for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
-      LiveInterval *Intf = Q.interferingVRegs()[i];
-      unassign(*Intf, VRM->getPhys(Intf->reg));
-      ++NumEvicted;
-      NewVRegs.push_back(Intf);
-      // Prevent looping by forcing the evicted ranges to be split before they
-      // can evict anything else.
-      if (getStage(*Intf) < RS_Second &&
-          canEvict(VirtReg, *Intf) == CE_WithSplit)
-        LRStage[Intf->reg] = RS_Second;
-    }
-  }
+  evictInterference(VirtReg, BestPhys, NewVRegs);
   return BestPhys;
 }
 
@@ -621,8 +717,7 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf,
   SpillPlacer->addLinks(ArrayRef<unsigned>(TBS, T));
 }
 
-void RAGreedy::growRegion(GlobalSplitCandidate &Cand,
-                          InterferenceCache::Cursor Intf) {
+void RAGreedy::growRegion(GlobalSplitCandidate &Cand) {
   // Keep track of through blocks that have not been added to SpillPlacer.
   BitVector Todo = SA->getThroughBlocks();
   SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks;
@@ -633,8 +728,6 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand,
 
   for (;;) {
     ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive();
-    if (NewBundles.empty())
-      break;
     // Find new through blocks in the periphery of PrefRegBundles.
     for (int i = 0, e = NewBundles.size(); i != e; ++i) {
       unsigned Bundle = NewBundles[i];
@@ -654,12 +747,12 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand,
       }
     }
     // Any new blocks to add?
-    if (ActiveBlocks.size() > AddedTo) {
-      ArrayRef<unsigned> Add(&ActiveBlocks[AddedTo],
-                             ActiveBlocks.size() - AddedTo);
-      addThroughConstraints(Intf, Add);
-      AddedTo = ActiveBlocks.size();
-    }
+    if (ActiveBlocks.size() == AddedTo)
+      break;
+    addThroughConstraints(Cand.Intf,
+                          ArrayRef<unsigned>(ActiveBlocks).slice(AddedTo));
+    AddedTo = ActiveBlocks.size();
+
     // Perhaps iterating can enable more bundles?
     SpillPlacer->iterate();
   }
@@ -697,8 +790,7 @@ float RAGreedy::calcSpillCost() {
 /// pattern in LiveBundles. This cost should be added to the local cost of the
 /// interference pattern in SplitConstraints.
 ///
-float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
-                                    InterferenceCache::Cursor Intf) {
+float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) {
   float GlobalCost = 0;
   const BitVector &LiveBundles = Cand.LiveBundles;
   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
@@ -725,8 +817,8 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand,
       continue;
     if (RegIn && RegOut) {
       // We need double spill code if this block has interference.
-      Intf.moveToBlock(Number);
-      if (Intf.hasInterference())
+      Cand.Intf.moveToBlock(Number);
+      if (Cand.Intf.hasInterference())
         GlobalCost += 2*SpillPlacer->getBlockFrequency(Number);
       continue;
     }
@@ -756,188 +848,42 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
     dbgs() << ".\n";
   });
 
-  InterferenceCache::Cursor Intf(IntfCache, Cand.PhysReg);
+  InterferenceCache::Cursor &Intf = Cand.Intf;
   LiveRangeEdit LREdit(VirtReg, NewVRegs, this);
   SE->reset(LREdit);
 
   // Create the main cross-block interval.
   const unsigned MainIntv = SE->openIntv();
 
-  // First add all defs that are live out of a block.
+  // First handle all the blocks with uses.
   ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks();
   for (unsigned i = 0; i != UseBlocks.size(); ++i) {
     const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
-    bool RegIn  = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
-    bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+    bool RegIn  = BI.LiveIn &&
+                  LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
+    bool RegOut = BI.LiveOut &&
+                  LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
 
     // Create separate intervals for isolated blocks with multiple uses.
-    if (!RegIn && !RegOut && BI.FirstUse != BI.LastUse) {
+    if (!RegIn && !RegOut) {
       DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n");
-      SE->splitSingleBlock(BI);
-      SE->selectIntv(MainIntv);
-      continue;
-    }
-
-    // Should the register be live out?
-    if (!BI.LiveOut || !RegOut)
-      continue;
-
-    SlotIndex Start, Stop;
-    tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
-    Intf.moveToBlock(BI.MBB->getNumber());
-    DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#"
-                 << Bundles->getBundle(BI.MBB->getNumber(), 1)
-                 << " [" << Start << ';'
-                 << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop
-                 << ") intf [" << Intf.first() << ';' << Intf.last() << ')');
-
-    // The interference interval should either be invalid or overlap MBB.
-    assert((!Intf.hasInterference() || Intf.first() < Stop)
-           && "Bad interference");
-    assert((!Intf.hasInterference() || Intf.last() > Start)
-           && "Bad interference");
-
-    // Check interference leaving the block.
-    if (!Intf.hasInterference()) {
-      // Block is interference-free.
-      DEBUG(dbgs() << ", no interference");
-      if (!BI.LiveThrough) {
-        DEBUG(dbgs() << ", not live-through.\n");
-        SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop);
-        continue;
+      if (!BI.isOneInstr()) {
+        SE->splitSingleBlock(BI);
+        SE->selectIntv(MainIntv);
       }
-      if (!RegIn) {
-        // Block is live-through, but entry bundle is on the stack.
-        // Reload just before the first use.
-        DEBUG(dbgs() << ", not live-in, enter before first use.\n");
-        SE->useIntv(SE->enterIntvBefore(BI.FirstUse), Stop);
-        continue;
-      }
-      DEBUG(dbgs() << ", live-through.\n");
       continue;
     }
 
-    // Block has interference.
-    DEBUG(dbgs() << ", interference to " << Intf.last());
-
-    if (!BI.LiveThrough && Intf.last() <= BI.FirstUse) {
-      // The interference doesn't reach the outgoing segment.
-      DEBUG(dbgs() << " doesn't affect def from " << BI.FirstUse << '\n');
-      SE->useIntv(BI.FirstUse, Stop);
-      continue;
-    }
-
-    SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber());
-    if (Intf.last().getBoundaryIndex() < BI.LastUse) {
-      // There are interference-free uses at the end of the block.
-      // Find the first use that can get the live-out register.
-      SmallVectorImpl<SlotIndex>::const_iterator UI =
-        std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
-                         Intf.last().getBoundaryIndex());
-      assert(UI != SA->UseSlots.end() && "Couldn't find last use");
-      SlotIndex Use = *UI;
-      assert(Use <= BI.LastUse && "Couldn't find last use");
-      // Only attempt a split befroe the last split point.
-      if (Use.getBaseIndex() <= LastSplitPoint) {
-        DEBUG(dbgs() << ", free use at " << Use << ".\n");
-        SlotIndex SegStart = SE->enterIntvBefore(Use);
-        assert(SegStart >= Intf.last() && "Couldn't avoid interference");
-        assert(SegStart < LastSplitPoint && "Impossible split point");
-        SE->useIntv(SegStart, Stop);
-        continue;
-      }
-    }
-
-    // Interference is after the last use.
-    DEBUG(dbgs() << " after last use.\n");
-    SlotIndex SegStart = SE->enterIntvAtEnd(*BI.MBB);
-    assert(SegStart >= Intf.last() && "Couldn't avoid interference");
-  }
-
-  // Now all defs leading to live bundles are handled, do everything else.
-  for (unsigned i = 0; i != UseBlocks.size(); ++i) {
-    const SplitAnalysis::BlockInfo &BI = UseBlocks[i];
-    bool RegIn  = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
-    bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
-
-    // Is the register live-in?
-    if (!BI.LiveIn || !RegIn)
-      continue;
-
-    // We have an incoming register. Check for interference.
-    SlotIndex Start, Stop;
-    tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
     Intf.moveToBlock(BI.MBB->getNumber());
-    DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0)
-                 << " -> BB#" << BI.MBB->getNumber() << " [" << Start << ';'
-                 << SA->getLastSplitPoint(BI.MBB->getNumber()) << '-' << Stop
-                 << ')');
 
-    // Check interference entering the block.
-    if (!Intf.hasInterference()) {
-      // Block is interference-free.
-      DEBUG(dbgs() << ", no interference");
-      if (!BI.LiveThrough) {
-        DEBUG(dbgs() << ", killed in block.\n");
-        SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse));
-        continue;
-      }
-      if (!RegOut) {
-        SlotIndex LastSplitPoint = SA->getLastSplitPoint(BI.MBB->getNumber());
-        // Block is live-through, but exit bundle is on the stack.
-        // Spill immediately after the last use.
-        if (BI.LastUse < LastSplitPoint) {
-          DEBUG(dbgs() << ", uses, stack-out.\n");
-          SE->useIntv(Start, SE->leaveIntvAfter(BI.LastUse));
-          continue;
-        }
-        // The last use is after the last split point, it is probably an
-        // indirect jump.
-        DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point "
-                     << LastSplitPoint << ", stack-out.\n");
-        SlotIndex SegEnd = SE->leaveIntvBefore(LastSplitPoint);
-        SE->useIntv(Start, SegEnd);
-        // Run a double interval from the split to the last use.
-        // This makes it possible to spill the complement without affecting the
-        // indirect branch.
-        SE->overlapIntv(SegEnd, BI.LastUse);
-        continue;
-      }
-      // Register is live-through.
-      DEBUG(dbgs() << ", uses, live-through.\n");
-      SE->useIntv(Start, Stop);
-      continue;
-    }
-
-    // Block has interference.
-    DEBUG(dbgs() << ", interference from " << Intf.first());
-
-    if (!BI.LiveThrough && Intf.first() >= BI.LastUse) {
-      // The interference doesn't reach the outgoing segment.
-      DEBUG(dbgs() << " doesn't affect kill at " << BI.LastUse << '\n');
-      SE->useIntv(Start, BI.LastUse);
-      continue;
-    }
-
-    if (Intf.first().getBaseIndex() > BI.FirstUse) {
-      // There are interference-free uses at the beginning of the block.
-      // Find the last use that can get the register.
-      SmallVectorImpl<SlotIndex>::const_iterator UI =
-        std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
-                         Intf.first().getBaseIndex());
-      assert(UI != SA->UseSlots.begin() && "Couldn't find first use");
-      SlotIndex Use = (--UI)->getBoundaryIndex();
-      DEBUG(dbgs() << ", free use at " << *UI << ".\n");
-      SlotIndex SegEnd = SE->leaveIntvAfter(Use);
-      assert(SegEnd <= Intf.first() && "Couldn't avoid interference");
-      SE->useIntv(Start, SegEnd);
-      continue;
-    }
-
-    // Interference is before the first use.
-    DEBUG(dbgs() << " before first use.\n");
-    SlotIndex SegEnd = SE->leaveIntvAtTop(*BI.MBB);
-    assert(SegEnd <= Intf.first() && "Couldn't avoid interference");
+    if (RegIn && RegOut)
+      SE->splitLiveThroughBlock(BI.MBB->getNumber(),
+                                MainIntv, Intf.first(),
+                                MainIntv, Intf.last());
+    else if (RegIn)
+      SE->splitRegInBlock(BI, MainIntv, Intf.first());
+    else
+      SE->splitRegOutBlock(BI, MainIntv, Intf.last());
   }
 
   // Handle live-through blocks.
@@ -945,20 +891,11 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
     unsigned Number = Cand.ActiveBlocks[i];
     bool RegIn  = LiveBundles[Bundles->getBundle(Number, 0)];
     bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)];
-    DEBUG(dbgs() << "Live through BB#" << Number << '\n');
-    if (RegIn && RegOut) {
-      Intf.moveToBlock(Number);
-      if (!Intf.hasInterference()) {
-        SE->useIntv(Indexes->getMBBStartIdx(Number),
-                    Indexes->getMBBEndIdx(Number));
-        continue;
-      }
-    }
-    MachineBasicBlock *MBB = MF->getBlockNumbered(Number);
-    if (RegIn)
-      SE->leaveIntvAtTop(*MBB);
-    if (RegOut)
-      SE->enterIntvAtEnd(*MBB);
+    if (!RegIn && !RegOut)
+      continue;
+    Intf.moveToBlock(Number);
+    SE->splitLiveThroughBlock(Number, RegIn  ? MainIntv : 0, Intf.first(),
+                                      RegOut ? MainIntv : 0, Intf.last());
   }
 
   ++NumGlobalSplits;
@@ -967,7 +904,7 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
   SE->finish(&IntvMap);
   DebugVars->splitRegister(VirtReg.reg, LREdit.regs());
 
-  LRStage.resize(MRI->getNumVirtRegs());
+  ExtraRegInfo.resize(MRI->getNumVirtRegs());
   unsigned OrigBlocks = SA->getNumLiveBlocks();
 
   // Sort out the new intervals created by splitting. We get four kinds:
@@ -976,27 +913,27 @@ void RAGreedy::splitAroundRegion(LiveInterval &VirtReg,
   // - Block-local splits are candidates for local splitting.
   // - DCE leftovers should go back on the queue.
   for (unsigned i = 0, e = LREdit.size(); i != e; ++i) {
-    unsigned Reg = LREdit.get(i)->reg;
+    LiveInterval &Reg = *LREdit.get(i);
 
     // Ignore old intervals from DCE.
-    if (LRStage[Reg] != RS_New)
+    if (getStage(Reg) != RS_New)
       continue;
 
     // Remainder interval. Don't try splitting again, spill if it doesn't
     // allocate.
     if (IntvMap[i] == 0) {
-      LRStage[Reg] = RS_Global;
+      setStage(Reg, RS_Global);
       continue;
     }
 
     // Main interval. Allow repeated splitting as long as the number of live
     // blocks is strictly decreasing.
     if (IntvMap[i] == MainIntv) {
-      if (SA->countLiveBlocks(LREdit.get(i)) >= OrigBlocks) {
+      if (SA->countLiveBlocks(&Reg) >= OrigBlocks) {
         DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks
                      << " blocks as original.\n");
         // Don't allow repeated splitting as a safe guard against looping.
-        LRStage[Reg] = RS_Global;
+        setStage(Reg, RS_Global);
       }
       continue;
     }
@@ -1015,17 +952,34 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n');
   const unsigned NoCand = ~0u;
   unsigned BestCand = NoCand;
+  unsigned NumCands = 0;
 
   Order.rewind();
-  for (unsigned Cand = 0; unsigned PhysReg = Order.next(); ++Cand) {
-    if (GlobalCand.size() <= Cand)
-      GlobalCand.resize(Cand+1);
-    GlobalCand[Cand].reset(PhysReg);
+  while (unsigned PhysReg = Order.next()) {
+    // Discard bad candidates before we run out of interference cache cursors.
+    // This will only affect register classes with a lot of registers (>32).
+    if (NumCands == IntfCache.getMaxCursors()) {
+      unsigned WorstCount = ~0u;
+      unsigned Worst = 0;
+      for (unsigned i = 0; i != NumCands; ++i) {
+        if (i == BestCand)
+          continue;
+        unsigned Count = GlobalCand[i].LiveBundles.count();
+        if (Count < WorstCount)
+          Worst = i, WorstCount = Count;
+      }
+      --NumCands;
+      GlobalCand[Worst] = GlobalCand[NumCands];
+    }
+
+    if (GlobalCand.size() <= NumCands)
+      GlobalCand.resize(NumCands+1);
+    GlobalSplitCandidate &Cand = GlobalCand[NumCands];
+    Cand.reset(IntfCache, PhysReg);
 
-    SpillPlacer->prepare(GlobalCand[Cand].LiveBundles);
+    SpillPlacer->prepare(Cand.LiveBundles);
     float Cost;
-    InterferenceCache::Cursor Intf(IntfCache, PhysReg);
-    if (!addSplitConstraints(Intf, Cost)) {
+    if (!addSplitConstraints(Cand.Intf, Cost)) {
       DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n");
       continue;
     }
@@ -1040,28 +994,29 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
       });
       continue;
     }
-    growRegion(GlobalCand[Cand], Intf);
+    growRegion(Cand);
 
     SpillPlacer->finish();
 
     // No live bundles, defer to splitSingleBlocks().
-    if (!GlobalCand[Cand].LiveBundles.any()) {
+    if (!Cand.LiveBundles.any()) {
       DEBUG(dbgs() << " no bundles.\n");
       continue;
     }
 
-    Cost += calcGlobalSplitCost(GlobalCand[Cand], Intf);
+    Cost += calcGlobalSplitCost(Cand);
     DEBUG({
       dbgs() << ", total = " << Cost << " with bundles";
-      for (int i = GlobalCand[Cand].LiveBundles.find_first(); i>=0;
-           i = GlobalCand[Cand].LiveBundles.find_next(i))
+      for (int i = Cand.LiveBundles.find_first(); i>=0;
+           i = Cand.LiveBundles.find_next(i))
         dbgs() << " EB#" << i;
       dbgs() << ".\n";
     });
     if (Cost < BestCost) {
-      BestCand = Cand;
+      BestCand = NumCands;
       BestCost = Hysteresis * Cost; // Prevent rounding effects.
     }
+    ++NumCands;
   }
 
   if (BestCand == NoCand)
@@ -1302,10 +1257,9 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
   if (NewGaps >= NumGaps) {
     DEBUG(dbgs() << "Tagging non-progress ranges: ");
     assert(!ProgressRequired && "Didn't make progress when it was required.");
-    LRStage.resize(MRI->getNumVirtRegs());
     for (unsigned i = 0, e = IntvMap.size(); i != e; ++i)
       if (IntvMap[i] == 1) {
-        LRStage[LREdit.get(i)->reg] = RS_Local;
+        setStage(*LREdit.get(i), RS_Local);
         DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg));
       }
     DEBUG(dbgs() << '\n');
@@ -1384,7 +1338,8 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
     return PhysReg;
 
   LiveRangeStage Stage = getStage(VirtReg);
-  DEBUG(dbgs() << StageName[Stage] << '\n');
+  DEBUG(dbgs() << StageName[Stage]
+               << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n');
 
   // Try to evict a less worthy live range, but only for ranges from the primary
   // queue. The RS_Second ranges already failed to do this, and they should not
@@ -1399,7 +1354,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
   // Wait until the second time, when all smaller ranges have been allocated.
   // This gives a better picture of the interference to split around.
   if (Stage == RS_First) {
-    LRStage[VirtReg.reg] = RS_Second;
+    setStage(VirtReg, RS_Second);
     DEBUG(dbgs() << "wait for second round\n");
     NewVRegs.push_back(&VirtReg);
     return 0;
@@ -1407,7 +1362,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
 
   // If we couldn't allocate a register from spilling, there is probably some
   // invalid inline assembly. The base class wil report it.
-  if (Stage >= RS_Spill)
+  if (Stage >= RS_Spill || !VirtReg.isSpillable())
     return ~0u;
 
   // Try splitting VirtReg or interferences.
@@ -1443,15 +1398,15 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
   DomTree = &getAnalysis<MachineDominatorTree>();
   SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
   Loops = &getAnalysis<MachineLoopInfo>();
-  LoopRanges = &getAnalysis<MachineLoopRanges>();
   Bundles = &getAnalysis<EdgeBundles>();
   SpillPlacer = &getAnalysis<SpillPlacement>();
   DebugVars = &getAnalysis<LiveDebugVariables>();
 
   SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
   SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree));
-  LRStage.clear();
-  LRStage.resize(MRI->getNumVirtRegs());
+  ExtraRegInfo.clear();
+  ExtraRegInfo.resize(MRI->getNumVirtRegs());
+  NextCascade = 1;
   IntfCache.init(MF, &PhysReg2LiveUnion[0], Indexes, TRI);
 
   allocatePhysRegs();
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 5ef88cb..0dd3c598 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -16,7 +16,9 @@
 #include "LiveRangeEdit.h"
 #include "VirtRegMap.h"
 #include "VirtRegRewriter.h"
+#include "RegisterClassInfo.h"
 #include "Spiller.h"
+#include "RegisterCoalescer.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
@@ -27,7 +29,6 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
@@ -57,11 +58,6 @@ NewHeuristic("new-spilling-heuristic",
              cl::init(false), cl::Hidden);
 
 static cl::opt<bool>
-PreSplitIntervals("pre-alloc-split",
-                  cl::desc("Pre-register allocation live interval splitting"),
-                  cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
 TrivCoalesceEnds("trivial-coalesce-ends",
                   cl::desc("Attempt trivial coalescing of interval ends"),
                   cl::init(false), cl::Hidden);
@@ -100,10 +96,9 @@ namespace {
       initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
       initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
       initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
-      initializeRegisterCoalescerAnalysisGroup(
+      initializeRegisterCoalescerPass(
         *PassRegistry::getPassRegistry());
       initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
-      initializePreAllocSplittingPass(*PassRegistry::getPassRegistry());
       initializeLiveStacksPass(*PassRegistry::getPassRegistry());
       initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
       initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
@@ -148,6 +143,7 @@ namespace {
     BitVector reservedRegs_;
     LiveIntervals* li_;
     MachineLoopInfo *loopInfo;
+    RegisterClassInfo RegClassInfo;
 
     /// handled_ - Intervals are added to the handled_ set in the order of their
     /// start value.  This is uses for backtracking.
@@ -215,8 +211,6 @@ namespace {
       // to coalescing and which analyses coalescing invalidates.
       AU.addRequiredTransitive<RegisterCoalescer>();
       AU.addRequired<CalculateSpillWeights>();
-      if (PreSplitIntervals)
-        AU.addRequiredID(PreAllocSplittingID);
       AU.addRequiredID(LiveStacksID);
       AU.addPreservedID(LiveStacksID);
       AU.addRequired<MachineLoopInfo>();
@@ -366,13 +360,10 @@ namespace {
     /// getFirstNonReservedPhysReg - return the first non-reserved physical
     /// register in the register class.
     unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) {
-        TargetRegisterClass::iterator aoe = RC->allocation_order_end(*mf_);
-        TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_);
-        while (i != aoe && reservedRegs_.test(*i))
-          ++i;
-        assert(i != aoe && "All registers reserved?!");
-        return *i;
-      }
+      ArrayRef<unsigned> O = RegClassInfo.getOrder(RC);
+      assert(!O.empty() && "All registers reserved?!");
+      return O.front();
+    }
 
     void ComputeRelatedRegClasses();
 
@@ -402,11 +393,10 @@ INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc",
 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
 INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights)
-INITIALIZE_PASS_DEPENDENCY(PreAllocSplitting)
 INITIALIZE_PASS_DEPENDENCY(LiveStacks)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
-INITIALIZE_AG_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
 INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc",
                     "Linear Scan Register Allocator", false, false)
@@ -524,6 +514,7 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
   reservedRegs_ = tri_->getReservedRegs(fn);
   li_ = &getAnalysis<LiveIntervals>();
   loopInfo = &getAnalysis<MachineLoopInfo>();
+  RegClassInfo.runOnMachineFunction(fn);
 
   // We don't run the coalescer here because we have no reason to
   // interact with it.  If the coalescer requires interaction, it
@@ -1166,14 +1157,11 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
 
   bool Found = false;
   std::vector<std::pair<unsigned,float> > RegsWeights;
+  ArrayRef<unsigned> Order = RegClassInfo.getOrder(RC);
   if (!minReg || SpillWeights[minReg] == HUGE_VALF)
-    for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
-           e = RC->allocation_order_end(*mf_); i != e; ++i) {
-      unsigned reg = *i;
+    for (unsigned i = 0; i != Order.size(); ++i) {
+      unsigned reg = Order[i];
       float regWeight = SpillWeights[reg];
-      // Don't even consider reserved regs.
-      if (reservedRegs_.test(reg))
-        continue;
       // Skip recently allocated registers and reserved registers.
       if (minWeight > regWeight && !isRecentlyUsed(reg))
         Found = true;
@@ -1182,11 +1170,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
 
   // If we didn't find a register that is spillable, try aliases?
   if (!Found) {
-    for (TargetRegisterClass::iterator i = RC->allocation_order_begin(*mf_),
-           e = RC->allocation_order_end(*mf_); i != e; ++i) {
-      unsigned reg = *i;
-      if (reservedRegs_.test(reg))
-        continue;
+    for (unsigned i = 0; i != Order.size(); ++i) {
+      unsigned reg = Order[i];
       // No need to worry about if the alias register size < regsize of RC.
       // We are going to spill all registers that alias it anyway.
       for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as)
@@ -1446,13 +1431,17 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
   if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
     physReg = vrm_->getPhys(physReg);
 
-  TargetRegisterClass::iterator I, E;
-  tie(I, E) = tri_->getAllocationOrder(RC, Hint.first, physReg, *mf_);
-  assert(I != E && "No allocatable register in this register class!");
+  ArrayRef<unsigned> Order;
+  if (Hint.first)
+    Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_);
+  else
+    Order = RegClassInfo.getOrder(RC);
+
+  assert(!Order.empty() && "No allocatable register in this register class!");
 
   // Scan for the first available register.
-  for (; I != E; ++I) {
-    unsigned Reg = *I;
+  for (unsigned i = 0; i != Order.size(); ++i) {
+    unsigned Reg = Order[i];
     // Ignore "downgraded" registers.
     if (SkipDGRegs && DowngradedRegs.count(Reg))
       continue;
@@ -1482,8 +1471,8 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
   // inactive count.  Alkis found that this reduced register pressure very
   // slightly on X86 (in rev 1.94 of this file), though this should probably be
   // reevaluated now.
-  for (; I != E; ++I) {
-    unsigned Reg = *I;
+  for (unsigned i = 0; i != Order.size(); ++i) {
+    unsigned Reg = Order[i];
     // Ignore "downgraded" registers.
     if (SkipDGRegs && DowngradedRegs.count(Reg))
       continue;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 1e1f1e0..72230d4 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -35,6 +35,7 @@
 #include "Splitter.h"
 #include "VirtRegMap.h"
 #include "VirtRegRewriter.h"
+#include "RegisterCoalescer.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
@@ -46,7 +47,6 @@
 #include "llvm/CodeGen/PBQP/Graph.h"
 #include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -84,11 +84,11 @@ public:
   static char ID;
 
   /// Construct a PBQP register allocator.
-  RegAllocPBQP(std::auto_ptr<PBQPBuilder> b)
-      : MachineFunctionPass(ID), builder(b) {
+  RegAllocPBQP(std::auto_ptr<PBQPBuilder> b, char *cPassID=0)
+      : MachineFunctionPass(ID), builder(b), customPassID(cPassID) {
     initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
     initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
-    initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+    initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
     initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
     initializeLiveStacksPass(*PassRegistry::getPassRegistry());
     initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
@@ -122,6 +122,8 @@ private:
 
   std::auto_ptr<PBQPBuilder> builder;
 
+  char *customPassID;
+
   MachineFunction *mf;
   const TargetMachine *tm;
   const TargetRegisterInfo *tri;
@@ -222,10 +224,9 @@ std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
     // Compute an initial allowed set for the current vreg.
     typedef std::vector<unsigned> VRAllowed;
     VRAllowed vrAllowed;
-    for (TargetRegisterClass::iterator aoItr = trc->allocation_order_begin(*mf),
-                                       aoEnd = trc->allocation_order_end(*mf);
-         aoItr != aoEnd; ++aoItr) {
-      unsigned preg = *aoItr;
+    ArrayRef<unsigned> rawOrder = trc->getRawAllocationOrder(*mf);
+    for (unsigned i = 0; i != rawOrder.size(); ++i) {
+      unsigned preg = rawOrder[i];
       if (!reservedRegs.test(preg)) {
         vrAllowed.push_back(preg);
       }
@@ -450,6 +451,8 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
   au.addRequired<LiveIntervals>();
   //au.addRequiredID(SplitCriticalEdgesID);
   au.addRequired<RegisterCoalescer>();
+  if (customPassID)
+    au.addRequiredID(*customPassID);
   au.addRequired<CalculateSpillWeights>();
   au.addRequired<LiveStacks>();
   au.addPreserved<LiveStacks>();
@@ -581,7 +584,7 @@ void RegAllocPBQP::finalizeAlloc() const {
 
     if (physReg == 0) {
       const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
-      physReg = *liRC->allocation_order_begin(*mf);
+      physReg = liRC->getRawAllocationOrder(*mf).front();
     }
 
     vrm->assignVirt2Phys(li->reg, physReg);
@@ -703,8 +706,9 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
 }
 
 FunctionPass* llvm::createPBQPRegisterAllocator(
-                                           std::auto_ptr<PBQPBuilder> builder) {
-  return new RegAllocPBQP(builder);
+                                           std::auto_ptr<PBQPBuilder> builder,
+                                           char *customPassID) {
+  return new RegAllocPBQP(builder, customPassID);
 }
 
 FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index 75b0c90..5a77e47 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -81,11 +81,9 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
 
   // FIXME: Once targets reserve registers instead of removing them from the
   // allocation order, we can simply use begin/end here.
-  TargetRegisterClass::iterator AOB = RC->allocation_order_begin(*MF);
-  TargetRegisterClass::iterator AOE = RC->allocation_order_end(*MF);
-
-  for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
-    unsigned PhysReg = *I;
+  ArrayRef<unsigned> RawOrder = RC->getRawAllocationOrder(*MF);
+  for (unsigned i = 0; i != RawOrder.size(); ++i) {
+    unsigned PhysReg = RawOrder[i];
     // Remove reserved registers from the allocation order.
     if (Reserved.test(PhysReg))
       continue;
@@ -103,7 +101,7 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
 
   DEBUG({
     dbgs() << "AllocationOrder(" << RC->getName() << ") = [";
-    for (unsigned I = 0; I != N; ++I)
+    for (unsigned I = 0; I != RCI.NumRegs; ++I)
       dbgs() << ' ' << PrintReg(RCI.Order[I], TRI);
     dbgs() << " ]\n";
   });
diff --git a/lib/CodeGen/RegisterClassInfo.h b/lib/CodeGen/RegisterClassInfo.h
index 6f7d9c9..d21fd67 100644
--- a/lib/CodeGen/RegisterClassInfo.h
+++ b/lib/CodeGen/RegisterClassInfo.h
@@ -112,7 +112,7 @@ public:
   /// register, so a register allocator needs to track its liveness and
   /// availability.
   bool isAllocatable(unsigned PhysReg) const {
-    return TRI->get(PhysReg).inAllocatableClass && !isReserved(PhysReg);
+    return TRI->isInAllocatableClass(PhysReg) && !isReserved(PhysReg);
   }
 };
 } // end namespace llvm
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 407559a..b91f92c 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -13,38 +13,92 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/RegisterCoalescer.h"
+#define DEBUG_TYPE "regcoalescing"
+#include "RegisterCoalescer.h"
+#include "VirtRegMap.h"
+#include "LiveDebugVariables.h"
+
+#include "llvm/Pass.h"
+#include "llvm/Value.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Pass.h"
-
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include <algorithm>
+#include <cmath>
 using namespace llvm;
 
-// Register the RegisterCoalescer interface, providing a nice name to refer to.
-INITIALIZE_ANALYSIS_GROUP(RegisterCoalescer, "Register Coalescer", 
-                          SimpleRegisterCoalescing)
-char RegisterCoalescer::ID = 0;
+STATISTIC(numJoins    , "Number of interval joins performed");
+STATISTIC(numCrossRCs , "Number of cross class joins performed");
+STATISTIC(numCommutes , "Number of instruction commuting performed");
+STATISTIC(numExtends  , "Number of copies extended");
+STATISTIC(NumReMats   , "Number of instructions re-materialized");
+STATISTIC(numPeep     , "Number of identity moves eliminated after coalescing");
+STATISTIC(numAborts   , "Number of times interval joining aborted");
 
-// RegisterCoalescer destructor: DO NOT move this to the header file
-// for RegisterCoalescer or else clients of the RegisterCoalescer
-// class may not depend on the RegisterCoalescer.o file in the current
-// .a file, causing alias analysis support to not be included in the
-// tool correctly!
-//
-RegisterCoalescer::~RegisterCoalescer() {}
+static cl::opt<bool>
+EnableJoining("join-liveintervals",
+              cl::desc("Coalesce copies (default=true)"),
+              cl::init(true));
+
+static cl::opt<bool>
+DisableCrossClassJoin("disable-cross-class-join",
+               cl::desc("Avoid coalescing cross register class copies"),
+               cl::init(false), cl::Hidden);
 
-unsigned CoalescerPair::compose(unsigned a, unsigned b) const {
+static cl::opt<bool>
+EnablePhysicalJoin("join-physregs",
+                   cl::desc("Join physical register copies"),
+                   cl::init(false), cl::Hidden);
+
+static cl::opt<bool>
+VerifyCoalescing("verify-coalescing",
+         cl::desc("Verify machine instrs before and after register coalescing"),
+         cl::Hidden);
+
+INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
+                      "Simple Register Coalescing", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
+                    "Simple Register Coalescing", false, false)
+
+char RegisterCoalescer::ID = 0;
+
+static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) {
   if (!a) return b;
   if (!b) return a;
-  return tri_.composeSubRegIndices(a, b);
+  return tri.composeSubRegIndices(a, b);
 }
 
-bool CoalescerPair::isMoveInstr(const MachineInstr *MI,
-                                unsigned &Src, unsigned &Dst,
-                                unsigned &SrcSub, unsigned &DstSub) const {
+static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
+                        unsigned &Src, unsigned &Dst,
+                        unsigned &SrcSub, unsigned &DstSub) {
   if (MI->isCopy()) {
     Dst = MI->getOperand(0).getReg();
     DstSub = MI->getOperand(0).getSubReg();
@@ -52,7 +106,8 @@ bool CoalescerPair::isMoveInstr(const MachineInstr *MI,
     SrcSub = MI->getOperand(1).getSubReg();
   } else if (MI->isSubregToReg()) {
     Dst = MI->getOperand(0).getReg();
-    DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm());
+    DstSub = compose(tri, MI->getOperand(0).getSubReg(),
+                     MI->getOperand(3).getImm());
     Src = MI->getOperand(2).getReg();
     SrcSub = MI->getOperand(2).getSubReg();
   } else
@@ -66,7 +121,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
   flipped_ = crossClass_ = false;
 
   unsigned Src, Dst, SrcSub, DstSub;
-  if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub))
+  if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub))
     return false;
   partial_ = SrcSub || DstSub;
 
@@ -156,7 +211,7 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
   if (!MI)
     return false;
   unsigned Src, Dst, SrcSub, DstSub;
-  if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub))
+  if (!isMoveInstr(tri_, MI, Src, Dst, SrcSub, DstSub))
     return false;
 
   // Find the virtual register that is srcReg_.
@@ -185,13 +240,1558 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
     if (dstReg_ != Dst)
       return false;
     // Registers match, do the subregisters line up?
-    return compose(subIdx_, SrcSub) == DstSub;
+    return compose(tri_, subIdx_, SrcSub) == DstSub;
+  }
+}
+
+void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<LiveIntervals>();
+  AU.addPreserved<LiveIntervals>();
+  AU.addRequired<LiveDebugVariables>();
+  AU.addPreserved<LiveDebugVariables>();
+  AU.addPreserved<SlotIndexes>();
+  AU.addRequired<MachineLoopInfo>();
+  AU.addPreserved<MachineLoopInfo>();
+  AU.addPreservedID(MachineDominatorsID);
+  AU.addPreservedID(StrongPHIEliminationID);
+  AU.addPreservedID(PHIEliminationID);
+  AU.addPreservedID(TwoAddressInstructionPassID);
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) {
+  /// Joined copies are not deleted immediately, but kept in JoinedCopies.
+  JoinedCopies.insert(CopyMI);
+
+  /// Mark all register operands of CopyMI as <undef> so they won't affect dead
+  /// code elimination.
+  for (MachineInstr::mop_iterator I = CopyMI->operands_begin(),
+       E = CopyMI->operands_end(); I != E; ++I)
+    if (I->isReg())
+      I->setIsUndef(true);
+}
+
+/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
+/// being the source and IntB being the dest, thus this defines a value number
+/// in IntB.  If the source value number (in IntA) is defined by a copy from B,
+/// see if we can merge these two pieces of B into a single value number,
+/// eliminating a copy.  For example:
+///
+///  A3 = B0
+///    ...
+///  B1 = A3      <- this copy
+///
+/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
+/// value number to be replaced with B0 (which simplifies the B liveinterval).
+///
+/// This returns true if an interval was modified.
+///
+bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP,
+                                                    MachineInstr *CopyMI) {
+  // Bail if there is no dst interval - can happen when merging physical subreg
+  // operations.
+  if (!li_->hasInterval(CP.getDstReg()))
+    return false;
+
+  LiveInterval &IntA =
+    li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+  LiveInterval &IntB =
+    li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+
+  // BValNo is a value number in B that is defined by a copy from A.  'B3' in
+  // the example above.
+  LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
+  if (BLR == IntB.end()) return false;
+  VNInfo *BValNo = BLR->valno;
+
+  // Get the location that B is defined at.  Two options: either this value has
+  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
+  // can't process it.
+  if (!BValNo->isDefByCopy()) return false;
+  assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+  // AValNo is the value number in A that defines the copy, A3 in the example.
+  SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
+  LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
+  // The live range might not exist after fun with physreg coalescing.
+  if (ALR == IntA.end()) return false;
+  VNInfo *AValNo = ALR->valno;
+  // If it's re-defined by an early clobber somewhere in the live range, then
+  // it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
+  // See PR3149:
+  // 172     %ECX<def> = MOV32rr %reg1039<kill>
+  // 180     INLINEASM <es:subl $5,$1
+  //         sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9,
+  //         %EAX<kill>,
+  // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
+  // 188     %EAX<def> = MOV32rr %EAX<kill>
+  // 196     %ECX<def> = MOV32rr %ECX<kill>
+  // 204     %ECX<def> = MOV32rr %ECX<kill>
+  // 212     %EAX<def> = MOV32rr %EAX<kill>
+  // 220     %EAX<def> = MOV32rr %EAX
+  // 228     %reg1039<def> = MOV32rr %ECX<kill>
+  // The early clobber operand ties ECX input to the ECX def.
+  //
+  // The live interval of ECX is represented as this:
+  // %reg20,inf = [46,47:1)[174,230:0)  0@174-(230) 1@46-(47)
+  // The coalescer has no idea there was a def in the middle of [174,230].
+  if (AValNo->hasRedefByEC())
+    return false;
+
+  // If AValNo is defined as a copy from IntB, we can potentially process this.
+  // Get the instruction that defines this value number.
+  if (!CP.isCoalescable(AValNo->getCopy()))
+    return false;
+
+  // Get the LiveRange in IntB that this value number starts with.
+  LiveInterval::iterator ValLR =
+    IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
+  if (ValLR == IntB.end())
+    return false;
+
+  // Make sure that the end of the live range is inside the same block as
+  // CopyMI.
+  MachineInstr *ValLREndInst =
+    li_->getInstructionFromIndex(ValLR->end.getPrevSlot());
+  if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent())
+    return false;
+
+  // Okay, we now know that ValLR ends in the same block that the CopyMI
+  // live-range starts.  If there are no intervening live ranges between them in
+  // IntB, we can merge them.
+  if (ValLR+1 != BLR) return false;
+
+  // If a live interval is a physical register, conservatively check if any
+  // of its aliases is overlapping the live interval of the virtual register.
+  // If so, do not coalesce.
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+    for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
+      if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) {
+        DEBUG({
+            dbgs() << "\t\tInterfere with alias ";
+            li_->getInterval(*AS).print(dbgs(), tri_);
+          });
+        return false;
+      }
+  }
+
+  DEBUG({
+      dbgs() << "Extending: ";
+      IntB.print(dbgs(), tri_);
+    });
+
+  SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
+  // We are about to delete CopyMI, so need to remove it as the 'instruction
+  // that defines this value #'. Update the valnum with the new defining
+  // instruction #.
+  BValNo->def  = FillerStart;
+  BValNo->setCopy(0);
+
+  // Okay, we can merge them.  We need to insert a new liverange:
+  // [ValLR.end, BLR.begin) of either value number, then we merge the
+  // two value numbers.
+  IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
+
+  // If the IntB live range is assigned to a physical register, and if that
+  // physreg has sub-registers, update their live intervals as well.
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
+    for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
+      if (!li_->hasInterval(*SR))
+        continue;
+      LiveInterval &SRLI = li_->getInterval(*SR);
+      SRLI.addRange(LiveRange(FillerStart, FillerEnd,
+                              SRLI.getNextValue(FillerStart, 0,
+                                                li_->getVNInfoAllocator())));
+    }
+  }
+
+  // Okay, merge "B1" into the same value number as "B0".
+  if (BValNo != ValLR->valno) {
+    // If B1 is killed by a PHI, then the merged live range must also be killed
+    // by the same PHI, as B0 and B1 can not overlap.
+    bool HasPHIKill = BValNo->hasPHIKill();
+    IntB.MergeValueNumberInto(BValNo, ValLR->valno);
+    if (HasPHIKill)
+      ValLR->valno->setHasPHIKill(true);
+  }
+  DEBUG({
+      dbgs() << "   result = ";
+      IntB.print(dbgs(), tri_);
+      dbgs() << "\n";
+    });
+
+  // If the source instruction was killing the source register before the
+  // merge, unset the isKill marker given the live range has been extended.
+  int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
+  if (UIdx != -1) {
+    ValLREndInst->getOperand(UIdx).setIsKill(false);
+  }
+
+  // If the copy instruction was killing the destination register before the
+  // merge, find the last use and trim the live range. That will also add the
+  // isKill marker.
+  if (ALR->end == CopyIdx)
+    li_->shrinkToUses(&IntA);
+
+  ++numExtends;
+  return true;
+}
+
+/// HasOtherReachingDefs - Return true if there are definitions of IntB
+/// other than BValNo val# that can reach uses of AValno val# of IntA.
+bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA,
+                                                    LiveInterval &IntB,
+                                                    VNInfo *AValNo,
+                                                    VNInfo *BValNo) {
+  for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+       AI != AE; ++AI) {
+    if (AI->valno != AValNo) continue;
+    LiveInterval::Ranges::iterator BI =
+      std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start);
+    if (BI != IntB.ranges.begin())
+      --BI;
+    for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
+      if (BI->valno == BValNo)
+        continue;
+      if (BI->start <= AI->start && BI->end > AI->start)
+        return true;
+      if (BI->start > AI->start && BI->start < AI->end)
+        return true;
+    }
   }
+  return false;
 }
 
-// Because of the way .a files work, we must force the SimpleRC
-// implementation to be pulled in if the RegisterCoalescer classes are
-// pulled in.  Otherwise we run the risk of RegisterCoalescer being
-// used, but the default implementation not being linked into the tool
-// that uses it.
-DEFINING_FILE_FOR(RegisterCoalescer)
+/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
+/// IntA being the source and IntB being the dest, thus this defines a value
+/// number in IntB.  If the source value number (in IntA) is defined by a
+/// commutable instruction and its other operand is coalesced to the copy dest
+/// register, see if we can transform the copy into a noop by commuting the
+/// definition. For example,
+///
+///  A3 = op A2 B0<kill>
+///    ...
+///  B1 = A3      <- this copy
+///    ...
+///     = op A3   <- more uses
+///
+/// ==>
+///
+///  B2 = op B0 A2<kill>
+///    ...
+///  B1 = B2      <- now an identify copy
+///    ...
+///     = op B2   <- more uses
+///
+/// This returns true if an interval was modified.
+///
+bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP,
+                                                        MachineInstr *CopyMI) {
+  // FIXME: For now, only eliminate the copy by commuting its def when the
+  // source register is a virtual register. We want to guard against cases
+  // where the copy is a back edge copy and commuting the def lengthen the
+  // live interval of the source register to the entire loop.
+  if (CP.isPhys() && CP.isFlipped())
+    return false;
+
+  // Bail if there is no dst interval.
+  if (!li_->hasInterval(CP.getDstReg()))
+    return false;
+
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+
+  LiveInterval &IntA =
+    li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
+  LiveInterval &IntB =
+    li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
+
+  // BValNo is a value number in B that is defined by a copy from A. 'B3' in
+  // the example above.
+  VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
+  if (!BValNo || !BValNo->isDefByCopy())
+    return false;
+
+  assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
+
+  // AValNo is the value number in A that defines the copy, A3 in the example.
+  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
+  assert(AValNo && "COPY source not live");
+
+  // If other defs can reach uses of this def, then it's not safe to perform
+  // the optimization.
+  if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
+    return false;
+  MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
+  if (!DefMI)
+    return false;
+  const MCInstrDesc &MCID = DefMI->getDesc();
+  if (!MCID.isCommutable())
+    return false;
+  // If DefMI is a two-address instruction then commuting it will change the
+  // destination register.
+  int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
+  assert(DefIdx != -1);
+  unsigned UseOpIdx;
+  if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
+    return false;
+  unsigned Op1, Op2, NewDstIdx;
+  if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2))
+    return false;
+  if (Op1 == UseOpIdx)
+    NewDstIdx = Op2;
+  else if (Op2 == UseOpIdx)
+    NewDstIdx = Op1;
+  else
+    return false;
+
+  MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
+  unsigned NewReg = NewDstMO.getReg();
+  if (NewReg != IntB.reg || !NewDstMO.isKill())
+    return false;
+
+  // Make sure there are no other definitions of IntB that would reach the
+  // uses which the new definition can reach.
+  if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
+    return false;
+
+  // Abort if the aliases of IntB.reg have values that are not simply the
+  // clobbers from the superreg.
+  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
+    for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
+      if (li_->hasInterval(*AS) &&
+          HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0))
+        return false;
+
+  // If some of the uses of IntA.reg is already coalesced away, return false.
+  // It's not possible to determine whether it's safe to perform the coalescing.
+  for (MachineRegisterInfo::use_nodbg_iterator UI = 
+         mri_->use_nodbg_begin(IntA.reg), 
+       UE = mri_->use_nodbg_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    SlotIndex UseIdx = li_->getInstructionIndex(UseMI);
+    LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+    if (ULR == IntA.end())
+      continue;
+    if (ULR->valno == AValNo && JoinedCopies.count(UseMI))
+      return false;
+  }
+
+  DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t'
+               << *DefMI);
+
+  // At this point we have decided that it is legal to do this
+  // transformation.  Start by commuting the instruction.
+  MachineBasicBlock *MBB = DefMI->getParent();
+  MachineInstr *NewMI = tii_->commuteInstruction(DefMI);
+  if (!NewMI)
+    return false;
+  if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
+      TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
+      !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg)))
+    return false;
+  if (NewMI != DefMI) {
+    li_->ReplaceMachineInstrInMaps(DefMI, NewMI);
+    MBB->insert(DefMI, NewMI);
+    MBB->erase(DefMI);
+  }
+  unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
+  NewMI->getOperand(OpIdx).setIsKill();
+
+  // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
+  // A = or A, B
+  // ...
+  // B = A
+  // ...
+  // C = A<kill>
+  // ...
+  //   = B
+
+  // Update uses of IntA of the specific Val# with IntB.
+  for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
+         UE = mri_->use_end(); UI != UE;) {
+    MachineOperand &UseMO = UI.getOperand();
+    MachineInstr *UseMI = &*UI;
+    ++UI;
+    if (JoinedCopies.count(UseMI))
+      continue;
+    if (UseMI->isDebugValue()) {
+      // FIXME These don't have an instruction index.  Not clear we have enough
+      // info to decide whether to do this replacement or not.  For now do it.
+      UseMO.setReg(NewReg);
+      continue;
+    }
+    SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex();
+    LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
+    if (ULR == IntA.end() || ULR->valno != AValNo)
+      continue;
+    if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+      UseMO.substPhysReg(NewReg, *tri_);
+    else
+      UseMO.setReg(NewReg);
+    if (UseMI == CopyMI)
+      continue;
+    if (!UseMI->isCopy())
+      continue;
+    if (UseMI->getOperand(0).getReg() != IntB.reg ||
+        UseMI->getOperand(0).getSubReg())
+      continue;
+
+    // This copy will become a noop. If it's defining a new val#, merge it into
+    // BValNo.
+    SlotIndex DefIdx = UseIdx.getDefIndex();
+    VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
+    if (!DVNI)
+      continue;
+    DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
+    assert(DVNI->def == DefIdx);
+    BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
+    markAsJoined(UseMI);
+  }
+
+  // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
+  // is updated.
+  VNInfo *ValNo = BValNo;
+  ValNo->def = AValNo->def;
+  ValNo->setCopy(0);
+  for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
+       AI != AE; ++AI) {
+    if (AI->valno != AValNo) continue;
+    IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
+  }
+  DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
+
+  IntA.removeValNo(AValNo);
+  DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
+  ++numCommutes;
+  return true;
+}
+
+/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
+/// computation, replace the copy by rematerialize the definition.
+bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt,
+                                                       bool preserveSrcInt,
+                                                       unsigned DstReg,
+                                                       unsigned DstSubIdx,
+                                                       MachineInstr *CopyMI) {
+  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex();
+  LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
+  assert(SrcLR != SrcInt.end() && "Live range not found!");
+  VNInfo *ValNo = SrcLR->valno;
+  // If other defs can reach uses of this def, then it's not safe to perform
+  // the optimization.
+  if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill())
+    return false;
+  MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
+  if (!DefMI)
+    return false;
+  assert(DefMI && "Defining instruction disappeared");
+  const MCInstrDesc &MCID = DefMI->getDesc();
+  if (!MCID.isAsCheapAsAMove())
+    return false;
+  if (!tii_->isTriviallyReMaterializable(DefMI, AA))
+    return false;
+  bool SawStore = false;
+  if (!DefMI->isSafeToMove(tii_, AA, SawStore))
+    return false;
+  if (MCID.getNumDefs() != 1)
+    return false;
+  if (!DefMI->isImplicitDef()) {
+    // Make sure the copy destination register class fits the instruction
+    // definition register class. The mismatch can happen as a result of earlier
+    // extract_subreg, insert_subreg, subreg_to_reg coalescing.
+    const TargetRegisterClass *RC = tii_->getRegClass(MCID, 0, tri_);
+    if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+      if (mri_->getRegClass(DstReg) != RC)
+        return false;
+    } else if (!RC->contains(DstReg))
+      return false;
+  }
+
+  // If destination register has a sub-register index on it, make sure it
+  // matches the instruction register class.
+  if (DstSubIdx) {
+    const MCInstrDesc &MCID = DefMI->getDesc();
+    if (MCID.getNumDefs() != 1)
+      return false;
+    const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
+    const TargetRegisterClass *DstSubRC =
+      DstRC->getSubRegisterRegClass(DstSubIdx);
+    const TargetRegisterClass *DefRC = tii_->getRegClass(MCID, 0, tri_);
+    if (DefRC == DstRC)
+      DstSubIdx = 0;
+    else if (DefRC != DstSubRC)
+      return false;
+  }
+
+  RemoveCopyFlag(DstReg, CopyMI);
+
+  MachineBasicBlock *MBB = CopyMI->getParent();
+  MachineBasicBlock::iterator MII =
+    llvm::next(MachineBasicBlock::iterator(CopyMI));
+  tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_);
+  MachineInstr *NewMI = prior(MII);
+
+  // CopyMI may have implicit operands, transfer them over to the newly
+  // rematerialized instruction. And update implicit def interval valnos.
+  for (unsigned i = CopyMI->getDesc().getNumOperands(),
+         e = CopyMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = CopyMI->getOperand(i);
+    if (MO.isReg() && MO.isImplicit())
+      NewMI->addOperand(MO);
+    if (MO.isDef())
+      RemoveCopyFlag(MO.getReg(), CopyMI);
+  }
+
+  NewMI->copyImplicitOps(CopyMI);
+  li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
+  CopyMI->eraseFromParent();
+  ReMatCopies.insert(CopyMI);
+  ReMatDefs.insert(DefMI);
+  DEBUG(dbgs() << "Remat: " << *NewMI);
+  ++NumReMats;
+
+  // The source interval can become smaller because we removed a use.
+  if (preserveSrcInt)
+    li_->shrinkToUses(&SrcInt);
+
+  return true;
+}
+
+/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
+/// update the subregister number if it is not zero. If DstReg is a
+/// physical register and the existing subregister number of the def / use
+/// being updated is not zero, make sure to set it to the correct physical
+/// subregister.
+void
+RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) {
+  bool DstIsPhys = CP.isPhys();
+  unsigned SrcReg = CP.getSrcReg();
+  unsigned DstReg = CP.getDstReg();
+  unsigned SubIdx = CP.getSubIdx();
+
+  // Update LiveDebugVariables.
+  ldv_->renameRegister(SrcReg, DstReg, SubIdx);
+
+  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg);
+       MachineInstr *UseMI = I.skipInstruction();) {
+    // A PhysReg copy that won't be coalesced can perhaps be rematerialized
+    // instead.
+    if (DstIsPhys) {
+      if (UseMI->isCopy() &&
+          !UseMI->getOperand(1).getSubReg() &&
+          !UseMI->getOperand(0).getSubReg() &&
+          UseMI->getOperand(1).getReg() == SrcReg &&
+          UseMI->getOperand(0).getReg() != SrcReg &&
+          UseMI->getOperand(0).getReg() != DstReg &&
+          !JoinedCopies.count(UseMI) &&
+          ReMaterializeTrivialDef(li_->getInterval(SrcReg), false,
+                                  UseMI->getOperand(0).getReg(), 0, UseMI))
+        continue;
+    }
+
+    SmallVector<unsigned,8> Ops;
+    bool Reads, Writes;
+    tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
+    bool Kills = false, Deads = false;
+
+    // Replace SrcReg with DstReg in all UseMI operands.
+    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+      MachineOperand &MO = UseMI->getOperand(Ops[i]);
+      Kills |= MO.isKill();
+      Deads |= MO.isDead();
+
+      if (DstIsPhys)
+        MO.substPhysReg(DstReg, *tri_);
+      else
+        MO.substVirtReg(DstReg, SubIdx, *tri_);
+    }
+
+    // This instruction is a copy that will be removed.
+    if (JoinedCopies.count(UseMI))
+      continue;
+
+    if (SubIdx) {
+      // If UseMI was a simple SrcReg def, make sure we didn't turn it into a
+      // read-modify-write of DstReg.
+      if (Deads)
+        UseMI->addRegisterDead(DstReg, tri_);
+      else if (!Reads && Writes)
+        UseMI->addRegisterDefined(DstReg, tri_);
+
+      // Kill flags apply to the whole physical register.
+      if (DstIsPhys && Kills)
+        UseMI->addRegisterKilled(DstReg, tri_);
+    }
+
+    DEBUG({
+        dbgs() << "\t\tupdated: ";
+        if (!UseMI->isDebugValue())
+          dbgs() << li_->getInstructionIndex(UseMI) << "\t";
+        dbgs() << *UseMI;
+      });
+  }
+}
+
+/// removeIntervalIfEmpty - Check if the live interval of a physical register
+/// is empty, if so remove it and also remove the empty intervals of its
+/// sub-registers. Return true if live interval is removed.
+static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
+                                  const TargetRegisterInfo *tri_) {
+  if (li.empty()) {
+    if (TargetRegisterInfo::isPhysicalRegister(li.reg))
+      for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
+        if (!li_->hasInterval(*SR))
+          continue;
+        LiveInterval &sli = li_->getInterval(*SR);
+        if (sli.empty())
+          li_->removeInterval(*SR);
+      }
+    li_->removeInterval(li.reg);
+    return true;
+  }
+  return false;
+}
+
+/// RemoveDeadDef - If a def of a live interval is now determined dead, remove
+/// the val# it defines. If the live interval becomes empty, remove it as well.
+bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li,
+                                             MachineInstr *DefMI) {
+  SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex();
+  LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
+  if (DefIdx != MLR->valno->def)
+    return false;
+  li.removeValNo(MLR->valno);
+  return removeIntervalIfEmpty(li, li_, tri_);
+}
+
+void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg,
+                                              const MachineInstr *CopyMI) {
+  SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
+  if (li_->hasInterval(DstReg)) {
+    LiveInterval &LI = li_->getInterval(DstReg);
+    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+      if (LR->valno->def == DefIdx)
+        LR->valno->setCopy(0);
+  }
+  if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
+    return;
+  for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) {
+    if (!li_->hasInterval(*AS))
+      continue;
+    LiveInterval &LI = li_->getInterval(*AS);
+    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
+      if (LR->valno->def == DefIdx)
+        LR->valno->setCopy(0);
+  }
+}
+
+/// shouldJoinPhys - Return true if a copy involving a physreg should be joined.
+/// We need to be careful about coalescing a source physical register with a
+/// virtual register. Once the coalescing is done, it cannot be broken and these
+/// are not spillable! If the destination interval uses are far away, think
+/// twice about coalescing them!
+bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) {
+  bool Allocatable = li_->isAllocatable(CP.getDstReg());
+  LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg());
+
+  /// Always join simple intervals that are defined by a single copy from a
+  /// reserved register. This doesn't increase register pressure, so it is
+  /// always beneficial.
+  if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue())
+    return true;
+
+  if (!EnablePhysicalJoin) {
+    DEBUG(dbgs() << "\tPhysreg joins disabled.\n");
+    return false;
+  }
+
+  // Only coalesce to allocatable physreg, we don't want to risk modifying
+  // reserved registers.
+  if (!Allocatable) {
+    DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
+    return false;  // Not coalescable.
+  }
+
+  // Don't join with physregs that have a ridiculous number of live
+  // ranges. The data structure performance is really bad when that
+  // happens.
+  if (li_->hasInterval(CP.getDstReg()) &&
+      li_->getInterval(CP.getDstReg()).ranges.size() > 1000) {
+    ++numAborts;
+    DEBUG(dbgs()
+          << "\tPhysical register live interval too complicated, abort!\n");
+    return false;
+  }
+
+  // FIXME: Why are we skipping this test for partial copies?
+  //        CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
+  if (!CP.isPartial()) {
+    const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
+    unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2;
+    unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
+    if (Length > Threshold) {
+      ++numAborts;
+      DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
+      return false;
+    }
+  }
+  return true;
+}
+
+/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
+/// two virtual registers from different register classes.
+bool
+RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg,
+                                             unsigned DstReg,
+                                             const TargetRegisterClass *SrcRC,
+                                             const TargetRegisterClass *DstRC,
+                                             const TargetRegisterClass *NewRC) {
+  unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC);
+  // This heuristics is good enough in practice, but it's obviously not *right*.
+  // 4 is a magic number that works well enough for x86, ARM, etc. It filter
+  // out all but the most restrictive register classes.
+  if (NewRCCount > 4 ||
+      // Early exit if the function is fairly small, coalesce aggressively if
+      // that's the case. For really special register classes with 3 or
+      // fewer registers, be a bit more careful.
+      (li_->getFuncInstructionCount() / NewRCCount) < 8)
+    return true;
+  LiveInterval &SrcInt = li_->getInterval(SrcReg);
+  LiveInterval &DstInt = li_->getInterval(DstReg);
+  unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt);
+  unsigned DstSize = li_->getApproximateInstructionCount(DstInt);
+
+  // Coalesce aggressively if the intervals are small compared to the number of
+  // registers in the new class. The number 4 is fairly arbitrary, chosen to be
+  // less aggressive than the 8 used for the whole function size.
+  const unsigned ThresSize = 4 * NewRCCount;
+  if (SrcSize <= ThresSize && DstSize <= ThresSize)
+    return true;
+
+  // Estimate *register use density*. If it doubles or more, abort.
+  unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg),
+                                   mri_->use_nodbg_end());
+  unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg),
+                                   mri_->use_nodbg_end());
+  unsigned NewUses = SrcUses + DstUses;
+  unsigned NewSize = SrcSize + DstSize;
+  if (SrcRC != NewRC && SrcSize > ThresSize) {
+    unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC);
+    if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount)
+      return false;
+  }
+  if (DstRC != NewRC && DstSize > ThresSize) {
+    unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC);
+    if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount)
+      return false;
+  }
+  return true;
+}
+
+
+/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
+/// which are the src/dst of the copy instruction CopyMI.  This returns true
+/// if the copy was successfully coalesced away. If it is not currently
+/// possible to coalesce this interval, but it may be possible if other
+/// things get coalesced, then it returns true by reference in 'Again'.
+bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) {
+
+  Again = false;
+  if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
+    return false; // Already done.
+
+  DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
+
+  CoalescerPair CP(*tii_, *tri_);
+  if (!CP.setRegisters(CopyMI)) {
+    DEBUG(dbgs() << "\tNot coalescable.\n");
+    return false;
+  }
+
+  // If they are already joined we continue.
+  if (CP.getSrcReg() == CP.getDstReg()) {
+    markAsJoined(CopyMI);
+    DEBUG(dbgs() << "\tCopy already coalesced.\n");
+    return false;  // Not coalescable.
+  }
+
+  DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_)
+               << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx())
+               << "\n");
+
+  // Enforce policies.
+  if (CP.isPhys()) {
+    if (!shouldJoinPhys(CP)) {
+      // Before giving up coalescing, if definition of source is defined by
+      // trivial computation, try rematerializing it.
+      if (!CP.isFlipped() &&
+          ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
+                                  CP.getDstReg(), 0, CopyMI))
+        return true;
+      return false;
+    }
+  } else {
+    // Avoid constraining virtual register regclass too much.
+    if (CP.isCrossClass()) {
+      DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n");
+      if (DisableCrossClassJoin) {
+        DEBUG(dbgs() << "\tCross-class joins disabled.\n");
+        return false;
+      }
+      if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(),
+                                 mri_->getRegClass(CP.getSrcReg()),
+                                 mri_->getRegClass(CP.getDstReg()),
+                                 CP.getNewRC())) {
+        DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n");
+        Again = true;  // May be possible to coalesce later.
+        return false;
+      }
+    }
+
+    // When possible, let DstReg be the larger interval.
+    if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() >
+                           li_->getInterval(CP.getDstReg()).ranges.size())
+      CP.flip();
+  }
+
+  // Okay, attempt to join these two intervals.  On failure, this returns false.
+  // Otherwise, if one of the intervals being joined is a physreg, this method
+  // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
+  // been modified, so we can use this information below to update aliases.
+  if (!JoinIntervals(CP)) {
+    // Coalescing failed.
+
+    // If definition of source is defined by trivial computation, try
+    // rematerializing it.
+    if (!CP.isFlipped() &&
+        ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
+                                CP.getDstReg(), 0, CopyMI))
+      return true;
+
+    // If we can eliminate the copy without merging the live ranges, do so now.
+    if (!CP.isPartial()) {
+      if (AdjustCopiesBackFrom(CP, CopyMI) ||
+          RemoveCopyByCommutingDef(CP, CopyMI)) {
+        markAsJoined(CopyMI);
+        DEBUG(dbgs() << "\tTrivial!\n");
+        return true;
+      }
+    }
+
+    // Otherwise, we are unable to join the intervals.
+    DEBUG(dbgs() << "\tInterference!\n");
+    Again = true;  // May be possible to coalesce later.
+    return false;
+  }
+
+  // Coalescing to a virtual register that is of a sub-register class of the
+  // other. Make sure the resulting register is set to the right register class.
+  if (CP.isCrossClass()) {
+    ++numCrossRCs;
+    mri_->setRegClass(CP.getDstReg(), CP.getNewRC());
+  }
+
+  // Remember to delete the copy instruction.
+  markAsJoined(CopyMI);
+
+  UpdateRegDefsUses(CP);
+
+  // If we have extended the live range of a physical register, make sure we
+  // update live-in lists as well.
+  if (CP.isPhys()) {
+    SmallVector<MachineBasicBlock*, 16> BlockSeq;
+    // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the
+    // ranges for this, and they are preserved.
+    LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg());
+    for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end();
+         I != E; ++I ) {
+      li_->findLiveInMBBs(I->start, I->end, BlockSeq);
+      for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) {
+        MachineBasicBlock &block = *BlockSeq[idx];
+        if (!block.isLiveIn(CP.getDstReg()))
+          block.addLiveIn(CP.getDstReg());
+      }
+      BlockSeq.clear();
+    }
+  }
+
+  // SrcReg is guarateed to be the register whose live interval that is
+  // being merged.
+  li_->removeInterval(CP.getSrcReg());
+
+  // Update regalloc hint.
+  tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_);
+
+  DEBUG({
+    LiveInterval &DstInt = li_->getInterval(CP.getDstReg());
+    dbgs() << "\tJoined. Result = ";
+    DstInt.print(dbgs(), tri_);
+    dbgs() << "\n";
+  });
+
+  ++numJoins;
+  return true;
+}
+
+/// ComputeUltimateVN - Assuming we are going to join two live intervals,
+/// compute what the resultant value numbers for each value in the input two
+/// ranges will be.  This is complicated by copies between the two which can
+/// and will commonly cause multiple value numbers to be merged into one.
+///
+/// VN is the value number that we're trying to resolve.  InstDefiningValue
+/// keeps track of the new InstDefiningValue assignment for the result
+/// LiveInterval.  ThisFromOther/OtherFromThis are sets that keep track of
+/// whether a value in this or other is a copy from the opposite set.
+/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
+/// already been assigned.
+///
+/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
+/// contains the value number the copy is from.
+///
+static unsigned ComputeUltimateVN(VNInfo *VNI,
+                                  SmallVector<VNInfo*, 16> &NewVNInfo,
+                                  DenseMap<VNInfo*, VNInfo*> &ThisFromOther,
+                                  DenseMap<VNInfo*, VNInfo*> &OtherFromThis,
+                                  SmallVector<int, 16> &ThisValNoAssignments,
+                                  SmallVector<int, 16> &OtherValNoAssignments) {
+  unsigned VN = VNI->id;
+
+  // If the VN has already been computed, just return it.
+  if (ThisValNoAssignments[VN] >= 0)
+    return ThisValNoAssignments[VN];
+  assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers");
+
+  // If this val is not a copy from the other val, then it must be a new value
+  // number in the destination.
+  DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI);
+  if (I == ThisFromOther.end()) {
+    NewVNInfo.push_back(VNI);
+    return ThisValNoAssignments[VN] = NewVNInfo.size()-1;
+  }
+  VNInfo *OtherValNo = I->second;
+
+  // Otherwise, this *is* a copy from the RHS.  If the other side has already
+  // been computed, return it.
+  if (OtherValNoAssignments[OtherValNo->id] >= 0)
+    return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
+
+  // Mark this value number as currently being computed, then ask what the
+  // ultimate value # of the other value is.
+  ThisValNoAssignments[VN] = -2;
+  unsigned UltimateVN =
+    ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther,
+                      OtherValNoAssignments, ThisValNoAssignments);
+  return ThisValNoAssignments[VN] = UltimateVN;
+}
+
+
+// Find out if we have something like
+// A = X
+// B = X
+// if so, we can pretend this is actually
+// A = X
+// B = A
+// which allows us to coalesce A and B.
+// VNI is the definition of B. LR is the life range of A that includes
+// the slot just before B. If we return true, we add "B = X" to DupCopies.
+static bool RegistersDefinedFromSameValue(LiveIntervals &li,
+                                          const TargetRegisterInfo &tri,
+                                          CoalescerPair &CP,
+                                          VNInfo *VNI,
+                                          LiveRange *LR,
+                                     SmallVector<MachineInstr*, 8> &DupCopies) {
+  // FIXME: This is very conservative. For example, we don't handle
+  // physical registers.
+
+  MachineInstr *MI = VNI->getCopy();
+
+  if (!MI->isFullCopy() || CP.isPartial() || CP.isPhys())
+    return false;
+
+  unsigned Dst = MI->getOperand(0).getReg();
+  unsigned Src = MI->getOperand(1).getReg();
+
+  if (!TargetRegisterInfo::isVirtualRegister(Src) ||
+      !TargetRegisterInfo::isVirtualRegister(Dst))
+    return false;
+
+  unsigned A = CP.getDstReg();
+  unsigned B = CP.getSrcReg();
+
+  if (B == Dst)
+    std::swap(A, B);
+  assert(Dst == A);
+
+  VNInfo *Other = LR->valno;
+  if (!Other->isDefByCopy())
+    return false;
+  const MachineInstr *OtherMI = Other->getCopy();
+
+  if (!OtherMI->isFullCopy())
+    return false;
+
+  unsigned OtherDst = OtherMI->getOperand(0).getReg();
+  unsigned OtherSrc = OtherMI->getOperand(1).getReg();
+
+  if (!TargetRegisterInfo::isVirtualRegister(OtherSrc) ||
+      !TargetRegisterInfo::isVirtualRegister(OtherDst))
+    return false;
+
+  assert(OtherDst == B);
+
+  if (Src != OtherSrc)
+    return false;
+
+  // If the copies use two different value numbers of X, we cannot merge
+  // A and B.
+  LiveInterval &SrcInt = li.getInterval(Src);
+  if (SrcInt.getVNInfoAt(Other->def) != SrcInt.getVNInfoAt(VNI->def))
+    return false;
+
+  DupCopies.push_back(MI);
+
+  return true;
+}
+
+/// JoinIntervals - Attempt to join these two intervals.  On failure, this
+/// returns false.
+bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) {
+  LiveInterval &RHS = li_->getInterval(CP.getSrcReg());
+  DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; });
+
+  // If a live interval is a physical register, check for interference with any
+  // aliases. The interference check implemented here is a bit more conservative
+  // than the full interfeence check below. We allow overlapping live ranges
+  // only when one is a copy of the other.
+  if (CP.isPhys()) {
+    for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){
+      if (!li_->hasInterval(*AS))
+        continue;
+      const LiveInterval &LHS = li_->getInterval(*AS);
+      LiveInterval::const_iterator LI = LHS.begin();
+      for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end();
+           RI != RE; ++RI) {
+        LI = std::lower_bound(LI, LHS.end(), RI->start);
+        // Does LHS have an overlapping live range starting before RI?
+        if ((LI != LHS.begin() && LI[-1].end > RI->start) &&
+            (RI->start != RI->valno->def ||
+             !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) {
+          DEBUG({
+            dbgs() << "\t\tInterference from alias: ";
+            LHS.print(dbgs(), tri_);
+            dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n";
+          });
+          return false;
+        }
+
+        // Check that LHS ranges beginning in this range are copies.
+        for (; LI != LHS.end() && LI->start < RI->end; ++LI) {
+          if (LI->start != LI->valno->def ||
+              !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) {
+            DEBUG({
+              dbgs() << "\t\tInterference from alias: ";
+              LHS.print(dbgs(), tri_);
+              dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n";
+            });
+            return false;
+          }
+        }
+      }
+    }
+  }
+
+  // Compute the final value assignment, assuming that the live ranges can be
+  // coalesced.
+  SmallVector<int, 16> LHSValNoAssignments;
+  SmallVector<int, 16> RHSValNoAssignments;
+  DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS;
+  DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
+  SmallVector<VNInfo*, 16> NewVNInfo;
+
+  SmallVector<MachineInstr*, 8> DupCopies;
+
+  LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg());
+  DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; });
+
+  // Loop over the value numbers of the LHS, seeing if any are defined from
+  // the RHS.
+  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
+      continue;
+
+    // Never join with a register that has EarlyClobber redefs.
+    if (VNI->hasRedefByEC())
+      return false;
+
+    // Figure out the value # from the RHS.
+    LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+    // The copy could be to an aliased physreg.
+    if (!lr) continue;
+
+    // DstReg is known to be a register in the LHS interval.  If the src is
+    // from the RHS interval, we can use its value #.
+    MachineInstr *MI = VNI->getCopy();
+    if (!CP.isCoalescable(MI) &&
+        !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies))
+      continue;
+
+    LHSValsDefinedFromRHS[VNI] = lr->valno;
+  }
+
+  // Loop over the value numbers of the RHS, seeing if any are defined from
+  // the LHS.
+  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
+      continue;
+
+    // Never join with a register that has EarlyClobber redefs.
+    if (VNI->hasRedefByEC())
+      return false;
+
+    // Figure out the value # from the LHS.
+    LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
+    // The copy could be to an aliased physreg.
+    if (!lr) continue;
+
+    // DstReg is known to be a register in the RHS interval.  If the src is
+    // from the LHS interval, we can use its value #.
+    MachineInstr *MI = VNI->getCopy();
+    if (!CP.isCoalescable(MI) &&
+        !RegistersDefinedFromSameValue(*li_, *tri_, CP, VNI, lr, DupCopies))
+        continue;
+
+    RHSValsDefinedFromLHS[VNI] = lr->valno;
+  }
+
+  LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
+  RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
+  NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
+
+  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    unsigned VN = VNI->id;
+    if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+      continue;
+    ComputeUltimateVN(VNI, NewVNInfo,
+                      LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
+                      LHSValNoAssignments, RHSValNoAssignments);
+  }
+  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
+       i != e; ++i) {
+    VNInfo *VNI = *i;
+    unsigned VN = VNI->id;
+    if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
+      continue;
+    // If this value number isn't a copy from the LHS, it's a new number.
+    if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
+      NewVNInfo.push_back(VNI);
+      RHSValNoAssignments[VN] = NewVNInfo.size()-1;
+      continue;
+    }
+
+    ComputeUltimateVN(VNI, NewVNInfo,
+                      RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
+                      RHSValNoAssignments, LHSValNoAssignments);
+  }
+
+  // Armed with the mappings of LHS/RHS values to ultimate values, walk the
+  // interval lists to see if these intervals are coalescable.
+  LiveInterval::const_iterator I = LHS.begin();
+  LiveInterval::const_iterator IE = LHS.end();
+  LiveInterval::const_iterator J = RHS.begin();
+  LiveInterval::const_iterator JE = RHS.end();
+
+  // Skip ahead until the first place of potential sharing.
+  if (I != IE && J != JE) {
+    if (I->start < J->start) {
+      I = std::upper_bound(I, IE, J->start);
+      if (I != LHS.begin()) --I;
+    } else if (J->start < I->start) {
+      J = std::upper_bound(J, JE, I->start);
+      if (J != RHS.begin()) --J;
+    }
+  }
+
+  while (I != IE && J != JE) {
+    // Determine if these two live ranges overlap.
+    bool Overlaps;
+    if (I->start < J->start) {
+      Overlaps = I->end > J->start;
+    } else {
+      Overlaps = J->end > I->start;
+    }
+
+    // If so, check value # info to determine if they are really different.
+    if (Overlaps) {
+      // If the live range overlap will map to the same value number in the
+      // result liverange, we can still coalesce them.  If not, we can't.
+      if (LHSValNoAssignments[I->valno->id] !=
+          RHSValNoAssignments[J->valno->id])
+        return false;
+      // If it's re-defined by an early clobber somewhere in the live range,
+      // then conservatively abort coalescing.
+      if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC())
+        return false;
+    }
+
+    if (I->end < J->end)
+      ++I;
+    else
+      ++J;
+  }
+
+  // Update kill info. Some live ranges are extended due to copy coalescing.
+  for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
+         E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
+    VNInfo *VNI = I->first;
+    unsigned LHSValID = LHSValNoAssignments[VNI->id];
+    if (VNI->hasPHIKill())
+      NewVNInfo[LHSValID]->setHasPHIKill(true);
+  }
+
+  // Update kill info. Some live ranges are extended due to copy coalescing.
+  for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
+         E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
+    VNInfo *VNI = I->first;
+    unsigned RHSValID = RHSValNoAssignments[VNI->id];
+    if (VNI->hasPHIKill())
+      NewVNInfo[RHSValID]->setHasPHIKill(true);
+  }
+
+  if (LHSValNoAssignments.empty())
+    LHSValNoAssignments.push_back(-1);
+  if (RHSValNoAssignments.empty())
+    RHSValNoAssignments.push_back(-1);
+
+  SmallVector<unsigned, 8> SourceRegisters;
+  for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(),
+         E = DupCopies.end(); I != E; ++I) {
+    MachineInstr *MI = *I;
+
+    // We have pretended that the assignment to B in
+    // A = X
+    // B = X
+    // was actually a copy from A. Now that we decided to coalesce A and B,
+    // transform the code into
+    // A = X
+    // X = X
+    // and mark the X as coalesced to keep the illusion.
+    unsigned Src = MI->getOperand(1).getReg();
+    SourceRegisters.push_back(Src);
+    MI->getOperand(0).substVirtReg(Src, 0, *tri_);
+
+    markAsJoined(MI);
+  }
+
+  // If B = X was the last use of X in a liverange, we have to shrink it now
+  // that B = X is gone.
+  for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(),
+         E = SourceRegisters.end(); I != E; ++I) {
+    li_->shrinkToUses(&li_->getInterval(*I));
+  }
+
+  // If we get here, we know that we can coalesce the live ranges.  Ask the
+  // intervals to coalesce themselves now.
+  LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
+           mri_);
+  return true;
+}
+
+namespace {
+  // DepthMBBCompare - Comparison predicate that sort first based on the loop
+  // depth of the basic block (the unsigned), and then on the MBB number.
+  struct DepthMBBCompare {
+    typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
+    bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
+      // Deeper loops first
+      if (LHS.first != RHS.first)
+        return LHS.first > RHS.first;
+
+      // Prefer blocks that are more connected in the CFG. This takes care of
+      // the most difficult copies first while intervals are short.
+      unsigned cl = LHS.second->pred_size() + LHS.second->succ_size();
+      unsigned cr = RHS.second->pred_size() + RHS.second->succ_size();
+      if (cl != cr)
+        return cl > cr;
+
+      // As a last resort, sort by block number.
+      return LHS.second->getNumber() < RHS.second->getNumber();
+    }
+  };
+}
+
+void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB,
+                                            std::vector<MachineInstr*> &TryAgain) {
+  DEBUG(dbgs() << MBB->getName() << ":\n");
+
+  SmallVector<MachineInstr*, 8> VirtCopies;
+  SmallVector<MachineInstr*, 8> PhysCopies;
+  SmallVector<MachineInstr*, 8> ImpDefCopies;
+  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
+       MII != E;) {
+    MachineInstr *Inst = MII++;
+
+    // If this isn't a copy nor a extract_subreg, we can't join intervals.
+    unsigned SrcReg, DstReg;
+    if (Inst->isCopy()) {
+      DstReg = Inst->getOperand(0).getReg();
+      SrcReg = Inst->getOperand(1).getReg();
+    } else if (Inst->isSubregToReg()) {
+      DstReg = Inst->getOperand(0).getReg();
+      SrcReg = Inst->getOperand(2).getReg();
+    } else
+      continue;
+
+    bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
+    bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+    if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
+      ImpDefCopies.push_back(Inst);
+    else if (SrcIsPhys || DstIsPhys)
+      PhysCopies.push_back(Inst);
+    else
+      VirtCopies.push_back(Inst);
+  }
+
+  // Try coalescing implicit copies and insert_subreg <undef> first,
+  // followed by copies to / from physical registers, then finally copies
+  // from virtual registers to virtual registers.
+  for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) {
+    MachineInstr *TheCopy = ImpDefCopies[i];
+    bool Again = false;
+    if (!JoinCopy(TheCopy, Again))
+      if (Again)
+        TryAgain.push_back(TheCopy);
+  }
+  for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) {
+    MachineInstr *TheCopy = PhysCopies[i];
+    bool Again = false;
+    if (!JoinCopy(TheCopy, Again))
+      if (Again)
+        TryAgain.push_back(TheCopy);
+  }
+  for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) {
+    MachineInstr *TheCopy = VirtCopies[i];
+    bool Again = false;
+    if (!JoinCopy(TheCopy, Again))
+      if (Again)
+        TryAgain.push_back(TheCopy);
+  }
+}
+
+void RegisterCoalescer::joinIntervals() {
+  DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
+
+  std::vector<MachineInstr*> TryAgainList;
+  if (loopInfo->empty()) {
+    // If there are no loops in the function, join intervals in function order.
+    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();
+         I != E; ++I)
+      CopyCoalesceInMBB(I, TryAgainList);
+  } else {
+    // Otherwise, join intervals in inner loops before other intervals.
+    // Unfortunately we can't just iterate over loop hierarchy here because
+    // there may be more MBB's than BB's.  Collect MBB's for sorting.
+
+    // Join intervals in the function prolog first. We want to join physical
+    // registers with virtual registers before the intervals got too long.
+    std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
+    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){
+      MachineBasicBlock *MBB = I;
+      MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I));
+    }
+
+    // Sort by loop depth.
+    std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
+
+    // Finally, join intervals in loop nest order.
+    for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
+      CopyCoalesceInMBB(MBBs[i].second, TryAgainList);
+  }
+
+  // Joining intervals can allow other intervals to be joined.  Iteratively join
+  // until we make no progress.
+  bool ProgressMade = true;
+  while (ProgressMade) {
+    ProgressMade = false;
+
+    for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
+      MachineInstr *&TheCopy = TryAgainList[i];
+      if (!TheCopy)
+        continue;
+
+      bool Again = false;
+      bool Success = JoinCopy(TheCopy, Again);
+      if (Success || !Again) {
+        TheCopy= 0;   // Mark this one as done.
+        ProgressMade = true;
+      }
+    }
+  }
+}
+
+void RegisterCoalescer::releaseMemory() {
+  JoinedCopies.clear();
+  ReMatCopies.clear();
+  ReMatDefs.clear();
+}
+
+bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
+  mf_ = &fn;
+  mri_ = &fn.getRegInfo();
+  tm_ = &fn.getTarget();
+  tri_ = tm_->getRegisterInfo();
+  tii_ = tm_->getInstrInfo();
+  li_ = &getAnalysis<LiveIntervals>();
+  ldv_ = &getAnalysis<LiveDebugVariables>();
+  AA = &getAnalysis<AliasAnalysis>();
+  loopInfo = &getAnalysis<MachineLoopInfo>();
+
+  DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
+               << "********** Function: "
+               << ((Value*)mf_->getFunction())->getName() << '\n');
+
+  if (VerifyCoalescing)
+    mf_->verify(this, "Before register coalescing");
+
+  RegClassInfo.runOnMachineFunction(fn);
+
+  // Join (coalesce) intervals if requested.
+  if (EnableJoining) {
+    joinIntervals();
+    DEBUG({
+        dbgs() << "********** INTERVALS POST JOINING **********\n";
+        for (LiveIntervals::iterator I = li_->begin(), E = li_->end();
+             I != E; ++I){
+          I->second->print(dbgs(), tri_);
+          dbgs() << "\n";
+        }
+      });
+  }
+
+  // Perform a final pass over the instructions and compute spill weights
+  // and remove identity moves.
+  SmallVector<unsigned, 4> DeadDefs;
+  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
+       mbbi != mbbe; ++mbbi) {
+    MachineBasicBlock* mbb = mbbi;
+    for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
+         mii != mie; ) {
+      MachineInstr *MI = mii;
+      if (JoinedCopies.count(MI)) {
+        // Delete all coalesced copies.
+        bool DoDelete = true;
+        assert(MI->isCopyLike() && "Unrecognized copy instruction");
+        unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+        if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+            MI->getNumOperands() > 2)
+          // Do not delete extract_subreg, insert_subreg of physical
+          // registers unless the definition is dead. e.g.
+          // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
+          // or else the scavenger may complain. LowerSubregs will
+          // delete them later.
+          DoDelete = false;
+
+        if (MI->allDefsAreDead()) {
+          if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+              li_->hasInterval(SrcReg))
+            li_->shrinkToUses(&li_->getInterval(SrcReg));
+          DoDelete = true;
+        }
+        if (!DoDelete) {
+          // We need the instruction to adjust liveness, so make it a KILL.
+          if (MI->isSubregToReg()) {
+            MI->RemoveOperand(3);
+            MI->RemoveOperand(1);
+          }
+          MI->setDesc(tii_->get(TargetOpcode::KILL));
+          mii = llvm::next(mii);
+        } else {
+          li_->RemoveMachineInstrFromMaps(MI);
+          mii = mbbi->erase(mii);
+          ++numPeep;
+        }
+        continue;
+      }
+
+      // Now check if this is a remat'ed def instruction which is now dead.
+      if (ReMatDefs.count(MI)) {
+        bool isDead = true;
+        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+          const MachineOperand &MO = MI->getOperand(i);
+          if (!MO.isReg())
+            continue;
+          unsigned Reg = MO.getReg();
+          if (!Reg)
+            continue;
+          if (TargetRegisterInfo::isVirtualRegister(Reg))
+            DeadDefs.push_back(Reg);
+          if (MO.isDead())
+            continue;
+          if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+              !mri_->use_nodbg_empty(Reg)) {
+            isDead = false;
+            break;
+          }
+        }
+        if (isDead) {
+          while (!DeadDefs.empty()) {
+            unsigned DeadDef = DeadDefs.back();
+            DeadDefs.pop_back();
+            RemoveDeadDef(li_->getInterval(DeadDef), MI);
+          }
+          li_->RemoveMachineInstrFromMaps(mii);
+          mii = mbbi->erase(mii);
+          continue;
+        } else
+          DeadDefs.clear();
+      }
+
+      ++mii;
+
+      // Check for now unnecessary kill flags.
+      if (li_->isNotInMIMap(MI)) continue;
+      SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex();
+      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+        MachineOperand &MO = MI->getOperand(i);
+        if (!MO.isReg() || !MO.isKill()) continue;
+        unsigned reg = MO.getReg();
+        if (!reg || !li_->hasInterval(reg)) continue;
+        if (!li_->getInterval(reg).killedAt(DefIdx)) {
+          MO.setIsKill(false);
+          continue;
+        }
+        // When leaving a kill flag on a physreg, check if any subregs should
+        // remain alive.
+        if (!TargetRegisterInfo::isPhysicalRegister(reg))
+          continue;
+        for (const unsigned *SR = tri_->getSubRegisters(reg);
+             unsigned S = *SR; ++SR)
+          if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx))
+            MI->addRegisterDefined(S, tri_);
+      }
+    }
+  }
+
+  DEBUG(dump());
+  DEBUG(ldv_->dump());
+  if (VerifyCoalescing)
+    mf_->verify(this, "After register coalescing");
+  return true;
+}
+
+/// print - Implement the dump method.
+void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
+   li_->print(O, m);
+}
+
+RegisterCoalescer *llvm::createRegisterCoalescer() {
+  return new RegisterCoalescer();
+}
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/RegisterCoalescer.h
index 92f6c64..4131d91 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -1,4 +1,4 @@
-//===-- SimpleRegisterCoalescing.h - Register Coalescing --------*- C++ -*-===//
+//===-- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,37 +7,38 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements a simple register copy coalescing phase.
+// This file contains the abstract interface for register coalescers, 
+// allowing them to interact with and query register allocators.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H
-#define LLVM_CODEGEN_SIMPLE_REGISTER_COALESCING_H
-
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
 #include "RegisterClassInfo.h"
+#include "llvm/Support/IncludeFile.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+#ifndef LLVM_CODEGEN_REGISTER_COALESCER_H
+#define LLVM_CODEGEN_REGISTER_COALESCER_H
 
 namespace llvm {
-  class SimpleRegisterCoalescing;
-  class LiveDebugVariables;
+
+  class MachineFunction;
+  class RegallocQuery;
+  class AnalysisUsage;
+  class MachineInstr;
   class TargetRegisterInfo;
+  class TargetRegisterClass;
   class TargetInstrInfo;
+  class LiveDebugVariables;
   class VirtRegMap;
   class MachineLoopInfo;
 
-  /// CopyRec - Representation for copy instructions in coalescer queue.
-  ///
-  struct CopyRec {
-    MachineInstr *MI;
-    unsigned LoopDepth;
-    CopyRec(MachineInstr *mi, unsigned depth)
-      : MI(mi), LoopDepth(depth) {}
-  };
+  class CoalescerPair;
 
-  class SimpleRegisterCoalescing : public MachineFunctionPass,
-                                   public RegisterCoalescer {
+  /// An abstract interface for register coalescers.  Coalescers must
+  /// implement this interface to be part of the coalescer analysis
+  /// group.
+  class RegisterCoalescer : public MachineFunctionPass {
     MachineFunction* mf_;
     MachineRegisterInfo* mri_;
     const TargetMachine* tm_;
@@ -61,41 +62,20 @@ namespace llvm {
     /// been remat'ed.
     SmallPtrSet<MachineInstr*, 8> ReMatDefs;
 
-  public:
-    static char ID; // Pass identifcation, replacement for typeid
-    SimpleRegisterCoalescing() : MachineFunctionPass(ID) {
-      initializeSimpleRegisterCoalescingPass(*PassRegistry::getPassRegistry());
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-    virtual void releaseMemory();
-
-    /// runOnMachineFunction - pass entry point
-    virtual bool runOnMachineFunction(MachineFunction&);
-
-    bool coalesceFunction(MachineFunction &mf, RegallocQuery &) {
-      // This runs as an independent pass, so don't do anything.
-      return false;
-    }
-
-    /// print - Implement the dump method.
-    virtual void print(raw_ostream &O, const Module* = 0) const;
-
-  private:
     /// joinIntervals - join compatible live intervals
     void joinIntervals();
 
     /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting
     /// copies that cannot yet be coalesced into the "TryAgain" list.
     void CopyCoalesceInMBB(MachineBasicBlock *MBB,
-                           std::vector<CopyRec> &TryAgain);
+                           std::vector<MachineInstr*> &TryAgain);
 
     /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
     /// which are the src/dst of the copy instruction CopyMI.  This returns true
     /// if the copy was successfully coalesced away. If it is not currently
     /// possible to coalesce this interval, but it may be possible if other
     /// things get coalesced, then it returns true by reference in 'Again'.
-    bool JoinCopy(CopyRec &TheCopy, bool &Again);
+    bool JoinCopy(MachineInstr *TheCopy, bool &Again);
 
     /// JoinIntervals - Attempt to join these two intervals.  On failure, this
     /// returns false.  The output "SrcInt" will not have been modified, so we can
@@ -155,8 +135,109 @@ namespace llvm {
 
     /// markAsJoined - Remember that CopyMI has already been joined.
     void markAsJoined(MachineInstr *CopyMI);
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    RegisterCoalescer() : MachineFunctionPass(ID) {
+      initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
+    }
+
+    /// Register allocators must call this from their own
+    /// getAnalysisUsage to cover the case where the coalescer is not
+    /// a Pass in the proper sense and isn't managed by PassManager.
+    /// PassManager needs to know which analyses to make available and
+    /// which to invalidate when running the register allocator or any
+    /// pass that might call coalescing.  The long-term solution is to
+    /// allow hierarchies of PassManagers.
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+    virtual void releaseMemory();
+
+    /// runOnMachineFunction - pass entry point
+    virtual bool runOnMachineFunction(MachineFunction&);
+
+    /// print - Implement the dump method.
+    virtual void print(raw_ostream &O, const Module* = 0) const;
   };
 
+  /// CoalescerPair - A helper class for register coalescers. When deciding if
+  /// two registers can be coalesced, CoalescerPair can determine if a copy
+  /// instruction would become an identity copy after coalescing.
+  class CoalescerPair {
+    const TargetInstrInfo &tii_;
+    const TargetRegisterInfo &tri_;
+
+    /// dstReg_ - The register that will be left after coalescing. It can be a
+    /// virtual or physical register.
+    unsigned dstReg_;
+
+    /// srcReg_ - the virtual register that will be coalesced into dstReg.
+    unsigned srcReg_;
+
+    /// subReg_ - The subregister index of srcReg in dstReg_. It is possible the
+    /// coalesce srcReg_ into a subreg of the larger dstReg_ when dstReg_ is a
+    /// virtual register.
+    unsigned subIdx_;
+
+    /// partial_ - True when the original copy was a partial subregister copy.
+    bool partial_;
+
+    /// crossClass_ - True when both regs are virtual, and newRC is constrained.
+    bool crossClass_;
+
+    /// flipped_ - True when DstReg and SrcReg are reversed from the oriignal copy
+    /// instruction.
+    bool flipped_;
+
+    /// newRC_ - The register class of the coalesced register, or NULL if dstReg_
+    /// is a physreg.
+    const TargetRegisterClass *newRC_;
+
+  public:
+    CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri)
+      : tii_(tii), tri_(tri), dstReg_(0), srcReg_(0), subIdx_(0),
+        partial_(false), crossClass_(false), flipped_(false), newRC_(0) {}
+
+    /// setRegisters - set registers to match the copy instruction MI. Return
+    /// false if MI is not a coalescable copy instruction.
+    bool setRegisters(const MachineInstr*);
+
+    /// flip - Swap srcReg_ and dstReg_. Return false if swapping is impossible
+    /// because dstReg_ is a physical register, or subIdx_ is set.
+    bool flip();
+
+    /// isCoalescable - Return true if MI is a copy instruction that will become
+    /// an identity copy after coalescing.
+    bool isCoalescable(const MachineInstr*) const;
+
+    /// isPhys - Return true if DstReg is a physical register.
+    bool isPhys() const { return !newRC_; }
+
+    /// isPartial - Return true if the original copy instruction did not copy the
+    /// full register, but was a subreg operation.
+    bool isPartial() const { return partial_; }
+
+    /// isCrossClass - Return true if DstReg is virtual and NewRC is a smaller register class than DstReg's.
+    bool isCrossClass() const { return crossClass_; }
+
+    /// isFlipped - Return true when getSrcReg is the register being defined by
+    /// the original copy instruction.
+    bool isFlipped() const { return flipped_; }
+
+    /// getDstReg - Return the register (virtual or physical) that will remain
+    /// after coalescing.
+    unsigned getDstReg() const { return dstReg_; }
+
+    /// getSrcReg - Return the virtual register that will be coalesced away.
+    unsigned getSrcReg() const { return srcReg_; }
+
+    /// getSubIdx - Return the subregister index in DstReg that SrcReg will be
+    /// coalesced into, or 0.
+    unsigned getSubIdx() const { return subIdx_; }
+
+    /// getNewRC - Return the register class of the coalesced register.
+    const TargetRegisterClass *getNewRC() const { return newRC_; }
+  };
 } // End llvm namespace
 
 #endif
diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp
index c8de382..8b02ec4 100644
--- a/lib/CodeGen/RenderMachineFunction.cpp
+++ b/lib/CodeGen/RenderMachineFunction.cpp
@@ -434,8 +434,7 @@ namespace llvm {
            rcEnd = tri->regclass_end();
          rcItr != rcEnd; ++rcItr) {
       const TargetRegisterClass *trc = *rcItr;
-      unsigned capacity = std::distance(trc->allocation_order_begin(*mf),
-                                        trc->allocation_order_end(*mf));
+      unsigned capacity = trc->getRawAllocationOrder(*mf).size();
 
       if (capacity != 0)
         capacityMap[trc] = capacity;
@@ -482,8 +481,7 @@ namespace llvm {
                rcItr != rcEnd; ++rcItr) {
             const TargetRegisterClass *trc = *rcItr;
 
-            if (trc->allocation_order_begin(*mf) ==
-                trc->allocation_order_end(*mf))
+            if (trc->getRawAllocationOrder(*mf).empty())
               continue;
 
             unsigned worstAtI = getWorst(li->reg, trc);
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 1302395..21375b2 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -19,23 +19,33 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include <climits>
 using namespace llvm;
 
+#ifndef NDEBUG
+cl::opt<bool> StressSchedOpt(
+  "stress-sched", cl::Hidden, cl::init(false),
+  cl::desc("Stress test instruction scheduling"));
+#endif
+
 ScheduleDAG::ScheduleDAG(MachineFunction &mf)
   : TM(mf.getTarget()),
     TII(TM.getInstrInfo()),
     TRI(TM.getRegisterInfo()),
     MF(mf), MRI(mf.getRegInfo()),
     EntrySU(), ExitSU() {
+#ifndef NDEBUG
+  StressSched = StressSchedOpt;
+#endif
 }
 
 ScheduleDAG::~ScheduleDAG() {}
 
 /// getInstrDesc helper to handle SDNodes.
-const TargetInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
   if (!Node || !Node->isMachineOpcode()) return NULL;
   return &TII->get(Node->getMachineOpcode());
 }
@@ -307,6 +317,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
       if (I->isArtificial())
         dbgs() << " *";
       dbgs() << ": Latency=" << I->getLatency();
+      if (I->isAssignedRegDep())
+        dbgs() << " Reg=" << G->TRI->getName(I->getReg());
       dbgs() << "\n";
     }
   }
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
index 6b7a8c64..f8b1bc7 100644
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -45,6 +45,7 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
       unsigned Reg = 0;
       for (SUnit::const_succ_iterator II = SU->Succs.begin(),
              EE = SU->Succs.end(); II != EE; ++II) {
+        if (II->isCtrl()) continue;  // ignore chain preds
         if (II->getReg()) {
           Reg = II->getReg();
           break;
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 2363df4..446adfc 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -21,10 +21,11 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallSet.h"
@@ -205,7 +206,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
   bool UnitLatencies = ForceUnitLatencies();
 
   // Ask the target if address-backscheduling is desirable, and if so how much.
-  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
+  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
   unsigned SpecialAddressLatency = ST.getSpecialAddressLatency();
 
   // Remove any stale debug info; sometimes BuildSchedGraph is called again
@@ -236,13 +237,13 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       continue;
     }
 
-    const TargetInstrDesc &TID = MI->getDesc();
-    assert(!TID.isTerminator() && !MI->isLabel() &&
+    const MCInstrDesc &MCID = MI->getDesc();
+    assert(!MCID.isTerminator() && !MI->isLabel() &&
            "Cannot schedule terminators or labels!");
     // Create the SUnit for this MI.
     SUnit *SU = NewSUnit(MI);
-    SU->isCall = TID.isCall();
-    SU->isCommutable = TID.isCommutable();
+    SU->isCall = MCID.isCall();
+    SU->isCommutable = MCID.isCommutable();
 
     // Assign the Latency field of SU using target-provided information.
     if (UnitLatencies)
@@ -309,13 +310,13 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
           if (SpecialAddressLatency != 0 && !UnitLatencies &&
               UseSU != &ExitSU) {
             MachineInstr *UseMI = UseSU->getInstr();
-            const TargetInstrDesc &UseTID = UseMI->getDesc();
+            const MCInstrDesc &UseMCID = UseMI->getDesc();
             int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
             assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
             if (RegUseIndex >= 0 &&
-                (UseTID.mayLoad() || UseTID.mayStore()) &&
-                (unsigned)RegUseIndex < UseTID.getNumOperands() &&
-                UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
+                (UseMCID.mayLoad() || UseMCID.mayStore()) &&
+                (unsigned)RegUseIndex < UseMCID.getNumOperands() &&
+                UseMCID.OpInfo[RegUseIndex].isLookupPtrRegClass())
               LDataLatency += SpecialAddressLatency;
           }
           // Adjust the dependence latency using operand def/use
@@ -352,17 +353,17 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
             unsigned Count = I->second.second;
             const MachineInstr *UseMI = UseMO->getParent();
             unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
-            const TargetInstrDesc &UseTID = UseMI->getDesc();
+            const MCInstrDesc &UseMCID = UseMI->getDesc();
             // TODO: If we knew the total depth of the region here, we could
             // handle the case where the whole loop is inside the region but
             // is large enough that the isScheduleHigh trick isn't needed.
-            if (UseMOIdx < UseTID.getNumOperands()) {
+            if (UseMOIdx < UseMCID.getNumOperands()) {
               // Currently, we only support scheduling regions consisting of
               // single basic blocks. Check to see if the instruction is in
               // the same region by checking to see if it has the same parent.
               if (UseMI->getParent() != MI->getParent()) {
                 unsigned Latency = SU->Latency;
-                if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass())
+                if (UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass())
                   Latency += SpecialAddressLatency;
                 // This is a wild guess as to the portion of the latency which
                 // will be overlapped by work done outside the current
@@ -374,7 +375,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
                                     /*isMustAlias=*/false,
                                     /*isArtificial=*/true));
               } else if (SpecialAddressLatency > 0 &&
-                         UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
+                         UseMCID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
                 // The entire loop body is within the current scheduling region
                 // and the latency of this operation is assumed to be greater
                 // than the latency of the loop.
@@ -417,9 +418,9 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
     // produce more precise dependence information.
 #define STORE_LOAD_LATENCY 1
     unsigned TrueMemOrderLatency = 0;
-    if (TID.isCall() || MI->hasUnmodeledSideEffects() ||
+    if (MCID.isCall() || MI->hasUnmodeledSideEffects() ||
         (MI->hasVolatileMemoryRef() &&
-         (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+         (!MCID.mayLoad() || !MI->isInvariantLoad(AA)))) {
       // Be conservative with these and add dependencies on all memory
       // references, even those that are known to not alias.
       for (std::map<const Value *, SUnit *>::iterator I =
@@ -458,7 +459,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
       PendingLoads.clear();
       AliasMemDefs.clear();
       AliasMemUses.clear();
-    } else if (TID.mayStore()) {
+    } else if (MCID.mayStore()) {
       bool MayAlias = true;
       TrueMemOrderLatency = STORE_LOAD_LATENCY;
       if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
@@ -514,7 +515,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
                             /*Reg=*/0, /*isNormalMemory=*/false,
                             /*isMustAlias=*/false,
                             /*isArtificial=*/true));
-    } else if (TID.mayLoad()) {
+    } else if (MCID.mayLoad()) {
       bool MayAlias = true;
       TrueMemOrderLatency = 0;
       if (MI->isInvariantLoad(AA)) {
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index e6d7ded..0e005d3 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -16,11 +16,11 @@
 #define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrItineraries.h"
 
 using namespace llvm;
 
@@ -115,12 +115,12 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   // Use the itinerary for the underlying instruction to check for
   // free FU's in the scoreboard at the appropriate future cycles.
 
-  const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
-  if (TID == NULL) {
+  const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+  if (MCID == NULL) {
     // Don't check hazards for non-machineinstr Nodes.
     return NoHazard;
   }
-  unsigned idx = TID->getSchedClass();
+  unsigned idx = MCID->getSchedClass();
   for (const InstrStage *IS = ItinData->beginStage(idx),
          *E = ItinData->endStage(idx); IS != E; ++IS) {
     // We must find one of the stage's units free for every cycle the
@@ -173,16 +173,16 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
 
   // Use the itinerary for the underlying instruction to reserve FU's
   // in the scoreboard at the appropriate future cycles.
-  const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
-  assert(TID && "The scheduler must filter non-machineinstrs");
-  if (DAG->TII->isZeroCost(TID->Opcode))
+  const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
+  assert(MCID && "The scheduler must filter non-machineinstrs");
+  if (DAG->TII->isZeroCost(MCID->Opcode))
     return;
 
   ++IssueCount;
 
   unsigned cycle = 0;
 
-  unsigned idx = TID->getSchedClass();
+  unsigned idx = MCID->getSchedClass();
   for (const InstrStage *IS = ItinData->beginStage(idx),
          *E = ItinData->endStage(idx); IS != E; ++IS) {
     // We must reserve one of the stage's units for every cycle the
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index e843f5f..4f0d2ca 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -138,6 +138,10 @@ namespace {
     SDValue PromoteExtend(SDValue Op);
     bool PromoteLoad(SDValue Op);
 
+    void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+                         SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+                         ISD::NodeType ExtType);
+
     /// combine - call the node-specific routine that knows how to fold each
     /// particular type of node. If that doesn't do anything, try the
     /// target-specific DAG combines.
@@ -234,6 +238,9 @@ namespace {
     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
     SDValue BuildSDIV(SDNode *N);
     SDValue BuildUDIV(SDNode *N);
+    SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+                               bool DemandHighBits = true);
+    SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
     SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
     SDValue ReduceLoadWidth(SDNode *N);
     SDValue ReduceLoadOpStoreWidth(SDNode *N);
@@ -994,7 +1001,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
           dbgs() << "\nWith: ";
           RV.getNode()->dump(&DAG);
           dbgs() << '\n');
-    
+
     // Transfer debug value.
     DAG.TransferDbgValues(SDValue(N, 0), RV);
     WorkListRemover DeadNodes(*this);
@@ -1303,16 +1310,6 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
   return SDValue();
 }
 
-/// isCarryMaterialization - Returns true if V is an ADDE node that is known to
-/// return 0 or 1 depending on the carry flag.
-static bool isCarryMaterialization(SDValue V) {
-  if (V.getOpcode() != ISD::ADDE)
-    return false;
-
-  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(0));
-  return C && C->isNullValue() && V.getOperand(0) == V.getOperand(1);
-}
-
 SDValue DAGCombiner::visitADD(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -1476,18 +1473,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
   }
 
-  // add (adde 0, 0, glue), X -> adde X, 0, glue
-  if (N0->hasOneUse() && isCarryMaterialization(N0))
-    return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
-                       DAG.getVTList(VT, MVT::Glue), N1, N0.getOperand(0),
-                       N0.getOperand(2));
-
-  // add X, (adde 0, 0, glue) -> adde X, 0, glue
-  if (N1->hasOneUse() && isCarryMaterialization(N1))
-    return DAG.getNode(ISD::ADDE, N->getDebugLoc(),
-                       DAG.getVTList(VT, MVT::Glue), N0, N1.getOperand(0),
-                       N1.getOperand(2));
-
   return SDValue();
 }
 
@@ -1531,16 +1516,6 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
                                    N->getDebugLoc(), MVT::Glue));
   }
 
-  // addc (adde 0, 0, glue), X -> adde X, 0, glue
-  if (N0->hasOneUse() && isCarryMaterialization(N0))
-    return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N1,
-                       DAG.getConstant(0, VT), N0.getOperand(2));
-
-  // addc X, (adde 0, 0, glue) -> adde X, 0, glue
-  if (N1->hasOneUse() && isCarryMaterialization(N1))
-    return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), N0,
-                       DAG.getConstant(0, VT), N1.getOperand(2));
-
   return SDValue();
 }
 
@@ -1591,6 +1566,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
+  ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 :
+    dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
   EVT VT = N0.getValueType();
 
   // fold vector ops
@@ -1622,6 +1599,12 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   // fold (A+B)-B -> A
   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
     return N0.getOperand(0);
+  // fold C2-(A+C1) -> (C2-C1)-A
+  if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
+    SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT);
+    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC,
+		       N1.getOperand(0));
+  }
   // fold ((A+(B+or-C))-B) -> A+or-C
   if (N0.getOpcode() == ISD::ADD &&
       (N0.getOperand(1).getOpcode() == ISD::SUB ||
@@ -2508,6 +2491,244 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   return SDValue();
 }
 
+/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16
+///
+SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
+                                        bool DemandHighBits) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
+    return SDValue();
+  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+    return SDValue();
+
+  // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
+  bool LookPassAnd0 = false;
+  bool LookPassAnd1 = false;
+  if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
+      std::swap(N0, N1);
+  if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
+      std::swap(N0, N1);
+  if (N0.getOpcode() == ISD::AND) {
+    if (!N0.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+    if (!N01C || N01C->getZExtValue() != 0xFF00)
+      return SDValue();
+    N0 = N0.getOperand(0);
+    LookPassAnd0 = true;
+  }
+
+  if (N1.getOpcode() == ISD::AND) {
+    if (!N1.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+    if (!N11C || N11C->getZExtValue() != 0xFF)
+      return SDValue();
+    N1 = N1.getOperand(0);
+    LookPassAnd1 = true;
+  }
+
+  if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
+    std::swap(N0, N1);
+  if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
+    return SDValue();
+  if (!N0.getNode()->hasOneUse() ||
+      !N1.getNode()->hasOneUse())
+    return SDValue();
+
+  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+  ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+  if (!N01C || !N11C)
+    return SDValue();
+  if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
+    return SDValue();
+
+  // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
+  SDValue N00 = N0->getOperand(0);
+  if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
+    if (!N00.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
+    if (!N001C || N001C->getZExtValue() != 0xFF)
+      return SDValue();
+    N00 = N00.getOperand(0);
+    LookPassAnd0 = true;
+  }
+
+  SDValue N10 = N1->getOperand(0);
+  if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
+    if (!N10.getNode()->hasOneUse())
+      return SDValue();
+    ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
+    if (!N101C || N101C->getZExtValue() != 0xFF00)
+      return SDValue();
+    N10 = N10.getOperand(0);
+    LookPassAnd1 = true;
+  }
+
+  if (N00 != N10)
+    return SDValue();
+
+  // Make sure everything beyond the low halfword is zero since the SRL 16
+  // will clear the top bits.
+  unsigned OpSizeInBits = VT.getSizeInBits();
+  if (DemandHighBits && OpSizeInBits > 16 &&
+      (!LookPassAnd0 || !LookPassAnd1) &&
+      !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16)))
+    return SDValue();
+
+  SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00);
+  if (OpSizeInBits > 16)
+    Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res,
+                      DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT)));
+  return Res;
+}
+
+/// isBSwapHWordElement - Return true if the specified node is an element
+/// that makes up a 32-bit packed halfword byteswap. i.e.
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) {
+  if (!N.getNode()->hasOneUse())
+    return false;
+
+  unsigned Opc = N.getOpcode();
+  if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
+    return false;
+
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+  if (!N1C)
+    return false;
+
+  unsigned Num;
+  switch (N1C->getZExtValue()) {
+  default:
+    return false;
+  case 0xFF:       Num = 0; break;
+  case 0xFF00:     Num = 1; break;
+  case 0xFF0000:   Num = 2; break;
+  case 0xFF000000: Num = 3; break;
+  }
+
+  // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
+  SDValue N0 = N.getOperand(0);
+  if (Opc == ISD::AND) {
+    if (Num == 0 || Num == 2) {
+      // (x >> 8) & 0xff
+      // (x >> 8) & 0xff0000
+      if (N0.getOpcode() != ISD::SRL)
+        return false;
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+      if (!C || C->getZExtValue() != 8)
+        return false;
+    } else {
+      // (x << 8) & 0xff00
+      // (x << 8) & 0xff000000
+      if (N0.getOpcode() != ISD::SHL)
+        return false;
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+      if (!C || C->getZExtValue() != 8)
+        return false;
+    }
+  } else if (Opc == ISD::SHL) {
+    // (x & 0xff) << 8
+    // (x & 0xff0000) << 8
+    if (Num != 0 && Num != 2)
+      return false;
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    if (!C || C->getZExtValue() != 8)
+      return false;
+  } else { // Opc == ISD::SRL
+    // (x & 0xff00) >> 8
+    // (x & 0xff000000) >> 8
+    if (Num != 1 && Num != 3)
+      return false;
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
+    if (!C || C->getZExtValue() != 8)
+      return false;
+  }
+
+  if (Parts[Num])
+    return false;
+
+  Parts[Num] = N0.getOperand(0).getNode();
+  return true;
+}
+
+/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is
+/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8)
+/// => (rotl (bswap x), 16)
+SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
+  if (!LegalOperations)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i32)
+    return SDValue();
+  if (!TLI.isOperationLegal(ISD::BSWAP, VT))
+    return SDValue();
+
+  SmallVector<SDNode*,4> Parts(4, (SDNode*)0);
+  // Look for either
+  // (or (or (and), (and)), (or (and), (and)))
+  // (or (or (or (and), (and)), (and)), (and))
+  if (N0.getOpcode() != ISD::OR)
+    return SDValue();
+  SDValue N00 = N0.getOperand(0);
+  SDValue N01 = N0.getOperand(1);
+
+  if (N1.getOpcode() == ISD::OR) {
+    // (or (or (and), (and)), (or (and), (and)))
+    SDValue N000 = N00.getOperand(0);
+    if (!isBSwapHWordElement(N000, Parts))
+      return SDValue();
+
+    SDValue N001 = N00.getOperand(1);
+    if (!isBSwapHWordElement(N001, Parts))
+      return SDValue();
+    SDValue N010 = N01.getOperand(0);
+    if (!isBSwapHWordElement(N010, Parts))
+      return SDValue();
+    SDValue N011 = N01.getOperand(1);
+    if (!isBSwapHWordElement(N011, Parts))
+      return SDValue();
+  } else {
+    // (or (or (or (and), (and)), (and)), (and))
+    if (!isBSwapHWordElement(N1, Parts))
+      return SDValue();
+    if (!isBSwapHWordElement(N01, Parts))
+      return SDValue();
+    if (N00.getOpcode() != ISD::OR)
+      return SDValue();
+    SDValue N000 = N00.getOperand(0);
+    if (!isBSwapHWordElement(N000, Parts))
+      return SDValue();
+    SDValue N001 = N00.getOperand(1);
+    if (!isBSwapHWordElement(N001, Parts))
+      return SDValue();
+  }
+
+  // Make sure the parts are all coming from the same node.
+  if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
+    return SDValue();
+
+  SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT,
+                              SDValue(Parts[0],0));
+
+  // Result of the bswap should be rotated by 16. If it's not legal, than
+  // do  (x << 16) | (x >> 16).
+  SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT));
+  if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
+    return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt);
+  else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
+    return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt);
+  return DAG.getNode(ISD::OR, N->getDebugLoc(), VT,
+                     DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt),
+                     DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt));
+}
+
 SDValue DAGCombiner::visitOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -2543,6 +2764,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   // fold (or x, c) -> c iff (x & ~c) == 0
   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
     return N1;
+
+  // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
+  SDValue BSwap = MatchBSwapHWord(N, N0, N1);
+  if (BSwap.getNode() != 0)
+    return BSwap;
+  BSwap = MatchBSwapHWordLow(N, N0, N1);
+  if (BSwap.getNode() != 0)
+    return BSwap;
+
   // reassociate or
   SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);
   if (ROR.getNode() != 0)
@@ -3030,6 +3260,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   // fold (shl x, 0) -> x
   if (N1C && N1C->isNullValue())
     return N0;
+  // fold (shl undef, x) -> 0
+  if (N0.getOpcode() == ISD::UNDEF)
+    return DAG.getConstant(0, VT);
   // if (shl x, c) is known to be zero, return 0
   if (DAG.MaskedValueIsZero(SDValue(N, 0),
                             APInt::getAllOnesValue(OpSizeInBits)))
@@ -3696,6 +3929,28 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
   return true;
 }
 
+void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs,
+                                  SDValue Trunc, SDValue ExtLoad, DebugLoc DL,
+                                  ISD::NodeType ExtType) {
+  // Extend SetCC uses if necessary.
+  for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
+    SDNode *SetCC = SetCCs[i];
+    SmallVector<SDValue, 4> Ops;
+
+    for (unsigned j = 0; j != 2; ++j) {
+      SDValue SOp = SetCC->getOperand(j);
+      if (SOp == Trunc)
+        Ops.push_back(ExtLoad);
+      else
+        Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
+    }
+
+    Ops.push_back(SetCC->getOperand(2));
+    CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0),
+                                 &Ops[0], Ops.size()));
+  }
+}
+
 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
@@ -3784,27 +4039,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                   N0.getValueType(), ExtLoad);
       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
-
-      // Extend SetCC uses if necessary.
-      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
-        SDNode *SetCC = SetCCs[i];
-        SmallVector<SDValue, 4> Ops;
-
-        for (unsigned j = 0; j != 2; ++j) {
-          SDValue SOp = SetCC->getOperand(j);
-          if (SOp == Trunc)
-            Ops.push_back(ExtLoad);
-          else
-            Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND,
-                                      N->getDebugLoc(), VT, SOp));
-        }
-
-        Ops.push_back(SetCC->getOperand(2));
-        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
-                                     SetCC->getValueType(0),
-                                     &Ops[0], Ops.size()));
-      }
-
+      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                      ISD::SIGN_EXTEND);
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
@@ -3832,6 +4068,45 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     }
   }
 
+  // fold (sext (and/or/xor (load x), cst)) ->
+  //      (and/or/xor (sextload x), (sext cst))
+  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+       N0.getOpcode() == ISD::XOR) &&
+      isa<LoadSDNode>(N0.getOperand(0)) &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) &&
+      (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+    if (LN0->getExtensionType() != ISD::ZEXTLOAD) {
+      bool DoXform = true;
+      SmallVector<SDNode*, 4> SetCCs;
+      if (!N0.hasOneUse())
+        DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
+                                          SetCCs, TLI);
+      if (DoXform) {
+        SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT,
+                                         LN0->getChain(), LN0->getBasePtr(),
+                                         LN0->getPointerInfo(),
+                                         LN0->getMemoryVT(),
+                                         LN0->isVolatile(),
+                                         LN0->isNonTemporal(),
+                                         LN0->getAlignment());
+        APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+        Mask = Mask.sext(VT.getSizeInBits());
+        SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+                                  ExtLoad, DAG.getConstant(Mask, VT));
+        SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+                                    N0.getOperand(0).getDebugLoc(),
+                                    N0.getOperand(0).getValueType(), ExtLoad);
+        CombineTo(N, And);
+        CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                        ISD::SIGN_EXTEND);
+        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+      }
+    }
+  }
+
   if (N0.getOpcode() == ISD::SETCC) {
     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
     // Only do this before legalize for now.
@@ -3990,27 +4265,48 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                                   N0.getValueType(), ExtLoad);
       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
 
-      // Extend SetCC uses if necessary.
-      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
-        SDNode *SetCC = SetCCs[i];
-        SmallVector<SDValue, 4> Ops;
-
-        for (unsigned j = 0; j != 2; ++j) {
-          SDValue SOp = SetCC->getOperand(j);
-          if (SOp == Trunc)
-            Ops.push_back(ExtLoad);
-          else
-            Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND,
-                                      N->getDebugLoc(), VT, SOp));
-        }
+      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                      ISD::ZERO_EXTEND);
+      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
+    }
+  }
 
-        Ops.push_back(SetCC->getOperand(2));
-        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
-                                     SetCC->getValueType(0),
-                                     &Ops[0], Ops.size()));
+  // fold (zext (and/or/xor (load x), cst)) ->
+  //      (and/or/xor (zextload x), (zext cst))
+  if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
+       N0.getOpcode() == ISD::XOR) &&
+      isa<LoadSDNode>(N0.getOperand(0)) &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) &&
+      (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
+    LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
+    if (LN0->getExtensionType() != ISD::SEXTLOAD) {
+      bool DoXform = true;
+      SmallVector<SDNode*, 4> SetCCs;
+      if (!N0.hasOneUse())
+        DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND,
+                                          SetCCs, TLI);
+      if (DoXform) {
+        SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT,
+                                         LN0->getChain(), LN0->getBasePtr(),
+                                         LN0->getPointerInfo(),
+                                         LN0->getMemoryVT(),
+                                         LN0->isVolatile(),
+                                         LN0->isNonTemporal(),
+                                         LN0->getAlignment());
+        APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+        Mask = Mask.zext(VT.getSizeInBits());
+        SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
+                                  ExtLoad, DAG.getConstant(Mask, VT));
+        SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
+                                    N0.getOperand(0).getDebugLoc(),
+                                    N0.getOperand(0).getValueType(), ExtLoad);
+        CombineTo(N, And);
+        CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
+        ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                        ISD::ZERO_EXTEND);
+        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
       }
-
-      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
 
@@ -4198,27 +4494,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
                                   N0.getValueType(), ExtLoad);
       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
-
-      // Extend SetCC uses if necessary.
-      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
-        SDNode *SetCC = SetCCs[i];
-        SmallVector<SDValue, 4> Ops;
-
-        for (unsigned j = 0; j != 2; ++j) {
-          SDValue SOp = SetCC->getOperand(j);
-          if (SOp == Trunc)
-            Ops.push_back(ExtLoad);
-          else
-            Ops.push_back(DAG.getNode(ISD::ANY_EXTEND,
-                                      N->getDebugLoc(), VT, SOp));
-        }
-
-        Ops.push_back(SetCC->getOperand(2));
-        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
-                                     SetCC->getValueType(0),
-                                     &Ops[0], Ops.size()));
-      }
-
+      ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(),
+                      ISD::ANY_EXTEND);
       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
     }
   }
@@ -4555,6 +4832,16 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   }
+
+  // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
+  if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
+    SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
+                                       N0.getOperand(1), false);
+    if (BSwap.getNode() != 0)
+      return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
+                         BSwap, N1);
+  }
+
   return SDValue();
 }
 
@@ -5180,7 +5467,8 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   // fold (sint_to_fp c1) -> c1fp
   if (N0C && OpVT != MVT::ppcf128 &&
       // ...but only if the target supports immediate floating-point values
-      (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+      (Level == llvm::Unrestricted ||
+       TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
     return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
 
   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
@@ -5204,7 +5492,8 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   // fold (uint_to_fp c1) -> c1fp
   if (N0C && OpVT != MVT::ppcf128 &&
       // ...but only if the target supports immediate floating-point values
-      (Level == llvm::Unrestricted || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
+      (Level == llvm::Unrestricted ||
+       TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
     return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
 
   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
@@ -5648,12 +5937,17 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
 
   // Now check for #3 and #4.
   bool RealUse = false;
+
+  // Caches for hasPredecessorHelper
+  SmallPtrSet<const SDNode *, 32> Visited;
+  SmallVector<const SDNode *, 16> Worklist;
+
   for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
          E = Ptr.getNode()->use_end(); I != E; ++I) {
     SDNode *Use = *I;
     if (Use == N)
       continue;
-    if (Use->isPredecessorOf(N))
+    if (N->hasPredecessorHelper(Use, Visited, Worklist))
       return false;
 
     if (!((Use->getOpcode() == ISD::LOAD &&
@@ -6431,8 +6725,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
     SDValue Shorter =
       GetDemandedBits(Value,
-                      APInt::getLowBitsSet(Value.getValueSizeInBits(),
-                                           ST->getMemoryVT().getSizeInBits()));
+                      APInt::getLowBitsSet(
+                        Value.getValueType().getScalarType().getSizeInBits(),
+                        ST->getMemoryVT().getScalarType().getSizeInBits()));
     AddToWorkList(Value.getNode());
     if (Shorter.getNode())
       return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
@@ -7156,7 +7451,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
         const TargetData &TD = *TLI.getTargetData();
 
         // Create a ConstantArray of the two constants.
-        Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
+        Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
         SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
                                             TD.getPrefTypeAlignment(FPTy));
         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 797f174..54a7d43 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -547,7 +547,7 @@ bool FastISel::SelectCall(const User *I) {
   case Intrinsic::dbg_value: {
     // This form of DBG_VALUE is target-independent.
     const DbgValueInst *DI = cast<DbgValueInst>(Call);
-    const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+    const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
     const Value *V = DI->getValue();
     if (!V) {
       // Currently the optimizer can produce this; insert an undef to
@@ -556,9 +556,14 @@ bool FastISel::SelectCall(const User *I) {
         .addReg(0U).addImm(DI->getOffset())
         .addMetadata(DI->getVariable());
     } else if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
-        .addImm(CI->getZExtValue()).addImm(DI->getOffset())
-        .addMetadata(DI->getVariable());
+      if (CI->getBitWidth() > 64)
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+          .addCImm(CI).addImm(DI->getOffset())
+          .addMetadata(DI->getVariable());
+      else 
+        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
+          .addImm(CI->getZExtValue()).addImm(DI->getOffset())
+          .addMetadata(DI->getVariable());
     } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
         .addFPImm(CF).addImm(DI->getOffset())
@@ -847,7 +852,7 @@ FastISel::SelectExtractValue(const User *U) {
     return false; // fast-isel can't handle aggregate constants at the moment
 
   // Get the actual result register, which is an offset from the base register.
-  unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->idx_begin(), EVI->idx_end());
+  unsigned VTIndex = ComputeLinearIndex(AggTy, EVI->getIndices());
 
   SmallVector<EVT, 4> AggValueVTs;
   ComputeValueVTs(TLI, AggTy, AggValueVTs);
@@ -1085,7 +1090,7 @@ unsigned FastISel::createResultReg(const TargetRegisterClass* RC) {
 unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode,
                                  const TargetRegisterClass* RC) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg);
   return ResultReg;
@@ -1095,7 +1100,7 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode,
                                   const TargetRegisterClass *RC,
                                   unsigned Op0, bool Op0IsKill) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1115,7 +1120,7 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
                                    unsigned Op0, bool Op0IsKill,
                                    unsigned Op1, bool Op1IsKill) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1137,7 +1142,7 @@ unsigned FastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
                                    unsigned Op1, bool Op1IsKill,
                                    unsigned Op2, bool Op2IsKill) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1160,7 +1165,7 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
                                    unsigned Op0, bool Op0IsKill,
                                    uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1181,7 +1186,7 @@ unsigned FastISel::FastEmitInst_rii(unsigned MachineInstOpcode,
                                    unsigned Op0, bool Op0IsKill,
                                    uint64_t Imm1, uint64_t Imm2) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1204,7 +1209,7 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
                                    unsigned Op0, bool Op0IsKill,
                                    const ConstantFP *FPImm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1226,7 +1231,7 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
                                     unsigned Op1, bool Op1IsKill,
                                     uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1248,7 +1253,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
                                   const TargetRegisterClass *RC,
                                   uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
@@ -1264,7 +1269,7 @@ unsigned FastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
                                   const TargetRegisterClass *RC,
                                   uint64_t Imm1, uint64_t Imm2) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index cb49a80..f0f4743 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -76,6 +76,12 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
   // the CopyToReg'd destination register instead of creating a new vreg.
   bool MatchReg = true;
   const TargetRegisterClass *UseRC = NULL;
+  EVT VT = Node->getValueType(ResNo);
+
+  // Stick to the preferred register classes for legal types.
+  if (TLI->isTypeLegal(VT))
+    UseRC = TLI->getRegClassFor(VT);
+
   if (!IsClone && !IsCloned)
     for (SDNode::use_iterator UI = Node->use_begin(), E = Node->use_end();
          UI != E; ++UI) {
@@ -100,10 +106,10 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
             continue;
           Match = false;
           if (User->isMachineOpcode()) {
-            const TargetInstrDesc &II = TII->get(User->getMachineOpcode());
+            const MCInstrDesc &II = TII->get(User->getMachineOpcode());
             const TargetRegisterClass *RC = 0;
             if (i+II.getNumDefs() < II.getNumOperands())
-              RC = II.OpInfo[i+II.getNumDefs()].getRegClass(TRI);
+              RC = TII->getRegClass(II, i+II.getNumDefs(), TRI);
             if (!UseRC)
               UseRC = RC;
             else if (RC) {
@@ -121,10 +127,9 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
         break;
     }
 
-  EVT VT = Node->getValueType(ResNo);
   const TargetRegisterClass *SrcRC = 0, *DstRC = 0;
   SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT);
-  
+
   // Figure out the register class to create for the destreg.
   if (VRBase) {
     DstRC = MRI->getRegClass(VRBase);
@@ -173,7 +178,7 @@ unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
 }
 
 void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
-                                       const TargetInstrDesc &II,
+                                       const MCInstrDesc &II,
                                        bool IsClone, bool IsCloned,
                                        DenseMap<SDValue, unsigned> &VRBaseMap) {
   assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF &&
@@ -184,7 +189,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
     // is a vreg in the same register class, use the CopyToReg'd destination
     // register instead of creating a new vreg.
     unsigned VRBase = 0;
-    const TargetRegisterClass *RC = II.OpInfo[i].getRegClass(TRI);
+    const TargetRegisterClass *RC = TII->getRegClass(II, i, TRI);
     if (II.OpInfo[i].isOptionalDef()) {
       // Optional def must be a physical register.
       unsigned NumResults = CountResults(Node);
@@ -237,7 +242,7 @@ unsigned InstrEmitter::getVR(SDValue Op,
       Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
     // Add an IMPLICIT_DEF instruction before every use.
     unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
-    // IMPLICIT_DEF can produce any type of result so its TargetInstrDesc
+    // IMPLICIT_DEF can produce any type of result so its MCInstrDesc
     // does not include operand register class info.
     if (!VReg) {
       const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType());
@@ -260,7 +265,7 @@ unsigned InstrEmitter::getVR(SDValue Op,
 void
 InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
                                  unsigned IIOpNum,
-                                 const TargetInstrDesc *II,
+                                 const MCInstrDesc *II,
                                  DenseMap<SDValue, unsigned> &VRBaseMap,
                                  bool IsDebug, bool IsClone, bool IsCloned) {
   assert(Op.getValueType() != MVT::Other &&
@@ -270,9 +275,9 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
   unsigned VReg = getVR(Op, VRBaseMap);
   assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
 
-  const TargetInstrDesc &TID = MI->getDesc();
-  bool isOptDef = IIOpNum < TID.getNumOperands() &&
-    TID.OpInfo[IIOpNum].isOptionalDef();
+  const MCInstrDesc &MCID = MI->getDesc();
+  bool isOptDef = IIOpNum < MCID.getNumOperands() &&
+    MCID.OpInfo[IIOpNum].isOptionalDef();
 
   // If the instruction requires a register in a different class, create
   // a new virtual register and copy the value into it.
@@ -280,8 +285,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
     const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg);
     const TargetRegisterClass *DstRC = 0;
     if (IIOpNum < II->getNumOperands())
-      DstRC = II->OpInfo[IIOpNum].getRegClass(TRI);
-    assert((DstRC || (TID.isVariadic() && IIOpNum >= TID.getNumOperands())) &&
+      DstRC = TII->getRegClass(*II, IIOpNum, TRI);
+    assert((DstRC || (MCID.isVariadic() && IIOpNum >= MCID.getNumOperands())) &&
            "Don't have operand info for this instruction!");
     if (DstRC && !SrcRC->hasSuperClassEq(DstRC)) {
       unsigned NewVReg = MRI->createVirtualRegister(DstRC);
@@ -307,7 +312,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
     while (Idx > 0 &&
            MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit())
       --Idx;
-    bool isTied = MI->getDesc().getOperandConstraint(Idx, TOI::TIED_TO) != -1;
+    bool isTied = MI->getDesc().getOperandConstraint(Idx, MCOI::TIED_TO) != -1;
     if (isTied)
       isKill = false;
   }
@@ -325,7 +330,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
 /// assertions only.
 void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
                               unsigned IIOpNum,
-                              const TargetInstrDesc *II,
+                              const MCInstrDesc *II,
                               DenseMap<SDValue, unsigned> &VRBaseMap,
                               bool IsDebug, bool IsClone, bool IsCloned) {
   if (Op.isMachineOpcode()) {
@@ -543,17 +548,18 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
 void InstrEmitter::EmitRegSequence(SDNode *Node,
                                   DenseMap<SDValue, unsigned> &VRBaseMap,
                                   bool IsClone, bool IsCloned) {
-  const TargetRegisterClass *RC = TLI->getRegClassFor(Node->getValueType(0));
+  unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+  const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
   unsigned NewVReg = MRI->createVirtualRegister(RC);
   MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
                              TII->get(TargetOpcode::REG_SEQUENCE), NewVReg);
   unsigned NumOps = Node->getNumOperands();
-  assert((NumOps & 1) == 0 &&
-         "REG_SEQUENCE must have an even number of operands!");
-  const TargetInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
-  for (unsigned i = 0; i != NumOps; ++i) {
+  assert((NumOps & 1) == 1 &&
+         "REG_SEQUENCE must have an odd number of operands!");
+  const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
+  for (unsigned i = 1; i != NumOps; ++i) {
     SDValue Op = Node->getOperand(i);
-    if (i & 1) {
+    if ((i & 1) == 0) {
       unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
       unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
       const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
@@ -591,7 +597,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
     return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL);
   }
   // Otherwise, we're going to create an instruction here.
-  const TargetInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
+  const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE);
   MachineInstrBuilder MIB = BuildMI(*MF, DL, II);
   if (SD->getKind() == SDDbgValue::SDNODE) {
     SDNode *Node = SD->getSDNode();
@@ -610,12 +616,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
   } else if (SD->getKind() == SDDbgValue::CONST) {
     const Value *V = SD->getConst();
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-      // FIXME: SDDbgValue constants aren't updated with legalization, so it's 
-      // possible to have i128 constants in them at this point. Dwarf writer
-      // does not handle i128 constants at the moment so, as a crude workaround,
-      // just drop the debug info if this happens.
-      if (!CI->getValue().isSignedIntN(64))
-        MIB.addReg(0U);
+      if (CI->getBitWidth() > 64)
+        MIB.addCImm(CI);
       else
         MIB.addImm(CI->getSExtValue());
     } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
@@ -666,7 +668,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
     // We want a unique VR for each IMPLICIT_DEF use.
     return;
   
-  const TargetInstrDesc &II = TII->get(Opc);
+  const MCInstrDesc &II = TII->get(Opc);
   unsigned NumResults = CountResults(Node);
   unsigned NodeOperands = CountOperands(Node);
   bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0;
@@ -695,9 +697,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
         UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
       else {
         // Collect declared implicit uses.
-        const TargetInstrDesc &TID = TII->get(F->getMachineOpcode());
-        UsedRegs.append(TID.getImplicitUses(),
-                        TID.getImplicitUses() + TID.getNumImplicitUses());
+        const MCInstrDesc &MCID = TII->get(F->getMachineOpcode());
+        UsedRegs.append(MCID.getImplicitUses(),
+                        MCID.getImplicitUses() + MCID.getNumImplicitUses());
         // In addition to declared implicit uses, we must also check for
         // direct RegisterSDNode operands.
         for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
@@ -849,6 +851,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
         }
         break;
       case InlineAsm::Kind_RegDefEarlyClobber:
+      case InlineAsm::Kind_Clobber:
         for (; NumVals; --NumVals, ++i) {
           unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
           MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 02c044c..19fc044 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -22,7 +22,7 @@
 
 namespace llvm {
 
-class TargetInstrDesc;
+class MCInstrDesc;
 class SDDbgValue;
 
 class InstrEmitter {
@@ -49,7 +49,7 @@ class InstrEmitter {
                                     unsigned ResNo) const;
 
   void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
-                              const TargetInstrDesc &II,
+                              const MCInstrDesc &II,
                               bool IsClone, bool IsCloned,
                               DenseMap<SDValue, unsigned> &VRBaseMap);
 
@@ -63,7 +63,7 @@ class InstrEmitter {
   /// not in the required register class.
   void AddRegisterOperand(MachineInstr *MI, SDValue Op,
                           unsigned IIOpNum,
-                          const TargetInstrDesc *II,
+                          const MCInstrDesc *II,
                           DenseMap<SDValue, unsigned> &VRBaseMap,
                           bool IsDebug, bool IsClone, bool IsCloned);
 
@@ -73,7 +73,7 @@ class InstrEmitter {
   /// assertions only.
   void AddOperand(MachineInstr *MI, SDValue Op,
                   unsigned IIOpNum,
-                  const TargetInstrDesc *II,
+                  const MCInstrDesc *II,
                   DenseMap<SDValue, unsigned> &VRBaseMap,
                   bool IsDebug, bool IsClone, bool IsCloned);
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 62d777c..d06e2bd 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -58,17 +58,6 @@ class SelectionDAGLegalize {
   /// against each other, including inserted libcalls.
   SmallVector<SDValue, 8> LastCALLSEQ;
 
-  enum LegalizeAction {
-    Legal,      // The target natively supports this operation.
-    Promote,    // This operation should be executed in a larger type.
-    Expand      // Try to expand this to other ops, otherwise use a libcall.
-  };
-
-  /// ValueTypeActions - This is a bitvector that contains two bits for each
-  /// value type, where the two bits correspond to the LegalizeAction enum.
-  /// This can be queried with "getTypeAction(VT)".
-  TargetLowering::ValueTypeActionImpl ValueTypeActions;
-
   /// LegalizedNodes - For nodes that are of legal width, and that have more
   /// than one use, this map indicates what regularized operand to use.  This
   /// allows us to avoid legalizing the same thing more than once.
@@ -87,25 +76,11 @@ class SelectionDAGLegalize {
 public:
   explicit SelectionDAGLegalize(SelectionDAG &DAG);
 
-  /// getTypeAction - Return how we should legalize values of this type, either
-  /// it is already legal or we need to expand it into multiple registers of
-  /// smaller integer type, or we need to promote it to a larger type.
-  LegalizeAction getTypeAction(EVT VT) const {
-    return (LegalizeAction)TLI.getTypeAction(*DAG.getContext(), VT);
-  }
-
-  /// isTypeLegal - Return true if this type is legal on this target.
-  ///
-  bool isTypeLegal(EVT VT) const {
-    return getTypeAction(VT) == Legal;
-  }
-
   void LegalizeDAG();
 
 private:
-  /// LegalizeOp - We know that the specified value has a legal type.
-  /// Recursively ensure that the operands have legal types, then return the
-  /// result.
+  /// LegalizeOp - Return a legal replacement for the given operation, with
+  /// all legal operands.
   SDValue LegalizeOp(SDValue O);
 
   SDValue OptimizeFloatStore(StoreSDNode *ST);
@@ -220,10 +195,7 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT,  DebugLoc dl,
 
 SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag)
   : TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()),
-    DAG(dag),
-    ValueTypeActions(TLI.getValueTypeActions()) {
-  assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
-         "Too many value types for ValueTypeActions to hold!");
+    DAG(dag) {
 }
 
 void SelectionDAGLegalize::LegalizeDAG() {
@@ -753,7 +725,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
   DebugLoc dl = ST->getDebugLoc();
   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(ST->getValue())) {
     if (CFP->getValueType(0) == MVT::f32 &&
-        getTypeAction(MVT::i32) == Legal) {
+        TLI.isTypeLegal(MVT::i32)) {
       Tmp3 = DAG.getConstant(CFP->getValueAPF().
                                       bitcastToAPInt().zextOrTrunc(32),
                               MVT::i32);
@@ -763,14 +735,14 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
 
     if (CFP->getValueType(0) == MVT::f64) {
       // If this target supports 64-bit registers, do a single 64-bit store.
-      if (getTypeAction(MVT::i64) == Legal) {
+      if (TLI.isTypeLegal(MVT::i64)) {
         Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
                                   zextOrTrunc(64), MVT::i64);
         return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
                             isVolatile, isNonTemporal, Alignment);
       }
 
-      if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+      if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) {
         // Otherwise, if the target supports 32-bit registers, use 2 32-bit
         // stores.  If the target supports neither 32- nor 64-bits, this
         // xform is certainly not worth it.
@@ -794,10 +766,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
   return SDValue(0, 0);
 }
 
-/// LegalizeOp - We know that the specified value has a legal type, and
-/// that its operands are legal.  Now ensure that the operation itself
-/// is legal, recursively ensuring that the operands' operations remain
-/// legal.
+/// LegalizeOp - Return a legal replacement for the given operation, with
+/// all legal operands.
 SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes.
     return Op;
@@ -806,11 +776,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
   DebugLoc dl = Node->getDebugLoc();
 
   for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
-    assert(getTypeAction(Node->getValueType(i)) == Legal &&
+    assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+             TargetLowering::TypeLegal &&
            "Unexpected illegal type!");
 
   for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i)
-    assert((isTypeLegal(Node->getOperand(i).getValueType()) ||
+    assert((TLI.getTypeAction(*DAG.getContext(),
+                              Node->getOperand(i).getValueType()) ==
+              TargetLowering::TypeLegal ||
             Node->getOperand(i).getOpcode() == ISD::TargetConstant) &&
            "Unexpected illegal type!");
 
@@ -1354,7 +1327,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         }
         break;
       case TargetLowering::Expand:
-        if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
+        if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) {
           SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
                                      LD->getPointerInfo(),
                                      LD->isVolatile(), LD->isNonTemporal(),
@@ -1374,6 +1347,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
           Tmp2 = LegalizeOp(Load.getValue(1));
           break;
         }
+
+        // If this is a promoted vector load, and the vector element types are
+        // legal, then scalarize it.
+        if (ExtType == ISD::EXTLOAD && SrcVT.isVector() &&
+          TLI.isTypeLegal(Node->getValueType(0).getScalarType())) {
+          SmallVector<SDValue, 8> LoadVals;
+          SmallVector<SDValue, 8> LoadChains;
+          unsigned NumElem = SrcVT.getVectorNumElements();
+          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+          for (unsigned Idx=0; Idx<NumElem; Idx++) {
+            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                DAG.getIntPtrConstant(Stride));
+            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+                  Node->getValueType(0).getScalarType(),
+                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+                  SrcVT.getScalarType(),
+                  LD->isVolatile(), LD->isNonTemporal(),
+                  LD->getAlignment());
+
+            LoadVals.push_back(ScalarLoad.getValue(0));
+            LoadChains.push_back(ScalarLoad.getValue(1));
+          }
+          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl,
+            Node->getValueType(0), &LoadVals[0], LoadVals.size());
+
+          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
+          break;
+        }
+
+        // If this is a promoted vector load, and the vector element types are
+        // illegal, create the promoted vector from bitcasted segments.
+        if (ExtType == ISD::EXTLOAD && SrcVT.isVector()) {
+          EVT MemElemTy = Node->getValueType(0).getScalarType();
+          EVT SrcSclrTy = SrcVT.getScalarType();
+          unsigned SizeRatio =
+            (MemElemTy.getSizeInBits() / SrcSclrTy.getSizeInBits());
+
+          SmallVector<SDValue, 8> LoadVals;
+          SmallVector<SDValue, 8> LoadChains;
+          unsigned NumElem = SrcVT.getVectorNumElements();
+          unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8;
+
+          for (unsigned Idx=0; Idx<NumElem; Idx++) {
+            Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                DAG.getIntPtrConstant(Stride));
+            SDValue ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl,
+                  SrcVT.getScalarType(),
+                  Tmp1, Tmp2, LD->getPointerInfo().getWithOffset(Idx * Stride),
+                  SrcVT.getScalarType(),
+                  LD->isVolatile(), LD->isNonTemporal(),
+                  LD->getAlignment());
+            if (TLI.isBigEndian()) {
+              // MSB (which is garbage, comes first)
+              LoadVals.push_back(ScalarLoad.getValue(0));
+              for (unsigned i = 0; i<SizeRatio-1; ++i)
+                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+            } else {
+              // LSB (which is data, comes first)
+              for (unsigned i = 0; i<SizeRatio-1; ++i)
+                LoadVals.push_back(DAG.getUNDEF(SrcVT.getScalarType()));
+              LoadVals.push_back(ScalarLoad.getValue(0));
+            }
+            LoadChains.push_back(ScalarLoad.getValue(1));
+          }
+
+          Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+            &LoadChains[0], LoadChains.size());
+          EVT TempWideVector = EVT::getVectorVT(*DAG.getContext(),
+            SrcVT.getScalarType(), NumElem*SizeRatio);
+          SDValue ValRes = DAG.getNode(ISD::BUILD_VECTOR, dl, 
+            TempWideVector, &LoadVals[0], LoadVals.size());
+
+          // Cast to the correct type
+          ValRes = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0), ValRes);
+
+          Tmp1 = LegalizeOp(ValRes);  // Relegalize new nodes.
+          Tmp2 = LegalizeOp(Result.getValue(0));  // Relegalize new nodes.
+          break;
+
+        }
+
         // FIXME: This does not work for vectors on most targets.  Sign- and
         // zero-extend operations are currently folded into extending loads,
         // whether they are legal or not, and then we end up here without any
@@ -1548,9 +1606,91 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
         case TargetLowering::Custom:
           Result = TLI.LowerOperation(Result, DAG);
           break;
-        case Expand:
+        case TargetLowering::Expand:
+
+          EVT WideScalarVT = Tmp3.getValueType().getScalarType();
+          EVT NarrowScalarVT = StVT.getScalarType();
+
+          // The Store type is illegal, must scalarize the vector store.
+          SmallVector<SDValue, 8> Stores;
+          bool ScalarLegal = TLI.isTypeLegal(WideScalarVT);
+          if (!TLI.isTypeLegal(StVT) && StVT.isVector() && ScalarLegal) {
+            unsigned NumElem = StVT.getVectorNumElements();
+
+            unsigned ScalarSize = StVT.getScalarType().getSizeInBits();
+            // Round odd types to the next pow of two.
+            if (!isPowerOf2_32(ScalarSize))
+              ScalarSize = NextPowerOf2(ScalarSize);
+            // Types smaller than 8 bits are promoted to 8 bits.
+            ScalarSize = std::max<unsigned>(ScalarSize, 8);
+            // Store stride
+            unsigned Stride = ScalarSize/8;
+            assert(isPowerOf2_32(Stride) && "Stride must be a power of two");
+
+            for (unsigned Idx=0; Idx<NumElem; Idx++) {
+              SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                                       WideScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+
+
+              EVT NVT = EVT::getIntegerVT(*DAG.getContext(), ScalarSize);
+
+              Ex = DAG.getNode(ISD::TRUNCATE, dl, NVT, Ex);
+              Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                 DAG.getIntPtrConstant(Stride));
+              SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+                                           ST->getPointerInfo().getWithOffset(Idx*Stride),
+                                           isVolatile, isNonTemporal, Alignment);
+              Stores.push_back(Store);
+            }
+            Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                                 &Stores[0], Stores.size());
+            break;
+          }
+
+          // The Store type is illegal, must scalarize the vector store.
+          // However, the scalar type is illegal. Must bitcast the result
+          // and store it in smaller parts.
+          if (!TLI.isTypeLegal(StVT) && StVT.isVector()) {
+            unsigned WideNumElem = StVT.getVectorNumElements();
+            unsigned Stride = NarrowScalarVT.getSizeInBits()/8;
+
+            unsigned SizeRatio =
+              (WideScalarVT.getSizeInBits() / NarrowScalarVT.getSizeInBits());
+
+            EVT CastValueVT = EVT::getVectorVT(*DAG.getContext(), NarrowScalarVT,
+                                               SizeRatio*WideNumElem);
+
+            // Cast the wide elem vector to wider vec with smaller elem type.
+            // Example <2 x i64> -> <4 x i32>
+            Tmp3 = DAG.getNode(ISD::BITCAST, dl, CastValueVT, Tmp3);
+
+            for (unsigned Idx=0; Idx<WideNumElem*SizeRatio; Idx++) {
+              // Extract elment i
+              SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
+                                       NarrowScalarVT, Tmp3, DAG.getIntPtrConstant(Idx));
+              // bump pointer.
+              Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+                                 DAG.getIntPtrConstant(Stride));
+
+              // Store if, this element is:
+              //  - First element on big endian, or
+              //  - Last element on little endian
+              if (( TLI.isBigEndian() && (Idx%SizeRatio == 0)) ||
+                  ((!TLI.isBigEndian() && (Idx%SizeRatio == SizeRatio-1)))) {
+                SDValue Store = DAG.getStore(Tmp1, dl, Ex, Tmp2,
+                                             ST->getPointerInfo().getWithOffset(Idx*Stride),
+                                             isVolatile, isNonTemporal, Alignment);
+                Stores.push_back(Store);
+              }
+            }
+            Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+                                 &Stores[0], Stores.size());
+            break;
+          }
+
+
           // TRUNCSTORE:i16 i32 -> STORE i16
-          assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
+          assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!");
           Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
           Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
                                 isVolatile, isNonTemporal, Alignment);
@@ -1709,7 +1849,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
   SDValue SignBit;
   EVT FloatVT = Tmp2.getValueType();
   EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
-  if (isTypeLegal(IVT)) {
+  if (TLI.isTypeLegal(IVT)) {
     // Convert to an integer with the same sign bit.
     SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
   } else {
@@ -3031,7 +3171,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
 
     EVT VT = Node->getValueType(0);
     EVT EltVT = VT.getVectorElementType();
-    if (getTypeAction(EltVT) == Promote)
+    if (!TLI.isTypeLegal(EltVT))
       EltVT = TLI.getTypeToTransformTo(*DAG.getContext(), EltVT);
     unsigned NumElems = VT.getVectorNumElements();
     SmallVector<SDValue, 8> Ops;
@@ -3184,6 +3324,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
                                       RTLIB::REM_F80, RTLIB::REM_PPCF128));
     break;
+  case ISD::FMA:
+    Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64,
+                                      RTLIB::FMA_F80, RTLIB::FMA_PPCF128));
+    break;
   case ISD::FP16_TO_FP32:
     Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false));
     break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 27a466b..e6835d8 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -74,6 +74,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
     case ISD::FLOG:        R = SoftenFloatRes_FLOG(N); break;
     case ISD::FLOG2:       R = SoftenFloatRes_FLOG2(N); break;
     case ISD::FLOG10:      R = SoftenFloatRes_FLOG10(N); break;
+    case ISD::FMA:         R = SoftenFloatRes_FMA(N); break;
     case ISD::FMUL:        R = SoftenFloatRes_FMUL(N); break;
     case ISD::FNEARBYINT:  R = SoftenFloatRes_FNEARBYINT(N); break;
     case ISD::FNEG:        R = SoftenFloatRes_FNEG(N); break;
@@ -294,6 +295,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
                      NVT, &Op, 1, false, N->getDebugLoc());
 }
 
+SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
+                     GetSoftenedFloat(N->getOperand(1)),
+                     GetSoftenedFloat(N->getOperand(2)) };
+  return MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                  RTLIB::FMA_F32,
+                                  RTLIB::FMA_F64,
+                                  RTLIB::FMA_F80,
+                                  RTLIB::FMA_PPCF128),
+                     NVT, Ops, 3, false, N->getDebugLoc());
+}
+
 SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
@@ -837,6 +851,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
   case ISD::FLOG:       ExpandFloatRes_FLOG(N, Lo, Hi); break;
   case ISD::FLOG2:      ExpandFloatRes_FLOG2(N, Lo, Hi); break;
   case ISD::FLOG10:     ExpandFloatRes_FLOG10(N, Lo, Hi); break;
+  case ISD::FMA:        ExpandFloatRes_FMA(N, Lo, Hi); break;
   case ISD::FMUL:       ExpandFloatRes_FMUL(N, Lo, Hi); break;
   case ISD::FNEARBYINT: ExpandFloatRes_FNEARBYINT(N, Lo, Hi); break;
   case ISD::FNEG:       ExpandFloatRes_FNEG(N, Lo, Hi); break;
@@ -989,6 +1004,19 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
   GetPairElements(Call, Lo, Hi);
 }
 
+void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
+                                          SDValue &Hi) {
+  SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+  SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0),
+                                          RTLIB::FMA_F32,
+                                          RTLIB::FMA_F64,
+                                          RTLIB::FMA_F80,
+                                          RTLIB::FMA_PPCF128),
+                             N->getValueType(0), Ops, 3, false,
+                             N->getDebugLoc());
+  GetPairElements(Call, Lo, Hi);
+}
+
 void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
   SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index b8da57f..e7c77dd 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -19,6 +19,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LegalizeTypes.h"
+#include "llvm/DerivedTypes.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -191,10 +192,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
     if (NOutVT.bitsEq(NInVT))
       // The input promotes to the same size.  Convert the promoted value.
       return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
-    if (NInVT.isVector())
-      // Promote vector element via memory load/store.
-      return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
-                         CreateStackStoreLoad(InOp, OutVT));
     break;
   case TargetLowering::TypeSoftenFloat:
     // Promote the integer operand by hand.
@@ -204,8 +201,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
     break;
   case TargetLowering::TypeScalarizeVector:
     // Convert the element to an integer and promote it by hand.
-    return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
-                       BitConvertToInteger(GetScalarizedVector(InOp)));
+    if (!NOutVT.isVector())
+      return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                         BitConvertToInteger(GetScalarizedVector(InOp)));
+    break;
   case TargetLowering::TypeSplitVector: {
     // For example, i32 = BITCAST v2i16 on alpha.  Convert the split
     // pieces of the input into integers and reassemble in the final type.
@@ -339,8 +338,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
   // (eg: because the value being converted is too big), then the result of the
   // original operation was undefined anyway, so the assert is still correct.
   return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
-                     ISD::AssertZext : ISD::AssertSext, dl,
-                     NVT, Res, DAG.getValueType(N->getValueType(0)));
+                     ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
+                     DAG.getValueType(N->getValueType(0).getScalarType()));
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) {
@@ -370,7 +369,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) {
         return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NVT, Res,
                            DAG.getValueType(N->getOperand(0).getValueType()));
       if (N->getOpcode() == ISD::ZERO_EXTEND)
-        return DAG.getZeroExtendInReg(Res, dl, N->getOperand(0).getValueType());
+        return DAG.getZeroExtendInReg(Res, dl,
+                      N->getOperand(0).getValueType().getScalarType());
       assert(N->getOpcode() == ISD::ANY_EXTEND && "Unknown integer extension!");
       return Res;
     }
@@ -520,20 +520,44 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   SDValue Res;
+  SDValue InOp = N->getOperand(0);
+  DebugLoc dl = N->getDebugLoc();
 
-  switch (getTypeAction(N->getOperand(0).getValueType())) {
+  switch (getTypeAction(InOp.getValueType())) {
   default: llvm_unreachable("Unknown type action!");
   case TargetLowering::TypeLegal:
   case TargetLowering::TypeExpandInteger:
-    Res = N->getOperand(0);
+    Res = InOp;
     break;
   case TargetLowering::TypePromoteInteger:
-    Res = GetPromotedInteger(N->getOperand(0));
+    Res = GetPromotedInteger(InOp);
     break;
+  case TargetLowering::TypeSplitVector:
+    EVT InVT = InOp.getValueType();
+    assert(InVT.isVector() && "Cannot split scalar types");
+    unsigned NumElts = InVT.getVectorNumElements();
+    assert(NumElts == NVT.getVectorNumElements() &&
+           "Dst and Src must have the same number of elements");
+    EVT EltVT = InVT.getScalarType();
+    assert(isPowerOf2_32(NumElts) &&
+           "Promoted vector type must be a power of two");
+
+    EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts/2);
+    EVT HalfNVT = EVT::getVectorVT(*DAG.getContext(), NVT.getScalarType(),
+                                   NumElts/2);
+
+    SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
+                               DAG.getIntPtrConstant(0));
+    SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, InOp,
+                               DAG.getIntPtrConstant(NumElts/2));
+    EOp1 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp1);
+    EOp2 = DAG.getNode(ISD::TRUNCATE, dl, HalfNVT, EOp2);
+
+    return DAG.getNode(ISD::CONCAT_VECTORS, dl, NVT, EOp1, EOp2);
   }
 
   // Truncate to NVT instead of VT
-  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Res);
+  return DAG.getNode(ISD::TRUNCATE, dl, NVT, Res);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
@@ -970,7 +994,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   SDValue Op = GetPromotedInteger(N->getOperand(0));
   Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op);
-  return DAG.getZeroExtendInReg(Op, dl, N->getOperand(0).getValueType());
+  return DAG.getZeroExtendInReg(Op, dl,
+                                N->getOperand(0).getValueType().getScalarType());
 }
 
 
@@ -1069,6 +1094,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
   case ISD::UADDO:
   case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+  case ISD::UMULO:
+  case ISD::SMULO: ExpandIntRes_XMULO(N, Lo, Hi); break;
   }
 
   // If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -2146,6 +2173,86 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
   ReplaceValueWith(SDValue(N, 1), Ofl);
 }
 
+void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N,
+                                          SDValue &Lo, SDValue &Hi) {
+  EVT VT = N->getValueType(0);
+  const Type *RetTy = VT.getTypeForEVT(*DAG.getContext());
+  EVT PtrVT = TLI.getPointerTy();
+  const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
+  DebugLoc dl = N->getDebugLoc();
+
+  // A divide for UMULO should be faster than a function call.
+  if (N->getOpcode() == ISD::UMULO) {
+    SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+    DebugLoc DL = N->getDebugLoc();
+
+    SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+    SplitInteger(MUL, Lo, Hi);
+
+    // A divide for UMULO will be faster than a function call. Select to
+    // make sure we aren't using 0.
+    SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+				  RHS, DAG.getConstant(0, VT), ISD::SETNE);
+    SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
+				  DAG.getConstant(1, VT), RHS);
+    SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero);
+    SDValue Overflow;
+    Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE);
+    ReplaceValueWith(SDValue(N, 1), Overflow);
+    return;
+  }
+
+  // Replace this with a libcall that will check overflow.
+  RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+  if (VT == MVT::i32)
+    LC = RTLIB::MULO_I32;
+  else if (VT == MVT::i64)
+    LC = RTLIB::MULO_I64;
+  else if (VT == MVT::i128)
+    LC = RTLIB::MULO_I128;
+  assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XMULO!");
+
+  SDValue Temp = DAG.CreateStackTemporary(PtrVT);
+  // Temporary for the overflow value, default it to zero.
+  SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl,
+			       DAG.getConstant(0, PtrVT), Temp,
+			       MachinePointerInfo(), false, false, 0);
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
+    EVT ArgVT = N->getOperand(i).getValueType();
+    const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+    Entry.Node = N->getOperand(i);
+    Entry.Ty = ArgTy;
+    Entry.isSExt = true;
+    Entry.isZExt = false;
+    Args.push_back(Entry);
+  }
+
+  // Also pass the address of the overflow check.
+  Entry.Node = Temp;
+  Entry.Ty = PtrTy->getPointerTo();
+  Entry.isSExt = true;
+  Entry.isZExt = false;
+  Args.push_back(Entry);
+
+  SDValue Func = DAG.getExternalSymbol(TLI.getLibcallName(LC), PtrVT);
+  std::pair<SDValue, SDValue> CallInfo =
+    TLI.LowerCallTo(Chain, RetTy, true, false, false, false,
+		    0, TLI.getLibcallCallingConv(LC), false,
+		    true, Func, Args, DAG, dl);
+
+  SplitInteger(CallInfo.first, Lo, Hi);
+  SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp,
+			      MachinePointerInfo(), false, false, 0);
+  SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2,
+                             DAG.getConstant(0, PtrVT),
+                             ISD::SETNE);
+  // Use the overflow from the libcall everywhere.
+  ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
 void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
@@ -2638,18 +2745,18 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
   SDValue InOp0 = N->getOperand(0);
   EVT InVT = InOp0.getValueType();
-  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
 
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   assert(NOutVT.isVector() && "This type must be promoted to a vector type");
-  unsigned OutNumElems = N->getValueType(0).getVectorNumElements();
+  unsigned OutNumElems = OutVT.getVectorNumElements();
   EVT NOutVTElem = NOutVT.getVectorElementType();
 
   DebugLoc dl = N->getDebugLoc();
   SDValue BaseIdx = N->getOperand(1);
 
   SmallVector<SDValue, 8> Ops;
+  Ops.reserve(OutNumElems);
   for (unsigned i = 0; i != OutNumElems; ++i) {
 
     // Extract the element from the original vector.
@@ -2681,18 +2788,13 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) {
 
   SDValue V0 = GetPromotedInteger(N->getOperand(0));
   SDValue V1 = GetPromotedInteger(N->getOperand(1));
-  EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  EVT OutVT = V0.getValueType();
 
-  return DAG.getVectorShuffle(OutVT, dl, V0,V1, &NewMask[0]);
+  return DAG.getVectorShuffle(OutVT, dl, V0, V1, &NewMask[0]);
 }
 
 
 SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
-
-  SDValue InOp0 = N->getOperand(0);
-  EVT InVT = InOp0.getValueType();
-  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
-
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   assert(NOutVT.isVector() && "This type must be promoted to a vector type");
@@ -2702,6 +2804,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
   SmallVector<SDValue, 8> Ops;
+  Ops.reserve(NumElems);
   for (unsigned i = 0; i != NumElems; ++i) {
     SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i));
     Ops.push_back(Op);
@@ -2714,10 +2817,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
 
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue InOp0 = N->getOperand(0);
-  EVT InVT = InOp0.getValueType();
-  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
-  assert(!InVT.isVector() && "Input must not be a scalar");
+  assert(!N->getOperand(0).getValueType().isVector() &&
+         "Input must be a scalar");
 
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
@@ -2730,12 +2831,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
-
-  SDValue InOp0 = N->getOperand(0);
-  EVT InVT = InOp0.getValueType();
-  EVT InElVT = InVT.getVectorElementType();
-  EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
-
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
   assert(NOutVT.isVector() && "This type must be promoted to a vector type");
@@ -2744,7 +2839,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
 
   DebugLoc dl = N->getDebugLoc();
 
-  SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, InOp0);
+  SDValue ConvertedVector = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
+                                        N->getOperand(0));
 
   SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl,
     NOutVTElem, N->getOperand(1));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index b2f966b..952797d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -201,7 +201,7 @@ private:
     EVT OldVT = Op.getValueType();
     DebugLoc dl = Op.getDebugLoc();
     Op = GetPromotedInteger(Op);
-    return DAG.getZeroExtendInReg(Op, dl, OldVT);
+    return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType());
   }
 
   // Integer Result Promotion.
@@ -318,6 +318,7 @@ private:
 
   void ExpandIntRes_SADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_UADDSUBO          (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_XMULO             (SDNode *N, SDValue &Lo, SDValue &Hi);
 
   void ExpandShiftByConstant(SDNode *N, unsigned Amt,
                              SDValue &Lo, SDValue &Hi);
@@ -377,6 +378,7 @@ private:
   SDValue SoftenFloatRes_FLOG(SDNode *N);
   SDValue SoftenFloatRes_FLOG2(SDNode *N);
   SDValue SoftenFloatRes_FLOG10(SDNode *N);
+  SDValue SoftenFloatRes_FMA(SDNode *N);
   SDValue SoftenFloatRes_FMUL(SDNode *N);
   SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
   SDValue SoftenFloatRes_FNEG(SDNode *N);
@@ -441,6 +443,7 @@ private:
   void ExpandFloatRes_FLOG      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FLOG2     (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FLOG10    (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandFloatRes_FMA       (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FMUL      (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FNEARBYINT(SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandFloatRes_FNEG      (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 5d0f923..ffff10c 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -182,9 +182,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FRINT:
   case ISD::FNEARBYINT:
   case ISD::FFLOOR:
+  case ISD::SIGN_EXTEND_INREG:
     QueryType = Node->getValueType(0);
     break;
-  case ISD::SIGN_EXTEND_INREG:
   case ISD::FP_ROUND_INREG:
     QueryType = cast<VTSDNode>(Node->getOperand(1))->getVT();
     break;
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 9595f69..b5698f9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2164,6 +2164,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
     if (MemVT.getSizeInBits() <= WidenEltWidth)
       break;
     if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+        isPowerOf2_32(WidenWidth / MemVTWidth) &&
         (MemVTWidth <= Width ||
          (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
       RetVT = MemVT;
@@ -2179,6 +2180,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
     unsigned MemVTWidth = MemVT.getSizeInBits();
     if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
         (WidenWidth % MemVTWidth) == 0 &&
+        isPowerOf2_32(WidenWidth / MemVTWidth) &&
         (MemVTWidth <= Width ||
          (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
       if (RetVT.getSizeInBits() < MemVTWidth || MemVT == WidenVT)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 7b560d1..b275c63 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -249,14 +249,14 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
     assert(N->getNodeId() == -1 && "Node already inserted!");
     N->setNodeId(NewSU->NodeNum);
       
-    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
-    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
-      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
         NewSU->isTwoAddress = true;
         break;
       }
     }
-    if (TID.isCommutable())
+    if (MCID.isCommutable())
       NewSU->isCommutable = true;
 
     // LoadNode may already exist. This can happen when there is another
@@ -422,10 +422,10 @@ void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
 /// FIXME: Move to SelectionDAG?
 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
                                  const TargetInstrInfo *TII) {
-  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
-  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
-  unsigned NumRes = TID.getNumDefs();
-  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+  unsigned NumRes = MCID.getNumDefs();
+  for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
     if (Reg == *ImpDef)
       break;
     ++NumRes;
@@ -490,7 +490,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
 
         ++i; // Skip the ID value.
         if (InlineAsm::isRegDefKind(Flags) ||
-            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
+            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+            InlineAsm::isClobberKind(Flags)) {
           // Check for def of register or earlyclobber register.
           for (; NumVals; --NumVals, ++i) {
             unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
@@ -504,10 +505,10 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
     }
     if (!Node->isMachineOpcode())
       continue;
-    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
-    if (!TID.ImplicitDefs)
+    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+    if (!MCID.ImplicitDefs)
       continue;
-    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg) {
+    for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg) {
       CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
     }
   }
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 8d61a89..12b1838 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -276,6 +276,43 @@ private:
 };
 }  // end anonymous namespace
 
+/// GetCostForDef - Looks up the register class and cost for a given definition.
+/// Typically this just means looking up the representative register class,
+/// but for untyped values (MVT::untyped) it means inspecting the node's
+/// opcode to determine what register class is being generated.
+static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos,
+                          const TargetLowering *TLI,
+                          const TargetInstrInfo *TII,
+                          const TargetRegisterInfo *TRI,
+                          unsigned &RegClass, unsigned &Cost) {
+  EVT VT = RegDefPos.GetValue();
+
+  // Special handling for untyped values.  These values can only come from
+  // the expansion of custom DAG-to-DAG patterns.
+  if (VT == MVT::untyped) {
+    const SDNode *Node = RegDefPos.GetNode();
+    unsigned Opcode = Node->getMachineOpcode();
+
+    if (Opcode == TargetOpcode::REG_SEQUENCE) {
+      unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
+      const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
+      RegClass = RC->getID();
+      Cost = 1;
+      return;
+    }
+
+    unsigned Idx = RegDefPos.GetIdx();
+    const MCInstrDesc Desc = TII->get(Opcode);
+    const TargetRegisterClass *RC = TII->getRegClass(Desc, Idx, TRI);
+    RegClass = RC->getID();
+    // FIXME: Cost arbitrarily set to 1 because there doesn't seem to be a
+    // better way to determine it.
+    Cost = 1;
+  } else {
+    RegClass = TLI->getRepRegClassFor(VT)->getID();
+    Cost = TLI->getRepRegClassCostFor(VT);
+  }
+}
 
 /// Schedule - Schedule the DAG using list scheduling.
 void ScheduleDAGRRList::Schedule() {
@@ -800,14 +837,14 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
     assert(N->getNodeId() == -1 && "Node already inserted!");
     N->setNodeId(NewSU->NodeNum);
 
-    const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
-    for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
-      if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+    const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+    for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+      if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
         NewSU->isTwoAddress = true;
         break;
       }
     }
-    if (TID.isCommutable())
+    if (MCID.isCommutable())
       NewSU->isCommutable = true;
 
     InitNumRegDefsLeft(NewSU);
@@ -987,10 +1024,10 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
 /// FIXME: Move to SelectionDAG?
 static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
                                  const TargetInstrInfo *TII) {
-  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
-  assert(TID.ImplicitDefs && "Physical reg def must be in implicit def list!");
-  unsigned NumRes = TID.getNumDefs();
-  for (const unsigned *ImpDef = TID.getImplicitDefs(); *ImpDef; ++ImpDef) {
+  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+  assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!");
+  unsigned NumRes = MCID.getNumDefs();
+  for (const unsigned *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) {
     if (Reg == *ImpDef)
       break;
     ++NumRes;
@@ -1055,7 +1092,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
 
         ++i; // Skip the ID value.
         if (InlineAsm::isRegDefKind(Flags) ||
-            InlineAsm::isRegDefEarlyClobberKind(Flags)) {
+            InlineAsm::isRegDefEarlyClobberKind(Flags) ||
+            InlineAsm::isClobberKind(Flags)) {
           // Check for def of register or earlyclobber register.
           for (; NumVals; --NumVals, ++i) {
             unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
@@ -1070,10 +1108,10 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
 
     if (!Node->isMachineOpcode())
       continue;
-    const TargetInstrDesc &TID = TII->get(Node->getMachineOpcode());
-    if (!TID.ImplicitDefs)
+    const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode());
+    if (!MCID.ImplicitDefs)
       continue;
-    for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
+    for (const unsigned *Reg = MCID.ImplicitDefs; *Reg; ++Reg)
       CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
   }
 
@@ -1369,6 +1407,21 @@ struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
   bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
 };
 
+#ifndef NDEBUG
+template<class SF>
+struct reverse_sort : public queue_sort {
+  SF &SortFunc;
+  reverse_sort(SF &sf) : SortFunc(sf) {}
+  reverse_sort(const reverse_sort &RHS) : SortFunc(RHS.SortFunc) {}
+
+  bool operator()(SUnit* left, SUnit* right) const {
+    // reverse left/right rather than simply !SortFunc(left, right)
+    // to expose different paths in the comparison logic.
+    return SortFunc(right, left);
+  }
+};
+#endif // NDEBUG
+
 /// bu_ls_rr_sort - Priority function for bottom up register pressure
 // reduction scheduler.
 struct bu_ls_rr_sort : public queue_sort {
@@ -1569,20 +1622,33 @@ protected:
 };
 
 template<class SF>
-class RegReductionPriorityQueue : public RegReductionPQBase {
-  static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
-    std::vector<SUnit *>::iterator Best = Q.begin();
-    for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
-           E = Q.end(); I != E; ++I)
-      if (Picker(*Best, *I))
-        Best = I;
-    SUnit *V = *Best;
-    if (Best != prior(Q.end()))
-      std::swap(*Best, Q.back());
-    Q.pop_back();
-    return V;
+static SUnit *popFromQueueImpl(std::vector<SUnit*> &Q, SF &Picker) {
+  std::vector<SUnit *>::iterator Best = Q.begin();
+  for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+         E = Q.end(); I != E; ++I)
+    if (Picker(*Best, *I))
+      Best = I;
+  SUnit *V = *Best;
+  if (Best != prior(Q.end()))
+    std::swap(*Best, Q.back());
+  Q.pop_back();
+  return V;
+}
+
+template<class SF>
+SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker, ScheduleDAG *DAG) {
+#ifndef NDEBUG
+  if (DAG->StressSched) {
+    reverse_sort<SF> RPicker(Picker);
+    return popFromQueueImpl(Q, RPicker);
   }
+#endif
+  (void)DAG;
+  return popFromQueueImpl(Q, Picker);
+}
 
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
   SF Picker;
 
 public:
@@ -1603,7 +1669,7 @@ public:
   SUnit *pop() {
     if (Queue.empty()) return NULL;
 
-    SUnit *V = popFromQueue(Queue, Picker);
+    SUnit *V = popFromQueue(Queue, Picker, scheduleDAG);
     V->NodeQueueId = 0;
     return V;
   }
@@ -1613,7 +1679,7 @@ public:
     std::vector<SUnit*> DumpQueue = Queue;
     SF DumpPicker = Picker;
     while (!DumpQueue.empty()) {
-      SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
+      SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG);
       if (isBottomUp())
         dbgs() << "Height " << SU->getHeight() << ": ";
       else
@@ -1778,9 +1844,9 @@ bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
     }
     for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
          RegDefPos.IsValid(); RegDefPos.Advance()) {
-      EVT VT = RegDefPos.GetValue();
-      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-      unsigned Cost = TLI->getRepRegClassCostFor(VT);
+      unsigned RCId, Cost;
+      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+
       if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
         return true;
     }
@@ -1891,9 +1957,10 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
          RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
       if (SkipRegDefs)
         continue;
-      EVT VT = RegDefPos.GetValue();
-      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+
+      unsigned RCId, Cost;
+      GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+      RegPressure[RCId] += Cost;
       break;
     }
   }
@@ -1906,16 +1973,16 @@ void RegReductionPQBase::ScheduledNode(SUnit *SU) {
        RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
     if (SkipRegDefs > 0)
       continue;
-    EVT VT = RegDefPos.GetValue();
-    unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-    if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
+    unsigned RCId, Cost;
+    GetCostForDef(RegDefPos, TLI, TII, TRI, RCId, Cost);
+    if (RegPressure[RCId] < Cost) {
       // Register pressure tracking is imprecise. This can happen. But we try
       // hard not to let it happen because it likely results in poor scheduling.
       DEBUG(dbgs() << "  SU(" << SU->NodeNum << ") has too many regdefs\n");
       RegPressure[RCId] = 0;
     }
     else {
-      RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+      RegPressure[RCId] -= Cost;
     }
   }
   dumpRegPressure();
@@ -1962,13 +2029,9 @@ void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
     unsigned POpc = PN->getMachineOpcode();
     if (POpc == TargetOpcode::IMPLICIT_DEF)
       continue;
-    if (POpc == TargetOpcode::EXTRACT_SUBREG) {
-      EVT VT = PN->getOperand(0).getValueType();
-      unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
-      RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
-      continue;
-    } else if (POpc == TargetOpcode::INSERT_SUBREG ||
-               POpc == TargetOpcode::SUBREG_TO_REG) {
+    if (POpc == TargetOpcode::EXTRACT_SUBREG ||
+        POpc == TargetOpcode::INSERT_SUBREG ||
+        POpc == TargetOpcode::SUBREG_TO_REG) {
       EVT VT = PN->getValueType(0);
       unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
       RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
@@ -2543,11 +2606,11 @@ void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
 bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
   if (SU->isTwoAddress) {
     unsigned Opc = SU->getNode()->getMachineOpcode();
-    const TargetInstrDesc &TID = TII->get(Opc);
-    unsigned NumRes = TID.getNumDefs();
-    unsigned NumOps = TID.getNumOperands() - NumRes;
+    const MCInstrDesc &MCID = TII->get(Opc);
+    unsigned NumRes = MCID.getNumDefs();
+    unsigned NumOps = MCID.getNumOperands() - NumRes;
     for (unsigned i = 0; i != NumOps; ++i) {
-      if (TID.getOperandConstraint(i+NumRes, TOI::TIED_TO) != -1) {
+      if (MCID.getOperandConstraint(i+NumRes, MCOI::TIED_TO) != -1) {
         SDNode *DU = SU->getNode()->getOperand(i).getNode();
         if (DU->getNodeId() != -1 &&
             Op->OrigNode == &(*SUnits)[DU->getNodeId()])
@@ -2727,11 +2790,11 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
 
     bool isLiveOut = hasOnlyLiveOutUses(SU);
     unsigned Opc = Node->getMachineOpcode();
-    const TargetInstrDesc &TID = TII->get(Opc);
-    unsigned NumRes = TID.getNumDefs();
-    unsigned NumOps = TID.getNumOperands() - NumRes;
+    const MCInstrDesc &MCID = TII->get(Opc);
+    unsigned NumRes = MCID.getNumDefs();
+    unsigned NumOps = MCID.getNumOperands() - NumRes;
     for (unsigned j = 0; j != NumOps; ++j) {
-      if (TID.getOperandConstraint(j+NumRes, TOI::TIED_TO) == -1)
+      if (MCID.getOperandConstraint(j+NumRes, MCOI::TIED_TO) == -1)
         continue;
       SDNode *DU = SU->getNode()->getOperand(j).getNode();
       if (DU->getNodeId() == -1)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 9f2f012..71f07d6 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -17,11 +17,12 @@
 #include "ScheduleDAGSDNodes.h"
 #include "InstrEmitter.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -111,7 +112,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
 
   unsigned ResNo = User->getOperand(2).getResNo();
   if (Def->isMachineOpcode()) {
-    const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
+    const MCInstrDesc &II = TII->get(Def->getMachineOpcode());
     if (ResNo >= II.getNumDefs() &&
         II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
       PhysReg = Reg;
@@ -255,8 +256,8 @@ void ScheduleDAGSDNodes::ClusterNodes() {
       continue;
 
     unsigned Opc = Node->getMachineOpcode();
-    const TargetInstrDesc &TID = TII->get(Opc);
-    if (TID.mayLoad())
+    const MCInstrDesc &MCID = TII->get(Opc);
+    if (MCID.mayLoad())
       // Cluster loads from "near" addresses into combined SUnits.
       ClusterNeighboringLoads(Node);
   }
@@ -378,7 +379,7 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
 }
 
 void ScheduleDAGSDNodes::AddSchedEdges() {
-  const TargetSubtarget &ST = TM.getSubtarget<TargetSubtarget>();
+  const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
 
   // Check to see if the scheduler cares about latencies.
   bool UnitLatencies = ForceUnitLatencies();
@@ -390,14 +391,14 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
 
     if (MainNode->isMachineOpcode()) {
       unsigned Opc = MainNode->getMachineOpcode();
-      const TargetInstrDesc &TID = TII->get(Opc);
-      for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
-        if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
+      const MCInstrDesc &MCID = TII->get(Opc);
+      for (unsigned i = 0; i != MCID.getNumOperands(); ++i) {
+        if (MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1) {
           SU->isTwoAddress = true;
           break;
         }
       }
-      if (TID.isCommutable())
+      if (MCID.isCommutable())
         SU->isCommutable = true;
     }
 
@@ -435,7 +436,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         // it requires a cross class copy (cost < 0). That means we are only
         // treating "expensive to copy" register dependency as physical register
         // dependency. This may change in the future though.
-        if (Cost >= 0)
+        if (Cost >= 0 && !StressSched)
           PhysReg = 0;
 
         // If this is a ctrl dep, latency is 1.
@@ -520,14 +521,7 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() {
     for (;DefIdx < NodeNumDefs; ++DefIdx) {
       if (!Node->hasAnyUseOfValue(DefIdx))
         continue;
-      if (Node->isMachineOpcode() &&
-          Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) {
-        // Propagate the incoming (full-register) type. I doubt it's needed.
-        ValueType = Node->getOperand(0).getValueType();
-      }
-      else {
-        ValueType = Node->getValueType(DefIdx);
-      }
+      ValueType = Node->getValueType(DefIdx);
       ++DefIdx;
       return; // Found a normal regdef.
     }
@@ -649,7 +643,7 @@ static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
   // order number right after the N.
   MachineBasicBlock *BB = Emitter.getBlock();
   MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
-  SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N);
+  ArrayRef<SDDbgValue*> DVs = DAG->GetDbgValues(N);
   for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
     if (DVs[i]->isInvalidated())
       continue;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index b5f68f3..9c27b2e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -135,6 +135,14 @@ namespace llvm {
         return ValueType;
       }
 
+      const SDNode *GetNode() const {
+        return Node;
+      }
+
+      unsigned GetIdx() const {
+        return DefIdx-1;
+      }
+
       void Advance();
     private:
       void InitNodeNumDefs();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 68eeb60..35ea0bb 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -598,7 +598,7 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
   Ordering->remove(N);
 
   // If any of the SDDbgValue nodes refer to this SDNode, invalidate them.
-  SmallVector<SDDbgValue*, 2> &DbgVals = DbgInfo->getSDDbgValues(N);
+  ArrayRef<SDDbgValue*> DbgVals = DbgInfo->getSDDbgValues(N);
   for (unsigned i = 0, e = DbgVals.size(); i != e; ++i)
     DbgVals[i]->setIsInvalidated();
 }
@@ -3326,13 +3326,13 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
                                      const TargetLowering &TLI) {
   assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
          "Expecting memcpy / memset source to meet alignment requirement!");
-  // If 'SrcAlign' is zero, that means the memory operation does not need load
-  // the value, i.e. memset or memcpy from constant string. Otherwise, it's
-  // the inferred alignment of the source. 'DstAlign', on the other hand, is the
-  // specified alignment of the memory operation. If it is zero, that means
-  // it's possible to change the alignment of the destination. 'MemcpyStrSrc'
-  // indicates whether the memcpy source is constant so it does not need to be
-  // loaded.
+  // If 'SrcAlign' is zero, that means the memory operation does not need to
+  // load the value, i.e. memset or memcpy from constant string. Otherwise,
+  // it's the inferred alignment of the source. 'DstAlign', on the other hand,
+  // is the specified alignment of the memory operation. If it is zero, that
+  // means it's possible to change the alignment of the destination.
+  // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
+  // not need to be loaded.
   EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
                                    NonScalarIntSafe, MemcpyStrSrc,
                                    DAG.getMachineFunction());
@@ -4037,6 +4037,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
                       MachinePointerInfo PtrInfo, EVT MemVT,
                       bool isVolatile, bool isNonTemporal,
                       unsigned Alignment, const MDNode *TBAAInfo) {
+  assert(Chain.getValueType() == MVT::Other && 
+        "Invalid chain type");
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(VT);
 
@@ -4142,6 +4144,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                SDValue Ptr, MachinePointerInfo PtrInfo,
                                bool isVolatile, bool isNonTemporal,
                                unsigned Alignment, const MDNode *TBAAInfo) {
+  assert(Chain.getValueType() == MVT::Other && 
+        "Invalid chain type");
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(Val.getValueType());
 
@@ -4165,6 +4169,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
 
 SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                SDValue Ptr, MachineMemOperand *MMO) {
+  assert(Chain.getValueType() == MVT::Other && 
+        "Invalid chain type");
   EVT VT = Val.getValueType();
   SDVTList VTs = getVTList(MVT::Other);
   SDValue Undef = getUNDEF(Ptr.getValueType());
@@ -4191,6 +4197,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                     EVT SVT,bool isVolatile, bool isNonTemporal,
                                     unsigned Alignment,
                                     const MDNode *TBAAInfo) {
+  assert(Chain.getValueType() == MVT::Other && 
+        "Invalid chain type");
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
     Alignment = getEVTAlignment(SVT);
 
@@ -4216,6 +4224,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
                                     MachineMemOperand *MMO) {
   EVT VT = Val.getValueType();
 
+  assert(Chain.getValueType() == MVT::Other && 
+        "Invalid chain type");
   if (VT == SVT)
     return getStore(Chain, dl, Val, Ptr, MMO);
 
@@ -5508,9 +5518,9 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
     return;
   SDNode *FromNode = From.getNode();
   SDNode *ToNode = To.getNode();
-  SmallVector<SDDbgValue *, 2> &DVs = GetDbgValues(FromNode);
+  ArrayRef<SDDbgValue *> DVs = GetDbgValues(FromNode);
   SmallVector<SDDbgValue *, 2> ClonedDVs;
-  for (SmallVector<SDDbgValue *, 2>::iterator I = DVs.begin(), E = DVs.end();
+  for (ArrayRef<SDDbgValue *>::iterator I = DVs.begin(), E = DVs.end();
        I != E; ++I) {
     SDDbgValue *Dbg = *I;
     if (Dbg->getKind() == SDDbgValue::SDNODE) {
@@ -5691,24 +5701,39 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
   return false;
 }
 
-/// isPredecessorOf - Return true if this node is a predecessor of N. This node
-/// is either an operand of N or it can be reached by traversing up the operands.
-/// NOTE: this is an expensive method. Use it carefully.
-bool SDNode::isPredecessorOf(SDNode *N) const {
-  SmallPtrSet<SDNode *, 32> Visited;
-  SmallVector<SDNode *, 16> Worklist;
-  Worklist.push_back(N);
+/// hasPredecessor - Return true if N is a predecessor of this node.
+/// N is either an operand of this node, or can be reached by recursively
+/// traversing up the operands.
+/// NOTE: This is an expensive method. Use it carefully.
+bool SDNode::hasPredecessor(const SDNode *N) const {
+  SmallPtrSet<const SDNode *, 32> Visited;
+  SmallVector<const SDNode *, 16> Worklist;
+  return hasPredecessorHelper(N, Visited, Worklist);
+}
 
-  do {
-    N = Worklist.pop_back_val();
-    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
-      SDNode *Op = N->getOperand(i).getNode();
-      if (Op == this)
-        return true;
+bool SDNode::hasPredecessorHelper(const SDNode *N,
+                                  SmallPtrSet<const SDNode *, 32> &Visited,
+                                  SmallVector<const SDNode *, 16> &Worklist) const {
+  if (Visited.empty()) {
+    Worklist.push_back(this);
+  } else {
+    // Take a look in the visited set. If we've already encountered this node
+    // we needn't search further.
+    if (Visited.count(N))
+      return true;
+  }
+
+  // Haven't visited N yet. Continue the search.
+  while (!Worklist.empty()) {
+    const SDNode *M = Worklist.pop_back_val();
+    for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
+      SDNode *Op = M->getOperand(i).getNode();
       if (Visited.insert(Op))
         Worklist.push_back(Op);
+      if (Op == N)
+        return true;
     }
-  } while (!Worklist.empty());
+  }
 
   return false;
 }
@@ -5863,6 +5888,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::FSUB:   return "fsub";
   case ISD::FMUL:   return "fmul";
   case ISD::FDIV:   return "fdiv";
+  case ISD::FMA:    return "fma";
   case ISD::FREM:   return "frem";
   case ISD::FCOPYSIGN: return "fcopysign";
   case ISD::FGETSIGN:  return "fgetsign";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 7a8a975..81b03ee 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -286,22 +286,10 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
     assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
       "Cannot handle this kind of promotion");
     // Promoted vector extract
-    unsigned NumElts = ValueVT.getVectorNumElements();
-    SmallVector<SDValue, 8> NewOps;
-    for (unsigned i = 0; i < NumElts; ++i) {
-      SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-        PartVT.getScalarType(), Val ,DAG.getIntPtrConstant(i));
-      SDValue Cast;
+    bool Smaller = ValueVT.bitsLE(PartVT);
+    return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                       DL, ValueVT, Val);
 
-      bool Smaller = ValueVT.bitsLE(PartVT);
-
-      Cast = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
-                         DL, ValueVT.getScalarType(), Ext);
-
-      NewOps.push_back(Cast);
-    }
-    return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT,
-      &NewOps[0], NewOps.size());
   }
 
   // Trivial bitcast if the types are the same size and the destination
@@ -310,9 +298,17 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
       TLI.isTypeLegal(ValueVT))
     return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
 
-  assert(ValueVT.getVectorElementType() == PartVT &&
-         ValueVT.getVectorNumElements() == 1 &&
+  // Handle cases such as i8 -> <1 x i1>
+  assert(ValueVT.getVectorNumElements() == 1 &&
          "Only trivial scalar-to-vector conversions should get here!");
+
+  if (ValueVT.getVectorNumElements() == 1 &&
+      ValueVT.getVectorElementType() != PartVT) {
+    bool Smaller = ValueVT.bitsLE(PartVT);
+    Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                       DL, ValueVT.getScalarType(), Val);
+  }
+
   return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val);
 }
 
@@ -453,7 +449,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
       // Bitconvert vector->vector case.
       Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
     } else if (PartVT.isVector() &&
-               PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
+               PartVT.getVectorElementType() == ValueVT.getVectorElementType() &&
                PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
       EVT ElementVT = PartVT.getVectorElementType();
       // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
@@ -475,28 +471,23 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
       //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
     } else if (PartVT.isVector() &&
                PartVT.getVectorElementType().bitsGE(
-                 ValueVT.getVectorElementType())&&
+                 ValueVT.getVectorElementType()) &&
                PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
 
       // Promoted vector extract
-      unsigned NumElts = ValueVT.getVectorNumElements();
-      SmallVector<SDValue, 8> NewOps;
-      for (unsigned i = 0; i < NumElts; ++i) {
-        SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
-                       ValueVT.getScalarType(), Val ,DAG.getIntPtrConstant(i));
-        SDValue Cast = DAG.getNode(ISD::ANY_EXTEND,
-                       DL, PartVT.getScalarType(), Ext);
-        NewOps.push_back(Cast);
-      }
-      Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT,
-                        &NewOps[0], NewOps.size());
+      bool Smaller = PartVT.bitsLE(ValueVT);
+      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                        DL, PartVT, Val);
     } else{
       // Vector -> scalar conversion.
-      assert(ValueVT.getVectorElementType() == PartVT &&
-             ValueVT.getVectorNumElements() == 1 &&
+      assert(ValueVT.getVectorNumElements() == 1 &&
              "Only trivial vector-to-scalar conversions should get here!");
       Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
                         PartVT, Val, DAG.getIntPtrConstant(0));
+
+      bool Smaller = ValueVT.bitsLE(PartVT);
+      Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND),
+                         DL, PartVT, Val);
     }
 
     Parts[0] = Val;
@@ -1280,6 +1271,24 @@ bool SelectionDAGBuilder::isExportableFromCurrentBlock(const Value *V,
   return true;
 }
 
+/// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
+uint32_t SelectionDAGBuilder::getEdgeWeight(MachineBasicBlock *Src,
+                                            MachineBasicBlock *Dst) {
+  BranchProbabilityInfo *BPI = FuncInfo.BPI;
+  if (!BPI)
+    return 0;
+  BasicBlock *SrcBB = const_cast<BasicBlock*>(Src->getBasicBlock());
+  BasicBlock *DstBB = const_cast<BasicBlock*>(Dst->getBasicBlock());
+  return BPI->getEdgeWeight(SrcBB, DstBB);
+}
+
+void SelectionDAGBuilder::addSuccessorWithWeight(MachineBasicBlock *Src,
+                                                 MachineBasicBlock *Dst) {
+  uint32_t weight = getEdgeWeight(Src, Dst);
+  Src->addSuccessor(Dst, weight);
+}
+
+
 static bool InBlock(const Value *V, const BasicBlock *BB) {
   if (const Instruction *I = dyn_cast<Instruction>(V))
     return I->getParent() == BB;
@@ -1549,8 +1558,8 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
   }
 
   // Update successor info
-  SwitchBB->addSuccessor(CB.TrueBB);
-  SwitchBB->addSuccessor(CB.FalseBB);
+  addSuccessorWithWeight(SwitchBB, CB.TrueBB);
+  addSuccessorWithWeight(SwitchBB, CB.FalseBB);
 
   // Set NextBlock to be the MBB immediately after the current one, if any.
   // This is used to avoid emitting unnecessary branches to the next block.
@@ -1694,8 +1703,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
 
   MachineBasicBlock* MBB = B.Cases[0].ThisBB;
 
-  SwitchBB->addSuccessor(B.Default);
-  SwitchBB->addSuccessor(MBB);
+  addSuccessorWithWeight(SwitchBB, B.Default);
+  addSuccessorWithWeight(SwitchBB, MBB);
 
   SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                                 MVT::Other, CopyTo, RangeCmp,
@@ -1718,7 +1727,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
   SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
                                        Reg, VT);
   SDValue Cmp;
-  if (CountPopulation_64(B.Mask) == 1) {
+  unsigned PopCount = CountPopulation_64(B.Mask);
+  if (PopCount == 1) {
     // Testing for a single bit; just compare the shift count with what it
     // would need to be to shift a 1 bit in that position.
     Cmp = DAG.getSetCC(getCurDebugLoc(),
@@ -1726,6 +1736,13 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                        ShiftOp,
                        DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
                        ISD::SETEQ);
+  } else if (PopCount == BB.Range) {
+    // There is only one zero bit in the range, test for it directly.
+    Cmp = DAG.getSetCC(getCurDebugLoc(),
+                       TLI.getSetCCResultType(VT),
+                       ShiftOp,
+                       DAG.getConstant(CountTrailingOnes_64(B.Mask), VT),
+                       ISD::SETNE);
   } else {
     // Make desired shift
     SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
@@ -1740,8 +1757,8 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
                        ISD::SETNE);
   }
 
-  SwitchBB->addSuccessor(B.TargetBB);
-  SwitchBB->addSuccessor(NextMBB);
+  addSuccessorWithWeight(SwitchBB, B.TargetBB);
+  addSuccessorWithWeight(SwitchBB, NextMBB);
 
   SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(),
                               MVT::Other, getControlRoot(),
@@ -1981,8 +1998,9 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
   // table.
   MachineBasicBlock *JumpTableBB = CurMF->CreateMachineBasicBlock(LLVMBB);
   CurMF->insert(BBI, JumpTableBB);
-  CR.CaseBB->addSuccessor(Default);
-  CR.CaseBB->addSuccessor(JumpTableBB);
+
+  addSuccessorWithWeight(CR.CaseBB, Default);
+  addSuccessorWithWeight(CR.CaseBB, JumpTableBB);
 
   // Build a vector of destination BBs, corresponding to each target
   // of the jump table. If the value of the jump table slot corresponds to
@@ -2009,7 +2027,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec& CR,
          E = DestBBs.end(); I != E; ++I) {
     if (!SuccsHandled[(*I)->getNumber()]) {
       SuccsHandled[(*I)->getNumber()] = true;
-      JumpTableBB->addSuccessor(*I);
+      addSuccessorWithWeight(JumpTableBB, *I);
     }
   }
 
@@ -2428,8 +2446,10 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
     succs.push_back(I.getSuccessor(i));
   array_pod_sort(succs.begin(), succs.end());
   succs.erase(std::unique(succs.begin(), succs.end()), succs.end());
-  for (unsigned i = 0, e = succs.size(); i != e; ++i)
-    IndirectBrMBB->addSuccessor(FuncInfo.MBBMap[succs[i]]);
+  for (unsigned i = 0, e = succs.size(); i != e; ++i) {
+    MachineBasicBlock *Succ = FuncInfo.MBBMap[succs[i]];
+    addSuccessorWithWeight(IndirectBrMBB, Succ);
+  }
 
   DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(),
                           MVT::Other, getControlRoot(),
@@ -2489,6 +2509,22 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
                            Op1.getValueType(), Op1, Op2));
 }
 
+void SelectionDAGBuilder::visitSDiv(const User &I) {
+  SDValue Op1 = getValue(I.getOperand(0));
+  SDValue Op2 = getValue(I.getOperand(1));
+
+  // Turn exact SDivs into multiplications.
+  // FIXME: This should be in DAGCombiner, but it doesn't have access to the
+  // exact bit.
+  if (isa<BinaryOperator>(&I) && cast<BinaryOperator>(&I)->isExact() &&
+      !isa<ConstantSDNode>(Op1) &&
+      isa<ConstantSDNode>(Op2) && !cast<ConstantSDNode>(Op2)->isNullValue())
+    setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG));
+  else
+    setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(),
+                             Op1, Op2));
+}
+
 void SelectionDAGBuilder::visitICmp(const User &I) {
   ICmpInst::Predicate predicate = ICmpInst::BAD_ICMP_PREDICATE;
   if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
@@ -2855,7 +2891,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
   bool IntoUndef = isa<UndefValue>(Op0);
   bool FromUndef = isa<UndefValue>(Op1);
 
-  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
 
   SmallVector<EVT, 4> AggValueVTs;
   ComputeValueVTs(TLI, AggTy, AggValueVTs);
@@ -2895,7 +2931,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
   const Type *ValTy = I.getType();
   bool OutOfUndef = isa<UndefValue>(Op0);
 
-  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
+  unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices());
 
   SmallVector<EVT, 4> ValValueVTs;
   ComputeValueVTs(TLI, ValTy, ValValueVTs);
@@ -4623,6 +4659,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::pow:
     visitPow(I);
     return 0;
+  case Intrinsic::fma:
+    setValue(&I, DAG.getNode(ISD::FMA, dl,
+                             getValue(I.getArgOperand(0)).getValueType(),
+                             getValue(I.getArgOperand(0)),
+                             getValue(I.getArgOperand(1)),
+                             getValue(I.getArgOperand(2))));
+    return 0;
   case Intrinsic::convert_to_fp16:
     setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl,
                              MVT::i16, getValue(I.getArgOperand(0))));
@@ -4759,6 +4802,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::flt_rounds:
     setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32));
     return 0;
+
+  case Intrinsic::expect: {
+    // Just replace __builtin_expect(exp, c) with EXP.
+    setValue(&I, getValue(I.getArgOperand(0)));
+    return 0;
+  }
+
   case Intrinsic::trap: {
     StringRef TrapFuncName = getTrapFunctionName();
     if (TrapFuncName.empty()) {
@@ -4789,15 +4839,16 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     return implVisitAluOverflow(I, ISD::SMULO);
 
   case Intrinsic::prefetch: {
-    SDValue Ops[4];
+    SDValue Ops[5];
     unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
     Ops[0] = getRoot();
     Ops[1] = getValue(I.getArgOperand(0));
     Ops[2] = getValue(I.getArgOperand(1));
     Ops[3] = getValue(I.getArgOperand(2));
+    Ops[4] = getValue(I.getArgOperand(3));
     DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
                                         DAG.getVTList(MVT::Other),
-                                        &Ops[0], 4,
+                                        &Ops[0], 5,
                                         EVT::getIntegerVT(*Context, 8),
                                         MachinePointerInfo(I.getArgOperand(0)),
                                         0, /* align */
@@ -5415,54 +5466,6 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
 
 } // end anonymous namespace
 
-/// isAllocatableRegister - If the specified register is safe to allocate,
-/// i.e. it isn't a stack pointer or some other special register, return the
-/// register class for the register.  Otherwise, return null.
-static const TargetRegisterClass *
-isAllocatableRegister(unsigned Reg, MachineFunction &MF,
-                      const TargetLowering &TLI,
-                      const TargetRegisterInfo *TRI) {
-  EVT FoundVT = MVT::Other;
-  const TargetRegisterClass *FoundRC = 0;
-  for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(),
-       E = TRI->regclass_end(); RCI != E; ++RCI) {
-    EVT ThisVT = MVT::Other;
-
-    const TargetRegisterClass *RC = *RCI;
-    // If none of the value types for this register class are valid, we
-    // can't use it.  For example, 64-bit reg classes on 32-bit targets.
-    for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
-         I != E; ++I) {
-      if (TLI.isTypeLegal(*I)) {
-        // If we have already found this register in a different register class,
-        // choose the one with the largest VT specified.  For example, on
-        // PowerPC, we favor f64 register classes over f32.
-        if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) {
-          ThisVT = *I;
-          break;
-        }
-      }
-    }
-
-    if (ThisVT == MVT::Other) continue;
-
-    // NOTE: This isn't ideal.  In particular, this might allocate the
-    // frame pointer in functions that need it (due to them not being taken
-    // out of allocation, because a variable sized allocation hasn't been seen
-    // yet).  This is a slight code pessimization, but should still work.
-    for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
-         E = RC->allocation_order_end(MF); I != E; ++I)
-      if (*I == Reg) {
-        // We found a matching register class.  Keep looking at others in case
-        // we find one with larger registers that this physreg is also in.
-        FoundRC = RC;
-        FoundVT = ThisVT;
-        break;
-      }
-  }
-  return FoundRC;
-}
-
 /// GetRegistersForValue - Assign registers (virtual or physical) for the
 /// specified operand.  We prefer to assign virtual registers, to allow the
 /// register allocator to handle the assignment process.  However, if the asm
@@ -5597,52 +5600,6 @@ static void GetRegistersForValue(SelectionDAG &DAG,
     return;
   }
 
-  // This is a reference to a register class that doesn't directly correspond
-  // to an LLVM register class.  Allocate NumRegs consecutive, available,
-  // registers from the class.
-  std::vector<unsigned> RegClassRegs
-    = TLI.getRegClassForInlineAsmConstraint(OpInfo.ConstraintCode,
-                                            OpInfo.ConstraintVT);
-
-  const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
-  unsigned NumAllocated = 0;
-  for (unsigned i = 0, e = RegClassRegs.size(); i != e; ++i) {
-    unsigned Reg = RegClassRegs[i];
-    // See if this register is available.
-    if ((isOutReg && OutputRegs.count(Reg)) ||   // Already used.
-        (isInReg  && InputRegs.count(Reg))) {    // Already used.
-      // Make sure we find consecutive registers.
-      NumAllocated = 0;
-      continue;
-    }
-
-    // Check to see if this register is allocatable (i.e. don't give out the
-    // stack pointer).
-    const TargetRegisterClass *RC = isAllocatableRegister(Reg, MF, TLI, TRI);
-    if (!RC) {        // Couldn't allocate this register.
-      // Reset NumAllocated to make sure we return consecutive registers.
-      NumAllocated = 0;
-      continue;
-    }
-
-    // Okay, this register is good, we can use it.
-    ++NumAllocated;
-
-    // If we allocated enough consecutive registers, succeed.
-    if (NumAllocated == NumRegs) {
-      unsigned RegStart = (i-NumAllocated)+1;
-      unsigned RegEnd   = i+1;
-      // Mark all of the allocated registers used.
-      for (unsigned i = RegStart; i != RegEnd; ++i)
-        Regs.push_back(RegClassRegs[i]);
-
-      OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(),
-                                         OpInfo.ConstraintVT);
-      OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI);
-      return;
-    }
-  }
-
   // Otherwise, we couldn't allocate enough registers for this.
 }
 
@@ -5749,10 +5706,13 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
 
       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+	std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+	  TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT);
+	std::pair<unsigned, const TargetRegisterClass*> InputRC =
+	  TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT);
         if ((OpInfo.ConstraintVT.isInteger() !=
              Input.ConstraintVT.isInteger()) ||
-            (OpInfo.ConstraintVT.getSizeInBits() !=
-             Input.ConstraintVT.getSizeInBits())) {
+            (MatchRC.second != InputRC.second)) {
           report_fatal_error("Unsupported asm: input constraint"
                              " with a matching output constraint of"
                              " incompatible type!");
@@ -6015,8 +5975,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
              "Don't know how to handle indirect register inputs yet!");
 
       // Copy the input into the appropriate registers.
-      if (OpInfo.AssignedRegs.Regs.empty() ||
-          !OpInfo.AssignedRegs.areValueTypesLegal(TLI))
+      if (OpInfo.AssignedRegs.Regs.empty())
         report_fatal_error("Couldn't allocate input reg for constraint '" +
                            Twine(OpInfo.ConstraintCode) + "'!");
 
@@ -6031,8 +5990,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       // Add the clobbered value to the operand list, so that the register
       // allocator is aware that the physreg got clobbered.
       if (!OpInfo.AssignedRegs.Regs.empty())
-        OpInfo.AssignedRegs.AddInlineAsmOperands(
-                                            InlineAsm::Kind_RegDefEarlyClobber,
+        OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_Clobber,
                                                  false, 0, DAG,
                                                  AsmNodeOperands);
       break;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 8376d41..a0884eb 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -434,6 +434,9 @@ private:
                                 const Value* SV,
                                 MachineBasicBlock* Default,
                                 MachineBasicBlock *SwitchBB);
+
+  uint32_t getEdgeWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
+  void addSuccessorWithWeight(MachineBasicBlock *Src, MachineBasicBlock *Dst);
 public:
   void visitSwitchCase(CaseBlock &CB,
                        MachineBasicBlock *SwitchBB);
@@ -464,7 +467,7 @@ private:
   void visitSRem(const User &I) { visitBinary(I, ISD::SREM); }
   void visitFRem(const User &I) { visitBinary(I, ISD::FREM); }
   void visitUDiv(const User &I) { visitBinary(I, ISD::UDIV); }
-  void visitSDiv(const User &I) { visitBinary(I, ISD::SDIV); }
+  void visitSDiv(const User &I);
   void visitFDiv(const User &I) { visitBinary(I, ISD::FDIV); }
   void visitAnd (const User &I) { visitBinary(I, ISD::AND); }
   void visitOr  (const User &I) { visitBinary(I, ISD::OR); }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 771b008..87bb296 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
@@ -68,6 +69,11 @@ static cl::opt<bool>
 EnableFastISelAbort("fast-isel-abort", cl::Hidden,
           cl::desc("Enable abort calls when \"fast\" instruction fails"));
 
+static cl::opt<bool>
+UseMBPI("use-mbpi",
+        cl::desc("use Machine Branch Probability Info"),
+        cl::init(true), cl::Hidden);
+
 #ifndef NDEBUG
 static cl::opt<bool>
 ViewDAGCombine1("view-dag-combine1-dags", cl::Hidden,
@@ -186,6 +192,7 @@ SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
   DAGSize(0) {
     initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
     initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+    initializeBranchProbabilityInfoPass(*PassRegistry::getPassRegistry());
   }
 
 SelectionDAGISel::~SelectionDAGISel() {
@@ -199,6 +206,8 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addPreserved<AliasAnalysis>();
   AU.addRequired<GCModuleInfo>();
   AU.addPreserved<GCModuleInfo>();
+  if (UseMBPI && OptLevel != CodeGenOpt::None)
+    AU.addRequired<BranchProbabilityInfo>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -262,6 +271,12 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
 
   CurDAG->init(*MF);
   FuncInfo->set(Fn, *MF);
+
+  if (UseMBPI && OptLevel != CodeGenOpt::None)
+    FuncInfo->BPI = &getAnalysis<BranchProbabilityInfo>();
+  else
+    FuncInfo->BPI = 0;
+
   SDB->init(GFI, *AA);
 
   SelectAllBasicBlocks(Fn);
@@ -339,9 +354,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
       const MachineBasicBlock *MBB = I;
       for (MachineBasicBlock::const_iterator
              II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
-        const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode());
+        const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode());
 
-        if ((TID.isCall() && !TID.isReturn()) ||
+        if ((MCID.isCall() && !MCID.isReturn()) ||
             II->isStackAligningInlineAsm()) {
           MFI->setHasCalls(true);
           goto done;
@@ -666,7 +681,7 @@ void SelectionDAGISel::PrepareEHLandingPad() {
   // landing pad can thus be detected via the MachineModuleInfo.
   MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB);
 
-  const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
+  const MCInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL);
   BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
     .addSym(Label);
 
@@ -2596,9 +2611,9 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
       if (EmitNodeInfo & OPFL_MemRefs) {
         // Only attach load or store memory operands if the generated
         // instruction may load or store.
-        const TargetInstrDesc &TID = TM.getInstrInfo()->get(TargetOpc);
-        bool mayLoad = TID.mayLoad();
-        bool mayStore = TID.mayStore();
+        const MCInstrDesc &MCID = TM.getInstrInfo()->get(TargetOpc);
+        bool mayLoad = MCID.mayLoad();
+        bool mayStore = MCID.mayStore();
 
         unsigned NumMemRefs = 0;
         for (SmallVector<MachineMemOperand*, 2>::const_iterator I =
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index cf6069a..2626ac3 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -81,6 +81,9 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::MUL_I32] = "__mulsi3";
   Names[RTLIB::MUL_I64] = "__muldi3";
   Names[RTLIB::MUL_I128] = "__multi3";
+  Names[RTLIB::MULO_I32] = "__mulosi4";
+  Names[RTLIB::MULO_I64] = "__mulodi4";
+  Names[RTLIB::MULO_I128] = "__muloti4";
   Names[RTLIB::SDIV_I8] = "__divqi3";
   Names[RTLIB::SDIV_I16] = "__divhi3";
   Names[RTLIB::SDIV_I32] = "__divsi3";
@@ -136,6 +139,10 @@ static void InitLibcallNames(const char **Names) {
   Names[RTLIB::REM_F64] = "fmod";
   Names[RTLIB::REM_F80] = "fmodl";
   Names[RTLIB::REM_PPCF128] = "fmodl";
+  Names[RTLIB::FMA_F32] = "fmaf";
+  Names[RTLIB::FMA_F64] = "fma";
+  Names[RTLIB::FMA_F80] = "fmal";
+  Names[RTLIB::FMA_PPCF128] = "fmal";
   Names[RTLIB::POWI_F32] = "__powisf2";
   Names[RTLIB::POWI_F64] = "__powidf2";
   Names[RTLIB::POWI_F80] = "__powixf2";
@@ -673,10 +680,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
     NewVT = EltTy;
   IntermediateVT = NewVT;
 
+  unsigned NewVTSize = NewVT.getSizeInBits();
+
+  // Convert sizes such as i33 to i64.
+  if (!isPowerOf2_32(NewVTSize))
+    NewVTSize = NextPowerOf2(NewVTSize);
+
   EVT DestVT = TLI->getRegisterType(NewVT);
   RegisterVT = DestVT;
   if (EVT(DestVT).bitsLT(NewVT))    // Value is expanded, e.g. i64 -> i16.
-    return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
 
   // Otherwise, promotion or legal types use the same number of registers as
   // the vector decimated to the appropriate level.
@@ -965,8 +978,14 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
 
   EVT DestVT = getRegisterType(Context, NewVT);
   RegisterVT = DestVT;
+  unsigned NewVTSize = NewVT.getSizeInBits();
+
+  // Convert sizes such as i33 to i64.
+  if (!isPowerOf2_32(NewVTSize))
+    NewVTSize = NextPowerOf2(NewVTSize);
+
   if (DestVT.bitsLT(NewVT))   // Value is expanded, e.g. i64 -> i16.
-    return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
+    return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits());
 
   // Otherwise, promotion or legal types use the same number of registers as
   // the vector decimated to the appropriate level.
@@ -1762,9 +1781,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   case ISD::BITCAST:
     // If this is an FP->Int bitcast and if the sign bit is the only
     // thing demanded, turn this into a FGETSIGN.
-    if (NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
-        Op.getOperand(0).getValueType().isFloatingPoint() &&
-        !Op.getOperand(0).getValueType().isVector()) {
+    if (!Op.getOperand(0).getValueType().isVector() &&
+        NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
+        Op.getOperand(0).getValueType().isFloatingPoint()) {
       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
       bool i32Legal  = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
       if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) {
@@ -1902,7 +1921,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   // comparisons.
   if (isa<ConstantSDNode>(N0.getNode()))
     return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond));
-  
+
   if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
     const APInt &C1 = N1C->getAPIntValue();
 
@@ -2608,7 +2627,6 @@ PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
 
 TargetLowering::ConstraintType
 TargetLowering::getConstraintType(const std::string &Constraint) const {
-  // FIXME: lots more standard ones to handle.
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     default: break;
@@ -2661,9 +2679,9 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
                                                   std::string &Constraint,
                                                   std::vector<SDValue> &Ops,
                                                   SelectionDAG &DAG) const {
-  
+
   if (Constraint.length() > 1) return;
-  
+
   char ConstraintLetter = Constraint[0];
   switch (ConstraintLetter) {
   default: break;
@@ -2722,13 +2740,6 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   }
 }
 
-std::vector<unsigned> TargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const {
-  return std::vector<unsigned>();
-}
-
-
 std::pair<unsigned, const TargetRegisterClass*> TargetLowering::
 getRegForInlineAsmConstraint(const std::string &Constraint,
                              EVT VT) const {
@@ -2853,7 +2864,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
           report_fatal_error("Indirect operand for inline asm not a pointer!");
         OpTy = PtrTy->getElementType();
       }
-      
+
       // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
       if (const StructType *STy = dyn_cast<StructType>(OpTy))
         if (STy->getNumElements() == 1)
@@ -2955,10 +2966,13 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
       AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
 
       if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+	std::pair<unsigned, const TargetRegisterClass*> MatchRC =
+	  getRegForInlineAsmConstraint(OpInfo.ConstraintCode, OpInfo.ConstraintVT);
+	std::pair<unsigned, const TargetRegisterClass*> InputRC =
+	  getRegForInlineAsmConstraint(Input.ConstraintCode, Input.ConstraintVT);
         if ((OpInfo.ConstraintVT.isInteger() !=
              Input.ConstraintVT.isInteger()) ||
-            (OpInfo.ConstraintVT.getSizeInBits() !=
-             Input.ConstraintVT.getSizeInBits())) {
+            (MatchRC.second != InputRC.second)) {
           report_fatal_error("Unsupported asm: input constraint"
                              " with a matching output constraint of"
                              " incompatible type!");
@@ -3204,6 +3218,32 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
   return true;
 }
 
+/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication
+/// with the multiplicative inverse of the constant.
+SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl,
+                                       SelectionDAG &DAG) const {
+  ConstantSDNode *C = cast<ConstantSDNode>(Op2);
+  APInt d = C->getAPIntValue();
+  assert(d != 0 && "Division by zero!");
+
+  // Shift the value upfront if it is even, so the LSB is one.
+  unsigned ShAmt = d.countTrailingZeros();
+  if (ShAmt) {
+    // TODO: For UDIV use SRL instead of SRA.
+    SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType()));
+    Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt);
+    d = d.ashr(ShAmt);
+  }
+
+  // Calculate the multiplicative inverse, using Newton's method.
+  APInt t, xn = d;
+  while ((t = d*xn) != 1)
+    xn *= APInt(d.getBitWidth(), 2) - t;
+
+  Op2 = DAG.getConstant(xn, Op1.getValueType());
+  return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2);
+}
+
 /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
 /// return a DAG expression to select that will generate the same value by
 /// multiplying by a magic number.  See:
diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp
index 6ab0cb0..5a253a4 100644
--- a/lib/CodeGen/ShadowStackGC.cpp
+++ b/lib/CodeGen/ShadowStackGC.cpp
@@ -45,7 +45,8 @@ namespace {
 
     /// StackEntryTy - Abstract type of a link in the shadow stack.
     ///
-    const StructType *StackEntryTy;
+    StructType *StackEntryTy;
+    StructType *FrameMapTy;
 
     /// Roots - GC roots in the current function. Each is a pair of the
     /// intrinsic call and its corresponding alloca.
@@ -164,8 +165,7 @@ namespace {
 
           InvokeInst *II = InvokeInst::Create(CI->getCalledValue(),
                                               NewBB, CleanupBB,
-                                              Args.begin(), Args.end(),
-                                              CI->getName(), CallBB);
+                                              Args, CI->getName(), CallBB);
           II->setCallingConv(CI->getCallingConv());
           II->setAttributes(CI->getAttributes());
           CI->replaceAllUsesWith(II);
@@ -194,31 +194,31 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) {
 
   // Truncate the ShadowStackDescriptor if some metadata is null.
   unsigned NumMeta = 0;
-  SmallVector<Constant*,16> Metadata;
+  SmallVector<Constant*, 16> Metadata;
   for (unsigned I = 0; I != Roots.size(); ++I) {
     Constant *C = cast<Constant>(Roots[I].first->getArgOperand(1));
     if (!C->isNullValue())
       NumMeta = I + 1;
     Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr));
   }
+  Metadata.resize(NumMeta);
 
+  const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+  
   Constant *BaseElts[] = {
-    ConstantInt::get(Type::getInt32Ty(F.getContext()), Roots.size(), false),
-    ConstantInt::get(Type::getInt32Ty(F.getContext()), NumMeta, false),
+    ConstantInt::get(Int32Ty, Roots.size(), false),
+    ConstantInt::get(Int32Ty, NumMeta, false),
   };
 
   Constant *DescriptorElts[] = {
-    ConstantStruct::get(F.getContext(), BaseElts, 2, false),
-    ConstantArray::get(ArrayType::get(VoidPtr, NumMeta),
-                       Metadata.begin(), NumMeta)
+    ConstantStruct::get(FrameMapTy, BaseElts),
+    ConstantArray::get(ArrayType::get(VoidPtr, NumMeta), Metadata)
   };
 
-  Constant *FrameMap = ConstantStruct::get(F.getContext(), DescriptorElts, 2,
-                                           false);
-
-  std::string TypeName("gc_map.");
-  TypeName += utostr(NumMeta);
-  F.getParent()->addTypeName(TypeName, FrameMap->getType());
+  Type *EltTys[] = { DescriptorElts[0]->getType(),DescriptorElts[1]->getType()};
+  StructType *STy = StructType::createNamed("gc_map."+utostr(NumMeta), EltTys);
+  
+  Constant *FrameMap = ConstantStruct::get(STy, DescriptorElts);
 
   // FIXME: Is this actually dangerous as WritingAnLLVMPass.html claims? Seems
   //        that, short of multithreaded LLVM, it should be safe; all that is
@@ -246,17 +246,12 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) {
 
 const Type* ShadowStackGC::GetConcreteStackEntryType(Function &F) {
   // doInitialization creates the generic version of this type.
-  std::vector<const Type*> EltTys;
+  std::vector<Type*> EltTys;
   EltTys.push_back(StackEntryTy);
   for (size_t I = 0; I != Roots.size(); I++)
     EltTys.push_back(Roots[I].second->getAllocatedType());
-  Type *Ty = StructType::get(F.getContext(), EltTys);
-
-  std::string TypeName("gc_stackentry.");
-  TypeName += F.getName();
-  F.getParent()->addTypeName(TypeName, Ty);
-
-  return Ty;
+  
+  return StructType::createNamed("gc_stackentry."+F.getName().str(), EltTys);
 }
 
 /// doInitialization - If this module uses the GC intrinsics, find them now. If
@@ -267,13 +262,12 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) {
   //   int32_t NumMeta;  // Number of metadata descriptors. May be < NumRoots.
   //   void *Meta[];     // May be absent for roots without metadata.
   // };
-  std::vector<const Type*> EltTys;
+  std::vector<Type*> EltTys;
   // 32 bits is ok up to a 32GB stack frame. :)
   EltTys.push_back(Type::getInt32Ty(M.getContext()));
   // Specifies length of variable length array. 
   EltTys.push_back(Type::getInt32Ty(M.getContext()));
-  StructType *FrameMapTy = StructType::get(M.getContext(), EltTys);
-  M.addTypeName("gc_map", FrameMapTy);
+  FrameMapTy = StructType::createNamed("gc_map", EltTys);
   PointerType *FrameMapPtrTy = PointerType::getUnqual(FrameMapTy);
 
   // struct StackEntry {
@@ -281,18 +275,14 @@ bool ShadowStackGC::initializeCustomLowering(Module &M) {
   //   FrameMap *Map;          // Pointer to constant FrameMap.
   //   void *Roots[];          // Stack roots (in-place array, so we pretend).
   // };
-  OpaqueType *RecursiveTy = OpaqueType::get(M.getContext());
-
+  
+  StackEntryTy = StructType::createNamed(M.getContext(), "gc_stackentry");
+  
   EltTys.clear();
-  EltTys.push_back(PointerType::getUnqual(RecursiveTy));
+  EltTys.push_back(PointerType::getUnqual(StackEntryTy));
   EltTys.push_back(FrameMapPtrTy);
-  PATypeHolder LinkTyH = StructType::get(M.getContext(), EltTys);
-
-  RecursiveTy->refineAbstractTypeTo(LinkTyH.get());
-  StackEntryTy = cast<StructType>(LinkTyH.get());
+  StackEntryTy->setBody(EltTys);
   const PointerType *StackEntryPtrTy = PointerType::getUnqual(StackEntryTy);
-  M.addTypeName("gc_stackentry", LinkTyH.get());  // FIXME: Is this safe from
-                                                  //        a FunctionPass?
 
   // Get the root chain if it already exists.
   Head = M.getGlobalVariable("llvm_gc_root_chain");
@@ -399,7 +389,7 @@ bool ShadowStackGC::performCustomLowering(Function &F) {
   Instruction *CurrentHead  = AtEntry.CreateLoad(Head, "gc_currhead");
   Instruction *EntryMapPtr  = CreateGEP(Context, AtEntry, StackEntry,
                                         0,1,"gc_frame.map");
-                              AtEntry.CreateStore(FrameMap, EntryMapPtr);
+  AtEntry.CreateStore(FrameMap, EntryMapPtr);
 
   // After all the allocas...
   for (unsigned I = 0, E = Roots.size(); I != E; ++I) {
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
deleted file mode 100644
index 221bec5..0000000
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ /dev/null
@@ -1,1539 +0,0 @@
-//===-- SimpleRegisterCoalescing.cpp - Register Coalescing ----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a simple register coalescing pass that attempts to
-// aggressively coalesce every register copy that it can.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "regcoalescing"
-#include "SimpleRegisterCoalescing.h"
-#include "VirtRegMap.h"
-#include "LiveDebugVariables.h"
-#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/Value.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include <algorithm>
-#include <cmath>
-using namespace llvm;
-
-STATISTIC(numJoins    , "Number of interval joins performed");
-STATISTIC(numCrossRCs , "Number of cross class joins performed");
-STATISTIC(numCommutes , "Number of instruction commuting performed");
-STATISTIC(numExtends  , "Number of copies extended");
-STATISTIC(NumReMats   , "Number of instructions re-materialized");
-STATISTIC(numPeep     , "Number of identity moves eliminated after coalescing");
-STATISTIC(numAborts   , "Number of times interval joining aborted");
-
-char SimpleRegisterCoalescing::ID = 0;
-static cl::opt<bool>
-EnableJoining("join-liveintervals",
-              cl::desc("Coalesce copies (default=true)"),
-              cl::init(true));
-
-static cl::opt<bool>
-DisableCrossClassJoin("disable-cross-class-join",
-               cl::desc("Avoid coalescing cross register class copies"),
-               cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-EnablePhysicalJoin("join-physregs",
-                   cl::desc("Join physical register copies"),
-                   cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-VerifyCoalescing("verify-coalescing",
-         cl::desc("Verify machine instrs before and after register coalescing"),
-         cl::Hidden);
-
-INITIALIZE_AG_PASS_BEGIN(SimpleRegisterCoalescing, RegisterCoalescer,
-                "simple-register-coalescing", "Simple Register Coalescing", 
-                false, false, true)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
-INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
-INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
-INITIALIZE_PASS_DEPENDENCY(PHIElimination)
-INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
-INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
-INITIALIZE_AG_PASS_END(SimpleRegisterCoalescing, RegisterCoalescer,
-                "simple-register-coalescing", "Simple Register Coalescing", 
-                false, false, true)
-
-char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID;
-
-void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesCFG();
-  AU.addRequired<AliasAnalysis>();
-  AU.addRequired<LiveIntervals>();
-  AU.addPreserved<LiveIntervals>();
-  AU.addRequired<LiveDebugVariables>();
-  AU.addPreserved<LiveDebugVariables>();
-  AU.addPreserved<SlotIndexes>();
-  AU.addRequired<MachineLoopInfo>();
-  AU.addPreserved<MachineLoopInfo>();
-  AU.addPreservedID(MachineDominatorsID);
-  AU.addPreservedID(StrongPHIEliminationID);
-  AU.addPreservedID(PHIEliminationID);
-  AU.addPreservedID(TwoAddressInstructionPassID);
-  MachineFunctionPass::getAnalysisUsage(AU);
-}
-
-void SimpleRegisterCoalescing::markAsJoined(MachineInstr *CopyMI) {
-  /// Joined copies are not deleted immediately, but kept in JoinedCopies.
-  JoinedCopies.insert(CopyMI);
-
-  /// Mark all register operands of CopyMI as <undef> so they won't affect dead
-  /// code elimination.
-  for (MachineInstr::mop_iterator I = CopyMI->operands_begin(),
-       E = CopyMI->operands_end(); I != E; ++I)
-    if (I->isReg())
-      I->setIsUndef(true);
-}
-
-/// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
-/// being the source and IntB being the dest, thus this defines a value number
-/// in IntB.  If the source value number (in IntA) is defined by a copy from B,
-/// see if we can merge these two pieces of B into a single value number,
-/// eliminating a copy.  For example:
-///
-///  A3 = B0
-///    ...
-///  B1 = A3      <- this copy
-///
-/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
-/// value number to be replaced with B0 (which simplifies the B liveinterval).
-///
-/// This returns true if an interval was modified.
-///
-bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
-                                                    MachineInstr *CopyMI) {
-  // Bail if there is no dst interval - can happen when merging physical subreg
-  // operations.
-  if (!li_->hasInterval(CP.getDstReg()))
-    return false;
-
-  LiveInterval &IntA =
-    li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
-  LiveInterval &IntB =
-    li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
-  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
-
-  // BValNo is a value number in B that is defined by a copy from A.  'B3' in
-  // the example above.
-  LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
-  if (BLR == IntB.end()) return false;
-  VNInfo *BValNo = BLR->valno;
-
-  // Get the location that B is defined at.  Two options: either this value has
-  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
-  // can't process it.
-  if (!BValNo->isDefByCopy()) return false;
-  assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
-
-  // AValNo is the value number in A that defines the copy, A3 in the example.
-  SlotIndex CopyUseIdx = CopyIdx.getUseIndex();
-  LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
-  // The live range might not exist after fun with physreg coalescing.
-  if (ALR == IntA.end()) return false;
-  VNInfo *AValNo = ALR->valno;
-  // If it's re-defined by an early clobber somewhere in the live range, then
-  // it's not safe to eliminate the copy. FIXME: This is a temporary workaround.
-  // See PR3149:
-  // 172     %ECX<def> = MOV32rr %reg1039<kill>
-  // 180     INLINEASM <es:subl $5,$1
-  //         sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9,
-  //         %EAX<kill>,
-  // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
-  // 188     %EAX<def> = MOV32rr %EAX<kill>
-  // 196     %ECX<def> = MOV32rr %ECX<kill>
-  // 204     %ECX<def> = MOV32rr %ECX<kill>
-  // 212     %EAX<def> = MOV32rr %EAX<kill>
-  // 220     %EAX<def> = MOV32rr %EAX
-  // 228     %reg1039<def> = MOV32rr %ECX<kill>
-  // The early clobber operand ties ECX input to the ECX def.
-  //
-  // The live interval of ECX is represented as this:
-  // %reg20,inf = [46,47:1)[174,230:0)  0@174-(230) 1@46-(47)
-  // The coalescer has no idea there was a def in the middle of [174,230].
-  if (AValNo->hasRedefByEC())
-    return false;
-
-  // If AValNo is defined as a copy from IntB, we can potentially process this.
-  // Get the instruction that defines this value number.
-  if (!CP.isCoalescable(AValNo->getCopy()))
-    return false;
-
-  // Get the LiveRange in IntB that this value number starts with.
-  LiveInterval::iterator ValLR =
-    IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
-  if (ValLR == IntB.end())
-    return false;
-
-  // Make sure that the end of the live range is inside the same block as
-  // CopyMI.
-  MachineInstr *ValLREndInst =
-    li_->getInstructionFromIndex(ValLR->end.getPrevSlot());
-  if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent())
-    return false;
-
-  // Okay, we now know that ValLR ends in the same block that the CopyMI
-  // live-range starts.  If there are no intervening live ranges between them in
-  // IntB, we can merge them.
-  if (ValLR+1 != BLR) return false;
-
-  // If a live interval is a physical register, conservatively check if any
-  // of its aliases is overlapping the live interval of the virtual register.
-  // If so, do not coalesce.
-  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
-    for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
-      if (li_->hasInterval(*AS) && IntA.overlaps(li_->getInterval(*AS))) {
-        DEBUG({
-            dbgs() << "\t\tInterfere with alias ";
-            li_->getInterval(*AS).print(dbgs(), tri_);
-          });
-        return false;
-      }
-  }
-
-  DEBUG({
-      dbgs() << "Extending: ";
-      IntB.print(dbgs(), tri_);
-    });
-
-  SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
-  // We are about to delete CopyMI, so need to remove it as the 'instruction
-  // that defines this value #'. Update the valnum with the new defining
-  // instruction #.
-  BValNo->def  = FillerStart;
-  BValNo->setCopy(0);
-
-  // Okay, we can merge them.  We need to insert a new liverange:
-  // [ValLR.end, BLR.begin) of either value number, then we merge the
-  // two value numbers.
-  IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
-
-  // If the IntB live range is assigned to a physical register, and if that
-  // physreg has sub-registers, update their live intervals as well.
-  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
-    for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
-      if (!li_->hasInterval(*SR))
-        continue;
-      LiveInterval &SRLI = li_->getInterval(*SR);
-      SRLI.addRange(LiveRange(FillerStart, FillerEnd,
-                              SRLI.getNextValue(FillerStart, 0,
-                                                li_->getVNInfoAllocator())));
-    }
-  }
-
-  // Okay, merge "B1" into the same value number as "B0".
-  if (BValNo != ValLR->valno) {
-    // If B1 is killed by a PHI, then the merged live range must also be killed
-    // by the same PHI, as B0 and B1 can not overlap.
-    bool HasPHIKill = BValNo->hasPHIKill();
-    IntB.MergeValueNumberInto(BValNo, ValLR->valno);
-    if (HasPHIKill)
-      ValLR->valno->setHasPHIKill(true);
-  }
-  DEBUG({
-      dbgs() << "   result = ";
-      IntB.print(dbgs(), tri_);
-      dbgs() << "\n";
-    });
-
-  // If the source instruction was killing the source register before the
-  // merge, unset the isKill marker given the live range has been extended.
-  int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
-  if (UIdx != -1) {
-    ValLREndInst->getOperand(UIdx).setIsKill(false);
-  }
-
-  // If the copy instruction was killing the destination register before the
-  // merge, find the last use and trim the live range. That will also add the
-  // isKill marker.
-  if (ALR->end == CopyIdx)
-    li_->shrinkToUses(&IntA);
-
-  ++numExtends;
-  return true;
-}
-
-/// HasOtherReachingDefs - Return true if there are definitions of IntB
-/// other than BValNo val# that can reach uses of AValno val# of IntA.
-bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
-                                                    LiveInterval &IntB,
-                                                    VNInfo *AValNo,
-                                                    VNInfo *BValNo) {
-  for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
-       AI != AE; ++AI) {
-    if (AI->valno != AValNo) continue;
-    LiveInterval::Ranges::iterator BI =
-      std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start);
-    if (BI != IntB.ranges.begin())
-      --BI;
-    for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
-      if (BI->valno == BValNo)
-        continue;
-      if (BI->start <= AI->start && BI->end > AI->start)
-        return true;
-      if (BI->start > AI->start && BI->start < AI->end)
-        return true;
-    }
-  }
-  return false;
-}
-
-/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
-/// IntA being the source and IntB being the dest, thus this defines a value
-/// number in IntB.  If the source value number (in IntA) is defined by a
-/// commutable instruction and its other operand is coalesced to the copy dest
-/// register, see if we can transform the copy into a noop by commuting the
-/// definition. For example,
-///
-///  A3 = op A2 B0<kill>
-///    ...
-///  B1 = A3      <- this copy
-///    ...
-///     = op A3   <- more uses
-///
-/// ==>
-///
-///  B2 = op B0 A2<kill>
-///    ...
-///  B1 = B2      <- now an identify copy
-///    ...
-///     = op B2   <- more uses
-///
-/// This returns true if an interval was modified.
-///
-bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
-                                                        MachineInstr *CopyMI) {
-  // FIXME: For now, only eliminate the copy by commuting its def when the
-  // source register is a virtual register. We want to guard against cases
-  // where the copy is a back edge copy and commuting the def lengthen the
-  // live interval of the source register to the entire loop.
-  if (CP.isPhys() && CP.isFlipped())
-    return false;
-
-  // Bail if there is no dst interval.
-  if (!li_->hasInterval(CP.getDstReg()))
-    return false;
-
-  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
-
-  LiveInterval &IntA =
-    li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
-  LiveInterval &IntB =
-    li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
-
-  // BValNo is a value number in B that is defined by a copy from A. 'B3' in
-  // the example above.
-  VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
-  if (!BValNo || !BValNo->isDefByCopy())
-    return false;
-
-  assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
-
-  // AValNo is the value number in A that defines the copy, A3 in the example.
-  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
-  assert(AValNo && "COPY source not live");
-
-  // If other defs can reach uses of this def, then it's not safe to perform
-  // the optimization.
-  if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
-    return false;
-  MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
-  if (!DefMI)
-    return false;
-  const TargetInstrDesc &TID = DefMI->getDesc();
-  if (!TID.isCommutable())
-    return false;
-  // If DefMI is a two-address instruction then commuting it will change the
-  // destination register.
-  int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
-  assert(DefIdx != -1);
-  unsigned UseOpIdx;
-  if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
-    return false;
-  unsigned Op1, Op2, NewDstIdx;
-  if (!tii_->findCommutedOpIndices(DefMI, Op1, Op2))
-    return false;
-  if (Op1 == UseOpIdx)
-    NewDstIdx = Op2;
-  else if (Op2 == UseOpIdx)
-    NewDstIdx = Op1;
-  else
-    return false;
-
-  MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
-  unsigned NewReg = NewDstMO.getReg();
-  if (NewReg != IntB.reg || !NewDstMO.isKill())
-    return false;
-
-  // Make sure there are no other definitions of IntB that would reach the
-  // uses which the new definition can reach.
-  if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
-    return false;
-
-  // Abort if the aliases of IntB.reg have values that are not simply the
-  // clobbers from the superreg.
-  if (TargetRegisterInfo::isPhysicalRegister(IntB.reg))
-    for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS)
-      if (li_->hasInterval(*AS) &&
-          HasOtherReachingDefs(IntA, li_->getInterval(*AS), AValNo, 0))
-        return false;
-
-  // If some of the uses of IntA.reg is already coalesced away, return false.
-  // It's not possible to determine whether it's safe to perform the coalescing.
-  for (MachineRegisterInfo::use_nodbg_iterator UI = 
-         mri_->use_nodbg_begin(IntA.reg), 
-       UE = mri_->use_nodbg_end(); UI != UE; ++UI) {
-    MachineInstr *UseMI = &*UI;
-    SlotIndex UseIdx = li_->getInstructionIndex(UseMI);
-    LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
-    if (ULR == IntA.end())
-      continue;
-    if (ULR->valno == AValNo && JoinedCopies.count(UseMI))
-      return false;
-  }
-
-  DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t'
-               << *DefMI);
-
-  // At this point we have decided that it is legal to do this
-  // transformation.  Start by commuting the instruction.
-  MachineBasicBlock *MBB = DefMI->getParent();
-  MachineInstr *NewMI = tii_->commuteInstruction(DefMI);
-  if (!NewMI)
-    return false;
-  if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
-      TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
-      !mri_->constrainRegClass(IntB.reg, mri_->getRegClass(IntA.reg)))
-    return false;
-  if (NewMI != DefMI) {
-    li_->ReplaceMachineInstrInMaps(DefMI, NewMI);
-    MBB->insert(DefMI, NewMI);
-    MBB->erase(DefMI);
-  }
-  unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
-  NewMI->getOperand(OpIdx).setIsKill();
-
-  // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
-  // A = or A, B
-  // ...
-  // B = A
-  // ...
-  // C = A<kill>
-  // ...
-  //   = B
-
-  // Update uses of IntA of the specific Val# with IntB.
-  for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
-         UE = mri_->use_end(); UI != UE;) {
-    MachineOperand &UseMO = UI.getOperand();
-    MachineInstr *UseMI = &*UI;
-    ++UI;
-    if (JoinedCopies.count(UseMI))
-      continue;
-    if (UseMI->isDebugValue()) {
-      // FIXME These don't have an instruction index.  Not clear we have enough
-      // info to decide whether to do this replacement or not.  For now do it.
-      UseMO.setReg(NewReg);
-      continue;
-    }
-    SlotIndex UseIdx = li_->getInstructionIndex(UseMI).getUseIndex();
-    LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
-    if (ULR == IntA.end() || ULR->valno != AValNo)
-      continue;
-    if (TargetRegisterInfo::isPhysicalRegister(NewReg))
-      UseMO.substPhysReg(NewReg, *tri_);
-    else
-      UseMO.setReg(NewReg);
-    if (UseMI == CopyMI)
-      continue;
-    if (!UseMI->isCopy())
-      continue;
-    if (UseMI->getOperand(0).getReg() != IntB.reg ||
-        UseMI->getOperand(0).getSubReg())
-      continue;
-
-    // This copy will become a noop. If it's defining a new val#, merge it into
-    // BValNo.
-    SlotIndex DefIdx = UseIdx.getDefIndex();
-    VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
-    if (!DVNI)
-      continue;
-    DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
-    assert(DVNI->def == DefIdx);
-    BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
-    markAsJoined(UseMI);
-  }
-
-  // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
-  // is updated.
-  VNInfo *ValNo = BValNo;
-  ValNo->def = AValNo->def;
-  ValNo->setCopy(0);
-  for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
-       AI != AE; ++AI) {
-    if (AI->valno != AValNo) continue;
-    IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
-  }
-  DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
-
-  IntA.removeValNo(AValNo);
-  DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
-  ++numCommutes;
-  return true;
-}
-
-/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
-/// computation, replace the copy by rematerialize the definition.
-bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
-                                                       bool preserveSrcInt,
-                                                       unsigned DstReg,
-                                                       unsigned DstSubIdx,
-                                                       MachineInstr *CopyMI) {
-  SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getUseIndex();
-  LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
-  assert(SrcLR != SrcInt.end() && "Live range not found!");
-  VNInfo *ValNo = SrcLR->valno;
-  // If other defs can reach uses of this def, then it's not safe to perform
-  // the optimization.
-  if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill())
-    return false;
-  MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
-  if (!DefMI)
-    return false;
-  assert(DefMI && "Defining instruction disappeared");
-  const TargetInstrDesc &TID = DefMI->getDesc();
-  if (!TID.isAsCheapAsAMove())
-    return false;
-  if (!tii_->isTriviallyReMaterializable(DefMI, AA))
-    return false;
-  bool SawStore = false;
-  if (!DefMI->isSafeToMove(tii_, AA, SawStore))
-    return false;
-  if (TID.getNumDefs() != 1)
-    return false;
-  if (!DefMI->isImplicitDef()) {
-    // Make sure the copy destination register class fits the instruction
-    // definition register class. The mismatch can happen as a result of earlier
-    // extract_subreg, insert_subreg, subreg_to_reg coalescing.
-    const TargetRegisterClass *RC = TID.OpInfo[0].getRegClass(tri_);
-    if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
-      if (mri_->getRegClass(DstReg) != RC)
-        return false;
-    } else if (!RC->contains(DstReg))
-      return false;
-  }
-
-  // If destination register has a sub-register index on it, make sure it
-  // matches the instruction register class.
-  if (DstSubIdx) {
-    const TargetInstrDesc &TID = DefMI->getDesc();
-    if (TID.getNumDefs() != 1)
-      return false;
-    const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg);
-    const TargetRegisterClass *DstSubRC =
-      DstRC->getSubRegisterRegClass(DstSubIdx);
-    const TargetRegisterClass *DefRC = TID.OpInfo[0].getRegClass(tri_);
-    if (DefRC == DstRC)
-      DstSubIdx = 0;
-    else if (DefRC != DstSubRC)
-      return false;
-  }
-
-  RemoveCopyFlag(DstReg, CopyMI);
-
-  MachineBasicBlock *MBB = CopyMI->getParent();
-  MachineBasicBlock::iterator MII =
-    llvm::next(MachineBasicBlock::iterator(CopyMI));
-  tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_);
-  MachineInstr *NewMI = prior(MII);
-
-  // CopyMI may have implicit operands, transfer them over to the newly
-  // rematerialized instruction. And update implicit def interval valnos.
-  for (unsigned i = CopyMI->getDesc().getNumOperands(),
-         e = CopyMI->getNumOperands(); i != e; ++i) {
-    MachineOperand &MO = CopyMI->getOperand(i);
-    if (MO.isReg() && MO.isImplicit())
-      NewMI->addOperand(MO);
-    if (MO.isDef())
-      RemoveCopyFlag(MO.getReg(), CopyMI);
-  }
-
-  NewMI->copyImplicitOps(CopyMI);
-  li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
-  CopyMI->eraseFromParent();
-  ReMatCopies.insert(CopyMI);
-  ReMatDefs.insert(DefMI);
-  DEBUG(dbgs() << "Remat: " << *NewMI);
-  ++NumReMats;
-
-  // The source interval can become smaller because we removed a use.
-  if (preserveSrcInt)
-    li_->shrinkToUses(&SrcInt);
-
-  return true;
-}
-
-/// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
-/// update the subregister number if it is not zero. If DstReg is a
-/// physical register and the existing subregister number of the def / use
-/// being updated is not zero, make sure to set it to the correct physical
-/// subregister.
-void
-SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
-  bool DstIsPhys = CP.isPhys();
-  unsigned SrcReg = CP.getSrcReg();
-  unsigned DstReg = CP.getDstReg();
-  unsigned SubIdx = CP.getSubIdx();
-
-  // Update LiveDebugVariables.
-  ldv_->renameRegister(SrcReg, DstReg, SubIdx);
-
-  for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg);
-       MachineInstr *UseMI = I.skipInstruction();) {
-    // A PhysReg copy that won't be coalesced can perhaps be rematerialized
-    // instead.
-    if (DstIsPhys) {
-      if (UseMI->isCopy() &&
-          !UseMI->getOperand(1).getSubReg() &&
-          !UseMI->getOperand(0).getSubReg() &&
-          UseMI->getOperand(1).getReg() == SrcReg &&
-          UseMI->getOperand(0).getReg() != SrcReg &&
-          UseMI->getOperand(0).getReg() != DstReg &&
-          !JoinedCopies.count(UseMI) &&
-          ReMaterializeTrivialDef(li_->getInterval(SrcReg), false,
-                                  UseMI->getOperand(0).getReg(), 0, UseMI))
-        continue;
-    }
-
-    SmallVector<unsigned,8> Ops;
-    bool Reads, Writes;
-    tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
-    bool Kills = false, Deads = false;
-
-    // Replace SrcReg with DstReg in all UseMI operands.
-    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
-      MachineOperand &MO = UseMI->getOperand(Ops[i]);
-      Kills |= MO.isKill();
-      Deads |= MO.isDead();
-
-      if (DstIsPhys)
-        MO.substPhysReg(DstReg, *tri_);
-      else
-        MO.substVirtReg(DstReg, SubIdx, *tri_);
-    }
-
-    // This instruction is a copy that will be removed.
-    if (JoinedCopies.count(UseMI))
-      continue;
-
-    if (SubIdx) {
-      // If UseMI was a simple SrcReg def, make sure we didn't turn it into a
-      // read-modify-write of DstReg.
-      if (Deads)
-        UseMI->addRegisterDead(DstReg, tri_);
-      else if (!Reads && Writes)
-        UseMI->addRegisterDefined(DstReg, tri_);
-
-      // Kill flags apply to the whole physical register.
-      if (DstIsPhys && Kills)
-        UseMI->addRegisterKilled(DstReg, tri_);
-    }
-
-    DEBUG({
-        dbgs() << "\t\tupdated: ";
-        if (!UseMI->isDebugValue())
-          dbgs() << li_->getInstructionIndex(UseMI) << "\t";
-        dbgs() << *UseMI;
-      });
-  }
-}
-
-/// removeIntervalIfEmpty - Check if the live interval of a physical register
-/// is empty, if so remove it and also remove the empty intervals of its
-/// sub-registers. Return true if live interval is removed.
-static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *li_,
-                                  const TargetRegisterInfo *tri_) {
-  if (li.empty()) {
-    if (TargetRegisterInfo::isPhysicalRegister(li.reg))
-      for (const unsigned* SR = tri_->getSubRegisters(li.reg); *SR; ++SR) {
-        if (!li_->hasInterval(*SR))
-          continue;
-        LiveInterval &sli = li_->getInterval(*SR);
-        if (sli.empty())
-          li_->removeInterval(*SR);
-      }
-    li_->removeInterval(li.reg);
-    return true;
-  }
-  return false;
-}
-
-/// RemoveDeadDef - If a def of a live interval is now determined dead, remove
-/// the val# it defines. If the live interval becomes empty, remove it as well.
-bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li,
-                                             MachineInstr *DefMI) {
-  SlotIndex DefIdx = li_->getInstructionIndex(DefMI).getDefIndex();
-  LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx);
-  if (DefIdx != MLR->valno->def)
-    return false;
-  li.removeValNo(MLR->valno);
-  return removeIntervalIfEmpty(li, li_, tri_);
-}
-
-void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg,
-                                              const MachineInstr *CopyMI) {
-  SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
-  if (li_->hasInterval(DstReg)) {
-    LiveInterval &LI = li_->getInterval(DstReg);
-    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
-      if (LR->valno->def == DefIdx)
-        LR->valno->setCopy(0);
-  }
-  if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
-    return;
-  for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) {
-    if (!li_->hasInterval(*AS))
-      continue;
-    LiveInterval &LI = li_->getInterval(*AS);
-    if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
-      if (LR->valno->def == DefIdx)
-        LR->valno->setCopy(0);
-  }
-}
-
-/// shouldJoinPhys - Return true if a copy involving a physreg should be joined.
-/// We need to be careful about coalescing a source physical register with a
-/// virtual register. Once the coalescing is done, it cannot be broken and these
-/// are not spillable! If the destination interval uses are far away, think
-/// twice about coalescing them!
-bool SimpleRegisterCoalescing::shouldJoinPhys(CoalescerPair &CP) {
-  bool Allocatable = li_->isAllocatable(CP.getDstReg());
-  LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg());
-
-  /// Always join simple intervals that are defined by a single copy from a
-  /// reserved register. This doesn't increase register pressure, so it is
-  /// always beneficial.
-  if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue())
-    return true;
-
-  if (!EnablePhysicalJoin) {
-    DEBUG(dbgs() << "\tPhysreg joins disabled.\n");
-    return false;
-  }
-
-  // Only coalesce to allocatable physreg, we don't want to risk modifying
-  // reserved registers.
-  if (!Allocatable) {
-    DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
-    return false;  // Not coalescable.
-  }
-
-  // Don't join with physregs that have a ridiculous number of live
-  // ranges. The data structure performance is really bad when that
-  // happens.
-  if (li_->hasInterval(CP.getDstReg()) &&
-      li_->getInterval(CP.getDstReg()).ranges.size() > 1000) {
-    ++numAborts;
-    DEBUG(dbgs()
-          << "\tPhysical register live interval too complicated, abort!\n");
-    return false;
-  }
-
-  // FIXME: Why are we skipping this test for partial copies?
-  //        CodeGen/X86/phys_subreg_coalesce-3.ll needs it.
-  if (!CP.isPartial()) {
-    const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg());
-    unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2;
-    unsigned Length = li_->getApproximateInstructionCount(JoinVInt);
-    if (Length > Threshold) {
-      ++numAborts;
-      DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n");
-      return false;
-    }
-  }
-  return true;
-}
-
-/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
-/// two virtual registers from different register classes.
-bool
-SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg,
-                                                unsigned DstReg,
-                                             const TargetRegisterClass *SrcRC,
-                                             const TargetRegisterClass *DstRC,
-                                             const TargetRegisterClass *NewRC) {
-  unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC);
-  // This heuristics is good enough in practice, but it's obviously not *right*.
-  // 4 is a magic number that works well enough for x86, ARM, etc. It filter
-  // out all but the most restrictive register classes.
-  if (NewRCCount > 4 ||
-      // Early exit if the function is fairly small, coalesce aggressively if
-      // that's the case. For really special register classes with 3 or
-      // fewer registers, be a bit more careful.
-      (li_->getFuncInstructionCount() / NewRCCount) < 8)
-    return true;
-  LiveInterval &SrcInt = li_->getInterval(SrcReg);
-  LiveInterval &DstInt = li_->getInterval(DstReg);
-  unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt);
-  unsigned DstSize = li_->getApproximateInstructionCount(DstInt);
-
-  // Coalesce aggressively if the intervals are small compared to the number of
-  // registers in the new class. The number 4 is fairly arbitrary, chosen to be
-  // less aggressive than the 8 used for the whole function size.
-  const unsigned ThresSize = 4 * NewRCCount;
-  if (SrcSize <= ThresSize && DstSize <= ThresSize)
-    return true;
-
-  // Estimate *register use density*. If it doubles or more, abort.
-  unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg),
-                                   mri_->use_nodbg_end());
-  unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg),
-                                   mri_->use_nodbg_end());
-  unsigned NewUses = SrcUses + DstUses;
-  unsigned NewSize = SrcSize + DstSize;
-  if (SrcRC != NewRC && SrcSize > ThresSize) {
-    unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC);
-    if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount)
-      return false;
-  }
-  if (DstRC != NewRC && DstSize > ThresSize) {
-    unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC);
-    if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount)
-      return false;
-  }
-  return true;
-}
-
-
-/// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
-/// which are the src/dst of the copy instruction CopyMI.  This returns true
-/// if the copy was successfully coalesced away. If it is not currently
-/// possible to coalesce this interval, but it may be possible if other
-/// things get coalesced, then it returns true by reference in 'Again'.
-bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
-  MachineInstr *CopyMI = TheCopy.MI;
-
-  Again = false;
-  if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI))
-    return false; // Already done.
-
-  DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
-
-  CoalescerPair CP(*tii_, *tri_);
-  if (!CP.setRegisters(CopyMI)) {
-    DEBUG(dbgs() << "\tNot coalescable.\n");
-    return false;
-  }
-
-  // If they are already joined we continue.
-  if (CP.getSrcReg() == CP.getDstReg()) {
-    markAsJoined(CopyMI);
-    DEBUG(dbgs() << "\tCopy already coalesced.\n");
-    return false;  // Not coalescable.
-  }
-
-  DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_)
-               << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx())
-               << "\n");
-
-  // Enforce policies.
-  if (CP.isPhys()) {
-    if (!shouldJoinPhys(CP)) {
-      // Before giving up coalescing, if definition of source is defined by
-      // trivial computation, try rematerializing it.
-      if (!CP.isFlipped() &&
-          ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
-                                  CP.getDstReg(), 0, CopyMI))
-        return true;
-      return false;
-    }
-  } else {
-    // Avoid constraining virtual register regclass too much.
-    if (CP.isCrossClass()) {
-      DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n");
-      if (DisableCrossClassJoin) {
-        DEBUG(dbgs() << "\tCross-class joins disabled.\n");
-        return false;
-      }
-      if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(),
-                                 mri_->getRegClass(CP.getSrcReg()),
-                                 mri_->getRegClass(CP.getDstReg()),
-                                 CP.getNewRC())) {
-        DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n");
-        Again = true;  // May be possible to coalesce later.
-        return false;
-      }
-    }
-
-    // When possible, let DstReg be the larger interval.
-    if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() >
-                           li_->getInterval(CP.getDstReg()).ranges.size())
-      CP.flip();
-  }
-
-  // Okay, attempt to join these two intervals.  On failure, this returns false.
-  // Otherwise, if one of the intervals being joined is a physreg, this method
-  // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
-  // been modified, so we can use this information below to update aliases.
-  if (!JoinIntervals(CP)) {
-    // Coalescing failed.
-
-    // If definition of source is defined by trivial computation, try
-    // rematerializing it.
-    if (!CP.isFlipped() &&
-        ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
-                                CP.getDstReg(), 0, CopyMI))
-      return true;
-
-    // If we can eliminate the copy without merging the live ranges, do so now.
-    if (!CP.isPartial()) {
-      if (AdjustCopiesBackFrom(CP, CopyMI) ||
-          RemoveCopyByCommutingDef(CP, CopyMI)) {
-        markAsJoined(CopyMI);
-        DEBUG(dbgs() << "\tTrivial!\n");
-        return true;
-      }
-    }
-
-    // Otherwise, we are unable to join the intervals.
-    DEBUG(dbgs() << "\tInterference!\n");
-    Again = true;  // May be possible to coalesce later.
-    return false;
-  }
-
-  // Coalescing to a virtual register that is of a sub-register class of the
-  // other. Make sure the resulting register is set to the right register class.
-  if (CP.isCrossClass()) {
-    ++numCrossRCs;
-    mri_->setRegClass(CP.getDstReg(), CP.getNewRC());
-  }
-
-  // Remember to delete the copy instruction.
-  markAsJoined(CopyMI);
-
-  UpdateRegDefsUses(CP);
-
-  // If we have extended the live range of a physical register, make sure we
-  // update live-in lists as well.
-  if (CP.isPhys()) {
-    SmallVector<MachineBasicBlock*, 16> BlockSeq;
-    // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the
-    // ranges for this, and they are preserved.
-    LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg());
-    for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end();
-         I != E; ++I ) {
-      li_->findLiveInMBBs(I->start, I->end, BlockSeq);
-      for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) {
-        MachineBasicBlock &block = *BlockSeq[idx];
-        if (!block.isLiveIn(CP.getDstReg()))
-          block.addLiveIn(CP.getDstReg());
-      }
-      BlockSeq.clear();
-    }
-  }
-
-  // SrcReg is guarateed to be the register whose live interval that is
-  // being merged.
-  li_->removeInterval(CP.getSrcReg());
-
-  // Update regalloc hint.
-  tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_);
-
-  DEBUG({
-    LiveInterval &DstInt = li_->getInterval(CP.getDstReg());
-    dbgs() << "\tJoined. Result = ";
-    DstInt.print(dbgs(), tri_);
-    dbgs() << "\n";
-  });
-
-  ++numJoins;
-  return true;
-}
-
-/// ComputeUltimateVN - Assuming we are going to join two live intervals,
-/// compute what the resultant value numbers for each value in the input two
-/// ranges will be.  This is complicated by copies between the two which can
-/// and will commonly cause multiple value numbers to be merged into one.
-///
-/// VN is the value number that we're trying to resolve.  InstDefiningValue
-/// keeps track of the new InstDefiningValue assignment for the result
-/// LiveInterval.  ThisFromOther/OtherFromThis are sets that keep track of
-/// whether a value in this or other is a copy from the opposite set.
-/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have
-/// already been assigned.
-///
-/// ThisFromOther[x] - If x is defined as a copy from the other interval, this
-/// contains the value number the copy is from.
-///
-static unsigned ComputeUltimateVN(VNInfo *VNI,
-                                  SmallVector<VNInfo*, 16> &NewVNInfo,
-                                  DenseMap<VNInfo*, VNInfo*> &ThisFromOther,
-                                  DenseMap<VNInfo*, VNInfo*> &OtherFromThis,
-                                  SmallVector<int, 16> &ThisValNoAssignments,
-                                  SmallVector<int, 16> &OtherValNoAssignments) {
-  unsigned VN = VNI->id;
-
-  // If the VN has already been computed, just return it.
-  if (ThisValNoAssignments[VN] >= 0)
-    return ThisValNoAssignments[VN];
-  assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers");
-
-  // If this val is not a copy from the other val, then it must be a new value
-  // number in the destination.
-  DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI);
-  if (I == ThisFromOther.end()) {
-    NewVNInfo.push_back(VNI);
-    return ThisValNoAssignments[VN] = NewVNInfo.size()-1;
-  }
-  VNInfo *OtherValNo = I->second;
-
-  // Otherwise, this *is* a copy from the RHS.  If the other side has already
-  // been computed, return it.
-  if (OtherValNoAssignments[OtherValNo->id] >= 0)
-    return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id];
-
-  // Mark this value number as currently being computed, then ask what the
-  // ultimate value # of the other value is.
-  ThisValNoAssignments[VN] = -2;
-  unsigned UltimateVN =
-    ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther,
-                      OtherValNoAssignments, ThisValNoAssignments);
-  return ThisValNoAssignments[VN] = UltimateVN;
-}
-
-/// JoinIntervals - Attempt to join these two intervals.  On failure, this
-/// returns false.
-bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) {
-  LiveInterval &RHS = li_->getInterval(CP.getSrcReg());
-  DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; });
-
-  // If a live interval is a physical register, check for interference with any
-  // aliases. The interference check implemented here is a bit more conservative
-  // than the full interfeence check below. We allow overlapping live ranges
-  // only when one is a copy of the other.
-  if (CP.isPhys()) {
-    for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){
-      if (!li_->hasInterval(*AS))
-        continue;
-      const LiveInterval &LHS = li_->getInterval(*AS);
-      LiveInterval::const_iterator LI = LHS.begin();
-      for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end();
-           RI != RE; ++RI) {
-        LI = std::lower_bound(LI, LHS.end(), RI->start);
-        // Does LHS have an overlapping live range starting before RI?
-        if ((LI != LHS.begin() && LI[-1].end > RI->start) &&
-            (RI->start != RI->valno->def ||
-             !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) {
-          DEBUG({
-            dbgs() << "\t\tInterference from alias: ";
-            LHS.print(dbgs(), tri_);
-            dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n";
-          });
-          return false;
-        }
-
-        // Check that LHS ranges beginning in this range are copies.
-        for (; LI != LHS.end() && LI->start < RI->end; ++LI) {
-          if (LI->start != LI->valno->def ||
-              !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) {
-            DEBUG({
-              dbgs() << "\t\tInterference from alias: ";
-              LHS.print(dbgs(), tri_);
-              dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n";
-            });
-            return false;
-          }
-        }
-      }
-    }
-  }
-
-  // Compute the final value assignment, assuming that the live ranges can be
-  // coalesced.
-  SmallVector<int, 16> LHSValNoAssignments;
-  SmallVector<int, 16> RHSValNoAssignments;
-  DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS;
-  DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS;
-  SmallVector<VNInfo*, 16> NewVNInfo;
-
-  LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg());
-  DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; });
-
-  // Loop over the value numbers of the LHS, seeing if any are defined from
-  // the RHS.
-  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
-      continue;
-
-    // Never join with a register that has EarlyClobber redefs.
-    if (VNI->hasRedefByEC())
-      return false;
-
-    // DstReg is known to be a register in the LHS interval.  If the src is
-    // from the RHS interval, we can use its value #.
-    if (!CP.isCoalescable(VNI->getCopy()))
-      continue;
-
-    // Figure out the value # from the RHS.
-    LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot());
-    // The copy could be to an aliased physreg.
-    if (!lr) continue;
-    LHSValsDefinedFromRHS[VNI] = lr->valno;
-  }
-
-  // Loop over the value numbers of the RHS, seeing if any are defined from
-  // the LHS.
-  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    if (VNI->isUnused() || !VNI->isDefByCopy())  // Src not defined by a copy?
-      continue;
-
-    // Never join with a register that has EarlyClobber redefs.
-    if (VNI->hasRedefByEC())
-      return false;
-
-    // DstReg is known to be a register in the RHS interval.  If the src is
-    // from the LHS interval, we can use its value #.
-    if (!CP.isCoalescable(VNI->getCopy()))
-      continue;
-
-    // Figure out the value # from the LHS.
-    LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot());
-    // The copy could be to an aliased physreg.
-    if (!lr) continue;
-    RHSValsDefinedFromLHS[VNI] = lr->valno;
-  }
-
-  LHSValNoAssignments.resize(LHS.getNumValNums(), -1);
-  RHSValNoAssignments.resize(RHS.getNumValNums(), -1);
-  NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums());
-
-  for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    unsigned VN = VNI->id;
-    if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused())
-      continue;
-    ComputeUltimateVN(VNI, NewVNInfo,
-                      LHSValsDefinedFromRHS, RHSValsDefinedFromLHS,
-                      LHSValNoAssignments, RHSValNoAssignments);
-  }
-  for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
-       i != e; ++i) {
-    VNInfo *VNI = *i;
-    unsigned VN = VNI->id;
-    if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused())
-      continue;
-    // If this value number isn't a copy from the LHS, it's a new number.
-    if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) {
-      NewVNInfo.push_back(VNI);
-      RHSValNoAssignments[VN] = NewVNInfo.size()-1;
-      continue;
-    }
-
-    ComputeUltimateVN(VNI, NewVNInfo,
-                      RHSValsDefinedFromLHS, LHSValsDefinedFromRHS,
-                      RHSValNoAssignments, LHSValNoAssignments);
-  }
-
-  // Armed with the mappings of LHS/RHS values to ultimate values, walk the
-  // interval lists to see if these intervals are coalescable.
-  LiveInterval::const_iterator I = LHS.begin();
-  LiveInterval::const_iterator IE = LHS.end();
-  LiveInterval::const_iterator J = RHS.begin();
-  LiveInterval::const_iterator JE = RHS.end();
-
-  // Skip ahead until the first place of potential sharing.
-  if (I != IE && J != JE) {
-    if (I->start < J->start) {
-      I = std::upper_bound(I, IE, J->start);
-      if (I != LHS.begin()) --I;
-    } else if (J->start < I->start) {
-      J = std::upper_bound(J, JE, I->start);
-      if (J != RHS.begin()) --J;
-    }
-  }
-
-  while (I != IE && J != JE) {
-    // Determine if these two live ranges overlap.
-    bool Overlaps;
-    if (I->start < J->start) {
-      Overlaps = I->end > J->start;
-    } else {
-      Overlaps = J->end > I->start;
-    }
-
-    // If so, check value # info to determine if they are really different.
-    if (Overlaps) {
-      // If the live range overlap will map to the same value number in the
-      // result liverange, we can still coalesce them.  If not, we can't.
-      if (LHSValNoAssignments[I->valno->id] !=
-          RHSValNoAssignments[J->valno->id])
-        return false;
-      // If it's re-defined by an early clobber somewhere in the live range,
-      // then conservatively abort coalescing.
-      if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC())
-        return false;
-    }
-
-    if (I->end < J->end)
-      ++I;
-    else
-      ++J;
-  }
-
-  // Update kill info. Some live ranges are extended due to copy coalescing.
-  for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(),
-         E = LHSValsDefinedFromRHS.end(); I != E; ++I) {
-    VNInfo *VNI = I->first;
-    unsigned LHSValID = LHSValNoAssignments[VNI->id];
-    if (VNI->hasPHIKill())
-      NewVNInfo[LHSValID]->setHasPHIKill(true);
-  }
-
-  // Update kill info. Some live ranges are extended due to copy coalescing.
-  for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(),
-         E = RHSValsDefinedFromLHS.end(); I != E; ++I) {
-    VNInfo *VNI = I->first;
-    unsigned RHSValID = RHSValNoAssignments[VNI->id];
-    if (VNI->hasPHIKill())
-      NewVNInfo[RHSValID]->setHasPHIKill(true);
-  }
-
-  if (LHSValNoAssignments.empty())
-    LHSValNoAssignments.push_back(-1);
-  if (RHSValNoAssignments.empty())
-    RHSValNoAssignments.push_back(-1);
-
-  // If we get here, we know that we can coalesce the live ranges.  Ask the
-  // intervals to coalesce themselves now.
-  LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo,
-           mri_);
-  return true;
-}
-
-namespace {
-  // DepthMBBCompare - Comparison predicate that sort first based on the loop
-  // depth of the basic block (the unsigned), and then on the MBB number.
-  struct DepthMBBCompare {
-    typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
-    bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
-      // Deeper loops first
-      if (LHS.first != RHS.first)
-        return LHS.first > RHS.first;
-
-      // Prefer blocks that are more connected in the CFG. This takes care of
-      // the most difficult copies first while intervals are short.
-      unsigned cl = LHS.second->pred_size() + LHS.second->succ_size();
-      unsigned cr = RHS.second->pred_size() + RHS.second->succ_size();
-      if (cl != cr)
-        return cl > cr;
-
-      // As a last resort, sort by block number.
-      return LHS.second->getNumber() < RHS.second->getNumber();
-    }
-  };
-}
-
-void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
-                                               std::vector<CopyRec> &TryAgain) {
-  DEBUG(dbgs() << MBB->getName() << ":\n");
-
-  SmallVector<CopyRec, 8> VirtCopies;
-  SmallVector<CopyRec, 8> PhysCopies;
-  SmallVector<CopyRec, 8> ImpDefCopies;
-  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
-       MII != E;) {
-    MachineInstr *Inst = MII++;
-
-    // If this isn't a copy nor a extract_subreg, we can't join intervals.
-    unsigned SrcReg, DstReg;
-    if (Inst->isCopy()) {
-      DstReg = Inst->getOperand(0).getReg();
-      SrcReg = Inst->getOperand(1).getReg();
-    } else if (Inst->isSubregToReg()) {
-      DstReg = Inst->getOperand(0).getReg();
-      SrcReg = Inst->getOperand(2).getReg();
-    } else
-      continue;
-
-    bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
-    bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
-    if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
-      ImpDefCopies.push_back(CopyRec(Inst, 0));
-    else if (SrcIsPhys || DstIsPhys)
-      PhysCopies.push_back(CopyRec(Inst, 0));
-    else
-      VirtCopies.push_back(CopyRec(Inst, 0));
-  }
-
-  // Try coalescing implicit copies and insert_subreg <undef> first,
-  // followed by copies to / from physical registers, then finally copies
-  // from virtual registers to virtual registers.
-  for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) {
-    CopyRec &TheCopy = ImpDefCopies[i];
-    bool Again = false;
-    if (!JoinCopy(TheCopy, Again))
-      if (Again)
-        TryAgain.push_back(TheCopy);
-  }
-  for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) {
-    CopyRec &TheCopy = PhysCopies[i];
-    bool Again = false;
-    if (!JoinCopy(TheCopy, Again))
-      if (Again)
-        TryAgain.push_back(TheCopy);
-  }
-  for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) {
-    CopyRec &TheCopy = VirtCopies[i];
-    bool Again = false;
-    if (!JoinCopy(TheCopy, Again))
-      if (Again)
-        TryAgain.push_back(TheCopy);
-  }
-}
-
-void SimpleRegisterCoalescing::joinIntervals() {
-  DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
-
-  std::vector<CopyRec> TryAgainList;
-  if (loopInfo->empty()) {
-    // If there are no loops in the function, join intervals in function order.
-    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();
-         I != E; ++I)
-      CopyCoalesceInMBB(I, TryAgainList);
-  } else {
-    // Otherwise, join intervals in inner loops before other intervals.
-    // Unfortunately we can't just iterate over loop hierarchy here because
-    // there may be more MBB's than BB's.  Collect MBB's for sorting.
-
-    // Join intervals in the function prolog first. We want to join physical
-    // registers with virtual registers before the intervals got too long.
-    std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
-    for (MachineFunction::iterator I = mf_->begin(), E = mf_->end();I != E;++I){
-      MachineBasicBlock *MBB = I;
-      MBBs.push_back(std::make_pair(loopInfo->getLoopDepth(MBB), I));
-    }
-
-    // Sort by loop depth.
-    std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
-
-    // Finally, join intervals in loop nest order.
-    for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
-      CopyCoalesceInMBB(MBBs[i].second, TryAgainList);
-  }
-
-  // Joining intervals can allow other intervals to be joined.  Iteratively join
-  // until we make no progress.
-  bool ProgressMade = true;
-  while (ProgressMade) {
-    ProgressMade = false;
-
-    for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) {
-      CopyRec &TheCopy = TryAgainList[i];
-      if (!TheCopy.MI)
-        continue;
-
-      bool Again = false;
-      bool Success = JoinCopy(TheCopy, Again);
-      if (Success || !Again) {
-        TheCopy.MI = 0;   // Mark this one as done.
-        ProgressMade = true;
-      }
-    }
-  }
-}
-
-void SimpleRegisterCoalescing::releaseMemory() {
-  JoinedCopies.clear();
-  ReMatCopies.clear();
-  ReMatDefs.clear();
-}
-
-bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
-  mf_ = &fn;
-  mri_ = &fn.getRegInfo();
-  tm_ = &fn.getTarget();
-  tri_ = tm_->getRegisterInfo();
-  tii_ = tm_->getInstrInfo();
-  li_ = &getAnalysis<LiveIntervals>();
-  ldv_ = &getAnalysis<LiveDebugVariables>();
-  AA = &getAnalysis<AliasAnalysis>();
-  loopInfo = &getAnalysis<MachineLoopInfo>();
-
-  DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
-               << "********** Function: "
-               << ((Value*)mf_->getFunction())->getName() << '\n');
-
-  if (VerifyCoalescing)
-    mf_->verify(this, "Before register coalescing");
-
-  RegClassInfo.runOnMachineFunction(fn);
-
-  // Join (coalesce) intervals if requested.
-  if (EnableJoining) {
-    joinIntervals();
-    DEBUG({
-        dbgs() << "********** INTERVALS POST JOINING **********\n";
-        for (LiveIntervals::iterator I = li_->begin(), E = li_->end();
-             I != E; ++I){
-          I->second->print(dbgs(), tri_);
-          dbgs() << "\n";
-        }
-      });
-  }
-
-  // Perform a final pass over the instructions and compute spill weights
-  // and remove identity moves.
-  SmallVector<unsigned, 4> DeadDefs;
-  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
-       mbbi != mbbe; ++mbbi) {
-    MachineBasicBlock* mbb = mbbi;
-    for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end();
-         mii != mie; ) {
-      MachineInstr *MI = mii;
-      if (JoinedCopies.count(MI)) {
-        // Delete all coalesced copies.
-        bool DoDelete = true;
-        assert(MI->isCopyLike() && "Unrecognized copy instruction");
-        unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
-        if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
-            MI->getNumOperands() > 2)
-          // Do not delete extract_subreg, insert_subreg of physical
-          // registers unless the definition is dead. e.g.
-          // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1
-          // or else the scavenger may complain. LowerSubregs will
-          // delete them later.
-          DoDelete = false;
-
-        if (MI->allDefsAreDead()) {
-          if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
-              li_->hasInterval(SrcReg))
-            li_->shrinkToUses(&li_->getInterval(SrcReg));
-          DoDelete = true;
-        }
-        if (!DoDelete) {
-          // We need the instruction to adjust liveness, so make it a KILL.
-          if (MI->isSubregToReg()) {
-            MI->RemoveOperand(3);
-            MI->RemoveOperand(1);
-          }
-          MI->setDesc(tii_->get(TargetOpcode::KILL));
-          mii = llvm::next(mii);
-        } else {
-          li_->RemoveMachineInstrFromMaps(MI);
-          mii = mbbi->erase(mii);
-          ++numPeep;
-        }
-        continue;
-      }
-
-      // Now check if this is a remat'ed def instruction which is now dead.
-      if (ReMatDefs.count(MI)) {
-        bool isDead = true;
-        for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-          const MachineOperand &MO = MI->getOperand(i);
-          if (!MO.isReg())
-            continue;
-          unsigned Reg = MO.getReg();
-          if (!Reg)
-            continue;
-          if (TargetRegisterInfo::isVirtualRegister(Reg))
-            DeadDefs.push_back(Reg);
-          if (MO.isDead())
-            continue;
-          if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
-              !mri_->use_nodbg_empty(Reg)) {
-            isDead = false;
-            break;
-          }
-        }
-        if (isDead) {
-          while (!DeadDefs.empty()) {
-            unsigned DeadDef = DeadDefs.back();
-            DeadDefs.pop_back();
-            RemoveDeadDef(li_->getInterval(DeadDef), MI);
-          }
-          li_->RemoveMachineInstrFromMaps(mii);
-          mii = mbbi->erase(mii);
-          continue;
-        } else
-          DeadDefs.clear();
-      }
-
-      ++mii;
-
-      // Check for now unnecessary kill flags.
-      if (li_->isNotInMIMap(MI)) continue;
-      SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex();
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        MachineOperand &MO = MI->getOperand(i);
-        if (!MO.isReg() || !MO.isKill()) continue;
-        unsigned reg = MO.getReg();
-        if (!reg || !li_->hasInterval(reg)) continue;
-        if (!li_->getInterval(reg).killedAt(DefIdx)) {
-          MO.setIsKill(false);
-          continue;
-        }
-        // When leaving a kill flag on a physreg, check if any subregs should
-        // remain alive.
-        if (!TargetRegisterInfo::isPhysicalRegister(reg))
-          continue;
-        for (const unsigned *SR = tri_->getSubRegisters(reg);
-             unsigned S = *SR; ++SR)
-          if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx))
-            MI->addRegisterDefined(S, tri_);
-      }
-    }
-  }
-
-  DEBUG(dump());
-  DEBUG(ldv_->dump());
-  if (VerifyCoalescing)
-    mf_->verify(this, "After register coalescing");
-  return true;
-}
-
-/// print - Implement the dump method.
-void SimpleRegisterCoalescing::print(raw_ostream &O, const Module* m) const {
-   li_->print(O, m);
-}
-
-RegisterCoalescer* llvm::createSimpleRegisterCoalescer() {
-  return new SimpleRegisterCoalescing();
-}
-
-// Make sure that anything that uses RegisterCoalescer pulls in this file...
-DEFINING_FILE_FOR(SimpleRegisterCoalescing)
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 92970e4..65a33da 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -87,12 +87,10 @@ FunctionPass *llvm::createSjLjEHPass(const TargetLowering *TLI) {
 bool SjLjEHPass::doInitialization(Module &M) {
   // Build the function context structure.
   // builtin_setjmp uses a five word jbuf
-  const Type *VoidPtrTy =
-          Type::getInt8PtrTy(M.getContext());
-  const Type *Int32Ty = Type::getInt32Ty(M.getContext());
+  Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
+  Type *Int32Ty = Type::getInt32Ty(M.getContext());
   FunctionContextTy =
-    StructType::get(M.getContext(),
-                    VoidPtrTy,                        // __prev
+    StructType::get(VoidPtrTy,                        // __prev
                     Int32Ty,                          // call_site
                     ArrayType::get(Int32Ty, 4),       // __data
                     VoidPtrTy,                        // __personality
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index bf27cc8..761cab7 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -76,12 +76,14 @@ SlotIndex SplitAnalysis::computeLastSplitPoint(unsigned Num) {
       return LSP.first;
     // There may not be a call instruction (?) in which case we ignore LPad.
     LSP.second = LSP.first;
-    for (MachineBasicBlock::const_iterator I = FirstTerm, E = MBB->begin();
-         I != E; --I)
+    for (MachineBasicBlock::const_iterator I = MBB->end(), E = MBB->begin();
+         I != E;) {
+      --I;
       if (I->getDesc().isCall()) {
         LSP.second = LIS.getInstructionIndex(I);
         break;
       }
+    }
   }
 
   // If CurLI is live into a landing pad successor, move the last split point
@@ -122,7 +124,7 @@ void SplitAnalysis::analyzeUses() {
   // Compute per-live block info.
   if (!calcLiveBlockInfo()) {
     // FIXME: calcLiveBlockInfo found inconsistencies in the live range.
-    // I am looking at you, SimpleRegisterCoalescing!
+    // I am looking at you, RegisterCoalescer!
     DidRepairRange = true;
     ++NumRepairs;
     DEBUG(dbgs() << "*** Fixing inconsistent live interval! ***\n");
@@ -165,7 +167,7 @@ bool SplitAnalysis::calcLiveBlockInfo() {
     tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
 
     // If the block contains no uses, the range must be live through. At one
-    // point, SimpleRegisterCoalescing could create dangling ranges that ended
+    // point, RegisterCoalescer could create dangling ranges that ended
     // mid-block.
     if (UseI == UseE || *UseI >= Stop) {
       ++NumThroughBlocks;
@@ -634,6 +636,7 @@ unsigned SplitEditor::openIntv() {
 void SplitEditor::selectIntv(unsigned Idx) {
   assert(Idx != 0 && "Cannot select the complement interval");
   assert(Idx < Edit->size() && "Can only select previously opened interval");
+  DEBUG(dbgs() << "    selectIntv " << OpenIdx << " -> " << Idx << '\n');
   OpenIdx = Idx;
 }
 
@@ -654,6 +657,24 @@ SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
   return VNI->def;
 }
 
+SlotIndex SplitEditor::enterIntvAfter(SlotIndex Idx) {
+  assert(OpenIdx && "openIntv not called before enterIntvAfter");
+  DEBUG(dbgs() << "    enterIntvAfter " << Idx);
+  Idx = Idx.getBoundaryIndex();
+  VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Idx);
+  if (!ParentVNI) {
+    DEBUG(dbgs() << ": not live\n");
+    return Idx;
+  }
+  DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+  MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+  assert(MI && "enterIntvAfter called with invalid index");
+
+  VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(),
+                              llvm::next(MachineBasicBlock::iterator(MI)));
+  return VNI->def;
+}
+
 SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
   assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
   SlotIndex End = LIS.getMBBEndIdx(&MBB);
@@ -1005,12 +1026,6 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
         markComplexMapped(i, ParentVNI);
   }
 
-#ifndef NDEBUG
-  // Every new interval must have a def by now, otherwise the split is bogus.
-  for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I)
-    assert((*I)->hasAtLeastOneValue() && "Split interval has no value");
-#endif
-
   // Transfer the simply mapped values, check if any are skipped.
   bool Skipped = transferValues();
   if (Skipped)
@@ -1109,3 +1124,263 @@ void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
   }
   finish();
 }
+
+
+//===----------------------------------------------------------------------===//
+//                    Global Live Range Splitting Support
+//===----------------------------------------------------------------------===//
+
+// These methods support a method of global live range splitting that uses a
+// global algorithm to decide intervals for CFG edges. They will insert split
+// points and color intervals in basic blocks while avoiding interference.
+//
+// Note that splitSingleBlock is also useful for blocks where both CFG edges
+// are on the stack.
+
+void SplitEditor::splitLiveThroughBlock(unsigned MBBNum,
+                                        unsigned IntvIn, SlotIndex LeaveBefore,
+                                        unsigned IntvOut, SlotIndex EnterAfter){
+  SlotIndex Start, Stop;
+  tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(MBBNum);
+
+  DEBUG(dbgs() << "BB#" << MBBNum << " [" << Start << ';' << Stop
+               << ") intf " << LeaveBefore << '-' << EnterAfter
+               << ", live-through " << IntvIn << " -> " << IntvOut);
+
+  assert((IntvIn || IntvOut) && "Use splitSingleBlock for isolated blocks");
+
+  if (!IntvOut) {
+    DEBUG(dbgs() << ", spill on entry.\n");
+    //
+    //        <<<<<<<<<    Possible LeaveBefore interference.
+    //    |-----------|    Live through.
+    //    -____________    Spill on entry.
+    //
+    selectIntv(IntvIn);
+    MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
+    SlotIndex Idx = leaveIntvAtTop(*MBB);
+    assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+    (void)Idx;
+    return;
+  }
+
+  if (!IntvIn) {
+    DEBUG(dbgs() << ", reload on exit.\n");
+    //
+    //    >>>>>>>          Possible EnterAfter interference.
+    //    |-----------|    Live through.
+    //    ___________--    Reload on exit.
+    //
+    selectIntv(IntvOut);
+    MachineBasicBlock *MBB = VRM.getMachineFunction().getBlockNumbered(MBBNum);
+    SlotIndex Idx = enterIntvAtEnd(*MBB);
+    assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+    (void)Idx;
+    return;
+  }
+
+  if (IntvIn == IntvOut && !LeaveBefore && !EnterAfter) {
+    DEBUG(dbgs() << ", straight through.\n");
+    //
+    //    |-----------|    Live through.
+    //    -------------    Straight through, same intv, no interference.
+    //
+    selectIntv(IntvOut);
+    useIntv(Start, Stop);
+    return;
+  }
+
+  // We cannot legally insert splits after LSP.
+  SlotIndex LSP = SA.getLastSplitPoint(MBBNum);
+
+  if (IntvIn != IntvOut && (!LeaveBefore || !EnterAfter ||
+                  LeaveBefore.getBaseIndex() > EnterAfter.getBoundaryIndex())) {
+    DEBUG(dbgs() << ", switch avoiding interference.\n");
+    //
+    //    >>>>     <<<<    Non-overlapping EnterAfter/LeaveBefore interference.
+    //    |-----------|    Live through.
+    //    ------=======    Switch intervals between interference.
+    //
+    SlotIndex Cut = (LeaveBefore && LeaveBefore < LSP) ? LeaveBefore : LSP;
+    selectIntv(IntvOut);
+    SlotIndex Idx = enterIntvBefore(Cut);
+    useIntv(Idx, Stop);
+    selectIntv(IntvIn);
+    useIntv(Start, Idx);
+    assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+    assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+    return;
+  }
+
+  DEBUG(dbgs() << ", create local intv for interference.\n");
+  //
+  //    >>><><><><<<<    Overlapping EnterAfter/LeaveBefore interference.
+  //    |-----------|    Live through.
+  //    ==---------==    Switch intervals before/after interference.
+  //
+  assert(LeaveBefore <= EnterAfter && "Missed case");
+
+  selectIntv(IntvOut);
+  SlotIndex Idx = enterIntvAfter(EnterAfter);
+  useIntv(Idx, Stop);
+  assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+  selectIntv(IntvIn);
+  Idx = leaveIntvBefore(LeaveBefore);
+  useIntv(Start, Idx);
+  assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+}
+
+
+void SplitEditor::splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+                                  unsigned IntvIn, SlotIndex LeaveBefore) {
+  SlotIndex Start, Stop;
+  tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+  DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+               << "), uses " << BI.FirstUse << '-' << BI.LastUse
+               << ", reg-in " << IntvIn << ", leave before " << LeaveBefore
+               << (BI.LiveOut ? ", stack-out" : ", killed in block"));
+
+  assert(IntvIn && "Must have register in");
+  assert(BI.LiveIn && "Must be live-in");
+  assert((!LeaveBefore || LeaveBefore > Start) && "Bad interference");
+
+  if (!BI.LiveOut && (!LeaveBefore || LeaveBefore >= BI.LastUse)) {
+    DEBUG(dbgs() << " before interference.\n");
+    //
+    //               <<<    Interference after kill.
+    //     |---o---x   |    Killed in block.
+    //     =========        Use IntvIn everywhere.
+    //
+    selectIntv(IntvIn);
+    useIntv(Start, BI.LastUse);
+    return;
+  }
+
+  SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+  if (!LeaveBefore || LeaveBefore > BI.LastUse.getBoundaryIndex()) {
+    //
+    //               <<<    Possible interference after last use.
+    //     |---o---o---|    Live-out on stack.
+    //     =========____    Leave IntvIn after last use.
+    //
+    //                 <    Interference after last use.
+    //     |---o---o--o|    Live-out on stack, late last use.
+    //     ============     Copy to stack after LSP, overlap IntvIn.
+    //            \_____    Stack interval is live-out.
+    //
+    if (BI.LastUse < LSP) {
+      DEBUG(dbgs() << ", spill after last use before interference.\n");
+      selectIntv(IntvIn);
+      SlotIndex Idx = leaveIntvAfter(BI.LastUse);
+      useIntv(Start, Idx);
+      assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+    } else {
+      DEBUG(dbgs() << ", spill before last split point.\n");
+      selectIntv(IntvIn);
+      SlotIndex Idx = leaveIntvBefore(LSP);
+      overlapIntv(Idx, BI.LastUse);
+      useIntv(Start, Idx);
+      assert((!LeaveBefore || Idx <= LeaveBefore) && "Interference");
+    }
+    return;
+  }
+
+  // The interference is overlapping somewhere we wanted to use IntvIn. That
+  // means we need to create a local interval that can be allocated a
+  // different register.
+  unsigned LocalIntv = openIntv();
+  (void)LocalIntv;
+  DEBUG(dbgs() << ", creating local interval " << LocalIntv << ".\n");
+
+  if (!BI.LiveOut || BI.LastUse < LSP) {
+    //
+    //           <<<<<<<    Interference overlapping uses.
+    //     |---o---o---|    Live-out on stack.
+    //     =====----____    Leave IntvIn before interference, then spill.
+    //
+    SlotIndex To = leaveIntvAfter(BI.LastUse);
+    SlotIndex From = enterIntvBefore(LeaveBefore);
+    useIntv(From, To);
+    selectIntv(IntvIn);
+    useIntv(Start, From);
+    assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+    return;
+  }
+
+  //           <<<<<<<    Interference overlapping uses.
+  //     |---o---o--o|    Live-out on stack, late last use.
+  //     =====-------     Copy to stack before LSP, overlap LocalIntv.
+  //            \_____    Stack interval is live-out.
+  //
+  SlotIndex To = leaveIntvBefore(LSP);
+  overlapIntv(To, BI.LastUse);
+  SlotIndex From = enterIntvBefore(std::min(To, LeaveBefore));
+  useIntv(From, To);
+  selectIntv(IntvIn);
+  useIntv(Start, From);
+  assert((!LeaveBefore || From <= LeaveBefore) && "Interference");
+}
+
+void SplitEditor::splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+                                   unsigned IntvOut, SlotIndex EnterAfter) {
+  SlotIndex Start, Stop;
+  tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+  DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " [" << Start << ';' << Stop
+               << "), uses " << BI.FirstUse << '-' << BI.LastUse
+               << ", reg-out " << IntvOut << ", enter after " << EnterAfter
+               << (BI.LiveIn ? ", stack-in" : ", defined in block"));
+
+  SlotIndex LSP = SA.getLastSplitPoint(BI.MBB->getNumber());
+
+  assert(IntvOut && "Must have register out");
+  assert(BI.LiveOut && "Must be live-out");
+  assert((!EnterAfter || EnterAfter < LSP) && "Bad interference");
+
+  if (!BI.LiveIn && (!EnterAfter || EnterAfter <= BI.FirstUse)) {
+    DEBUG(dbgs() << " after interference.\n");
+    //
+    //    >>>>             Interference before def.
+    //    |   o---o---|    Defined in block.
+    //        =========    Use IntvOut everywhere.
+    //
+    selectIntv(IntvOut);
+    useIntv(BI.FirstUse, Stop);
+    return;
+  }
+
+  if (!EnterAfter || EnterAfter < BI.FirstUse.getBaseIndex()) {
+    DEBUG(dbgs() << ", reload after interference.\n");
+    //
+    //    >>>>             Interference before def.
+    //    |---o---o---|    Live-through, stack-in.
+    //    ____=========    Enter IntvOut before first use.
+    //
+    selectIntv(IntvOut);
+    SlotIndex Idx = enterIntvBefore(std::min(LSP, BI.FirstUse));
+    useIntv(Idx, Stop);
+    assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+    return;
+  }
+
+  // The interference is overlapping somewhere we wanted to use IntvOut. That
+  // means we need to create a local interval that can be allocated a
+  // different register.
+  DEBUG(dbgs() << ", interference overlaps uses.\n");
+  //
+  //    >>>>>>>          Interference overlapping uses.
+  //    |---o---o---|    Live-through, stack-in.
+  //    ____---======    Create local interval for interference range.
+  //
+  selectIntv(IntvOut);
+  SlotIndex Idx = enterIntvAfter(EnterAfter);
+  useIntv(Idx, Stop);
+  assert((!EnterAfter || Idx >= EnterAfter) && "Interference");
+
+  openIntv();
+  SlotIndex From = enterIntvBefore(std::min(Idx, BI.FirstUse));
+  useIntv(From, Idx);
+}
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index 7174c0b..7948b72 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -81,6 +81,12 @@ public:
     bool LiveThrough;     ///< Live in whole block (Templ 5. above).
     bool LiveIn;          ///< Current reg is live in.
     bool LiveOut;         ///< Current reg is live out.
+
+    /// isOneInstr - Returns true when this BlockInfo describes a single
+    /// instruction.
+    bool isOneInstr() const {
+      return SlotIndex::isSameInstr(FirstUse, LastUse);
+    }
   };
 
 private:
@@ -360,6 +366,10 @@ public:
   /// Return the beginning of the new live range.
   SlotIndex enterIntvBefore(SlotIndex Idx);
 
+  /// enterIntvAfter - Enter the open interval after the instruction at Idx.
+  /// Return the beginning of the new live range.
+  SlotIndex enterIntvAfter(SlotIndex Idx);
+
   /// enterIntvAtEnd - Enter the open interval at the end of MBB.
   /// Use the open interval from he inserted copy to the MBB end.
   /// Return the beginning of the new live range.
@@ -416,6 +426,42 @@ public:
   /// splitSingleBlocks - Split CurLI into a separate live interval inside each
   /// basic block in Blocks.
   void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
+
+  /// splitLiveThroughBlock - Split CurLI in the given block such that it
+  /// enters the block in IntvIn and leaves it in IntvOut. There may be uses in
+  /// the block, but they will be ignored when placing split points.
+  ///
+  /// @param MBBNum      Block number.
+  /// @param IntvIn      Interval index entering the block.
+  /// @param LeaveBefore When set, leave IntvIn before this point.
+  /// @param IntvOut     Interval index leaving the block.
+  /// @param EnterAfter  When set, enter IntvOut after this point.
+  void splitLiveThroughBlock(unsigned MBBNum,
+                             unsigned IntvIn, SlotIndex LeaveBefore,
+                             unsigned IntvOut, SlotIndex EnterAfter);
+
+  /// splitRegInBlock - Split CurLI in the given block such that it enters the
+  /// block in IntvIn and leaves it on the stack (or not at all). Split points
+  /// are placed in a way that avoids putting uses in the stack interval. This
+  /// may require creating a local interval when there is interference.
+  ///
+  /// @param BI          Block descriptor.
+  /// @param IntvIn      Interval index entering the block. Not 0.
+  /// @param LeaveBefore When set, leave IntvIn before this point.
+  void splitRegInBlock(const SplitAnalysis::BlockInfo &BI,
+                       unsigned IntvIn, SlotIndex LeaveBefore);
+
+  /// splitRegOutBlock - Split CurLI in the given block such that it enters the
+  /// block on the stack (or isn't live-in at all) and leaves it in IntvOut.
+  /// Split points are placed to avoid interference and such that the uses are
+  /// not in the stack interval. This may require creating a local interval
+  /// when there is interference.
+  ///
+  /// @param BI          Block descriptor.
+  /// @param IntvOut     Interval index leaving the block.
+  /// @param EnterAfter  When set, enter IntvOut after this point.
+  void splitRegOutBlock(const SplitAnalysis::BlockInfo &BI,
+                        unsigned IntvOut, SlotIndex EnterAfter);
 };
 
 }
diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp
index 08aee82..ec75df4 100644
--- a/lib/CodeGen/Splitter.cpp
+++ b/lib/CodeGen/Splitter.cpp
@@ -11,7 +11,7 @@
 
 #include "Splitter.h"
 
-#include "SimpleRegisterCoalescing.h"
+#include "RegisterCoalescer.h"
 #include "llvm/Module.h"
 #include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index f0a44ab..d3cbd15 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -186,7 +186,7 @@ bool StackProtector::InsertStackProtectors() {
       Value *Args[] = { LI, AI };
       CallInst::
         Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector),
-               &Args[0], array_endof(Args), "", InsPt);
+               Args, "", InsPt);
 
       // Create the basic block to jump to when the guard check fails.
       FailBB = CreateFailBB();
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 01f5b56..57cbe1ba 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -504,7 +504,7 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
     bool FoundDef = false;  // Not counting 2address def.
 
     Uses.clear();
-    const TargetInstrDesc &TID = MII->getDesc();
+    const MCInstrDesc &MCID = MII->getDesc();
     for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MII->getOperand(i);
       if (!MO.isReg())
@@ -521,7 +521,7 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII,
         if (MO.getSubReg() || MII->isSubregToReg())
           return false;
 
-        const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
+        const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
         if (RC && !RC->contains(NewReg))
           return false;
 
@@ -566,7 +566,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
   SmallVector<MachineOperand*, 4> Uses;
   while (++MII != MBB->end()) {
     bool FoundKill = false;
-    const TargetInstrDesc &TID = MII->getDesc();
+    const MCInstrDesc &MCID = MII->getDesc();
     for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MII->getOperand(i);
       if (!MO.isReg())
@@ -583,7 +583,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII,
         if (MO.getSubReg())
           return false;
 
-        const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI);
+        const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI);
         if (RC && !RC->contains(NewReg))
           return false;
         if (MO.isKill())
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index e8eab8f..6b801cb 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -95,10 +95,22 @@ namespace {
                               SmallSetVector<MachineBasicBlock*, 8> &Succs);
     bool TailDuplicateBlocks(MachineFunction &MF);
     bool shouldTailDuplicate(const MachineFunction &MF,
-                             MachineBasicBlock &TailBB);
-    bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
+                             bool IsSimple, MachineBasicBlock &TailBB);
+    bool isSimpleBB(MachineBasicBlock *TailBB);
+    bool canCompletelyDuplicateBB(MachineBasicBlock &BB);
+    bool duplicateSimpleBB(MachineBasicBlock *TailBB,
+                           SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                           const DenseSet<unsigned> &RegsUsedByPhi,
+                           SmallVector<MachineInstr*, 16> &Copies);
+    bool TailDuplicate(MachineBasicBlock *TailBB,
+                       bool IsSimple,
+                       MachineFunction &MF,
                        SmallVector<MachineBasicBlock*, 8> &TDBBs,
                        SmallVector<MachineInstr*, 16> &Copies);
+    bool TailDuplicateAndUpdate(MachineBasicBlock *MBB,
+                                bool IsSimple,
+                                MachineFunction &MF);
+
     void RemoveDeadBlock(MachineBasicBlock *MBB);
   };
 
@@ -169,6 +181,109 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
   }
 }
 
+/// TailDuplicateAndUpdate - Tail duplicate the block and cleanup.
+bool
+TailDuplicatePass::TailDuplicateAndUpdate(MachineBasicBlock *MBB,
+                                          bool IsSimple,
+                                          MachineFunction &MF) {
+  // Save the successors list.
+  SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
+                                              MBB->succ_end());
+
+  SmallVector<MachineBasicBlock*, 8> TDBBs;
+  SmallVector<MachineInstr*, 16> Copies;
+  if (!TailDuplicate(MBB, IsSimple, MF, TDBBs, Copies))
+    return false;
+
+  ++NumTails;
+
+  SmallVector<MachineInstr*, 8> NewPHIs;
+  MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+
+  // TailBB's immediate successors are now successors of those predecessors
+  // which duplicated TailBB. Add the predecessors as sources to the PHI
+  // instructions.
+  bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
+  if (PreRegAlloc)
+    UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
+
+  // If it is dead, remove it.
+  if (isDead) {
+    NumInstrDups -= MBB->size();
+    RemoveDeadBlock(MBB);
+    ++NumDeadBlocks;
+  }
+
+  // Update SSA form.
+  if (!SSAUpdateVRs.empty()) {
+    for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
+      unsigned VReg = SSAUpdateVRs[i];
+      SSAUpdate.Initialize(VReg);
+
+      // If the original definition is still around, add it as an available
+      // value.
+      MachineInstr *DefMI = MRI->getVRegDef(VReg);
+      MachineBasicBlock *DefBB = 0;
+      if (DefMI) {
+        DefBB = DefMI->getParent();
+        SSAUpdate.AddAvailableValue(DefBB, VReg);
+      }
+
+      // Add the new vregs as available values.
+      DenseMap<unsigned, AvailableValsTy>::iterator LI =
+        SSAUpdateVals.find(VReg);
+      for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+        MachineBasicBlock *SrcBB = LI->second[j].first;
+        unsigned SrcReg = LI->second[j].second;
+        SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
+      }
+
+      // Rewrite uses that are outside of the original def's block.
+      MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
+      while (UI != MRI->use_end()) {
+        MachineOperand &UseMO = UI.getOperand();
+        MachineInstr *UseMI = &*UI;
+        ++UI;
+        if (UseMI->isDebugValue()) {
+          // SSAUpdate can replace the use with an undef. That creates
+          // a debug instruction that is a kill.
+          // FIXME: Should it SSAUpdate job to delete debug instructions
+          // instead of replacing the use with undef?
+          UseMI->eraseFromParent();
+          continue;
+        }
+        if (UseMI->getParent() == DefBB && !UseMI->isPHI())
+          continue;
+        SSAUpdate.RewriteUse(UseMO);
+      }
+    }
+
+    SSAUpdateVRs.clear();
+    SSAUpdateVals.clear();
+  }
+
+  // Eliminate some of the copies inserted by tail duplication to maintain
+  // SSA form.
+  for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+    MachineInstr *Copy = Copies[i];
+    if (!Copy->isCopy())
+      continue;
+    unsigned Dst = Copy->getOperand(0).getReg();
+    unsigned Src = Copy->getOperand(1).getReg();
+    MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
+    if (++UI == MRI->use_end()) {
+      // Copy is the only use. Do trivial copy propagation here.
+      MRI->replaceRegWith(Dst, Src);
+      Copy->eraseFromParent();
+    }
+  }
+
+  if (NewPHIs.size())
+    NumAddedPHIs += NewPHIs.size();
+
+  return true;
+}
+
 /// TailDuplicateBlocks - Look for small blocks that are unconditionally
 /// branched to and do not fall through. Tail-duplicate their instructions
 /// into their predecessors to eliminate (dynamic) branches.
@@ -180,100 +295,22 @@ bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
     VerifyPHIs(MF, true);
   }
 
-  SmallVector<MachineInstr*, 8> NewPHIs;
-  MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
-
   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
     MachineBasicBlock *MBB = I++;
 
     if (NumTails == TailDupLimit)
       break;
 
-    // Save the successors list.
-    SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
-                                                MBB->succ_end());
-
-    SmallVector<MachineBasicBlock*, 8> TDBBs;
-    SmallVector<MachineInstr*, 16> Copies;
-    if (TailDuplicate(MBB, MF, TDBBs, Copies)) {
-      ++NumTails;
-
-      // TailBB's immediate successors are now successors of those predecessors
-      // which duplicated TailBB. Add the predecessors as sources to the PHI
-      // instructions.
-      bool isDead = MBB->pred_empty();
-      if (PreRegAlloc)
-        UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
-
-      // If it is dead, remove it.
-      if (isDead) {
-        NumInstrDups -= MBB->size();
-        RemoveDeadBlock(MBB);
-        ++NumDeadBlocks;
-      }
-
-      // Update SSA form.
-      if (!SSAUpdateVRs.empty()) {
-        for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
-          unsigned VReg = SSAUpdateVRs[i];
-          SSAUpdate.Initialize(VReg);
-
-          // If the original definition is still around, add it as an available
-          // value.
-          MachineInstr *DefMI = MRI->getVRegDef(VReg);
-          MachineBasicBlock *DefBB = 0;
-          if (DefMI) {
-            DefBB = DefMI->getParent();
-            SSAUpdate.AddAvailableValue(DefBB, VReg);
-          }
-
-          // Add the new vregs as available values.
-          DenseMap<unsigned, AvailableValsTy>::iterator LI =
-            SSAUpdateVals.find(VReg);  
-          for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
-            MachineBasicBlock *SrcBB = LI->second[j].first;
-            unsigned SrcReg = LI->second[j].second;
-            SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
-          }
-
-          // Rewrite uses that are outside of the original def's block.
-          MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
-          while (UI != MRI->use_end()) {
-            MachineOperand &UseMO = UI.getOperand();
-            MachineInstr *UseMI = &*UI;
-            ++UI;
-            if (UseMI->getParent() == DefBB && !UseMI->isPHI())
-              continue;
-            SSAUpdate.RewriteUse(UseMO);
-          }
-        }
+    bool IsSimple = isSimpleBB(MBB);
 
-        SSAUpdateVRs.clear();
-        SSAUpdateVals.clear();
-      }
-
-      // Eliminate some of the copies inserted by tail duplication to maintain
-      // SSA form.
-      for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
-        MachineInstr *Copy = Copies[i];
-        if (!Copy->isCopy())
-          continue;
-        unsigned Dst = Copy->getOperand(0).getReg();
-        unsigned Src = Copy->getOperand(1).getReg();
-        MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
-        if (++UI == MRI->use_end()) {
-          // Copy is the only use. Do trivial copy propagation here.
-          MRI->replaceRegWith(Dst, Src);
-          Copy->eraseFromParent();
-        }
-      }
+    if (!shouldTailDuplicate(MF, IsSimple, *MBB))
+      continue;
 
-      if (PreRegAlloc && TailDupVerify)
-        VerifyPHIs(MF, false);
-      MadeChange = true;
-    }
+    MadeChange |= TailDuplicateAndUpdate(MBB, IsSimple, MF);
   }
-  NumAddedPHIs += NewPHIs.size();
+
+  if (PreRegAlloc && TailDupVerify)
+    VerifyPHIs(MF, false);
 
   return MadeChange;
 }
@@ -283,6 +320,8 @@ static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
   for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
          UE = MRI->use_end(); UI != UE; ++UI) {
     MachineInstr *UseMI = &*UI;
+    if (UseMI->isDebugValue())
+      continue;
     if (UseMI->getParent() != BB)
       return true;
   }
@@ -485,11 +524,16 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
 /// shouldTailDuplicate - Determine if it is profitable to duplicate this block.
 bool
 TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
+                                       bool IsSimple,
                                        MachineBasicBlock &TailBB) {
   // Only duplicate blocks that end with unconditional branches.
   if (TailBB.canFallThrough())
     return false;
 
+  // Don't try to tail-duplicate single-block loops.
+  if (TailBB.isSuccessor(&TailBB))
+    return false;
+
   // Set the limit on the cost to duplicate. When optimizing for size,
   // duplicate only one, because one branch instruction can be eliminated to
   // compensate for the duplication.
@@ -500,73 +544,208 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF,
   else
     MaxDuplicateCount = TailDuplicateSize;
 
-  if (PreRegAlloc) {
-    if (TailBB.empty())
-      return false;
-    const TargetInstrDesc &TID = TailBB.back().getDesc();
-    // Pre-regalloc tail duplication hurts compile time and doesn't help
-    // much except for indirect branches.
-    if (!TID.isIndirectBranch())
-      return false;
-    // If the target has hardware branch prediction that can handle indirect
-    // branches, duplicating them can often make them predictable when there
-    // are common paths through the code.  The limit needs to be high enough
-    // to allow undoing the effects of tail merging and other optimizations
-    // that rearrange the predecessors of the indirect branch.
-    MaxDuplicateCount = 20;
-  }
+  // If the target has hardware branch prediction that can handle indirect
+  // branches, duplicating them can often make them predictable when there
+  // are common paths through the code.  The limit needs to be high enough
+  // to allow undoing the effects of tail merging and other optimizations
+  // that rearrange the predecessors of the indirect branch.
 
-  // Don't try to tail-duplicate single-block loops.
-  if (TailBB.isSuccessor(&TailBB))
-    return false;
+  bool HasIndirectbr = false;
+  if (!TailBB.empty())
+    HasIndirectbr = TailBB.back().getDesc().isIndirectBranch();
+
+  if (HasIndirectbr && PreRegAlloc)
+    MaxDuplicateCount = 20;
 
   // Check the instructions in the block to determine whether tail-duplication
   // is invalid or unlikely to be profitable.
   unsigned InstrCount = 0;
-  bool HasCall = false;
   for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end();
        ++I) {
     // Non-duplicable things shouldn't be tail-duplicated.
-    if (I->getDesc().isNotDuplicable()) return false;
+    if (I->getDesc().isNotDuplicable())
+      return false;
+
     // Do not duplicate 'return' instructions if this is a pre-regalloc run.
     // A return may expand into a lot more instructions (e.g. reload of callee
     // saved registers) after PEI.
-    if (PreRegAlloc && I->getDesc().isReturn()) return false;
-    // Don't duplicate more than the threshold.
-    if (InstrCount == MaxDuplicateCount) return false;
-    // Remember if we saw a call.
-    if (I->getDesc().isCall()) HasCall = true;
+    if (PreRegAlloc && I->getDesc().isReturn())
+      return false;
+
+    // Avoid duplicating calls before register allocation. Calls presents a
+    // barrier to register allocation so duplicating them may end up increasing
+    // spills.
+    if (PreRegAlloc && I->getDesc().isCall())
+      return false;
+
     if (!I->isPHI() && !I->isDebugValue())
       InstrCount += 1;
+
+    if (InstrCount > MaxDuplicateCount)
+      return false;
   }
-  // Don't tail-duplicate calls before register allocation. Calls presents a
-  // barrier to register allocation so duplicating them may end up increasing
-  // spills.
-  if (InstrCount > 1 && (PreRegAlloc && HasCall))
+
+  if (HasIndirectbr && PreRegAlloc)
+    return true;
+
+  if (IsSimple)
+    return true;
+
+  if (!PreRegAlloc)
+    return true;
+
+  return canCompletelyDuplicateBB(TailBB);
+}
+
+/// isSimpleBB - True if this BB has only one unconditional jump.
+bool
+TailDuplicatePass::isSimpleBB(MachineBasicBlock *TailBB) {
+  if (TailBB->succ_size() != 1)
+    return false;
+  if (TailBB->pred_empty())
     return false;
+  MachineBasicBlock::iterator I = TailBB->begin();
+  MachineBasicBlock::iterator E = TailBB->end();
+  while (I != E && I->isDebugValue())
+    ++I;
+  if (I == E)
+    return true;
+  return I->getDesc().isUnconditionalBranch();
+}
+
+static bool
+bothUsedInPHI(const MachineBasicBlock &A,
+              SmallPtrSet<MachineBasicBlock*, 8> SuccsB) {
+  for (MachineBasicBlock::const_succ_iterator SI = A.succ_begin(),
+         SE = A.succ_end(); SI != SE; ++SI) {
+    MachineBasicBlock *BB = *SI;
+    if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
+      return true;
+  }
+
+  return false;
+}
+
+bool
+TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
+  SmallPtrSet<MachineBasicBlock*, 8> Succs(BB.succ_begin(), BB.succ_end());
+
+  for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(),
+       PE = BB.pred_end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+
+    if (PredBB->succ_size() > 1)
+      return false;
+
+    MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+    SmallVector<MachineOperand, 4> PredCond;
+    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+      return false;
 
+    if (!PredCond.empty())
+      return false;
+  }
   return true;
 }
 
+bool
+TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB,
+                                     SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                                     const DenseSet<unsigned> &UsedByPhi,
+                                     SmallVector<MachineInstr*, 16> &Copies) {
+  SmallPtrSet<MachineBasicBlock*, 8> Succs(TailBB->succ_begin(),
+                                           TailBB->succ_end());
+  SmallVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+                                           TailBB->pred_end());
+  bool Changed = false;
+  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+       PE = Preds.end(); PI != PE; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+
+    if (PredBB->getLandingPadSuccessor())
+      continue;
+
+    if (bothUsedInPHI(*PredBB, Succs))
+      continue;
+
+    MachineBasicBlock *PredTBB = NULL, *PredFBB = NULL;
+    SmallVector<MachineOperand, 4> PredCond;
+    if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+      continue;
+
+    Changed = true;
+    DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
+                 << "From simple Succ: " << *TailBB);
+
+    MachineBasicBlock *NewTarget = *TailBB->succ_begin();
+    MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(PredBB));
+
+    // Make PredFBB explicit.
+    if (PredCond.empty())
+      PredFBB = PredTBB;
+
+    // Make fall through explicit.
+    if (!PredTBB)
+      PredTBB = NextBB;
+    if (!PredFBB)
+      PredFBB = NextBB;
+
+    // Redirect
+    if (PredFBB == TailBB)
+      PredFBB = NewTarget;
+    if (PredTBB == TailBB)
+      PredTBB = NewTarget;
+
+    // Make the branch unconditional if possible
+    if (PredTBB == PredFBB) {
+      PredCond.clear();
+      PredFBB = NULL;
+    }
+
+    // Avoid adding fall through branches.
+    if (PredFBB == NextBB)
+      PredFBB = NULL;
+    if (PredTBB == NextBB && PredFBB == NULL)
+      PredTBB = NULL;
+
+    TII->RemoveBranch(*PredBB);
+
+    if (PredTBB)
+      TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+
+    PredBB->removeSuccessor(TailBB);
+    unsigned NumSuccessors = PredBB->succ_size();
+    assert(NumSuccessors <= 1);
+    if (NumSuccessors == 0 || *PredBB->succ_begin() != NewTarget)
+      PredBB->addSuccessor(NewTarget);
+
+    TDBBs.push_back(PredBB);
+  }
+  return Changed;
+}
+
 /// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
 /// of its predecessors.
 bool
-TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
+TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
+                                 bool IsSimple,
+                                 MachineFunction &MF,
                                  SmallVector<MachineBasicBlock*, 8> &TDBBs,
                                  SmallVector<MachineInstr*, 16> &Copies) {
-  if (!shouldTailDuplicate(MF, *TailBB))
-    return false;
-
   DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
 
+  DenseSet<unsigned> UsedByPhi;
+  getRegsUsedByPHIs(*TailBB, &UsedByPhi);
+
+  if (IsSimple)
+    return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
+
   // Iterate through all the unique predecessors and tail-duplicate this
   // block into them, if possible. Copying the list ahead of time also
   // avoids trouble with the predecessor list reallocating.
   bool Changed = false;
   SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
                                               TailBB->pred_end());
-  DenseSet<unsigned> UsedByPhi;
-  getRegsUsedByPHIs(*TailBB, &UsedByPhi);
   for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
        PE = Preds.end(); PI != PE; ++PI) {
     MachineBasicBlock *PredBB = *PI;
@@ -618,6 +797,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
                                TII->get(TargetOpcode::COPY),
                                CopyInfos[i].first).addReg(CopyInfos[i].second));
     }
+
+    // Simplify
+    TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+
     NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
 
     // Update the CFG.
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 34e2b33..86e71d8 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -59,8 +59,8 @@ TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
 // the two operands returned by findCommutedOpIndices.
 MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
                                                       bool NewMI) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  bool HasDef = TID.getNumDefs();
+  const MCInstrDesc &MCID = MI->getDesc();
+  bool HasDef = MCID.getNumDefs();
   if (HasDef && !MI->getOperand(0).isReg())
     // No idea how to commute this instruction. Target should implement its own.
     return 0;
@@ -81,7 +81,7 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
   bool ChangeReg0 = false;
   if (HasDef && MI->getOperand(0).getReg() == Reg1) {
     // Must be two address instruction!
-    assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
+    assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
            "Expecting a two-address instruction!");
     Reg2IsKill = false;
     ChangeReg0 = true;
@@ -119,12 +119,12 @@ MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI,
 bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
                                                 unsigned &SrcOpIdx1,
                                                 unsigned &SrcOpIdx2) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isCommutable())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isCommutable())
     return false;
   // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this
   // is not true, then the target must implement this.
-  SrcOpIdx1 = TID.getNumDefs();
+  SrcOpIdx1 = MCID.getNumDefs();
   SrcOpIdx2 = SrcOpIdx1 + 1;
   if (!MI->getOperand(SrcOpIdx1).isReg() ||
       !MI->getOperand(SrcOpIdx2).isReg())
@@ -137,12 +137,12 @@ bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI,
 bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
                             const SmallVectorImpl<MachineOperand> &Pred) const {
   bool MadeChange = false;
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isPredicable())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isPredicable())
     return false;
 
   for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    if (TID.OpInfo[i].isPredicate()) {
+    if (MCID.OpInfo[i].isPredicate()) {
       MachineOperand &MO = MI->getOperand(i);
       if (MO.isReg()) {
         MO.setReg(Pred[j].getReg());
@@ -332,10 +332,10 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
       MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
     return true;
 
-  const TargetInstrDesc &TID = MI->getDesc();
+  const MCInstrDesc &MCID = MI->getDesc();
 
   // Avoid instructions obviously unsafe for remat.
-  if (TID.isNotDuplicable() || TID.mayStore() ||
+  if (MCID.isNotDuplicable() || MCID.mayStore() ||
       MI->hasUnmodeledSideEffects())
     return false;
 
@@ -345,7 +345,7 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
     return false;
 
   // Avoid instructions which load from potentially varying memory.
-  if (TID.mayLoad() && !MI->isInvariantLoad(AA))
+  if (MCID.mayLoad() && !MI->isInvariantLoad(AA))
     return false;
 
   // If any of the registers accessed are non-constant, conservatively assume
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index cdac42d..a3c5620 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -43,6 +43,19 @@ using namespace dwarf;
 //                                  ELF
 //===----------------------------------------------------------------------===//
 
+TargetLoweringObjectFileELF::TargetLoweringObjectFileELF()
+  : TargetLoweringObjectFile(),
+    TLSDataSection(0),
+    TLSBSSSection(0),
+    DataRelSection(0),
+    DataRelLocalSection(0),
+    DataRelROSection(0),
+    DataRelROLocalSection(0),
+    MergeableConst4Section(0),
+    MergeableConst8Section(0),
+    MergeableConst16Section(0) {
+}
+
 void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
                                              const TargetMachine &TM) {
   TargetLoweringObjectFile::Initialize(Ctx, TM);
@@ -189,8 +202,8 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
     return  Mang->getSymbol(GV);
     break;
   case dwarf::DW_EH_PE_pcrel: {
-    Twine FullName = StringRef("DW.ref.") + Mang->getSymbol(GV)->getName();
-    return getContext().GetOrCreateSymbol(FullName);
+    return getContext().GetOrCreateSymbol(StringRef("DW.ref.") +
+                                          Mang->getSymbol(GV)->getName());
     break;
   }
   }
@@ -199,13 +212,13 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV,
 void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer,
                                                        const TargetMachine &TM,
                                                        const MCSymbol *Sym) const {
-  Twine FullName = StringRef("DW.ref.") + Sym->getName();
-  MCSymbol *Label = getContext().GetOrCreateSymbol(FullName);
+  SmallString<64> NameData("DW.ref.");
+  NameData += Sym->getName();
+  MCSymbol *Label = getContext().GetOrCreateSymbol(NameData);
   Streamer.EmitSymbolAttribute(Label, MCSA_Hidden);
   Streamer.EmitSymbolAttribute(Label, MCSA_Weak);
-  Twine SectionName = StringRef(".data.") + Label->getName();
-  SmallString<64> NameData;
-  SectionName.toVector(NameData);
+  StringRef Prefix = ".data.";
+  NameData.insert(NameData.begin(), Prefix.begin(), Prefix.end());
   unsigned Flags = ELF::SHF_ALLOC | ELF::SHF_WRITE | ELF::SHF_GROUP;
   const MCSection *Sec = getContext().getELFSection(NameData,
                                                     ELF::SHT_PROGBITS,
@@ -480,6 +493,27 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
 //                                 MachO
 //===----------------------------------------------------------------------===//
 
+TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO()
+  : TargetLoweringObjectFile(),
+    TLSDataSection(0),
+    TLSBSSSection(0),
+    TLSTLVSection(0),
+    TLSThreadInitSection(0),
+    CStringSection(0),
+    UStringSection(0),
+    TextCoalSection(0),
+    ConstTextCoalSection(0),
+    ConstDataSection(0),
+    DataCoalSection(0),
+    DataCommonSection(0),
+    DataBSSSection(0),
+    FourByteConstantSection(0),
+    EightByteConstantSection(0),
+    SixteenByteConstantSection(0),
+    LazySymbolPointerSection(0),
+    NonLazySymbolPointerSection(0) {
+}
+
 void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
                                                const TargetMachine &TM) {
   IsFunctionEHFrameSymbolPrivate = false;
@@ -605,6 +639,13 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
   // Exception Handling.
   LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0,
                                              SectionKind::getReadOnlyWithRel());
+
+  if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6))
+    CompactUnwindSection =
+      getContext().getMachOSection("__LD", "__compact_unwind",
+                                   MCSectionMachO::S_ATTR_DEBUG,
+                                   SectionKind::getReadOnly());
+
   // Debug Information.
   DwarfAbbrevSection =
     getContext().getMachOSection("__DWARF", "__debug_abbrev",
@@ -884,6 +925,13 @@ unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const {
 //                                  COFF
 //===----------------------------------------------------------------------===//
 
+TargetLoweringObjectFileCOFF::TargetLoweringObjectFileCOFF()
+  : TargetLoweringObjectFile(),
+    DrectveSection(0),
+    PDataSection(0),
+    XDataSection(0) {
+}
+
 void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
                                               const TargetMachine &TM) {
   TargetLoweringObjectFile::Initialize(Ctx, TM);
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index f54d879..6d6244e 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -280,8 +280,8 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
 /// isTwoAddrUse - Return true if the specified MI is using the specified
 /// register as a two-address operand.
 static bool isTwoAddrUse(MachineInstr *UseMI, unsigned Reg) {
-  const TargetInstrDesc &TID = UseMI->getDesc();
-  for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
+  const MCInstrDesc &MCID = UseMI->getDesc();
+  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
     MachineOperand &MO = UseMI->getOperand(i);
     if (MO.isReg() && MO.getReg() == Reg &&
         (MO.isDef() || UseMI->isRegTiedToDefOperand(i)))
@@ -443,8 +443,9 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
 /// isTwoAddrUse - Return true if the specified MI uses the specified register
 /// as a two-address use. If so, return the destination register by reference.
 static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) {
-  const TargetInstrDesc &TID = MI.getDesc();
-  unsigned NumOps = MI.isInlineAsm() ? MI.getNumOperands():TID.getNumOperands();
+  const MCInstrDesc &MCID = MI.getDesc();
+  unsigned NumOps = MI.isInlineAsm()
+    ? MI.getNumOperands() : MCID.getNumOperands();
   for (unsigned i = 0; i != NumOps; ++i) {
     const MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg)
@@ -761,10 +762,10 @@ void TwoAddressInstructionPass::ProcessCopy(MachineInstr *MI,
 static bool isSafeToDelete(MachineInstr *MI,
                            const TargetInstrInfo *TII,
                            SmallVector<unsigned, 4> &Kills) {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (TID.mayStore() || TID.isCall())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MCID.mayStore() || MCID.isCall())
     return false;
-  if (TID.isTerminator() || MI->hasUnmodeledSideEffects())
+  if (MCID.isTerminator() || MI->hasUnmodeledSideEffects())
     return false;
 
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -854,7 +855,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
                         MachineFunction::iterator &mbbi,
                         unsigned SrcIdx, unsigned DstIdx, unsigned Dist,
                         SmallPtrSet<MachineInstr*, 8> &Processed) {
-  const TargetInstrDesc &TID = mi->getDesc();
+  const MCInstrDesc &MCID = mi->getDesc();
   unsigned regA = mi->getOperand(DstIdx).getReg();
   unsigned regB = mi->getOperand(SrcIdx).getReg();
 
@@ -876,7 +877,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   unsigned regCIdx = ~0U;
   bool TryCommute = false;
   bool AggressiveCommute = false;
-  if (TID.isCommutable() && mi->getNumOperands() >= 3 &&
+  if (MCID.isCommutable() && mi->getNumOperands() >= 3 &&
       TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) {
     if (SrcIdx == SrcOp1)
       regCIdx = SrcOp2;
@@ -907,7 +908,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   if (TargetRegisterInfo::isVirtualRegister(regA))
     ScanUses(regA, &*mbbi, Processed);
 
-  if (TID.isConvertibleTo3Addr()) {
+  if (MCID.isConvertibleTo3Addr()) {
     // This instruction is potentially convertible to a true
     // three-address instruction.  Check if it is profitable.
     if (!regBKilled || isProfitableToConv3Addr(regA, regB)) {
@@ -927,7 +928,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
   //   movq (%rax), %rcx
   //   addq %rdx, %rcx
   // because it's preferable to schedule a load than a register copy.
-  if (TID.mayLoad() && !regBKilled) {
+  if (MCID.mayLoad() && !regBKilled) {
     // Determine if a load can be unfolded.
     unsigned LoadRegIndex;
     unsigned NewOpc =
@@ -936,14 +937,14 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
                                       /*UnfoldStore=*/false,
                                       &LoadRegIndex);
     if (NewOpc != 0) {
-      const TargetInstrDesc &UnfoldTID = TII->get(NewOpc);
-      if (UnfoldTID.getNumDefs() == 1) {
+      const MCInstrDesc &UnfoldMCID = TII->get(NewOpc);
+      if (UnfoldMCID.getNumDefs() == 1) {
         MachineFunction &MF = *mbbi->getParent();
 
         // Unfold the load.
         DEBUG(dbgs() << "2addr:   UNFOLDING: " << *mi);
         const TargetRegisterClass *RC =
-          UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI);
+          TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI);
         unsigned Reg = MRI->createVirtualRegister(RC);
         SmallVector<MachineInstr *, 2> NewMIs;
         if (!TII->unfoldMemoryOperand(MF, mi, Reg,
@@ -1067,7 +1068,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
       if (mi->isRegSequence())
         RegSequences.push_back(&*mi);
 
-      const TargetInstrDesc &TID = mi->getDesc();
+      const MCInstrDesc &MCID = mi->getDesc();
       bool FirstTied = true;
 
       DistanceMap.insert(std::make_pair(mi, ++Dist));
@@ -1077,7 +1078,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
       // First scan through all the tied register uses in this instruction
       // and record a list of pairs of tied operands for each register.
       unsigned NumOps = mi->isInlineAsm()
-        ? mi->getNumOperands() : TID.getNumOperands();
+        ? mi->getNumOperands() : MCID.getNumOperands();
       for (unsigned SrcIdx = 0; SrcIdx < NumOps; ++SrcIdx) {
         unsigned DstIdx = 0;
         if (!mi->isRegTiedToDefOperand(SrcIdx, &DstIdx))
@@ -1095,12 +1096,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
                "two address instruction invalid");
 
         unsigned regB = mi->getOperand(SrcIdx).getReg();
-        TiedOperandMap::iterator OI = TiedOperands.find(regB);
-        if (OI == TiedOperands.end()) {
-          SmallVector<std::pair<unsigned, unsigned>, 4> TiedPair;
-          OI = TiedOperands.insert(std::make_pair(regB, TiedPair)).first;
-        }
-        OI->second.push_back(std::make_pair(SrcIdx, DstIdx));
+        TiedOperands[regB].push_back(std::make_pair(SrcIdx, DstIdx));
       }
 
       // Now iterate over the information collected above.
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index ba50f4e..03abff3 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -208,6 +208,11 @@ namespace llvm {
     /// @brief returns the register allocation preference.
     unsigned getRegAllocPref(unsigned virtReg);
 
+    /// @brief returns true if VirtReg is assigned to its preferred physreg.
+    bool hasPreferredPhys(unsigned VirtReg) {
+      return getPhys(VirtReg) == getRegAllocPref(VirtReg);
+    }
+
     /// @brief records virtReg is a split live interval from SReg.
     void setIsSplitFromReg(unsigned virtReg, unsigned SReg) {
       Virt2SplitMap[virtReg] = SReg;
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 1850658..a5ec797 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -679,8 +679,8 @@ static void ReMaterialize(MachineBasicBlock &MBB,
                           VirtRegMap &VRM) {
   MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg);
 #ifndef NDEBUG
-  const TargetInstrDesc &TID = ReMatDefMI->getDesc();
-  assert(TID.getNumDefs() == 1 &&
+  const MCInstrDesc &MCID = ReMatDefMI->getDesc();
+  assert(MCID.getNumDefs() == 1 &&
          "Don't know how to remat instructions that define > 1 values!");
 #endif
   TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI);
@@ -1483,11 +1483,11 @@ OptimizeByUnfold(MachineBasicBlock::iterator &MII,
 /// where SrcReg is r1 and it is tied to r0. Return true if after
 /// commuting this instruction it will be r0 = op r2, r1.
 static bool CommuteChangesDestination(MachineInstr *DefMI,
-                                      const TargetInstrDesc &TID,
+                                      const MCInstrDesc &MCID,
                                       unsigned SrcReg,
                                       const TargetInstrInfo *TII,
                                       unsigned &DstIdx) {
-  if (TID.getNumDefs() != 1 && TID.getNumOperands() != 3)
+  if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3)
     return false;
   if (!DefMI->getOperand(1).isReg() ||
       DefMI->getOperand(1).getReg() != SrcReg)
@@ -1527,11 +1527,11 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII,
   MachineInstr &MI = *MII;
   MachineBasicBlock::iterator DefMII = prior(MII);
   MachineInstr *DefMI = DefMII;
-  const TargetInstrDesc &TID = DefMI->getDesc();
+  const MCInstrDesc &MCID = DefMI->getDesc();
   unsigned NewDstIdx;
   if (DefMII != MBB->begin() &&
-      TID.isCommutable() &&
-      CommuteChangesDestination(DefMI, TID, SrcReg, TII, NewDstIdx)) {
+      MCID.isCommutable() &&
+      CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) {
     MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
     unsigned NewReg = NewDstMO.getReg();
     if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg))
@@ -1658,9 +1658,9 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
 /// isSafeToDelete - Return true if this instruction doesn't produce any side
 /// effect and all of its defs are dead.
 static bool isSafeToDelete(MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
-  if (TID.mayLoad() || TID.mayStore() || TID.isTerminator() ||
-      TID.isCall() || TID.isBarrier() || TID.isReturn() ||
+  const MCInstrDesc &MCID = MI.getDesc();
+  if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() ||
+      MCID.isCall() || MCID.isBarrier() || MCID.isReturn() ||
       MI.isLabel() || MI.isDebugValue() ||
       MI.hasUnmodeledSideEffects())
     return false;
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 062256a..f7e2a4d 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -78,7 +78,6 @@ static char getTypeID(const Type *Ty) {
   case Type::FunctionTyID:return 'M';
   case Type::StructTyID:  return 'T';
   case Type::ArrayTyID:   return 'A';
-  case Type::OpaqueTyID:  return 'O';
   default: return 'U';
   }
 }
@@ -282,10 +281,10 @@ GenericValue Interpreter::callExternalFunction(Function *F,
 
   if (F->getName() == "__main")
     errs() << "Tried to execute an unknown external function: "
-      << F->getType()->getDescription() << " __main\n";
+      << *F->getType() << " __main\n";
   else
     report_fatal_error("Tried to execute an unknown external function: " +
-                      F->getType()->getDescription() + " " +F->getName());
+                       F->getName());
 #ifndef USE_LIBFFI
   errs() << "Recompiling LLVM with --enable-libffi might help.\n";
 #endif
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 8fceaf2..445d2d0 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -533,8 +533,7 @@ GenericValue JIT::runFunction(Function *F,
     Args.push_back(C);
   }
 
-  CallInst *TheCall = CallInst::Create(F, Args.begin(), Args.end(),
-                                       "", StubBB);
+  CallInst *TheCall = CallInst::Create(F, Args, "", StubBB);
   TheCall->setCallingConv(F->getCallingConv());
   TheCall->setTailCall();
   if (!TheCall->getType()->isVoidTy())
diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
index 9e53f87..59bdfee 100644
--- a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
+++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt
@@ -1,3 +1,4 @@
 add_llvm_library(LLVMRuntimeDyld
   RuntimeDyld.cpp
+  RuntimeDyldMachO.cpp
   )
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index eda4cbb..33dd705 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -1,4 +1,4 @@
-//===-- RuntimeDyld.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//===-- RuntimeDyld.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -12,118 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "dyld"
-#include "llvm/ADT/OwningPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/Object/MachOObject.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/system_error.h"
-#include "llvm/Support/raw_ostream.h"
+#include "RuntimeDyldImpl.h"
 using namespace llvm;
 using namespace llvm::object;
 
 // Empty out-of-line virtual destructor as the key function.
 RTDyldMemoryManager::~RTDyldMemoryManager() {}
+RuntimeDyldImpl::~RuntimeDyldImpl() {}
 
 namespace llvm {
-class RuntimeDyldImpl {
-  unsigned CPUType;
-  unsigned CPUSubtype;
-
-  // The MemoryManager to load objects into.
-  RTDyldMemoryManager *MemMgr;
-
-  // FIXME: This all assumes we're dealing with external symbols for anything
-  //        explicitly referenced. I.e., we can index by name and things
-  //        will work out. In practice, this may not be the case, so we
-  //        should find a way to effectively generalize.
-
-  // For each function, we have a MemoryBlock of it's instruction data.
-  StringMap<sys::MemoryBlock> Functions;
-
-  // Master symbol table. As modules are loaded and external symbols are
-  // resolved, their addresses are stored here.
-  StringMap<uint8_t*> SymbolTable;
-
-  // For each symbol, keep a list of relocations based on it. Anytime
-  // its address is reassigned (the JIT re-compiled the function, e.g.),
-  // the relocations get re-resolved.
-  struct RelocationEntry {
-    std::string Target;     // Object this relocation is contained in.
-    uint64_t    Offset;     // Offset into the object for the relocation.
-    uint32_t    Data;       // Second word of the raw macho relocation entry.
-    int64_t     Addend;     // Addend encoded in the instruction itself, if any.
-    bool        isResolved; // Has this relocation been resolved previously?
-
-    RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend)
-      : Target(t), Offset(offset), Data(data), Addend(addend),
-        isResolved(false) {}
-  };
-  typedef SmallVector<RelocationEntry, 4> RelocationList;
-  StringMap<RelocationList> Relocations;
-
-  // FIXME: Also keep a map of all the relocations contained in an object. Use
-  // this to dynamically answer whether all of the relocations in it have
-  // been resolved or not.
-
-  bool HasError;
-  std::string ErrorStr;
-
-  // Set the error state and record an error string.
-  bool Error(const Twine &Msg) {
-    ErrorStr = Msg.str();
-    HasError = true;
-    return true;
-  }
-
-  void extractFunction(StringRef Name, uint8_t *StartAddress,
-                       uint8_t *EndAddress);
-  bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
-                         unsigned Type, unsigned Size);
-  bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
-                               unsigned Type, unsigned Size);
-  bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
-                            unsigned Type, unsigned Size);
-
-  bool loadSegment32(const MachOObject *Obj,
-                     const MachOObject::LoadCommandInfo *SegmentLCI,
-                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
-  bool loadSegment64(const MachOObject *Obj,
-                     const MachOObject::LoadCommandInfo *SegmentLCI,
-                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
-
-public:
-  RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
-
-  bool loadObject(MemoryBuffer *InputBuffer);
-
-  void *getSymbolAddress(StringRef Name) {
-    // FIXME: Just look up as a function for now. Overly simple of course.
-    // Work in progress.
-    return SymbolTable.lookup(Name);
-  }
-
-  void resolveRelocations();
-
-  void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
-
-  // Is the linker in an error state?
-  bool hasError() { return HasError; }
-
-  // Mark the error condition as handled and continue.
-  void clearError() { HasError = false; }
-
-  // Get the error message.
-  StringRef getErrorString() { return ErrorStr; }
-};
 
 void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress,
                                       uint8_t *EndAddress) {
@@ -144,472 +41,6 @@ void RuntimeDyldImpl::extractFunction(StringRef Name, uint8_t *StartAddress,
   DEBUG(dbgs() << "    allocated to [" << Mem << ", " << Mem + Size << "]\n");
 }
 
-bool RuntimeDyldImpl::
-resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
-                  unsigned Type, unsigned Size) {
-  // This just dispatches to the proper target specific routine.
-  switch (CPUType) {
-  default: assert(0 && "Unsupported CPU type!");
-  case mach::CTM_x86_64:
-    return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value,
-                                   isPCRel, Type, Size);
-  case mach::CTM_ARM:
-    return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value,
-                                isPCRel, Type, Size);
-  }
-  llvm_unreachable("");
-}
-
-bool RuntimeDyldImpl::
-resolveX86_64Relocation(uintptr_t Address, uintptr_t Value,
-                        bool isPCRel, unsigned Type,
-                        unsigned Size) {
-  // If the relocation is PC-relative, the value to be encoded is the
-  // pointer difference.
-  if (isPCRel)
-    // FIXME: It seems this value needs to be adjusted by 4 for an effective PC
-    // address. Is that expected? Only for branches, perhaps?
-    Value -= Address + 4;
-
-  switch(Type) {
-  default:
-    llvm_unreachable("Invalid relocation type!");
-  case macho::RIT_X86_64_Unsigned:
-  case macho::RIT_X86_64_Branch: {
-    // Mask in the target value a byte at a time (we don't have an alignment
-    // guarantee for the target address, so this is safest).
-    uint8_t *p = (uint8_t*)Address;
-    for (unsigned i = 0; i < Size; ++i) {
-      *p++ = (uint8_t)Value;
-      Value >>= 8;
-    }
-    return false;
-  }
-  case macho::RIT_X86_64_Signed:
-  case macho::RIT_X86_64_GOTLoad:
-  case macho::RIT_X86_64_GOT:
-  case macho::RIT_X86_64_Subtractor:
-  case macho::RIT_X86_64_Signed1:
-  case macho::RIT_X86_64_Signed2:
-  case macho::RIT_X86_64_Signed4:
-  case macho::RIT_X86_64_TLV:
-    return Error("Relocation type not implemented yet!");
-  }
-  return false;
-}
-
-bool RuntimeDyldImpl::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
-                                           bool isPCRel, unsigned Type,
-                                           unsigned Size) {
-  // If the relocation is PC-relative, the value to be encoded is the
-  // pointer difference.
-  if (isPCRel) {
-    Value -= Address;
-    // ARM PCRel relocations have an effective-PC offset of two instructions
-    // (four bytes in Thumb mode, 8 bytes in ARM mode).
-    // FIXME: For now, assume ARM mode.
-    Value -= 8;
-  }
-
-  switch(Type) {
-  default:
-    llvm_unreachable("Invalid relocation type!");
-  case macho::RIT_Vanilla: {
-    llvm_unreachable("Invalid relocation type!");
-    // Mask in the target value a byte at a time (we don't have an alignment
-    // guarantee for the target address, so this is safest).
-    uint8_t *p = (uint8_t*)Address;
-    for (unsigned i = 0; i < Size; ++i) {
-      *p++ = (uint8_t)Value;
-      Value >>= 8;
-    }
-    break;
-  }
-  case macho::RIT_ARM_Branch24Bit: {
-    // Mask the value into the target address. We know instructions are
-    // 32-bit aligned, so we can do it all at once.
-    uint32_t *p = (uint32_t*)Address;
-    // The low two bits of the value are not encoded.
-    Value >>= 2;
-    // Mask the value to 24 bits.
-    Value &= 0xffffff;
-    // FIXME: If the destination is a Thumb function (and the instruction
-    // is a non-predicated BL instruction), we need to change it to a BLX
-    // instruction instead.
-
-    // Insert the value into the instruction.
-    *p = (*p & ~0xffffff) | Value;
-    break;
-  }
-  case macho::RIT_ARM_ThumbBranch22Bit:
-  case macho::RIT_ARM_ThumbBranch32Bit:
-  case macho::RIT_ARM_Half:
-  case macho::RIT_ARM_HalfDifference:
-  case macho::RIT_Pair:
-  case macho::RIT_Difference:
-  case macho::RIT_ARM_LocalDifference:
-  case macho::RIT_ARM_PreboundLazyPointer:
-    return Error("Relocation type not implemented yet!");
-  }
-  return false;
-}
-
-bool RuntimeDyldImpl::
-loadSegment32(const MachOObject *Obj,
-              const MachOObject::LoadCommandInfo *SegmentLCI,
-              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
-  InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
-  Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC);
-  if (!SegmentLC)
-    return Error("unable to load segment load command");
-
-  for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
-    InMemoryStruct<macho::Section> Sect;
-    Obj->ReadSection(*SegmentLCI, SectNum, Sect);
-    if (!Sect)
-      return Error("unable to load section: '" + Twine(SectNum) + "'");
-
-    // FIXME: For the time being, we're only loading text segments.
-    if (Sect->Flags != 0x80000400)
-      continue;
-
-    // Address and names of symbols in the section.
-    typedef std::pair<uint64_t, StringRef> SymbolEntry;
-    SmallVector<SymbolEntry, 64> Symbols;
-    // Index of all the names, in this section or not. Used when we're
-    // dealing with relocation entries.
-    SmallVector<StringRef, 64> SymbolNames;
-    for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
-      InMemoryStruct<macho::SymbolTableEntry> STE;
-      Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
-      if (!STE)
-        return Error("unable to read symbol: '" + Twine(i) + "'");
-      if (STE->SectionIndex > SegmentLC->NumSections)
-        return Error("invalid section index for symbol: '" + Twine(i) + "'");
-      // Get the symbol name.
-      StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
-      SymbolNames.push_back(Name);
-
-      // Just skip symbols not defined in this section.
-      if ((unsigned)STE->SectionIndex - 1 != SectNum)
-        continue;
-
-      // FIXME: Check the symbol type and flags.
-      if (STE->Type != 0xF)  // external, defined in this section.
-        continue;
-      // Flags == 0x8 marks a thumb function for ARM, which is fine as it
-      // doesn't require any special handling here.
-      if (STE->Flags != 0x0 && STE->Flags != 0x8)
-        continue;
-
-      // Remember the symbol.
-      Symbols.push_back(SymbolEntry(STE->Value, Name));
-
-      DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
-            (Sect->Address + STE->Value) << "\n");
-    }
-    // Sort the symbols by address, just in case they didn't come in that way.
-    array_pod_sort(Symbols.begin(), Symbols.end());
-
-    // If there weren't any functions (odd, but just in case...)
-    if (!Symbols.size())
-      continue;
-
-    // Extract the function data.
-    uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset,
-                                           SegmentLC->FileSize).data();
-    for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
-      uint64_t StartOffset = Sect->Address + Symbols[i].first;
-      uint64_t EndOffset = Symbols[i + 1].first - 1;
-      DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
-                   << " from [" << StartOffset << ", " << EndOffset << "]\n");
-      extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
-    }
-    // The last symbol we do after since the end address is calculated
-    // differently because there is no next symbol to reference.
-    uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
-    uint64_t EndOffset = Sect->Size - 1;
-    DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
-                 << " from [" << StartOffset << ", " << EndOffset << "]\n");
-    extractFunction(Symbols[Symbols.size()-1].second,
-                    Base + StartOffset, Base + EndOffset);
-
-    // Now extract the relocation information for each function and process it.
-    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
-      InMemoryStruct<macho::RelocationEntry> RE;
-      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
-      if (RE->Word0 & macho::RF_Scattered)
-        return Error("NOT YET IMPLEMENTED: scattered relocations.");
-      // Word0 of the relocation is the offset into the section where the
-      // relocation should be applied. We need to translate that into an
-      // offset into a function since that's our atom.
-      uint32_t Offset = RE->Word0;
-      // Look for the function containing the address. This is used for JIT
-      // code, so the number of functions in section is almost always going
-      // to be very small (usually just one), so until we have use cases
-      // where that's not true, just use a trivial linear search.
-      unsigned SymbolNum;
-      unsigned NumSymbols = Symbols.size();
-      assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
-             "No symbol containing relocation!");
-      for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
-        if (Symbols[SymbolNum + 1].first > Offset)
-          break;
-      // Adjust the offset to be relative to the symbol.
-      Offset -= Symbols[SymbolNum].first;
-      // Get the name of the symbol containing the relocation.
-      StringRef TargetName = SymbolNames[SymbolNum];
-
-      bool isExtern = (RE->Word1 >> 27) & 1;
-      // Figure out the source symbol of the relocation. If isExtern is true,
-      // this relocation references the symbol table, otherwise it references
-      // a section in the same object, numbered from 1 through NumSections
-      // (SectionBases is [0, NumSections-1]).
-      // FIXME: Some targets (ARM) use internal relocations even for
-      // externally visible symbols, if the definition is in the same
-      // file as the reference. We need to convert those back to by-name
-      // references. We can resolve the address based on the section
-      // offset and see if we have a symbol at that address. If we do,
-      // use that; otherwise, puke.
-      if (!isExtern)
-        return Error("Internal relocations not supported.");
-      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
-      StringRef SourceName = SymbolNames[SourceNum];
-
-      // FIXME: Get the relocation addend from the target address.
-
-      // Now store the relocation information. Associate it with the source
-      // symbol.
-      Relocations[SourceName].push_back(RelocationEntry(TargetName,
-                                                        Offset,
-                                                        RE->Word1,
-                                                        0 /*Addend*/));
-      DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
-                   << " from '" << SourceName << "(Word1: "
-                   << format("0x%x", RE->Word1) << ")\n");
-    }
-  }
-  return false;
-}
-
-
-bool RuntimeDyldImpl::
-loadSegment64(const MachOObject *Obj,
-              const MachOObject::LoadCommandInfo *SegmentLCI,
-              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
-  InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
-  Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC);
-  if (!Segment64LC)
-    return Error("unable to load segment load command");
-
-  for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
-    InMemoryStruct<macho::Section64> Sect;
-    Obj->ReadSection64(*SegmentLCI, SectNum, Sect);
-    if (!Sect)
-      return Error("unable to load section: '" + Twine(SectNum) + "'");
-
-    // FIXME: For the time being, we're only loading text segments.
-    if (Sect->Flags != 0x80000400)
-      continue;
-
-    // Address and names of symbols in the section.
-    typedef std::pair<uint64_t, StringRef> SymbolEntry;
-    SmallVector<SymbolEntry, 64> Symbols;
-    // Index of all the names, in this section or not. Used when we're
-    // dealing with relocation entries.
-    SmallVector<StringRef, 64> SymbolNames;
-    for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
-      InMemoryStruct<macho::Symbol64TableEntry> STE;
-      Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
-      if (!STE)
-        return Error("unable to read symbol: '" + Twine(i) + "'");
-      if (STE->SectionIndex > Segment64LC->NumSections)
-        return Error("invalid section index for symbol: '" + Twine(i) + "'");
-      // Get the symbol name.
-      StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
-      SymbolNames.push_back(Name);
-
-      // Just skip symbols not defined in this section.
-      if ((unsigned)STE->SectionIndex - 1 != SectNum)
-        continue;
-
-      // FIXME: Check the symbol type and flags.
-      if (STE->Type != 0xF)  // external, defined in this section.
-        continue;
-      if (STE->Flags != 0x0)
-        continue;
-
-      // Remember the symbol.
-      Symbols.push_back(SymbolEntry(STE->Value, Name));
-
-      DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
-            (Sect->Address + STE->Value) << "\n");
-    }
-    // Sort the symbols by address, just in case they didn't come in that way.
-    array_pod_sort(Symbols.begin(), Symbols.end());
-
-    // If there weren't any functions (odd, but just in case...)
-    if (!Symbols.size())
-      continue;
-
-    // Extract the function data.
-    uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset,
-                                           Segment64LC->FileSize).data();
-    for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
-      uint64_t StartOffset = Sect->Address + Symbols[i].first;
-      uint64_t EndOffset = Symbols[i + 1].first - 1;
-      DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
-                   << " from [" << StartOffset << ", " << EndOffset << "]\n");
-      extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
-    }
-    // The last symbol we do after since the end address is calculated
-    // differently because there is no next symbol to reference.
-    uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
-    uint64_t EndOffset = Sect->Size - 1;
-    DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
-                 << " from [" << StartOffset << ", " << EndOffset << "]\n");
-    extractFunction(Symbols[Symbols.size()-1].second,
-                    Base + StartOffset, Base + EndOffset);
-
-    // Now extract the relocation information for each function and process it.
-    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
-      InMemoryStruct<macho::RelocationEntry> RE;
-      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
-      if (RE->Word0 & macho::RF_Scattered)
-        return Error("NOT YET IMPLEMENTED: scattered relocations.");
-      // Word0 of the relocation is the offset into the section where the
-      // relocation should be applied. We need to translate that into an
-      // offset into a function since that's our atom.
-      uint32_t Offset = RE->Word0;
-      // Look for the function containing the address. This is used for JIT
-      // code, so the number of functions in section is almost always going
-      // to be very small (usually just one), so until we have use cases
-      // where that's not true, just use a trivial linear search.
-      unsigned SymbolNum;
-      unsigned NumSymbols = Symbols.size();
-      assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
-             "No symbol containing relocation!");
-      for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
-        if (Symbols[SymbolNum + 1].first > Offset)
-          break;
-      // Adjust the offset to be relative to the symbol.
-      Offset -= Symbols[SymbolNum].first;
-      // Get the name of the symbol containing the relocation.
-      StringRef TargetName = SymbolNames[SymbolNum];
-
-      bool isExtern = (RE->Word1 >> 27) & 1;
-      // Figure out the source symbol of the relocation. If isExtern is true,
-      // this relocation references the symbol table, otherwise it references
-      // a section in the same object, numbered from 1 through NumSections
-      // (SectionBases is [0, NumSections-1]).
-      if (!isExtern)
-        return Error("Internal relocations not supported.");
-      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
-      StringRef SourceName = SymbolNames[SourceNum];
-
-      // FIXME: Get the relocation addend from the target address.
-
-      // Now store the relocation information. Associate it with the source
-      // symbol.
-      Relocations[SourceName].push_back(RelocationEntry(TargetName,
-                                                        Offset,
-                                                        RE->Word1,
-                                                        0 /*Addend*/));
-      DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
-                   << " from '" << SourceName << "(Word1: "
-                   << format("0x%x", RE->Word1) << ")\n");
-    }
-  }
-  return false;
-}
-
-bool RuntimeDyldImpl::loadObject(MemoryBuffer *InputBuffer) {
-  // If the linker is in an error state, don't do anything.
-  if (hasError())
-    return true;
-  // Load the Mach-O wrapper object.
-  std::string ErrorStr;
-  OwningPtr<MachOObject> Obj(
-    MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr));
-  if (!Obj)
-    return Error("unable to load object: '" + ErrorStr + "'");
-
-  // Get the CPU type information from the header.
-  const macho::Header &Header = Obj->getHeader();
-
-  // FIXME: Error checking that the loaded object is compatible with
-  //        the system we're running on.
-  CPUType = Header.CPUType;
-  CPUSubtype = Header.CPUSubtype;
-
-  // Validate that the load commands match what we expect.
-  const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0,
-    *DysymtabLCI = 0;
-  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
-    const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i);
-    switch (LCI.Command.Type) {
-    case macho::LCT_Segment:
-    case macho::LCT_Segment64:
-      if (SegmentLCI)
-        return Error("unexpected input object (multiple segments)");
-      SegmentLCI = &LCI;
-      break;
-    case macho::LCT_Symtab:
-      if (SymtabLCI)
-        return Error("unexpected input object (multiple symbol tables)");
-      SymtabLCI = &LCI;
-      break;
-    case macho::LCT_Dysymtab:
-      if (DysymtabLCI)
-        return Error("unexpected input object (multiple symbol tables)");
-      DysymtabLCI = &LCI;
-      break;
-    default:
-      return Error("unexpected input object (unexpected load command");
-    }
-  }
-
-  if (!SymtabLCI)
-    return Error("no symbol table found in object");
-  if (!SegmentLCI)
-    return Error("no symbol table found in object");
-
-  // Read and register the symbol table data.
-  InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
-  Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
-  if (!SymtabLC)
-    return Error("unable to load symbol table load command");
-  Obj->RegisterStringTable(*SymtabLC);
-
-  // Read the dynamic link-edit information, if present (not present in static
-  // objects).
-  if (DysymtabLCI) {
-    InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC;
-    Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC);
-    if (!DysymtabLC)
-      return Error("unable to load dynamic link-exit load command");
-
-    // FIXME: We don't support anything interesting yet.
-//    if (DysymtabLC->LocalSymbolsIndex != 0)
-//      return Error("NOT YET IMPLEMENTED: local symbol entries");
-//    if (DysymtabLC->ExternalSymbolsIndex != 0)
-//      return Error("NOT YET IMPLEMENTED: non-external symbol entries");
-//    if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries)
-//      return Error("NOT YET IMPLEMENTED: undefined symbol entries");
-  }
-
-  // Load the segment load command.
-  if (SegmentLCI->Command.Type == macho::LCT_Segment) {
-    if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC))
-      return true;
-  } else {
-    if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC))
-      return true;
-  }
-
-  return false;
-}
-
 // Resolve the relocations for all symbols we currently know about.
 void RuntimeDyldImpl::resolveRelocations() {
   // Just iterate over the symbols in our symbol table and assign their
@@ -620,35 +51,11 @@ void RuntimeDyldImpl::resolveRelocations() {
     reassignSymbolAddress(i->getKey(), i->getValue());
 }
 
-// Assign an address to a symbol name and resolve all the relocations
-// associated with it.
-void RuntimeDyldImpl::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
-  // Assign the address in our symbol table.
-  SymbolTable[Name] = Addr;
-
-  RelocationList &Relocs = Relocations[Name];
-  for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
-    RelocationEntry &RE = Relocs[i];
-    uint8_t *Target = SymbolTable[RE.Target] + RE.Offset;
-    bool isPCRel = (RE.Data >> 24) & 1;
-    unsigned Type = (RE.Data >> 28) & 0xf;
-    unsigned Size = 1 << ((RE.Data >> 25) & 3);
-
-    DEBUG(dbgs() << "Resolving relocation at '" << RE.Target
-          << "' + " << RE.Offset << " (" << format("%p", Target) << ")"
-          << " from '" << Name << " (" << format("%p", Addr) << ")"
-          << "(" << (isPCRel ? "pcrel" : "absolute")
-          << ", type: " << Type << ", Size: " << Size << ").\n");
-
-    resolveRelocation(Target, Addr, isPCRel, Type, Size);
-    RE.isResolved = true;
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // RuntimeDyld class implementation
-RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *MM) {
-  Dyld = new RuntimeDyldImpl(MM);
+RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) {
+  Dyld = 0;
+  MM = mm;
 }
 
 RuntimeDyld::~RuntimeDyld() {
@@ -656,6 +63,16 @@ RuntimeDyld::~RuntimeDyld() {
 }
 
 bool RuntimeDyld::loadObject(MemoryBuffer *InputBuffer) {
+  if (!Dyld) {
+    if (RuntimeDyldMachO::isKnownFormat(InputBuffer))
+      Dyld = new RuntimeDyldMachO(MM);
+    else
+      report_fatal_error("Unknown object format!");
+  } else {
+    if(!Dyld->isCompatibleFormat(InputBuffer))
+      report_fatal_error("Incompatible object format!");
+  }
+
   return Dyld->loadObject(InputBuffer);
 }
 
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
new file mode 100644
index 0000000..bcdfb04
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -0,0 +1,152 @@
+//===-- RuntimeDyldImpl.h - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Interface for the implementations of runtime dynamic linker facilities.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_RUNTIME_DYLD_IMPL_H
+#define LLVM_RUNTIME_DYLD_IMPL_H
+
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/Object/MachOObject.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Memory.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+using namespace llvm::object;
+
+namespace llvm {
+class RuntimeDyldImpl {
+protected:
+  unsigned CPUType;
+  unsigned CPUSubtype;
+
+  // The MemoryManager to load objects into.
+  RTDyldMemoryManager *MemMgr;
+
+  // FIXME: This all assumes we're dealing with external symbols for anything
+  //        explicitly referenced. I.e., we can index by name and things
+  //        will work out. In practice, this may not be the case, so we
+  //        should find a way to effectively generalize.
+
+  // For each function, we have a MemoryBlock of it's instruction data.
+  StringMap<sys::MemoryBlock> Functions;
+
+  // Master symbol table. As modules are loaded and external symbols are
+  // resolved, their addresses are stored here.
+  StringMap<uint8_t*> SymbolTable;
+
+  bool HasError;
+  std::string ErrorStr;
+
+  // Set the error state and record an error string.
+  bool Error(const Twine &Msg) {
+    ErrorStr = Msg.str();
+    HasError = true;
+    return true;
+  }
+
+  void extractFunction(StringRef Name, uint8_t *StartAddress,
+                       uint8_t *EndAddress);
+
+public:
+  RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {}
+
+  virtual ~RuntimeDyldImpl();
+
+  virtual bool loadObject(MemoryBuffer *InputBuffer) = 0;
+
+  void *getSymbolAddress(StringRef Name) {
+    // FIXME: Just look up as a function for now. Overly simple of course.
+    // Work in progress.
+    return SymbolTable.lookup(Name);
+  }
+
+  void resolveRelocations();
+
+  virtual void reassignSymbolAddress(StringRef Name, uint8_t *Addr) = 0;
+
+  // Is the linker in an error state?
+  bool hasError() { return HasError; }
+
+  // Mark the error condition as handled and continue.
+  void clearError() { HasError = false; }
+
+  // Get the error message.
+  StringRef getErrorString() { return ErrorStr; }
+
+  virtual bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const = 0;
+};
+
+
+class RuntimeDyldMachO : public RuntimeDyldImpl {
+
+  // For each symbol, keep a list of relocations based on it. Anytime
+  // its address is reassigned (the JIT re-compiled the function, e.g.),
+  // the relocations get re-resolved.
+  struct RelocationEntry {
+    std::string Target;     // Object this relocation is contained in.
+    uint64_t    Offset;     // Offset into the object for the relocation.
+    uint32_t    Data;       // Second word of the raw macho relocation entry.
+    int64_t     Addend;     // Addend encoded in the instruction itself, if any.
+    bool        isResolved; // Has this relocation been resolved previously?
+
+    RelocationEntry(StringRef t, uint64_t offset, uint32_t data, int64_t addend)
+      : Target(t), Offset(offset), Data(data), Addend(addend),
+        isResolved(false) {}
+  };
+  typedef SmallVector<RelocationEntry, 4> RelocationList;
+  StringMap<RelocationList> Relocations;
+
+  // FIXME: Also keep a map of all the relocations contained in an object. Use
+  // this to dynamically answer whether all of the relocations in it have
+  // been resolved or not.
+
+  bool resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
+                         unsigned Type, unsigned Size);
+  bool resolveX86_64Relocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
+                               unsigned Type, unsigned Size);
+  bool resolveARMRelocation(uintptr_t Address, uintptr_t Value, bool isPCRel,
+                            unsigned Type, unsigned Size);
+
+  bool loadSegment32(const MachOObject *Obj,
+                     const MachOObject::LoadCommandInfo *SegmentLCI,
+                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
+  bool loadSegment64(const MachOObject *Obj,
+                     const MachOObject::LoadCommandInfo *SegmentLCI,
+                     const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC);
+
+public:
+  RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+
+  bool loadObject(MemoryBuffer *InputBuffer);
+
+  void reassignSymbolAddress(StringRef Name, uint8_t *Addr);
+
+  static bool isKnownFormat(const MemoryBuffer *InputBuffer);
+
+  bool isCompatibleFormat(const MemoryBuffer *InputBuffer) const {
+    return isKnownFormat(InputBuffer);
+  };
+};
+
+} // end namespace llvm
+
+
+#endif
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
new file mode 100644
index 0000000..623e9b2
--- /dev/null
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -0,0 +1,524 @@
+//===-- RuntimeDyldMachO.cpp - Run-time dynamic linker for MC-JIT ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the MC-JIT runtime dynamic linker.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "dyld"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "RuntimeDyldImpl.h"
+using namespace llvm;
+using namespace llvm::object;
+
+namespace llvm {
+
+bool RuntimeDyldMachO::
+resolveRelocation(uint8_t *Address, uint8_t *Value, bool isPCRel,
+                  unsigned Type, unsigned Size) {
+  // This just dispatches to the proper target specific routine.
+  switch (CPUType) {
+  default: assert(0 && "Unsupported CPU type!");
+  case mach::CTM_x86_64:
+    return resolveX86_64Relocation((uintptr_t)Address, (uintptr_t)Value,
+                                   isPCRel, Type, Size);
+  case mach::CTM_ARM:
+    return resolveARMRelocation((uintptr_t)Address, (uintptr_t)Value,
+                                isPCRel, Type, Size);
+  }
+  llvm_unreachable("");
+}
+
+bool RuntimeDyldMachO::
+resolveX86_64Relocation(uintptr_t Address, uintptr_t Value,
+                        bool isPCRel, unsigned Type,
+                        unsigned Size) {
+  // If the relocation is PC-relative, the value to be encoded is the
+  // pointer difference.
+  if (isPCRel)
+    // FIXME: It seems this value needs to be adjusted by 4 for an effective PC
+    // address. Is that expected? Only for branches, perhaps?
+    Value -= Address + 4;
+
+  switch(Type) {
+  default:
+    llvm_unreachable("Invalid relocation type!");
+  case macho::RIT_X86_64_Unsigned:
+  case macho::RIT_X86_64_Branch: {
+    // Mask in the target value a byte at a time (we don't have an alignment
+    // guarantee for the target address, so this is safest).
+    uint8_t *p = (uint8_t*)Address;
+    for (unsigned i = 0; i < Size; ++i) {
+      *p++ = (uint8_t)Value;
+      Value >>= 8;
+    }
+    return false;
+  }
+  case macho::RIT_X86_64_Signed:
+  case macho::RIT_X86_64_GOTLoad:
+  case macho::RIT_X86_64_GOT:
+  case macho::RIT_X86_64_Subtractor:
+  case macho::RIT_X86_64_Signed1:
+  case macho::RIT_X86_64_Signed2:
+  case macho::RIT_X86_64_Signed4:
+  case macho::RIT_X86_64_TLV:
+    return Error("Relocation type not implemented yet!");
+  }
+  return false;
+}
+
+bool RuntimeDyldMachO::resolveARMRelocation(uintptr_t Address, uintptr_t Value,
+                                         bool isPCRel, unsigned Type,
+                                         unsigned Size) {
+  // If the relocation is PC-relative, the value to be encoded is the
+  // pointer difference.
+  if (isPCRel) {
+    Value -= Address;
+    // ARM PCRel relocations have an effective-PC offset of two instructions
+    // (four bytes in Thumb mode, 8 bytes in ARM mode).
+    // FIXME: For now, assume ARM mode.
+    Value -= 8;
+  }
+
+  switch(Type) {
+  default:
+    llvm_unreachable("Invalid relocation type!");
+  case macho::RIT_Vanilla: {
+    llvm_unreachable("Invalid relocation type!");
+    // Mask in the target value a byte at a time (we don't have an alignment
+    // guarantee for the target address, so this is safest).
+    uint8_t *p = (uint8_t*)Address;
+    for (unsigned i = 0; i < Size; ++i) {
+      *p++ = (uint8_t)Value;
+      Value >>= 8;
+    }
+    break;
+  }
+  case macho::RIT_ARM_Branch24Bit: {
+    // Mask the value into the target address. We know instructions are
+    // 32-bit aligned, so we can do it all at once.
+    uint32_t *p = (uint32_t*)Address;
+    // The low two bits of the value are not encoded.
+    Value >>= 2;
+    // Mask the value to 24 bits.
+    Value &= 0xffffff;
+    // FIXME: If the destination is a Thumb function (and the instruction
+    // is a non-predicated BL instruction), we need to change it to a BLX
+    // instruction instead.
+
+    // Insert the value into the instruction.
+    *p = (*p & ~0xffffff) | Value;
+    break;
+  }
+  case macho::RIT_ARM_ThumbBranch22Bit:
+  case macho::RIT_ARM_ThumbBranch32Bit:
+  case macho::RIT_ARM_Half:
+  case macho::RIT_ARM_HalfDifference:
+  case macho::RIT_Pair:
+  case macho::RIT_Difference:
+  case macho::RIT_ARM_LocalDifference:
+  case macho::RIT_ARM_PreboundLazyPointer:
+    return Error("Relocation type not implemented yet!");
+  }
+  return false;
+}
+
+bool RuntimeDyldMachO::
+loadSegment32(const MachOObject *Obj,
+              const MachOObject::LoadCommandInfo *SegmentLCI,
+              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
+  InMemoryStruct<macho::SegmentLoadCommand> SegmentLC;
+  Obj->ReadSegmentLoadCommand(*SegmentLCI, SegmentLC);
+  if (!SegmentLC)
+    return Error("unable to load segment load command");
+
+  for (unsigned SectNum = 0; SectNum != SegmentLC->NumSections; ++SectNum) {
+    InMemoryStruct<macho::Section> Sect;
+    Obj->ReadSection(*SegmentLCI, SectNum, Sect);
+    if (!Sect)
+      return Error("unable to load section: '" + Twine(SectNum) + "'");
+
+    // FIXME: For the time being, we're only loading text segments.
+    if (Sect->Flags != 0x80000400)
+      continue;
+
+    // Address and names of symbols in the section.
+    typedef std::pair<uint64_t, StringRef> SymbolEntry;
+    SmallVector<SymbolEntry, 64> Symbols;
+    // Index of all the names, in this section or not. Used when we're
+    // dealing with relocation entries.
+    SmallVector<StringRef, 64> SymbolNames;
+    for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
+      InMemoryStruct<macho::SymbolTableEntry> STE;
+      Obj->ReadSymbolTableEntry(SymtabLC->SymbolTableOffset, i, STE);
+      if (!STE)
+        return Error("unable to read symbol: '" + Twine(i) + "'");
+      if (STE->SectionIndex > SegmentLC->NumSections)
+        return Error("invalid section index for symbol: '" + Twine(i) + "'");
+      // Get the symbol name.
+      StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
+      SymbolNames.push_back(Name);
+
+      // Just skip symbols not defined in this section.
+      if ((unsigned)STE->SectionIndex - 1 != SectNum)
+        continue;
+
+      // FIXME: Check the symbol type and flags.
+      if (STE->Type != 0xF)  // external, defined in this section.
+        continue;
+      // Flags == 0x8 marks a thumb function for ARM, which is fine as it
+      // doesn't require any special handling here.
+      if (STE->Flags != 0x0 && STE->Flags != 0x8)
+        continue;
+
+      // Remember the symbol.
+      Symbols.push_back(SymbolEntry(STE->Value, Name));
+
+      DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
+            (Sect->Address + STE->Value) << "\n");
+    }
+    // Sort the symbols by address, just in case they didn't come in that way.
+    array_pod_sort(Symbols.begin(), Symbols.end());
+
+    // If there weren't any functions (odd, but just in case...)
+    if (!Symbols.size())
+      continue;
+
+    // Extract the function data.
+    uint8_t *Base = (uint8_t*)Obj->getData(SegmentLC->FileOffset,
+                                           SegmentLC->FileSize).data();
+    for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
+      uint64_t StartOffset = Sect->Address + Symbols[i].first;
+      uint64_t EndOffset = Symbols[i + 1].first - 1;
+      DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
+                   << " from [" << StartOffset << ", " << EndOffset << "]\n");
+      extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
+    }
+    // The last symbol we do after since the end address is calculated
+    // differently because there is no next symbol to reference.
+    uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
+    uint64_t EndOffset = Sect->Size - 1;
+    DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
+                 << " from [" << StartOffset << ", " << EndOffset << "]\n");
+    extractFunction(Symbols[Symbols.size()-1].second,
+                    Base + StartOffset, Base + EndOffset);
+
+    // Now extract the relocation information for each function and process it.
+    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
+      InMemoryStruct<macho::RelocationEntry> RE;
+      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
+      if (RE->Word0 & macho::RF_Scattered)
+        return Error("NOT YET IMPLEMENTED: scattered relocations.");
+      // Word0 of the relocation is the offset into the section where the
+      // relocation should be applied. We need to translate that into an
+      // offset into a function since that's our atom.
+      uint32_t Offset = RE->Word0;
+      // Look for the function containing the address. This is used for JIT
+      // code, so the number of functions in section is almost always going
+      // to be very small (usually just one), so until we have use cases
+      // where that's not true, just use a trivial linear search.
+      unsigned SymbolNum;
+      unsigned NumSymbols = Symbols.size();
+      assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
+             "No symbol containing relocation!");
+      for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
+        if (Symbols[SymbolNum + 1].first > Offset)
+          break;
+      // Adjust the offset to be relative to the symbol.
+      Offset -= Symbols[SymbolNum].first;
+      // Get the name of the symbol containing the relocation.
+      StringRef TargetName = SymbolNames[SymbolNum];
+
+      bool isExtern = (RE->Word1 >> 27) & 1;
+      // Figure out the source symbol of the relocation. If isExtern is true,
+      // this relocation references the symbol table, otherwise it references
+      // a section in the same object, numbered from 1 through NumSections
+      // (SectionBases is [0, NumSections-1]).
+      // FIXME: Some targets (ARM) use internal relocations even for
+      // externally visible symbols, if the definition is in the same
+      // file as the reference. We need to convert those back to by-name
+      // references. We can resolve the address based on the section
+      // offset and see if we have a symbol at that address. If we do,
+      // use that; otherwise, puke.
+      if (!isExtern)
+        return Error("Internal relocations not supported.");
+      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
+      StringRef SourceName = SymbolNames[SourceNum];
+
+      // FIXME: Get the relocation addend from the target address.
+
+      // Now store the relocation information. Associate it with the source
+      // symbol.
+      Relocations[SourceName].push_back(RelocationEntry(TargetName,
+                                                        Offset,
+                                                        RE->Word1,
+                                                        0 /*Addend*/));
+      DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
+                   << " from '" << SourceName << "(Word1: "
+                   << format("0x%x", RE->Word1) << ")\n");
+    }
+  }
+  return false;
+}
+
+
+bool RuntimeDyldMachO::
+loadSegment64(const MachOObject *Obj,
+              const MachOObject::LoadCommandInfo *SegmentLCI,
+              const InMemoryStruct<macho::SymtabLoadCommand> &SymtabLC) {
+  InMemoryStruct<macho::Segment64LoadCommand> Segment64LC;
+  Obj->ReadSegment64LoadCommand(*SegmentLCI, Segment64LC);
+  if (!Segment64LC)
+    return Error("unable to load segment load command");
+
+  for (unsigned SectNum = 0; SectNum != Segment64LC->NumSections; ++SectNum) {
+    InMemoryStruct<macho::Section64> Sect;
+    Obj->ReadSection64(*SegmentLCI, SectNum, Sect);
+    if (!Sect)
+      return Error("unable to load section: '" + Twine(SectNum) + "'");
+
+    // FIXME: For the time being, we're only loading text segments.
+    if (Sect->Flags != 0x80000400)
+      continue;
+
+    // Address and names of symbols in the section.
+    typedef std::pair<uint64_t, StringRef> SymbolEntry;
+    SmallVector<SymbolEntry, 64> Symbols;
+    // Index of all the names, in this section or not. Used when we're
+    // dealing with relocation entries.
+    SmallVector<StringRef, 64> SymbolNames;
+    for (unsigned i = 0; i != SymtabLC->NumSymbolTableEntries; ++i) {
+      InMemoryStruct<macho::Symbol64TableEntry> STE;
+      Obj->ReadSymbol64TableEntry(SymtabLC->SymbolTableOffset, i, STE);
+      if (!STE)
+        return Error("unable to read symbol: '" + Twine(i) + "'");
+      if (STE->SectionIndex > Segment64LC->NumSections)
+        return Error("invalid section index for symbol: '" + Twine(i) + "'");
+      // Get the symbol name.
+      StringRef Name = Obj->getStringAtIndex(STE->StringIndex);
+      SymbolNames.push_back(Name);
+
+      // Just skip symbols not defined in this section.
+      if ((unsigned)STE->SectionIndex - 1 != SectNum)
+        continue;
+
+      // FIXME: Check the symbol type and flags.
+      if (STE->Type != 0xF)  // external, defined in this section.
+        continue;
+      if (STE->Flags != 0x0)
+        continue;
+
+      // Remember the symbol.
+      Symbols.push_back(SymbolEntry(STE->Value, Name));
+
+      DEBUG(dbgs() << "Function sym: '" << Name << "' @ " <<
+            (Sect->Address + STE->Value) << "\n");
+    }
+    // Sort the symbols by address, just in case they didn't come in that way.
+    array_pod_sort(Symbols.begin(), Symbols.end());
+
+    // If there weren't any functions (odd, but just in case...)
+    if (!Symbols.size())
+      continue;
+
+    // Extract the function data.
+    uint8_t *Base = (uint8_t*)Obj->getData(Segment64LC->FileOffset,
+                                           Segment64LC->FileSize).data();
+    for (unsigned i = 0, e = Symbols.size() - 1; i != e; ++i) {
+      uint64_t StartOffset = Sect->Address + Symbols[i].first;
+      uint64_t EndOffset = Symbols[i + 1].first - 1;
+      DEBUG(dbgs() << "Extracting function: " << Symbols[i].second
+                   << " from [" << StartOffset << ", " << EndOffset << "]\n");
+      extractFunction(Symbols[i].second, Base + StartOffset, Base + EndOffset);
+    }
+    // The last symbol we do after since the end address is calculated
+    // differently because there is no next symbol to reference.
+    uint64_t StartOffset = Symbols[Symbols.size() - 1].first;
+    uint64_t EndOffset = Sect->Size - 1;
+    DEBUG(dbgs() << "Extracting function: " << Symbols[Symbols.size()-1].second
+                 << " from [" << StartOffset << ", " << EndOffset << "]\n");
+    extractFunction(Symbols[Symbols.size()-1].second,
+                    Base + StartOffset, Base + EndOffset);
+
+    // Now extract the relocation information for each function and process it.
+    for (unsigned j = 0; j != Sect->NumRelocationTableEntries; ++j) {
+      InMemoryStruct<macho::RelocationEntry> RE;
+      Obj->ReadRelocationEntry(Sect->RelocationTableOffset, j, RE);
+      if (RE->Word0 & macho::RF_Scattered)
+        return Error("NOT YET IMPLEMENTED: scattered relocations.");
+      // Word0 of the relocation is the offset into the section where the
+      // relocation should be applied. We need to translate that into an
+      // offset into a function since that's our atom.
+      uint32_t Offset = RE->Word0;
+      // Look for the function containing the address. This is used for JIT
+      // code, so the number of functions in section is almost always going
+      // to be very small (usually just one), so until we have use cases
+      // where that's not true, just use a trivial linear search.
+      unsigned SymbolNum;
+      unsigned NumSymbols = Symbols.size();
+      assert(NumSymbols > 0 && Symbols[0].first <= Offset &&
+             "No symbol containing relocation!");
+      for (SymbolNum = 0; SymbolNum < NumSymbols - 1; ++SymbolNum)
+        if (Symbols[SymbolNum + 1].first > Offset)
+          break;
+      // Adjust the offset to be relative to the symbol.
+      Offset -= Symbols[SymbolNum].first;
+      // Get the name of the symbol containing the relocation.
+      StringRef TargetName = SymbolNames[SymbolNum];
+
+      bool isExtern = (RE->Word1 >> 27) & 1;
+      // Figure out the source symbol of the relocation. If isExtern is true,
+      // this relocation references the symbol table, otherwise it references
+      // a section in the same object, numbered from 1 through NumSections
+      // (SectionBases is [0, NumSections-1]).
+      if (!isExtern)
+        return Error("Internal relocations not supported.");
+      uint32_t SourceNum = RE->Word1 & 0xffffff; // 24-bit value
+      StringRef SourceName = SymbolNames[SourceNum];
+
+      // FIXME: Get the relocation addend from the target address.
+
+      // Now store the relocation information. Associate it with the source
+      // symbol.
+      Relocations[SourceName].push_back(RelocationEntry(TargetName,
+                                                        Offset,
+                                                        RE->Word1,
+                                                        0 /*Addend*/));
+      DEBUG(dbgs() << "Relocation at '" << TargetName << "' + " << Offset
+                   << " from '" << SourceName << "(Word1: "
+                   << format("0x%x", RE->Word1) << ")\n");
+    }
+  }
+  return false;
+}
+
+bool RuntimeDyldMachO::loadObject(MemoryBuffer *InputBuffer) {
+  // If the linker is in an error state, don't do anything.
+  if (hasError())
+    return true;
+  // Load the Mach-O wrapper object.
+  std::string ErrorStr;
+  OwningPtr<MachOObject> Obj(
+    MachOObject::LoadFromBuffer(InputBuffer, &ErrorStr));
+  if (!Obj)
+    return Error("unable to load object: '" + ErrorStr + "'");
+
+  // Get the CPU type information from the header.
+  const macho::Header &Header = Obj->getHeader();
+
+  // FIXME: Error checking that the loaded object is compatible with
+  //        the system we're running on.
+  CPUType = Header.CPUType;
+  CPUSubtype = Header.CPUSubtype;
+
+  // Validate that the load commands match what we expect.
+  const MachOObject::LoadCommandInfo *SegmentLCI = 0, *SymtabLCI = 0,
+    *DysymtabLCI = 0;
+  for (unsigned i = 0; i != Header.NumLoadCommands; ++i) {
+    const MachOObject::LoadCommandInfo &LCI = Obj->getLoadCommandInfo(i);
+    switch (LCI.Command.Type) {
+    case macho::LCT_Segment:
+    case macho::LCT_Segment64:
+      if (SegmentLCI)
+        return Error("unexpected input object (multiple segments)");
+      SegmentLCI = &LCI;
+      break;
+    case macho::LCT_Symtab:
+      if (SymtabLCI)
+        return Error("unexpected input object (multiple symbol tables)");
+      SymtabLCI = &LCI;
+      break;
+    case macho::LCT_Dysymtab:
+      if (DysymtabLCI)
+        return Error("unexpected input object (multiple symbol tables)");
+      DysymtabLCI = &LCI;
+      break;
+    default:
+      return Error("unexpected input object (unexpected load command");
+    }
+  }
+
+  if (!SymtabLCI)
+    return Error("no symbol table found in object");
+  if (!SegmentLCI)
+    return Error("no symbol table found in object");
+
+  // Read and register the symbol table data.
+  InMemoryStruct<macho::SymtabLoadCommand> SymtabLC;
+  Obj->ReadSymtabLoadCommand(*SymtabLCI, SymtabLC);
+  if (!SymtabLC)
+    return Error("unable to load symbol table load command");
+  Obj->RegisterStringTable(*SymtabLC);
+
+  // Read the dynamic link-edit information, if present (not present in static
+  // objects).
+  if (DysymtabLCI) {
+    InMemoryStruct<macho::DysymtabLoadCommand> DysymtabLC;
+    Obj->ReadDysymtabLoadCommand(*DysymtabLCI, DysymtabLC);
+    if (!DysymtabLC)
+      return Error("unable to load dynamic link-exit load command");
+
+    // FIXME: We don't support anything interesting yet.
+//    if (DysymtabLC->LocalSymbolsIndex != 0)
+//      return Error("NOT YET IMPLEMENTED: local symbol entries");
+//    if (DysymtabLC->ExternalSymbolsIndex != 0)
+//      return Error("NOT YET IMPLEMENTED: non-external symbol entries");
+//    if (DysymtabLC->UndefinedSymbolsIndex != SymtabLC->NumSymbolTableEntries)
+//      return Error("NOT YET IMPLEMENTED: undefined symbol entries");
+  }
+
+  // Load the segment load command.
+  if (SegmentLCI->Command.Type == macho::LCT_Segment) {
+    if (loadSegment32(Obj.get(), SegmentLCI, SymtabLC))
+      return true;
+  } else {
+    if (loadSegment64(Obj.get(), SegmentLCI, SymtabLC))
+      return true;
+  }
+
+  return false;
+}
+
+// Assign an address to a symbol name and resolve all the relocations
+// associated with it.
+void RuntimeDyldMachO::reassignSymbolAddress(StringRef Name, uint8_t *Addr) {
+  // Assign the address in our symbol table.
+  SymbolTable[Name] = Addr;
+
+  RelocationList &Relocs = Relocations[Name];
+  for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+    RelocationEntry &RE = Relocs[i];
+    uint8_t *Target = SymbolTable[RE.Target] + RE.Offset;
+    bool isPCRel = (RE.Data >> 24) & 1;
+    unsigned Type = (RE.Data >> 28) & 0xf;
+    unsigned Size = 1 << ((RE.Data >> 25) & 3);
+
+    DEBUG(dbgs() << "Resolving relocation at '" << RE.Target
+          << "' + " << RE.Offset << " (" << format("%p", Target) << ")"
+          << " from '" << Name << " (" << format("%p", Addr) << ")"
+          << "(" << (isPCRel ? "pcrel" : "absolute")
+          << ", type: " << Type << ", Size: " << Size << ").\n");
+
+    resolveRelocation(Target, Addr, isPCRel, Type, Size);
+    RE.isResolved = true;
+  }
+}
+
+bool RuntimeDyldMachO::isKnownFormat(const MemoryBuffer *InputBuffer) {
+  StringRef Magic = InputBuffer->getBuffer().slice(0, 4);
+  if (Magic == "\xFE\xED\xFA\xCE") return true;
+  if (Magic == "\xCE\xFA\xED\xFE") return true;
+  if (Magic == "\xFE\xED\xFA\xCF") return true;
+  if (Magic == "\xCF\xFA\xED\xFE") return true;
+  return false;
+}
+
+} // end namespace llvm
diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp
index a8822e5..f51aff3 100644
--- a/lib/ExecutionEngine/TargetSelect.cpp
+++ b/lib/ExecutionEngine/TargetSelect.cpp
@@ -16,10 +16,10 @@
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/Module.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Host.h"
-#include "llvm/Target/SubtargetFeature.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
@@ -75,9 +75,8 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
 
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
-  if (!MCPU.empty() || !MAttrs.empty()) {
+  if (!MAttrs.empty()) {
     SubtargetFeatures Features;
-    Features.setCPU(MCPU);
     for (unsigned i = 0; i != MAttrs.size(); ++i)
       Features.AddFeature(MAttrs[i]);
     FeaturesStr = Features.getString();
@@ -85,7 +84,7 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod,
 
   // Allocate a target...
   TargetMachine *Target =
-    TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr);
+    TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr);
   assert(Target && "Could not allocate target machine!");
   return Target;
 }
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index f372db2..55aa9bf 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -9,337 +9,404 @@
 //
 // This file implements the LLVM module linker.
 //
-// Specifically, this:
-//  * Merges global variables between the two modules
-//    * Uninit + Uninit = Init, Init + Uninit = Init, Init + Init = Error if !=
-//  * Merges functions between two modules
-//
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Linker.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
-#include "llvm/TypeSymbolTable.h"
-#include "llvm/ValueSymbolTable.h"
-#include "llvm/Instructions.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/ADT/DenseMap.h"
 using namespace llvm;
 
-// Error - Simple wrapper function to conditionally assign to E and return true.
-// This just makes error return conditions a little bit simpler...
-static inline bool Error(std::string *E, const Twine &Message) {
-  if (E) *E = Message.str();
-  return true;
-}
-
-// Function: ResolveTypes()
-//
-// Description:
-//  Attempt to link the two specified types together.
-//
-// Inputs:
-//  DestTy - The type to which we wish to resolve.
-//  SrcTy  - The original type which we want to resolve.
-//
-// Outputs:
-//  DestST - The symbol table in which the new type should be placed.
-//
-// Return value:
-//  true  - There is an error and the types cannot yet be linked.
-//  false - No errors.
-//
-static bool ResolveTypes(const Type *DestTy, const Type *SrcTy) {
-  if (DestTy == SrcTy) return false;       // If already equal, noop
-  assert(DestTy && SrcTy && "Can't handle null types");
-
-  if (const OpaqueType *OT = dyn_cast<OpaqueType>(DestTy)) {
-    // Type _is_ in module, just opaque...
-    const_cast<OpaqueType*>(OT)->refineAbstractTypeTo(SrcTy);
-  } else if (const OpaqueType *OT = dyn_cast<OpaqueType>(SrcTy)) {
-    const_cast<OpaqueType*>(OT)->refineAbstractTypeTo(DestTy);
-  } else {
-    return true;  // Cannot link types... not-equal and neither is opaque.
-  }
-  return false;
-}
+//===----------------------------------------------------------------------===//
+// TypeMap implementation.
+//===----------------------------------------------------------------------===//
 
-/// LinkerTypeMap - This implements a map of types that is stable
-/// even if types are resolved/refined to other types.  This is not a general
-/// purpose map, it is specific to the linker's use.
 namespace {
-class LinkerTypeMap : public AbstractTypeUser {
-  typedef DenseMap<const Type*, PATypeHolder> TheMapTy;
-  TheMapTy TheMap;
-
-  LinkerTypeMap(const LinkerTypeMap&); // DO NOT IMPLEMENT
-  void operator=(const LinkerTypeMap&); // DO NOT IMPLEMENT
+class TypeMapTy : public ValueMapTypeRemapper {
+  /// MappedTypes - This is a mapping from a source type to a destination type
+  /// to use.
+  DenseMap<Type*, Type*> MappedTypes;
+
+  /// SpeculativeTypes - When checking to see if two subgraphs are isomorphic,
+  /// we speculatively add types to MappedTypes, but keep track of them here in
+  /// case we need to roll back.
+  SmallVector<Type*, 16> SpeculativeTypes;
+  
+  /// DefinitionsToResolve - This is a list of non-opaque structs in the source
+  /// module that are mapped to an opaque struct in the destination module.
+  SmallVector<StructType*, 16> DefinitionsToResolve;
 public:
-  LinkerTypeMap() {}
-  ~LinkerTypeMap() {
-    for (DenseMap<const Type*, PATypeHolder>::iterator I = TheMap.begin(),
-         E = TheMap.end(); I != E; ++I)
-      I->first->removeAbstractTypeUser(this);
-  }
-
-  /// lookup - Return the value for the specified type or null if it doesn't
-  /// exist.
-  const Type *lookup(const Type *Ty) const {
-    TheMapTy::const_iterator I = TheMap.find(Ty);
-    if (I != TheMap.end()) return I->second;
-    return 0;
-  }
-
-  /// insert - This returns true if the pointer was new to the set, false if it
-  /// was already in the set.
-  bool insert(const Type *Src, const Type *Dst) {
-    if (!TheMap.insert(std::make_pair(Src, PATypeHolder(Dst))).second)
-      return false;  // Already in map.
-    if (Src->isAbstract())
-      Src->addAbstractTypeUser(this);
-    return true;
-  }
-
-protected:
-  /// refineAbstractType - The callback method invoked when an abstract type is
-  /// resolved to another type.  An object must override this method to update
-  /// its internal state to reference NewType instead of OldType.
-  ///
-  virtual void refineAbstractType(const DerivedType *OldTy,
-                                  const Type *NewTy) {
-    TheMapTy::iterator I = TheMap.find(OldTy);
-    const Type *DstTy = I->second;
-
-    TheMap.erase(I);
-    if (OldTy->isAbstract())
-      OldTy->removeAbstractTypeUser(this);
-
-    // Don't reinsert into the map if the key is concrete now.
-    if (NewTy->isAbstract())
-      insert(NewTy, DstTy);
+  
+  /// addTypeMapping - Indicate that the specified type in the destination
+  /// module is conceptually equivalent to the specified type in the source
+  /// module.
+  void addTypeMapping(Type *DstTy, Type *SrcTy);
+
+  /// linkDefinedTypeBodies - Produce a body for an opaque type in the dest
+  /// module from a type definition in the source module.
+  void linkDefinedTypeBodies();
+  
+  /// get - Return the mapped type to use for the specified input type from the
+  /// source module.
+  Type *get(Type *SrcTy);
+
+  FunctionType *get(FunctionType *T) {return cast<FunctionType>(get((Type*)T));}
+
+private:
+  Type *getImpl(Type *T);
+  /// remapType - Implement the ValueMapTypeRemapper interface.
+  Type *remapType(Type *SrcTy) {
+    return get(SrcTy);
   }
+  
+  bool areTypesIsomorphic(Type *DstTy, Type *SrcTy);
+};
+}
 
-  /// The other case which AbstractTypeUsers must be aware of is when a type
-  /// makes the transition from being abstract (where it has clients on it's
-  /// AbstractTypeUsers list) to concrete (where it does not).  This method
-  /// notifies ATU's when this occurs for a type.
-  virtual void typeBecameConcrete(const DerivedType *AbsTy) {
-    TheMap.erase(AbsTy);
-    AbsTy->removeAbstractTypeUser(this);
+void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) {
+  Type *&Entry = MappedTypes[SrcTy];
+  if (Entry) return;
+  
+  if (DstTy == SrcTy) {
+    Entry = DstTy;
+    return;
   }
-
-  // for debugging...
-  virtual void dump() const {
-    dbgs() << "AbstractTypeSet!\n";
+  
+  // Check to see if these types are recursively isomorphic and establish a
+  // mapping between them if so.
+  if (!areTypesIsomorphic(DstTy, SrcTy)) {
+    // Oops, they aren't isomorphic.  Just discard this request by rolling out
+    // any speculative mappings we've established.
+    for (unsigned i = 0, e = SpeculativeTypes.size(); i != e; ++i)
+      MappedTypes.erase(SpeculativeTypes[i]);
   }
-};
+  SpeculativeTypes.clear();
 }
 
-
-// RecursiveResolveTypes - This is just like ResolveTypes, except that it
-// recurses down into derived types, merging the used types if the parent types
-// are compatible.
-static bool RecursiveResolveTypesI(const Type *DstTy, const Type *SrcTy,
-                                   LinkerTypeMap &Pointers) {
-  if (DstTy == SrcTy) return false;       // If already equal, noop
-
-  // If we found our opaque type, resolve it now!
-  if (DstTy->isOpaqueTy() || SrcTy->isOpaqueTy())
-    return ResolveTypes(DstTy, SrcTy);
-
-  // Two types cannot be resolved together if they are of different primitive
-  // type.  For example, we cannot resolve an int to a float.
-  if (DstTy->getTypeID() != SrcTy->getTypeID()) return true;
-
-  // If neither type is abstract, then they really are just different types.
-  if (!DstTy->isAbstract() && !SrcTy->isAbstract())
-    return true;
-
-  // Otherwise, resolve the used type used by this derived type...
-  switch (DstTy->getTypeID()) {
-  default:
+/// areTypesIsomorphic - Recursively walk this pair of types, returning true
+/// if they are isomorphic, false if they are not.
+bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
+  // Two types with differing kinds are clearly not isomorphic.
+  if (DstTy->getTypeID() != SrcTy->getTypeID()) return false;
+
+  // If we have an entry in the MappedTypes table, then we have our answer.
+  Type *&Entry = MappedTypes[SrcTy];
+  if (Entry)
+    return Entry == DstTy;
+
+  // Two identical types are clearly isomorphic.  Remember this
+  // non-speculatively.
+  if (DstTy == SrcTy) {
+    Entry = DstTy;
     return true;
-  case Type::FunctionTyID: {
-    const FunctionType *DstFT = cast<FunctionType>(DstTy);
-    const FunctionType *SrcFT = cast<FunctionType>(SrcTy);
-    if (DstFT->isVarArg() != SrcFT->isVarArg() ||
-        DstFT->getNumContainedTypes() != SrcFT->getNumContainedTypes())
-      return true;
-
-    // Use TypeHolder's so recursive resolution won't break us.
-    PATypeHolder ST(SrcFT), DT(DstFT);
-    for (unsigned i = 0, e = DstFT->getNumContainedTypes(); i != e; ++i) {
-      const Type *SE = ST->getContainedType(i), *DE = DT->getContainedType(i);
-      if (SE != DE && RecursiveResolveTypesI(DE, SE, Pointers))
-        return true;
-    }
-    return false;
   }
-  case Type::StructTyID: {
-    const StructType *DstST = cast<StructType>(DstTy);
-    const StructType *SrcST = cast<StructType>(SrcTy);
-    if (DstST->getNumContainedTypes() != SrcST->getNumContainedTypes())
+  
+  // Okay, we have two types with identical kinds that we haven't seen before.
+
+  // If this is an opaque struct type, special case it.
+  if (StructType *SSTy = dyn_cast<StructType>(SrcTy)) {
+    // Mapping an opaque type to any struct, just keep the dest struct.
+    if (SSTy->isOpaque()) {
+      Entry = DstTy;
+      SpeculativeTypes.push_back(SrcTy);
       return true;
+    }
 
-    PATypeHolder ST(SrcST), DT(DstST);
-    for (unsigned i = 0, e = DstST->getNumContainedTypes(); i != e; ++i) {
-      const Type *SE = ST->getContainedType(i), *DE = DT->getContainedType(i);
-      if (SE != DE && RecursiveResolveTypesI(DE, SE, Pointers))
-        return true;
+    // Mapping a non-opaque source type to an opaque dest.  Keep the dest, but
+    // fill it in later.  This doesn't need to be speculative.
+    if (cast<StructType>(DstTy)->isOpaque()) {
+      Entry = DstTy;
+      DefinitionsToResolve.push_back(SSTy);
+      return true;
     }
-    return false;
-  }
-  case Type::ArrayTyID: {
-    const ArrayType *DAT = cast<ArrayType>(DstTy);
-    const ArrayType *SAT = cast<ArrayType>(SrcTy);
-    if (DAT->getNumElements() != SAT->getNumElements()) return true;
-    return RecursiveResolveTypesI(DAT->getElementType(), SAT->getElementType(),
-                                  Pointers);
   }
-  case Type::VectorTyID: {
-    const VectorType *DVT = cast<VectorType>(DstTy);
-    const VectorType *SVT = cast<VectorType>(SrcTy);
-    if (DVT->getNumElements() != SVT->getNumElements()) return true;
-    return RecursiveResolveTypesI(DVT->getElementType(), SVT->getElementType(),
-                                  Pointers);
+  
+  // If the number of subtypes disagree between the two types, then we fail.
+  if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes())
+    return false;
+  
+  // Fail if any of the extra properties (e.g. array size) of the type disagree.
+  if (isa<IntegerType>(DstTy))
+    return false;  // bitwidth disagrees.
+  if (PointerType *PT = dyn_cast<PointerType>(DstTy)) {
+    if (PT->getAddressSpace() != cast<PointerType>(SrcTy)->getAddressSpace())
+      return false;
+  } else if (FunctionType *FT = dyn_cast<FunctionType>(DstTy)) {
+    if (FT->isVarArg() != cast<FunctionType>(SrcTy)->isVarArg())
+      return false;
+  } else if (StructType *DSTy = dyn_cast<StructType>(DstTy)) {
+    StructType *SSTy = cast<StructType>(SrcTy);
+    if (DSTy->isAnonymous() != SSTy->isAnonymous() ||
+        DSTy->isPacked() != SSTy->isPacked())
+      return false;
+  } else if (ArrayType *DATy = dyn_cast<ArrayType>(DstTy)) {
+    if (DATy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
+      return false;
+  } else if (VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
+    if (DVTy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
+      return false;
   }
-  case Type::PointerTyID: {
-    const PointerType *DstPT = cast<PointerType>(DstTy);
-    const PointerType *SrcPT = cast<PointerType>(SrcTy);
 
-    if (DstPT->getAddressSpace() != SrcPT->getAddressSpace())
-      return true;
+  // Otherwise, we speculate that these two types will line up and recursively
+  // check the subelements.
+  Entry = DstTy;
+  SpeculativeTypes.push_back(SrcTy);
+
+  for (unsigned i = 0, e = SrcTy->getNumContainedTypes(); i != e; ++i)
+    if (!areTypesIsomorphic(DstTy->getContainedType(i),
+                            SrcTy->getContainedType(i)))
+      return false;
+  
+  // If everything seems to have lined up, then everything is great.
+  return true;
+}
 
-    // If this is a pointer type, check to see if we have already seen it.  If
-    // so, we are in a recursive branch.  Cut off the search now.  We cannot use
-    // an associative container for this search, because the type pointers (keys
-    // in the container) change whenever types get resolved.
-    if (SrcPT->isAbstract())
-      if (const Type *ExistingDestTy = Pointers.lookup(SrcPT))
-        return ExistingDestTy != DstPT;
-
-    if (DstPT->isAbstract())
-      if (const Type *ExistingSrcTy = Pointers.lookup(DstPT))
-        return ExistingSrcTy != SrcPT;
-    // Otherwise, add the current pointers to the vector to stop recursion on
-    // this pair.
-    if (DstPT->isAbstract())
-      Pointers.insert(DstPT, SrcPT);
-    if (SrcPT->isAbstract())
-      Pointers.insert(SrcPT, DstPT);
-
-    return RecursiveResolveTypesI(DstPT->getElementType(),
-                                  SrcPT->getElementType(), Pointers);
-  }
+/// linkDefinedTypeBodies - Produce a body for an opaque type in the dest
+/// module from a type definition in the source module.
+void TypeMapTy::linkDefinedTypeBodies() {
+  SmallVector<Type*, 16> Elements;
+  SmallString<16> TmpName;
+  
+  // Note that processing entries in this loop (calling 'get') can add new
+  // entries to the DefinitionsToResolve vector.
+  while (!DefinitionsToResolve.empty()) {
+    StructType *SrcSTy = DefinitionsToResolve.pop_back_val();
+    StructType *DstSTy = cast<StructType>(MappedTypes[SrcSTy]);
+    
+    // TypeMap is a many-to-one mapping, if there were multiple types that
+    // provide a body for DstSTy then previous iterations of this loop may have
+    // already handled it.  Just ignore this case.
+    if (!DstSTy->isOpaque()) continue;
+    assert(!SrcSTy->isOpaque() && "Not resolving a definition?");
+    
+    // Map the body of the source type over to a new body for the dest type.
+    Elements.resize(SrcSTy->getNumElements());
+    for (unsigned i = 0, e = Elements.size(); i != e; ++i)
+      Elements[i] = getImpl(SrcSTy->getElementType(i));
+    
+    DstSTy->setBody(Elements, SrcSTy->isPacked());
+    
+    // If DstSTy has no name or has a longer name than STy, then viciously steal
+    // STy's name.
+    if (!SrcSTy->hasName()) continue;
+    StringRef SrcName = SrcSTy->getName();
+    
+    if (!DstSTy->hasName() || DstSTy->getName().size() > SrcName.size()) {
+      TmpName.insert(TmpName.end(), SrcName.begin(), SrcName.end());
+      SrcSTy->setName("");
+      DstSTy->setName(TmpName.str());
+      TmpName.clear();
+    }
   }
 }
 
-static bool RecursiveResolveTypes(const Type *DestTy, const Type *SrcTy) {
-  LinkerTypeMap PointerTypes;
-  return RecursiveResolveTypesI(DestTy, SrcTy, PointerTypes);
-}
 
+/// get - Return the mapped type to use for the specified input type from the
+/// source module.
+Type *TypeMapTy::get(Type *Ty) {
+  Type *Result = getImpl(Ty);
+  
+  // If this caused a reference to any struct type, resolve it before returning.
+  if (!DefinitionsToResolve.empty())
+    linkDefinedTypeBodies();
+  return Result;
+}
 
-// LinkTypes - Go through the symbol table of the Src module and see if any
-// types are named in the src module that are not named in the Dst module.
-// Make sure there are no type name conflicts.
-static bool LinkTypes(Module *Dest, const Module *Src, std::string *Err) {
-        TypeSymbolTable *DestST = &Dest->getTypeSymbolTable();
-  const TypeSymbolTable *SrcST  = &Src->getTypeSymbolTable();
-
-  // Look for a type plane for Type's...
-  TypeSymbolTable::const_iterator TI = SrcST->begin();
-  TypeSymbolTable::const_iterator TE = SrcST->end();
-  if (TI == TE) return false;  // No named types, do nothing.
-
-  // Some types cannot be resolved immediately because they depend on other
-  // types being resolved to each other first.  This contains a list of types we
-  // are waiting to recheck.
-  std::vector<std::string> DelayedTypesToResolve;
-
-  for ( ; TI != TE; ++TI ) {
-    const std::string &Name = TI->first;
-    const Type *RHS = TI->second;
-
-    // Check to see if this type name is already in the dest module.
-    Type *Entry = DestST->lookup(Name);
-
-    // If the name is just in the source module, bring it over to the dest.
-    if (Entry == 0) {
-      if (!Name.empty())
-        DestST->insert(Name, const_cast<Type*>(RHS));
-    } else if (ResolveTypes(Entry, RHS)) {
-      // They look different, save the types 'till later to resolve.
-      DelayedTypesToResolve.push_back(Name);
+/// getImpl - This is the recursive version of get().
+Type *TypeMapTy::getImpl(Type *Ty) {
+  // If we already have an entry for this type, return it.
+  Type **Entry = &MappedTypes[Ty];
+  if (*Entry) return *Entry;
+  
+  // If this is not a named struct type, then just map all of the elements and
+  // then rebuild the type from inside out.
+  if (!isa<StructType>(Ty) || cast<StructType>(Ty)->isAnonymous()) {
+    // If there are no element types to map, then the type is itself.  This is
+    // true for the anonymous {} struct, things like 'float', integers, etc.
+    if (Ty->getNumContainedTypes() == 0)
+      return *Entry = Ty;
+    
+    // Remap all of the elements, keeping track of whether any of them change.
+    bool AnyChange = false;
+    SmallVector<Type*, 4> ElementTypes;
+    ElementTypes.resize(Ty->getNumContainedTypes());
+    for (unsigned i = 0, e = Ty->getNumContainedTypes(); i != e; ++i) {
+      ElementTypes[i] = getImpl(Ty->getContainedType(i));
+      AnyChange |= ElementTypes[i] != Ty->getContainedType(i);
+    }
+    
+    // If we found our type while recursively processing stuff, just use it.
+    Entry = &MappedTypes[Ty];
+    if (*Entry) return *Entry;
+    
+    // If all of the element types mapped directly over, then the type is usable
+    // as-is.
+    if (!AnyChange)
+      return *Entry = Ty;
+    
+    // Otherwise, rebuild a modified type.
+    switch (Ty->getTypeID()) {
+    default: assert(0 && "unknown derived type to remap");
+    case Type::ArrayTyID:
+      return *Entry = ArrayType::get(ElementTypes[0],
+                                     cast<ArrayType>(Ty)->getNumElements());
+    case Type::VectorTyID: 
+      return *Entry = VectorType::get(ElementTypes[0],
+                                      cast<VectorType>(Ty)->getNumElements());
+    case Type::PointerTyID:
+      return *Entry = PointerType::get(ElementTypes[0],
+                                      cast<PointerType>(Ty)->getAddressSpace());
+    case Type::FunctionTyID:
+      return *Entry = FunctionType::get(ElementTypes[0],
+                                        ArrayRef<Type*>(ElementTypes).slice(1),
+                                        cast<FunctionType>(Ty)->isVarArg());
+    case Type::StructTyID:
+      // Note that this is only reached for anonymous structs.
+      return *Entry = StructType::get(Ty->getContext(), ElementTypes,
+                                      cast<StructType>(Ty)->isPacked());
     }
   }
 
-  // Iteratively resolve types while we can...
-  while (!DelayedTypesToResolve.empty()) {
-    // Loop over all of the types, attempting to resolve them if possible...
-    unsigned OldSize = DelayedTypesToResolve.size();
-
-    // Try direct resolution by name...
-    for (unsigned i = 0; i != DelayedTypesToResolve.size(); ++i) {
-      const std::string &Name = DelayedTypesToResolve[i];
-      Type *T1 = SrcST->lookup(Name);
-      Type *T2 = DestST->lookup(Name);
-      if (!ResolveTypes(T2, T1)) {
-        // We are making progress!
-        DelayedTypesToResolve.erase(DelayedTypesToResolve.begin()+i);
-        --i;
-      }
-    }
+  // Otherwise, this is an unmapped named struct.  If the struct can be directly
+  // mapped over, just use it as-is.  This happens in a case when the linked-in
+  // module has something like:
+  //   %T = type {%T*, i32}
+  //   @GV = global %T* null
+  // where T does not exist at all in the destination module.
+  //
+  // The other case we watch for is when the type is not in the destination
+  // module, but that it has to be rebuilt because it refers to something that
+  // is already mapped.  For example, if the destination module has:
+  //  %A = type { i32 }
+  // and the source module has something like
+  //  %A' = type { i32 }
+  //  %B = type { %A'* }
+  //  @GV = global %B* null
+  // then we want to create a new type: "%B = type { %A*}" and have it take the
+  // pristine "%B" name from the source module.
+  //
+  // To determine which case this is, we have to recursively walk the type graph
+  // speculating that we'll be able to reuse it unmodified.  Only if this is
+  // safe would we map the entire thing over.  Because this is an optimization,
+  // and is not required for the prettiness of the linked module, we just skip
+  // it and always rebuild a type here.
+  StructType *STy = cast<StructType>(Ty);
+  
+  // If the type is opaque, we can just use it directly.
+  if (STy->isOpaque())
+    return *Entry = STy;
+  
+  // Otherwise we create a new type and resolve its body later.  This will be
+  // resolved by the top level of get().
+  DefinitionsToResolve.push_back(STy);
+  return *Entry = StructType::createNamed(STy->getContext(), "");
+}
 
-    // Did we not eliminate any types?
-    if (DelayedTypesToResolve.size() == OldSize) {
-      // Attempt to resolve subelements of types.  This allows us to merge these
-      // two types: { int* } and { opaque* }
-      for (unsigned i = 0, e = DelayedTypesToResolve.size(); i != e; ++i) {
-        const std::string &Name = DelayedTypesToResolve[i];
-        if (!RecursiveResolveTypes(SrcST->lookup(Name), DestST->lookup(Name))) {
-          // We are making progress!
-          DelayedTypesToResolve.erase(DelayedTypesToResolve.begin()+i);
-
-          // Go back to the main loop, perhaps we can resolve directly by name
-          // now...
-          break;
-        }
-      }
 
-      // If we STILL cannot resolve the types, then there is something wrong.
-      if (DelayedTypesToResolve.size() == OldSize) {
-        // Remove the symbol name from the destination.
-        DelayedTypesToResolve.pop_back();
-      }
-    }
-  }
 
+//===----------------------------------------------------------------------===//
+// ModuleLinker implementation.
+//===----------------------------------------------------------------------===//
 
-  return false;
+namespace {
+  /// ModuleLinker - This is an implementation class for the LinkModules
+  /// function, which is the entrypoint for this file.
+  class ModuleLinker {
+    Module *DstM, *SrcM;
+    
+    TypeMapTy TypeMap; 
+
+    /// ValueMap - Mapping of values from what they used to be in Src, to what
+    /// they are now in DstM.  ValueToValueMapTy is a ValueMap, which involves
+    /// some overhead due to the use of Value handles which the Linker doesn't
+    /// actually need, but this allows us to reuse the ValueMapper code.
+    ValueToValueMapTy ValueMap;
+    
+    struct AppendingVarInfo {
+      GlobalVariable *NewGV;  // New aggregate global in dest module.
+      Constant *DstInit;      // Old initializer from dest module.
+      Constant *SrcInit;      // Old initializer from src module.
+    };
+    
+    std::vector<AppendingVarInfo> AppendingVars;
+    
+  public:
+    std::string ErrorMsg;
+    
+    ModuleLinker(Module *dstM, Module *srcM) : DstM(dstM), SrcM(srcM) { }
+    
+    bool run();
+    
+  private:
+    /// emitError - Helper method for setting a message and returning an error
+    /// code.
+    bool emitError(const Twine &Message) {
+      ErrorMsg = Message.str();
+      return true;
+    }
+    
+    /// getLinkageResult - This analyzes the two global values and determines
+    /// what the result will look like in the destination module.
+    bool getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
+                          GlobalValue::LinkageTypes &LT, bool &LinkFromSrc);
+
+    /// getLinkedToGlobal - Given a global in the source module, return the
+    /// global in the destination module that is being linked to, if any.
+    GlobalValue *getLinkedToGlobal(GlobalValue *SrcGV) {
+      // If the source has no name it can't link.  If it has local linkage,
+      // there is no name match-up going on.
+      if (!SrcGV->hasName() || SrcGV->hasLocalLinkage())
+        return 0;
+      
+      // Otherwise see if we have a match in the destination module's symtab.
+      GlobalValue *DGV = DstM->getNamedValue(SrcGV->getName());
+      if (DGV == 0) return 0;
+        
+      // If we found a global with the same name in the dest module, but it has
+      // internal linkage, we are really not doing any linkage here.
+      if (DGV->hasLocalLinkage())
+        return 0;
+
+      // Otherwise, we do in fact link to the destination global.
+      return DGV;
+    }
+    
+    void computeTypeMapping();
+    
+    bool linkAppendingVarProto(GlobalVariable *DstGV, GlobalVariable *SrcGV);
+    bool linkGlobalProto(GlobalVariable *SrcGV);
+    bool linkFunctionProto(Function *SrcF);
+    bool linkAliasProto(GlobalAlias *SrcA);
+    
+    void linkAppendingVarInit(const AppendingVarInfo &AVI);
+    void linkGlobalInits();
+    void linkFunctionBody(Function *Dst, Function *Src);
+    void linkAliasBodies();
+    void linkNamedMDNodes();
+  };
 }
 
-/// ForceRenaming - The LLVM SymbolTable class autorenames globals that conflict
+
+
+/// forceRenaming - The LLVM SymbolTable class autorenames globals that conflict
 /// in the symbol table.  This is good for all clients except for us.  Go
 /// through the trouble to force this back.
-static void ForceRenaming(GlobalValue *GV, const std::string &Name) {
-  assert(GV->getName() != Name && "Can't force rename to self");
-  ValueSymbolTable &ST = GV->getParent()->getValueSymbolTable();
+static void forceRenaming(GlobalValue *GV, StringRef Name) {
+  // If the global doesn't force its name or if it already has the right name,
+  // there is nothing for us to do.
+  if (GV->hasLocalLinkage() || GV->getName() == Name)
+    return;
+
+  Module *M = GV->getParent();
 
   // If there is a conflict, rename the conflict.
-  if (GlobalValue *ConflictGV = cast_or_null<GlobalValue>(ST.lookup(Name))) {
-    assert(ConflictGV->hasLocalLinkage() &&
-           "Not conflicting with a static global, should link instead!");
+  if (GlobalValue *ConflictGV = M->getNamedValue(Name)) {
     GV->takeName(ConflictGV);
     ConflictGV->setName(Name);    // This will cause ConflictGV to get renamed
-    assert(ConflictGV->getName() != Name && "ForceRenaming didn't work");
+    assert(ConflictGV->getName() != Name && "forceRenaming didn't work");
   } else {
     GV->setName(Name);              // Force the name back
   }
@@ -352,30 +419,33 @@ static void CopyGVAttributes(GlobalValue *DestGV, const GlobalValue *SrcGV) {
   unsigned Alignment = std::max(DestGV->getAlignment(), SrcGV->getAlignment());
   DestGV->copyAttributesFrom(SrcGV);
   DestGV->setAlignment(Alignment);
+  
+  forceRenaming(DestGV, SrcGV->getName());
 }
 
-/// GetLinkageResult - This analyzes the two global values and determines what
+/// getLinkageResult - This analyzes the two global values and determines what
 /// the result will look like in the destination module.  In particular, it
 /// computes the resultant linkage type, computes whether the global in the
 /// source should be copied over to the destination (replacing the existing
 /// one), and computes whether this linkage is an error or not. It also performs
 /// visibility checks: we cannot link together two symbols with different
 /// visibilities.
-static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
-                             GlobalValue::LinkageTypes &LT, bool &LinkFromSrc,
-                             std::string *Err) {
-  assert((!Dest || !Src->hasLocalLinkage()) &&
+bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
+                                    GlobalValue::LinkageTypes &LT, 
+                                    bool &LinkFromSrc) {
+  assert(Dest && "Must have two globals being queried");
+  assert(!Src->hasLocalLinkage() &&
          "If Src has internal linkage, Dest shouldn't be set!");
-  if (!Dest) {
-    // Linking something to nothing.
-    LinkFromSrc = true;
-    LT = Src->getLinkage();
-  } else if (Src->isDeclaration()) {
+  
+  bool SrcIsDeclaration = Src->isDeclaration();
+  bool DestIsDeclaration = Dest->isDeclaration();
+  
+  if (SrcIsDeclaration) {
     // If Src is external or if both Src & Dest are external..  Just link the
     // external globals, we aren't adding anything.
     if (Src->hasDLLImportLinkage()) {
       // If one of GVs has DLLImport linkage, result should be dllimport'ed.
-      if (Dest->isDeclaration()) {
+      if (DestIsDeclaration) {
         LinkFromSrc = true;
         LT = Src->getLinkage();
       }
@@ -387,16 +457,10 @@ static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
       LinkFromSrc = false;
       LT = Dest->getLinkage();
     }
-  } else if (Dest->isDeclaration() && !Dest->hasDLLImportLinkage()) {
+  } else if (DestIsDeclaration && !Dest->hasDLLImportLinkage()) {
     // If Dest is external but Src is not:
     LinkFromSrc = true;
     LT = Src->getLinkage();
-  } else if (Src->hasAppendingLinkage() || Dest->hasAppendingLinkage()) {
-    if (Src->getLinkage() != Dest->getLinkage())
-      return Error(Err, "Linking globals named '" + Src->getName() +
-            "': can only link appending global with another appending global!");
-    LinkFromSrc = true; // Special cased.
-    LT = Src->getLinkage();
   } else if (Src->isWeakForLinker()) {
     // At this point we know that Dest has LinkOnce, External*, Weak, Common,
     // or DLL* linkage.
@@ -420,883 +484,485 @@ static bool GetLinkageResult(GlobalValue *Dest, const GlobalValue *Src,
       LT = GlobalValue::ExternalLinkage;
     }
   } else {
-    assert((Dest->hasExternalLinkage() ||
-            Dest->hasDLLImportLinkage() ||
-            Dest->hasDLLExportLinkage() ||
-            Dest->hasExternalWeakLinkage()) &&
-           (Src->hasExternalLinkage() ||
-            Src->hasDLLImportLinkage() ||
-            Src->hasDLLExportLinkage() ||
-            Src->hasExternalWeakLinkage()) &&
+    assert((Dest->hasExternalLinkage()  || Dest->hasDLLImportLinkage() ||
+            Dest->hasDLLExportLinkage() || Dest->hasExternalWeakLinkage()) &&
+           (Src->hasExternalLinkage()   || Src->hasDLLImportLinkage() ||
+            Src->hasDLLExportLinkage()  || Src->hasExternalWeakLinkage()) &&
            "Unexpected linkage type!");
-    return Error(Err, "Linking globals named '" + Src->getName() +
+    return emitError("Linking globals named '" + Src->getName() +
                  "': symbol multiply defined!");
   }
 
   // Check visibility
-  if (Dest && Src->getVisibility() != Dest->getVisibility() &&
-      !Src->isDeclaration() && !Dest->isDeclaration() &&
+  if (Src->getVisibility() != Dest->getVisibility() &&
+      !SrcIsDeclaration && !DestIsDeclaration &&
       !Src->hasAvailableExternallyLinkage() &&
       !Dest->hasAvailableExternallyLinkage())
-      return Error(Err, "Linking globals named '" + Src->getName() +
+    return emitError("Linking globals named '" + Src->getName() +
                    "': symbols have different visibilities!");
   return false;
 }
 
-// Insert all of the named mdnoes in Src into the Dest module.
-static void LinkNamedMDNodes(Module *Dest, Module *Src,
-                             ValueToValueMapTy &ValueMap) {
-  for (Module::const_named_metadata_iterator I = Src->named_metadata_begin(),
-         E = Src->named_metadata_end(); I != E; ++I) {
-    const NamedMDNode *SrcNMD = I;
-    NamedMDNode *DestNMD = Dest->getOrInsertNamedMetadata(SrcNMD->getName());
-    // Add Src elements into Dest node.
-    for (unsigned i = 0, e = SrcNMD->getNumOperands(); i != e; ++i)
-      DestNMD->addOperand(cast<MDNode>(MapValue(SrcNMD->getOperand(i),
-                                                ValueMap)));
+/// computeTypeMapping - Loop over all of the linked values to compute type
+/// mappings.  For example, if we link "extern Foo *x" and "Foo *x = NULL", then
+/// we have two struct types 'Foo' but one got renamed when the module was
+/// loaded into the same LLVMContext.
+void ModuleLinker::computeTypeMapping() {
+  // Incorporate globals.
+  for (Module::global_iterator I = SrcM->global_begin(),
+       E = SrcM->global_end(); I != E; ++I) {
+    GlobalValue *DGV = getLinkedToGlobal(I);
+    if (DGV == 0) continue;
+    
+    if (!DGV->hasAppendingLinkage() || !I->hasAppendingLinkage()) {
+      TypeMap.addTypeMapping(DGV->getType(), I->getType());
+      continue;      
+    }
+    
+    // Unify the element type of appending arrays.
+    ArrayType *DAT = cast<ArrayType>(DGV->getType()->getElementType());
+    ArrayType *SAT = cast<ArrayType>(I->getType()->getElementType());
+    TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType());
+  }
+  
+  // Incorporate functions.
+  for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I) {
+    if (GlobalValue *DGV = getLinkedToGlobal(I))
+      TypeMap.addTypeMapping(DGV->getType(), I->getType());
   }
+  
+  // Don't bother incorporating aliases, they aren't generally typed well.
+  
+  // Now that we have discovered all of the type equivalences, get a body for
+  // any 'opaque' types in the dest module that are now resolved. 
+  TypeMap.linkDefinedTypeBodies();
 }
 
-// LinkGlobals - Loop through the global variables in the src module and merge
-// them into the dest module.
-static bool LinkGlobals(Module *Dest, const Module *Src,
-                        ValueToValueMapTy &ValueMap,
-                    std::multimap<std::string, GlobalVariable *> &AppendingVars,
-                        std::string *Err) {
-  ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable();
-
-  // Loop over all of the globals in the src module, mapping them over as we go
-  for (Module::const_global_iterator I = Src->global_begin(),
-       E = Src->global_end(); I != E; ++I) {
-    const GlobalVariable *SGV = I;
-    GlobalValue *DGV = 0;
-
-    // Check to see if may have to link the global with the global, alias or
-    // function.
-    if (SGV->hasName() && !SGV->hasLocalLinkage())
-      DGV = cast_or_null<GlobalValue>(DestSymTab.lookup(SGV->getName()));
-
-    // If we found a global with the same name in the dest module, but it has
-    // internal linkage, we are really not doing any linkage here.
-    if (DGV && DGV->hasLocalLinkage())
-      DGV = 0;
-
-    // If types don't agree due to opaque types, try to resolve them.
-    if (DGV && DGV->getType() != SGV->getType())
-      RecursiveResolveTypes(SGV->getType(), DGV->getType());
-
-    assert((SGV->hasInitializer() || SGV->hasExternalWeakLinkage() ||
-            SGV->hasExternalLinkage() || SGV->hasDLLImportLinkage()) &&
-           "Global must either be external or have an initializer!");
+/// linkAppendingVarProto - If there were any appending global variables, link
+/// them together now.  Return true on error.
+bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV,
+                                         GlobalVariable *SrcGV) {
+ 
+  if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage())
+    return emitError("Linking globals named '" + SrcGV->getName() +
+           "': can only link appending global with another appending global!");
+  
+  ArrayType *DstTy = cast<ArrayType>(DstGV->getType()->getElementType());
+  ArrayType *SrcTy =
+    cast<ArrayType>(TypeMap.get(SrcGV->getType()->getElementType()));
+  Type *EltTy = DstTy->getElementType();
+  
+  // Check to see that they two arrays agree on type.
+  if (EltTy != SrcTy->getElementType())
+    return emitError("Appending variables with different element types!");
+  if (DstGV->isConstant() != SrcGV->isConstant())
+    return emitError("Appending variables linked with different const'ness!");
+  
+  if (DstGV->getAlignment() != SrcGV->getAlignment())
+    return emitError(
+             "Appending variables with different alignment need to be linked!");
+  
+  if (DstGV->getVisibility() != SrcGV->getVisibility())
+    return emitError(
+            "Appending variables with different visibility need to be linked!");
+  
+  if (DstGV->getSection() != SrcGV->getSection())
+    return emitError(
+          "Appending variables with different section name need to be linked!");
+  
+  uint64_t NewSize = DstTy->getNumElements() + SrcTy->getNumElements();
+  ArrayType *NewType = ArrayType::get(EltTy, NewSize);
+  
+  // Create the new global variable.
+  GlobalVariable *NG =
+    new GlobalVariable(*DstGV->getParent(), NewType, SrcGV->isConstant(),
+                       DstGV->getLinkage(), /*init*/0, /*name*/"", DstGV,
+                       DstGV->isThreadLocal(),
+                       DstGV->getType()->getAddressSpace());
+  
+  // Propagate alignment, visibility and section info.
+  CopyGVAttributes(NG, DstGV);
+  
+  AppendingVarInfo AVI;
+  AVI.NewGV = NG;
+  AVI.DstInit = DstGV->getInitializer();
+  AVI.SrcInit = SrcGV->getInitializer();
+  AppendingVars.push_back(AVI);
+
+  // Replace any uses of the two global variables with uses of the new
+  // global.
+  ValueMap[SrcGV] = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType()));
+
+  DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType()));
+  DstGV->eraseFromParent();
+  
+  // Zap the initializer in the source variable so we don't try to link it.
+  SrcGV->setInitializer(0);
+  SrcGV->setLinkage(GlobalValue::ExternalLinkage);
+  return false;
+}
 
+/// linkGlobalProto - Loop through the global variables in the src module and
+/// merge them into the dest module.
+bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) {
+  GlobalValue *DGV = getLinkedToGlobal(SGV);
+
+  if (DGV) {
+    // Concatenation of appending linkage variables is magic and handled later.
+    if (DGV->hasAppendingLinkage() || SGV->hasAppendingLinkage())
+      return linkAppendingVarProto(cast<GlobalVariable>(DGV), SGV);
+    
+    // Determine whether linkage of these two globals follows the source
+    // module's definition or the destination module's definition.
     GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
     bool LinkFromSrc = false;
-    if (GetLinkageResult(DGV, SGV, NewLinkage, LinkFromSrc, Err))
+    if (getLinkageResult(DGV, SGV, NewLinkage, LinkFromSrc))
       return true;
 
-    if (DGV == 0) {
-      // No linking to be performed, simply create an identical version of the
-      // symbol over in the dest module... the initializer will be filled in
-      // later by LinkGlobalInits.
-      GlobalVariable *NewDGV =
-        new GlobalVariable(*Dest, SGV->getType()->getElementType(),
-                           SGV->isConstant(), SGV->getLinkage(), /*init*/0,
-                           SGV->getName(), 0, false,
-                           SGV->getType()->getAddressSpace());
-      // Propagate alignment, visibility and section info.
-      CopyGVAttributes(NewDGV, SGV);
-      NewDGV->setUnnamedAddr(SGV->hasUnnamedAddr());
-
-      // If the LLVM runtime renamed the global, but it is an externally visible
-      // symbol, DGV must be an existing global with internal linkage.  Rename
-      // it.
-      if (!NewDGV->hasLocalLinkage() && NewDGV->getName() != SGV->getName())
-        ForceRenaming(NewDGV, SGV->getName());
-
-      // Make sure to remember this mapping.
-      ValueMap[SGV] = NewDGV;
-
-      // Keep track that this is an appending variable.
-      if (SGV->hasAppendingLinkage())
-        AppendingVars.insert(std::make_pair(SGV->getName(), NewDGV));
-      continue;
-    }
-
-    bool HasUnnamedAddr = SGV->hasUnnamedAddr() && DGV->hasUnnamedAddr();
-
-    // If the visibilities of the symbols disagree and the destination is a
-    // prototype, take the visibility of its input.
-    if (DGV->isDeclaration())
-      DGV->setVisibility(SGV->getVisibility());
-
-    if (DGV->hasAppendingLinkage()) {
-      // No linking is performed yet.  Just insert a new copy of the global, and
-      // keep track of the fact that it is an appending variable in the
-      // AppendingVars map.  The name is cleared out so that no linkage is
-      // performed.
-      GlobalVariable *NewDGV =
-        new GlobalVariable(*Dest, SGV->getType()->getElementType(),
-                           SGV->isConstant(), SGV->getLinkage(), /*init*/0,
-                           "", 0, false,
-                           SGV->getType()->getAddressSpace());
-
-      // Set alignment allowing CopyGVAttributes merge it with alignment of SGV.
-      NewDGV->setAlignment(DGV->getAlignment());
-      // Propagate alignment, section and visibility info.
-      CopyGVAttributes(NewDGV, SGV);
-
-      // Make sure to remember this mapping...
-      ValueMap[SGV] = NewDGV;
-
-      // Keep track that this is an appending variable...
-      AppendingVars.insert(std::make_pair(SGV->getName(), NewDGV));
-      continue;
-    }
-
-    if (LinkFromSrc) {
-      if (isa<GlobalAlias>(DGV))
-        return Error(Err, "Global-Alias Collision on '" + SGV->getName() +
-                     "': symbol multiple defined");
-
-      // If the types don't match, and if we are to link from the source, nuke
-      // DGV and create a new one of the appropriate type.  Note that the thing
-      // we are replacing may be a function (if a prototype, weak, etc) or a
-      // global variable.
-      GlobalVariable *NewDGV =
-        new GlobalVariable(*Dest, SGV->getType()->getElementType(),
-                           SGV->isConstant(), NewLinkage, /*init*/0,
-                           DGV->getName(), 0, false,
-                           SGV->getType()->getAddressSpace());
-
-      // Set the unnamed_addr.
-      NewDGV->setUnnamedAddr(HasUnnamedAddr);
-
-      // Propagate alignment, section, and visibility info.
-      CopyGVAttributes(NewDGV, SGV);
-      DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV,
-                                                              DGV->getType()));
-
-      // DGV will conflict with NewDGV because they both had the same
-      // name. We must erase this now so ForceRenaming doesn't assert
-      // because DGV might not have internal linkage.
-      if (GlobalVariable *Var = dyn_cast<GlobalVariable>(DGV))
-        Var->eraseFromParent();
-      else
-        cast<Function>(DGV)->eraseFromParent();
-
-      // If the symbol table renamed the global, but it is an externally visible
-      // symbol, DGV must be an existing global with internal linkage.  Rename.
-      if (NewDGV->getName() != SGV->getName() && !NewDGV->hasLocalLinkage())
-        ForceRenaming(NewDGV, SGV->getName());
-
-      // Inherit const as appropriate.
-      NewDGV->setConstant(SGV->isConstant());
-
+    // If we're not linking from the source, then keep the definition that we
+    // have.
+    if (!LinkFromSrc) {
+      // Special case for const propagation.
+      if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV))
+        if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant())
+          DGVar->setConstant(true);
+      
+      // Set calculated linkage.
+      DGV->setLinkage(NewLinkage);
+      
       // Make sure to remember this mapping.
-      ValueMap[SGV] = NewDGV;
-      continue;
+      ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType()));
+      
+      // Destroy the source global's initializer (and convert it to a prototype)
+      // so that we don't attempt to copy it over when processing global
+      // initializers.
+      SGV->setInitializer(0);
+      SGV->setLinkage(GlobalValue::ExternalLinkage);
+      return false;
     }
-
-    // Not "link from source", keep the one in the DestModule and remap the
-    // input onto it.
-
-    // Special case for const propagation.
-    if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV))
-      if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant())
-        DGVar->setConstant(true);
-
-    // SGV is global, but DGV is alias.
-    if (isa<GlobalAlias>(DGV)) {
-      // The only valid mappings are:
-      // - SGV is external declaration, which is effectively a no-op.
-      // - SGV is weak, when we just need to throw SGV out.
-      if (!SGV->isDeclaration() && !SGV->isWeakForLinker())
-        return Error(Err, "Global-Alias Collision on '" + SGV->getName() +
-                     "': symbol multiple defined");
-    }
-
-    // Set calculated linkage and unnamed_addr
-    DGV->setLinkage(NewLinkage);
-    DGV->setUnnamedAddr(HasUnnamedAddr);
-
-    // Make sure to remember this mapping...
-    ValueMap[SGV] = ConstantExpr::getBitCast(DGV, SGV->getType());
   }
-  return false;
-}
-
-static GlobalValue::LinkageTypes
-CalculateAliasLinkage(const GlobalValue *SGV, const GlobalValue *DGV) {
-  GlobalValue::LinkageTypes SL = SGV->getLinkage();
-  GlobalValue::LinkageTypes DL = DGV->getLinkage();
-  if (SL == GlobalValue::ExternalLinkage || DL == GlobalValue::ExternalLinkage)
-    return GlobalValue::ExternalLinkage;
-  else if (SL == GlobalValue::WeakAnyLinkage ||
-           DL == GlobalValue::WeakAnyLinkage)
-    return GlobalValue::WeakAnyLinkage;
-  else if (SL == GlobalValue::WeakODRLinkage ||
-           DL == GlobalValue::WeakODRLinkage)
-    return GlobalValue::WeakODRLinkage;
-  else if (SL == GlobalValue::InternalLinkage &&
-           DL == GlobalValue::InternalLinkage)
-    return GlobalValue::InternalLinkage;
-  else if (SL == GlobalValue::LinkerPrivateLinkage &&
-           DL == GlobalValue::LinkerPrivateLinkage)
-    return GlobalValue::LinkerPrivateLinkage;
-  else if (SL == GlobalValue::LinkerPrivateWeakLinkage &&
-           DL == GlobalValue::LinkerPrivateWeakLinkage)
-    return GlobalValue::LinkerPrivateWeakLinkage;
-  else if (SL == GlobalValue::LinkerPrivateWeakDefAutoLinkage &&
-           DL == GlobalValue::LinkerPrivateWeakDefAutoLinkage)
-    return GlobalValue::LinkerPrivateWeakDefAutoLinkage;
-  else {
-    assert (SL == GlobalValue::PrivateLinkage &&
-            DL == GlobalValue::PrivateLinkage && "Unexpected linkage type");
-    return GlobalValue::PrivateLinkage;
+  
+  // No linking to be performed or linking from the source: simply create an
+  // identical version of the symbol over in the dest module... the
+  // initializer will be filled in later by LinkGlobalInits.
+  GlobalVariable *NewDGV =
+    new GlobalVariable(*DstM, TypeMap.get(SGV->getType()->getElementType()),
+                       SGV->isConstant(), SGV->getLinkage(), /*init*/0,
+                       SGV->getName(), /*insertbefore*/0,
+                       SGV->isThreadLocal(),
+                       SGV->getType()->getAddressSpace());
+  // Propagate alignment, visibility and section info.
+  CopyGVAttributes(NewDGV, SGV);
+
+  if (DGV) {
+    DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType()));
+    DGV->eraseFromParent();
   }
-}
-
-// LinkAlias - Loop through the alias in the src module and link them into the
-// dest module. We're assuming, that all functions/global variables were already
-// linked in.
-static bool LinkAlias(Module *Dest, const Module *Src,
-                      ValueToValueMapTy &ValueMap,
-                      std::string *Err) {
-  // Loop over all alias in the src module
-  for (Module::const_alias_iterator I = Src->alias_begin(),
-         E = Src->alias_end(); I != E; ++I) {
-    const GlobalAlias *SGA = I;
-    const GlobalValue *SAliasee = SGA->getAliasedGlobal();
-    GlobalAlias *NewGA = NULL;
-
-    // Globals were already linked, thus we can just query ValueMap for variant
-    // of SAliasee in Dest.
-    ValueToValueMapTy::const_iterator VMI = ValueMap.find(SAliasee);
-    assert(VMI != ValueMap.end() && "Aliasee not linked");
-    GlobalValue* DAliasee = cast<GlobalValue>(VMI->second);
-    GlobalValue* DGV = NULL;
-
-    // Fixup aliases to bitcasts.  Note that aliases to GEPs are still broken
-    // by this, but aliases to GEPs are broken to a lot of other things, so
-    // it's less important.
-    Constant *DAliaseeConst = DAliasee;
-    if (SGA->getType() != DAliasee->getType())
-      DAliaseeConst = ConstantExpr::getBitCast(DAliasee, SGA->getType());
-
-    // Try to find something 'similar' to SGA in destination module.
-    if (!DGV && !SGA->hasLocalLinkage()) {
-      DGV = Dest->getNamedAlias(SGA->getName());
-
-      // If types don't agree due to opaque types, try to resolve them.
-      if (DGV && DGV->getType() != SGA->getType())
-        RecursiveResolveTypes(SGA->getType(), DGV->getType());
-    }
-
-    if (!DGV && !SGA->hasLocalLinkage()) {
-      DGV = Dest->getGlobalVariable(SGA->getName());
-
-      // If types don't agree due to opaque types, try to resolve them.
-      if (DGV && DGV->getType() != SGA->getType())
-        RecursiveResolveTypes(SGA->getType(), DGV->getType());
-    }
-
-    if (!DGV && !SGA->hasLocalLinkage()) {
-      DGV = Dest->getFunction(SGA->getName());
-
-      // If types don't agree due to opaque types, try to resolve them.
-      if (DGV && DGV->getType() != SGA->getType())
-        RecursiveResolveTypes(SGA->getType(), DGV->getType());
-    }
-
-    // No linking to be performed on internal stuff.
-    if (DGV && DGV->hasLocalLinkage())
-      DGV = NULL;
-
-    if (GlobalAlias *DGA = dyn_cast_or_null<GlobalAlias>(DGV)) {
-      // Types are known to be the same, check whether aliasees equal. As
-      // globals are already linked we just need query ValueMap to find the
-      // mapping.
-      if (DAliasee == DGA->getAliasedGlobal()) {
-        // This is just two copies of the same alias. Propagate linkage, if
-        // necessary.
-        DGA->setLinkage(CalculateAliasLinkage(SGA, DGA));
-
-        NewGA = DGA;
-        // Proceed to 'common' steps
-      } else
-        return Error(Err, "Alias Collision on '"  + SGA->getName()+
-                     "': aliases have different aliasees");
-    } else if (GlobalVariable *DGVar = dyn_cast_or_null<GlobalVariable>(DGV)) {
-      // The only allowed way is to link alias with external declaration or weak
-      // symbol..
-      if (DGVar->isDeclaration() || DGVar->isWeakForLinker()) {
-        // But only if aliasee is global too...
-        if (!isa<GlobalVariable>(DAliasee))
-          return Error(Err, "Global-Alias Collision on '" + SGA->getName() +
-                       "': aliasee is not global variable");
-
-        NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
-                                SGA->getName(), DAliaseeConst, Dest);
-        CopyGVAttributes(NewGA, SGA);
-
-        // Any uses of DGV need to change to NewGA, with cast, if needed.
-        if (SGA->getType() != DGVar->getType())
-          DGVar->replaceAllUsesWith(ConstantExpr::getBitCast(NewGA,
-                                                             DGVar->getType()));
-        else
-          DGVar->replaceAllUsesWith(NewGA);
-
-        // DGVar will conflict with NewGA because they both had the same
-        // name. We must erase this now so ForceRenaming doesn't assert
-        // because DGV might not have internal linkage.
-        DGVar->eraseFromParent();
-
-        // Proceed to 'common' steps
-      } else
-        return Error(Err, "Global-Alias Collision on '" + SGA->getName() +
-                     "': symbol multiple defined");
-    } else if (Function *DF = dyn_cast_or_null<Function>(DGV)) {
-      // The only allowed way is to link alias with external declaration or weak
-      // symbol...
-      if (DF->isDeclaration() || DF->isWeakForLinker()) {
-        // But only if aliasee is function too...
-        if (!isa<Function>(DAliasee))
-          return Error(Err, "Function-Alias Collision on '" + SGA->getName() +
-                       "': aliasee is not function");
-
-        NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
-                                SGA->getName(), DAliaseeConst, Dest);
-        CopyGVAttributes(NewGA, SGA);
-
-        // Any uses of DF need to change to NewGA, with cast, if needed.
-        if (SGA->getType() != DF->getType())
-          DF->replaceAllUsesWith(ConstantExpr::getBitCast(NewGA,
-                                                          DF->getType()));
-        else
-          DF->replaceAllUsesWith(NewGA);
-
-        // DF will conflict with NewGA because they both had the same
-        // name. We must erase this now so ForceRenaming doesn't assert
-        // because DF might not have internal linkage.
-        DF->eraseFromParent();
-
-        // Proceed to 'common' steps
-      } else
-        return Error(Err, "Function-Alias Collision on '" + SGA->getName() +
-                     "': symbol multiple defined");
-    } else {
-      // No linking to be performed, simply create an identical version of the
-      // alias over in the dest module...
-      NewGA = new GlobalAlias(SGA->getType(), SGA->getLinkage(),
-                              SGA->getName(), DAliaseeConst, Dest);
-      CopyGVAttributes(NewGA, SGA);
-
-      // Proceed to 'common' steps
-    }
-
-    assert(NewGA && "No alias was created in destination module!");
-
-    // If the symbol table renamed the alias, but it is an externally visible
-    // symbol, DGA must be an global value with internal linkage. Rename it.
-    if (NewGA->getName() != SGA->getName() &&
-        !NewGA->hasLocalLinkage())
-      ForceRenaming(NewGA, SGA->getName());
-
-    // Remember this mapping so uses in the source module get remapped
-    // later by MapValue.
-    ValueMap[SGA] = NewGA;
-  }
-
+  
+  // Make sure to remember this mapping.
+  ValueMap[SGV] = NewDGV;
   return false;
 }
 
+/// linkFunctionProto - Link the function in the source module into the
+/// destination module if needed, setting up mapping information.
+bool ModuleLinker::linkFunctionProto(Function *SF) {
+  GlobalValue *DGV = getLinkedToGlobal(SF);
 
-// LinkGlobalInits - Update the initializers in the Dest module now that all
-// globals that may be referenced are in Dest.
-static bool LinkGlobalInits(Module *Dest, const Module *Src,
-                            ValueToValueMapTy &ValueMap,
-                            std::string *Err) {
-  // Loop over all of the globals in the src module, mapping them over as we go
-  for (Module::const_global_iterator I = Src->global_begin(),
-       E = Src->global_end(); I != E; ++I) {
-    const GlobalVariable *SGV = I;
-
-    if (SGV->hasInitializer()) {      // Only process initialized GV's
-      // Figure out what the initializer looks like in the dest module.
-      Constant *SInit =
-        cast<Constant>(MapValue(SGV->getInitializer(), ValueMap));
-      // Grab destination global variable or alias.
-      GlobalValue *DGV = cast<GlobalValue>(ValueMap[SGV]->stripPointerCasts());
-
-      // If dest if global variable, check that initializers match.
-      if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV)) {
-        if (DGVar->hasInitializer()) {
-          if (SGV->hasExternalLinkage()) {
-            if (DGVar->getInitializer() != SInit)
-              return Error(Err, "Global Variable Collision on '" +
-                           SGV->getName() +
-                           "': global variables have different initializers");
-          } else if (DGVar->isWeakForLinker()) {
-            // Nothing is required, mapped values will take the new global
-            // automatically.
-          } else if (SGV->isWeakForLinker()) {
-            // Nothing is required, mapped values will take the new global
-            // automatically.
-          } else if (DGVar->hasAppendingLinkage()) {
-            llvm_unreachable("Appending linkage unimplemented!");
-          } else {
-            llvm_unreachable("Unknown linkage!");
-          }
-        } else {
-          // Copy the initializer over now...
-          DGVar->setInitializer(SInit);
-        }
-      } else {
-        // Destination is alias, the only valid situation is when source is
-        // weak. Also, note, that we already checked linkage in LinkGlobals(),
-        // thus we assert here.
-        // FIXME: Should we weaken this assumption, 'dereference' alias and
-        // check for initializer of aliasee?
-        assert(SGV->isWeakForLinker());
-      }
+  if (DGV) {
+    GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
+    bool LinkFromSrc = false;
+    if (getLinkageResult(DGV, SF, NewLinkage, LinkFromSrc))
+      return true;
+    
+    if (!LinkFromSrc) {
+      // Set calculated linkage
+      DGV->setLinkage(NewLinkage);
+      
+      // Make sure to remember this mapping.
+      ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType()));
+      
+      // Remove the body from the source module so we don't attempt to remap it.
+      SF->deleteBody();
+      return false;
     }
   }
+  
+  // If there is no linkage to be performed or we are linking from the source,
+  // bring SF over.
+  Function *NewDF = Function::Create(TypeMap.get(SF->getFunctionType()),
+                                     SF->getLinkage(), SF->getName(), DstM);
+  CopyGVAttributes(NewDF, SF);
+
+  if (DGV) {
+    // Any uses of DF need to change to NewDF, with cast.
+    DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType()));
+    DGV->eraseFromParent();
+  }
+  
+  ValueMap[SF] = NewDF;
   return false;
 }
 
-// LinkFunctionProtos - Link the functions together between the two modules,
-// without doing function bodies... this just adds external function prototypes
-// to the Dest function...
-//
-static bool LinkFunctionProtos(Module *Dest, const Module *Src,
-                               ValueToValueMapTy &ValueMap,
-                               std::string *Err) {
-  ValueSymbolTable &DestSymTab = Dest->getValueSymbolTable();
-
-  // Loop over all of the functions in the src module, mapping them over
-  for (Module::const_iterator I = Src->begin(), E = Src->end(); I != E; ++I) {
-    const Function *SF = I;   // SrcFunction
-    GlobalValue *DGV = 0;
-
-    // Check to see if may have to link the function with the global, alias or
-    // function.
-    if (SF->hasName() && !SF->hasLocalLinkage())
-      DGV = cast_or_null<GlobalValue>(DestSymTab.lookup(SF->getName()));
-
-    // If we found a global with the same name in the dest module, but it has
-    // internal linkage, we are really not doing any linkage here.
-    if (DGV && DGV->hasLocalLinkage())
-      DGV = 0;
-
-    // If types don't agree due to opaque types, try to resolve them.
-    if (DGV && DGV->getType() != SF->getType())
-      RecursiveResolveTypes(SF->getType(), DGV->getType());
-
+/// LinkAliasProto - Set up prototypes for any aliases that come over from the
+/// source module.
+bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) {
+  GlobalValue *DGV = getLinkedToGlobal(SGA);
+  
+  if (DGV) {
     GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage;
     bool LinkFromSrc = false;
-    if (GetLinkageResult(DGV, SF, NewLinkage, LinkFromSrc, Err))
+    if (getLinkageResult(DGV, SGA, NewLinkage, LinkFromSrc))
       return true;
-
-    // If there is no linkage to be performed, just bring over SF without
-    // modifying it.
-    if (DGV == 0) {
-      // Function does not already exist, simply insert an function signature
-      // identical to SF into the dest module.
-      Function *NewDF = Function::Create(SF->getFunctionType(),
-                                         SF->getLinkage(),
-                                         SF->getName(), Dest);
-      CopyGVAttributes(NewDF, SF);
-
-      // If the LLVM runtime renamed the function, but it is an externally
-      // visible symbol, DF must be an existing function with internal linkage.
-      // Rename it.
-      if (!NewDF->hasLocalLinkage() && NewDF->getName() != SF->getName())
-        ForceRenaming(NewDF, SF->getName());
-
-      // ... and remember this mapping...
-      ValueMap[SF] = NewDF;
-      continue;
-    }
-
-    // If the visibilities of the symbols disagree and the destination is a
-    // prototype, take the visibility of its input.
-    if (DGV->isDeclaration())
-      DGV->setVisibility(SF->getVisibility());
-
-    if (LinkFromSrc) {
-      if (isa<GlobalAlias>(DGV))
-        return Error(Err, "Function-Alias Collision on '" + SF->getName() +
-                     "': symbol multiple defined");
-
-      // We have a definition of the same name but different type in the
-      // source module. Copy the prototype to the destination and replace
-      // uses of the destination's prototype with the new prototype.
-      Function *NewDF = Function::Create(SF->getFunctionType(), NewLinkage,
-                                         SF->getName(), Dest);
-      CopyGVAttributes(NewDF, SF);
-
-      // Any uses of DF need to change to NewDF, with cast
-      DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF,
-                                                              DGV->getType()));
-
-      // DF will conflict with NewDF because they both had the same. We must
-      // erase this now so ForceRenaming doesn't assert because DF might
-      // not have internal linkage.
-      if (GlobalVariable *Var = dyn_cast<GlobalVariable>(DGV))
-        Var->eraseFromParent();
-      else
-        cast<Function>(DGV)->eraseFromParent();
-
-      // If the symbol table renamed the function, but it is an externally
-      // visible symbol, DF must be an existing function with internal
-      // linkage.  Rename it.
-      if (NewDF->getName() != SF->getName() && !NewDF->hasLocalLinkage())
-        ForceRenaming(NewDF, SF->getName());
-
-      // Remember this mapping so uses in the source module get remapped
-      // later by MapValue.
-      ValueMap[SF] = NewDF;
-      continue;
+    
+    if (!LinkFromSrc) {
+      // Set calculated linkage.
+      DGV->setLinkage(NewLinkage);
+      
+      // Make sure to remember this mapping.
+      ValueMap[SGA] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGA->getType()));
+      
+      // Remove the body from the source module so we don't attempt to remap it.
+      SGA->setAliasee(0);
+      return false;
     }
+  }
+  
+  // If there is no linkage to be performed or we're linking from the source,
+  // bring over SGA.
+  GlobalAlias *NewDA = new GlobalAlias(TypeMap.get(SGA->getType()),
+                                       SGA->getLinkage(), SGA->getName(),
+                                       /*aliasee*/0, DstM);
+  CopyGVAttributes(NewDA, SGA);
+
+  if (DGV) {
+    // Any uses of DGV need to change to NewDA, with cast.
+    DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDA, DGV->getType()));
+    DGV->eraseFromParent();
+  }
+  
+  ValueMap[SGA] = NewDA;
+  return false;
+}
 
-    // Not "link from source", keep the one in the DestModule and remap the
-    // input onto it.
-
-    if (isa<GlobalAlias>(DGV)) {
-      // The only valid mappings are:
-      // - SF is external declaration, which is effectively a no-op.
-      // - SF is weak, when we just need to throw SF out.
-      if (!SF->isDeclaration() && !SF->isWeakForLinker())
-        return Error(Err, "Function-Alias Collision on '" + SF->getName() +
-                     "': symbol multiple defined");
-    }
+void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) {
+  // Merge the initializer.
+  SmallVector<Constant*, 16> Elements;
+  if (ConstantArray *I = dyn_cast<ConstantArray>(AVI.DstInit)) {
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+      Elements.push_back(I->getOperand(i));
+  } else {
+    assert(isa<ConstantAggregateZero>(AVI.DstInit));
+    ArrayType *DstAT = cast<ArrayType>(AVI.DstInit->getType());
+    Type *EltTy = DstAT->getElementType();
+    Elements.append(DstAT->getNumElements(), Constant::getNullValue(EltTy));
+  }
+  
+  Constant *SrcInit = MapValue(AVI.SrcInit, ValueMap, RF_None, &TypeMap);
+  if (const ConstantArray *I = dyn_cast<ConstantArray>(SrcInit)) {
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+      Elements.push_back(I->getOperand(i));
+  } else {
+    assert(isa<ConstantAggregateZero>(SrcInit));
+    ArrayType *SrcAT = cast<ArrayType>(SrcInit->getType());
+    Type *EltTy = SrcAT->getElementType();
+    Elements.append(SrcAT->getNumElements(), Constant::getNullValue(EltTy));
+  }
+  ArrayType *NewType = cast<ArrayType>(AVI.NewGV->getType()->getElementType());
+  AVI.NewGV->setInitializer(ConstantArray::get(NewType, Elements));
+}
 
-    // Set calculated linkage
-    DGV->setLinkage(NewLinkage);
 
-    // Make sure to remember this mapping.
-    ValueMap[SF] = ConstantExpr::getBitCast(DGV, SF->getType());
+// linkGlobalInits - Update the initializers in the Dest module now that all
+// globals that may be referenced are in Dest.
+void ModuleLinker::linkGlobalInits() {
+  // Loop over all of the globals in the src module, mapping them over as we go
+  for (Module::const_global_iterator I = SrcM->global_begin(),
+       E = SrcM->global_end(); I != E; ++I) {
+    if (!I->hasInitializer()) continue;      // Only process initialized GV's.
+    
+    // Grab destination global variable.
+    GlobalVariable *DGV = cast<GlobalVariable>(ValueMap[I]);
+    // Figure out what the initializer looks like in the dest module.
+    DGV->setInitializer(MapValue(I->getInitializer(), ValueMap,
+                                 RF_None, &TypeMap));
   }
-  return false;
 }
 
-// LinkFunctionBody - Copy the source function over into the dest function and
+// linkFunctionBody - Copy the source function over into the dest function and
 // fix up references to values.  At this point we know that Dest is an external
 // function, and that Src is not.
-static bool LinkFunctionBody(Function *Dest, Function *Src,
-                             ValueToValueMapTy &ValueMap,
-                             std::string *Err) {
-  assert(Src && Dest && Dest->isDeclaration() && !Src->isDeclaration());
+void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) {
+  assert(Src && Dst && Dst->isDeclaration() && !Src->isDeclaration());
 
   // Go through and convert function arguments over, remembering the mapping.
-  Function::arg_iterator DI = Dest->arg_begin();
+  Function::arg_iterator DI = Dst->arg_begin();
   for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
        I != E; ++I, ++DI) {
-    DI->setName(I->getName());  // Copy the name information over...
+    DI->setName(I->getName());  // Copy the name over.
 
-    // Add a mapping to our local map
+    // Add a mapping to our mapping.
     ValueMap[I] = DI;
   }
 
   // Splice the body of the source function into the dest function.
-  Dest->getBasicBlockList().splice(Dest->end(), Src->getBasicBlockList());
+  Dst->getBasicBlockList().splice(Dst->end(), Src->getBasicBlockList());
 
   // At this point, all of the instructions and values of the function are now
   // copied over.  The only problem is that they are still referencing values in
   // the Source function as operands.  Loop through all of the operands of the
   // functions and patch them up to point to the local versions.
-  for (Function::iterator BB = Dest->begin(), BE = Dest->end(); BB != BE; ++BB)
+  for (Function::iterator BB = Dst->begin(), BE = Dst->end(); BB != BE; ++BB)
     for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I)
-      RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries);
+      RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries, &TypeMap);
 
   // There is no need to map the arguments anymore.
   for (Function::arg_iterator I = Src->arg_begin(), E = Src->arg_end();
        I != E; ++I)
     ValueMap.erase(I);
-
-  return false;
 }
 
 
-// LinkFunctionBodies - Link in the function bodies that are defined in the
-// source module into the DestModule.  This consists basically of copying the
-// function over and fixing up references to values.
-static bool LinkFunctionBodies(Module *Dest, Module *Src,
-                               ValueToValueMapTy &ValueMap,
-                               std::string *Err) {
-
-  // Loop over all of the functions in the src module, mapping them over as we
-  // go
-  for (Module::iterator SF = Src->begin(), E = Src->end(); SF != E; ++SF) {
-    if (!SF->isDeclaration()) {               // No body if function is external
-      Function *DF = dyn_cast<Function>(ValueMap[SF]); // Destination function
-
-      // DF not external SF external?
-      if (DF && DF->isDeclaration())
-        // Only provide the function body if there isn't one already.
-        if (LinkFunctionBody(DF, SF, ValueMap, Err))
-          return true;
+void ModuleLinker::linkAliasBodies() {
+  for (Module::alias_iterator I = SrcM->alias_begin(), E = SrcM->alias_end();
+       I != E; ++I)
+    if (Constant *Aliasee = I->getAliasee()) {
+      GlobalAlias *DA = cast<GlobalAlias>(ValueMap[I]);
+      DA->setAliasee(MapValue(Aliasee, ValueMap, RF_None, &TypeMap));
     }
-  }
-  return false;
 }
 
-// LinkAppendingVars - If there were any appending global variables, link them
-// together now.  Return true on error.
-static bool LinkAppendingVars(Module *M,
-                  std::multimap<std::string, GlobalVariable *> &AppendingVars,
-                              std::string *ErrorMsg) {
-  if (AppendingVars.empty()) return false; // Nothing to do.
-
-  // Loop over the multimap of appending vars, processing any variables with the
-  // same name, forming a new appending global variable with both of the
-  // initializers merged together, then rewrite references to the old variables
-  // and delete them.
-  std::vector<Constant*> Inits;
-  while (AppendingVars.size() > 1) {
-    // Get the first two elements in the map...
-    std::multimap<std::string,
-      GlobalVariable*>::iterator Second = AppendingVars.begin(), First=Second++;
-
-    // If the first two elements are for different names, there is no pair...
-    // Otherwise there is a pair, so link them together...
-    if (First->first == Second->first) {
-      GlobalVariable *G1 = First->second, *G2 = Second->second;
-      const ArrayType *T1 = cast<ArrayType>(G1->getType()->getElementType());
-      const ArrayType *T2 = cast<ArrayType>(G2->getType()->getElementType());
-
-      // Check to see that they two arrays agree on type...
-      if (T1->getElementType() != T2->getElementType())
-        return Error(ErrorMsg,
-         "Appending variables with different element types need to be linked!");
-      if (G1->isConstant() != G2->isConstant())
-        return Error(ErrorMsg,
-                     "Appending variables linked with different const'ness!");
-
-      if (G1->getAlignment() != G2->getAlignment())
-        return Error(ErrorMsg,
-         "Appending variables with different alignment need to be linked!");
-
-      if (G1->getVisibility() != G2->getVisibility())
-        return Error(ErrorMsg,
-         "Appending variables with different visibility need to be linked!");
-
-      if (G1->getSection() != G2->getSection())
-        return Error(ErrorMsg,
-         "Appending variables with different section name need to be linked!");
-
-      unsigned NewSize = T1->getNumElements() + T2->getNumElements();
-      ArrayType *NewType = ArrayType::get(T1->getElementType(),
-                                                         NewSize);
-
-      G1->setName("");   // Clear G1's name in case of a conflict!
-
-      // Create the new global variable...
-      GlobalVariable *NG =
-        new GlobalVariable(*M, NewType, G1->isConstant(), G1->getLinkage(),
-                           /*init*/0, First->first, 0, G1->isThreadLocal(),
-                           G1->getType()->getAddressSpace());
-
-      // Propagate alignment, visibility and section info.
-      CopyGVAttributes(NG, G1);
-
-      // Merge the initializer...
-      Inits.reserve(NewSize);
-      if (ConstantArray *I = dyn_cast<ConstantArray>(G1->getInitializer())) {
-        for (unsigned i = 0, e = T1->getNumElements(); i != e; ++i)
-          Inits.push_back(I->getOperand(i));
-      } else {
-        assert(isa<ConstantAggregateZero>(G1->getInitializer()));
-        Constant *CV = Constant::getNullValue(T1->getElementType());
-        for (unsigned i = 0, e = T1->getNumElements(); i != e; ++i)
-          Inits.push_back(CV);
-      }
-      if (ConstantArray *I = dyn_cast<ConstantArray>(G2->getInitializer())) {
-        for (unsigned i = 0, e = T2->getNumElements(); i != e; ++i)
-          Inits.push_back(I->getOperand(i));
-      } else {
-        assert(isa<ConstantAggregateZero>(G2->getInitializer()));
-        Constant *CV = Constant::getNullValue(T2->getElementType());
-        for (unsigned i = 0, e = T2->getNumElements(); i != e; ++i)
-          Inits.push_back(CV);
-      }
-      NG->setInitializer(ConstantArray::get(NewType, Inits));
-      Inits.clear();
-
-      // Replace any uses of the two global variables with uses of the new
-      // global...
-
-      // FIXME: This should rewrite simple/straight-forward uses such as
-      // getelementptr instructions to not use the Cast!
-      G1->replaceAllUsesWith(ConstantExpr::getBitCast(NG,
-                             G1->getType()));
-      G2->replaceAllUsesWith(ConstantExpr::getBitCast(NG,
-                             G2->getType()));
-
-      // Remove the two globals from the module now...
-      M->getGlobalList().erase(G1);
-      M->getGlobalList().erase(G2);
-
-      // Put the new global into the AppendingVars map so that we can handle
-      // linking of more than two vars...
-      Second->second = NG;
-    }
-    AppendingVars.erase(First);
+/// linkNamedMDNodes - Insert all of the named mdnodes in Src into the Dest
+/// module.
+void ModuleLinker::linkNamedMDNodes() {
+  for (Module::const_named_metadata_iterator I = SrcM->named_metadata_begin(),
+       E = SrcM->named_metadata_end(); I != E; ++I) {
+    NamedMDNode *DestNMD = DstM->getOrInsertNamedMetadata(I->getName());
+    // Add Src elements into Dest node.
+    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+      DestNMD->addOperand(MapValue(I->getOperand(i), ValueMap,
+                                   RF_None, &TypeMap));
   }
-
-  return false;
 }
+  
+bool ModuleLinker::run() {
+  assert(DstM && "Null Destination module");
+  assert(SrcM && "Null Source Module");
 
-static bool ResolveAliases(Module *Dest) {
-  for (Module::alias_iterator I = Dest->alias_begin(), E = Dest->alias_end();
-       I != E; ++I)
-    // We can't sue resolveGlobalAlias here because we need to preserve
-    // bitcasts and GEPs.
-    if (const Constant *C = I->getAliasee()) {
-      while (dyn_cast<GlobalAlias>(C))
-        C = cast<GlobalAlias>(C)->getAliasee();
-      const GlobalValue *GV = dyn_cast<GlobalValue>(C);
-      if (C != I && !(GV && GV->isDeclaration()))
-        I->replaceAllUsesWith(const_cast<Constant*>(C));
-    }
-
-  return false;
-}
-
-// LinkModules - This function links two modules together, with the resulting
-// left module modified to be the composite of the two input modules.  If an
-// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
-// the problem.  Upon failure, the Dest module could be in a modified state, and
-// shouldn't be relied on to be consistent.
-bool
-Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
-  assert(Dest != 0 && "Invalid Destination module");
-  assert(Src  != 0 && "Invalid Source Module");
-
-  if (Dest->getDataLayout().empty()) {
-    if (!Src->getDataLayout().empty()) {
-      Dest->setDataLayout(Src->getDataLayout());
-    } else {
-      std::string DataLayout;
-
-      if (Dest->getEndianness() == Module::AnyEndianness) {
-        if (Src->getEndianness() == Module::BigEndian)
-          DataLayout.append("E");
-        else if (Src->getEndianness() == Module::LittleEndian)
-          DataLayout.append("e");
-      }
-
-      if (Dest->getPointerSize() == Module::AnyPointerSize) {
-        if (Src->getPointerSize() == Module::Pointer64)
-          DataLayout.append(DataLayout.length() == 0 ? "p:64:64" : "-p:64:64");
-        else if (Src->getPointerSize() == Module::Pointer32)
-          DataLayout.append(DataLayout.length() == 0 ? "p:32:32" : "-p:32:32");
-      }
-      Dest->setDataLayout(DataLayout);
-    }
-  }
+  // Inherit the target data from the source module if the destination module
+  // doesn't have one already.
+  if (DstM->getDataLayout().empty() && !SrcM->getDataLayout().empty())
+    DstM->setDataLayout(SrcM->getDataLayout());
 
   // Copy the target triple from the source to dest if the dest's is empty.
-  if (Dest->getTargetTriple().empty() && !Src->getTargetTriple().empty())
-    Dest->setTargetTriple(Src->getTargetTriple());
+  if (DstM->getTargetTriple().empty() && !SrcM->getTargetTriple().empty())
+    DstM->setTargetTriple(SrcM->getTargetTriple());
 
-  if (!Src->getDataLayout().empty() && !Dest->getDataLayout().empty() &&
-      Src->getDataLayout() != Dest->getDataLayout())
+  if (!SrcM->getDataLayout().empty() && !DstM->getDataLayout().empty() &&
+      SrcM->getDataLayout() != DstM->getDataLayout())
     errs() << "WARNING: Linking two modules of different data layouts!\n";
-  if (!Src->getTargetTriple().empty() &&
-      Dest->getTargetTriple() != Src->getTargetTriple()) {
+  if (!SrcM->getTargetTriple().empty() &&
+      DstM->getTargetTriple() != SrcM->getTargetTriple()) {
     errs() << "WARNING: Linking two modules of different target triples: ";
-    if (!Src->getModuleIdentifier().empty())
-      errs() << Src->getModuleIdentifier() << ": ";
-    errs() << "'" << Src->getTargetTriple() << "' and '" 
-           << Dest->getTargetTriple() << "'\n";
+    if (!SrcM->getModuleIdentifier().empty())
+      errs() << SrcM->getModuleIdentifier() << ": ";
+    errs() << "'" << SrcM->getTargetTriple() << "' and '" 
+           << DstM->getTargetTriple() << "'\n";
   }
 
   // Append the module inline asm string.
-  if (!Src->getModuleInlineAsm().empty()) {
-    if (Dest->getModuleInlineAsm().empty())
-      Dest->setModuleInlineAsm(Src->getModuleInlineAsm());
+  if (!SrcM->getModuleInlineAsm().empty()) {
+    if (DstM->getModuleInlineAsm().empty())
+      DstM->setModuleInlineAsm(SrcM->getModuleInlineAsm());
     else
-      Dest->setModuleInlineAsm(Dest->getModuleInlineAsm()+"\n"+
-                               Src->getModuleInlineAsm());
+      DstM->setModuleInlineAsm(DstM->getModuleInlineAsm()+"\n"+
+                               SrcM->getModuleInlineAsm());
   }
 
   // Update the destination module's dependent libraries list with the libraries
   // from the source module. There's no opportunity for duplicates here as the
   // Module ensures that duplicate insertions are discarded.
-  for (Module::lib_iterator SI = Src->lib_begin(), SE = Src->lib_end();
+  for (Module::lib_iterator SI = SrcM->lib_begin(), SE = SrcM->lib_end();
        SI != SE; ++SI)
-    Dest->addLibrary(*SI);
+    DstM->addLibrary(*SI);
+  
+  // If the source library's module id is in the dependent library list of the
+  // destination library, remove it since that module is now linked in.
+  StringRef ModuleId = SrcM->getModuleIdentifier();
+  if (!ModuleId.empty())
+    DstM->removeLibrary(sys::path::stem(ModuleId));
 
-  // LinkTypes - Go through the symbol table of the Src module and see if any
-  // types are named in the src module that are not named in the Dst module.
-  // Make sure there are no type name conflicts.
-  if (LinkTypes(Dest, Src, ErrorMsg))
-    return true;
+  
+  // Loop over all of the linked values to compute type mappings.
+  computeTypeMapping();
 
-  // ValueMap - Mapping of values from what they used to be in Src, to what they
-  // are now in Dest.  ValueToValueMapTy is a ValueMap, which involves some
-  // overhead due to the use of Value handles which the Linker doesn't actually
-  // need, but this allows us to reuse the ValueMapper code.
-  ValueToValueMapTy ValueMap;
-
-  // AppendingVars - Keep track of global variables in the destination module
-  // with appending linkage.  After the module is linked together, they are
-  // appended and the module is rewritten.
-  std::multimap<std::string, GlobalVariable *> AppendingVars;
-  for (Module::global_iterator I = Dest->global_begin(), E = Dest->global_end();
-       I != E; ++I) {
-    // Add all of the appending globals already in the Dest module to
-    // AppendingVars.
-    if (I->hasAppendingLinkage())
-      AppendingVars.insert(std::make_pair(I->getName(), I));
-  }
+  // Remap all of the named mdnoes in Src into the DstM module. We do this
+  // after linking GlobalValues so that MDNodes that reference GlobalValues
+  // are properly remapped.
+  linkNamedMDNodes();
 
-  // Insert all of the globals in src into the Dest module... without linking
+  // Insert all of the globals in src into the DstM module... without linking
   // initializers (which could refer to functions not yet mapped over).
-  if (LinkGlobals(Dest, Src, ValueMap, AppendingVars, ErrorMsg))
-    return true;
+  for (Module::global_iterator I = SrcM->global_begin(),
+       E = SrcM->global_end(); I != E; ++I)
+    if (linkGlobalProto(I))
+      return true;
 
   // Link the functions together between the two modules, without doing function
-  // bodies... this just adds external function prototypes to the Dest
+  // bodies... this just adds external function prototypes to the DstM
   // function...  We do this so that when we begin processing function bodies,
   // all of the global values that may be referenced are available in our
   // ValueMap.
-  if (LinkFunctionProtos(Dest, Src, ValueMap, ErrorMsg))
-    return true;
-
-  // If there were any alias, link them now. We really need to do this now,
-  // because all of the aliases that may be referenced need to be available in
-  // ValueMap
-  if (LinkAlias(Dest, Src, ValueMap, ErrorMsg)) return true;
-
-  // Update the initializers in the Dest module now that all globals that may
-  // be referenced are in Dest.
-  if (LinkGlobalInits(Dest, Src, ValueMap, ErrorMsg)) return true;
+  for (Module::iterator I = SrcM->begin(), E = SrcM->end(); I != E; ++I)
+    if (linkFunctionProto(I))
+      return true;
 
-  // Link in the function bodies that are defined in the source module into the
-  // DestModule.  This consists basically of copying the function over and
-  // fixing up references to values.
-  if (LinkFunctionBodies(Dest, Src, ValueMap, ErrorMsg)) return true;
+  // If there were any aliases, link them now.
+  for (Module::alias_iterator I = SrcM->alias_begin(),
+       E = SrcM->alias_end(); I != E; ++I)
+    if (linkAliasProto(I))
+      return true;
 
-  // If there were any appending global variables, link them together now.
-  if (LinkAppendingVars(Dest, AppendingVars, ErrorMsg)) return true;
+  for (unsigned i = 0, e = AppendingVars.size(); i != e; ++i)
+    linkAppendingVarInit(AppendingVars[i]);
+  
+  // Update the initializers in the DstM module now that all globals that may
+  // be referenced are in DstM.
+  linkGlobalInits();
+
+  // Link in the function bodies that are defined in the source module into
+  // DstM.
+  for (Module::iterator SF = SrcM->begin(), E = SrcM->end(); SF != E; ++SF) {
+    if (SF->isDeclaration()) continue;      // No body if function is external.
+    
+    linkFunctionBody(cast<Function>(ValueMap[SF]), SF);
+  }
 
-  // Resolve all uses of aliases with aliasees
-  if (ResolveAliases(Dest)) return true;
+  // Resolve all uses of aliases with aliasees.
+  linkAliasBodies();
 
-  // Remap all of the named mdnoes in Src into the Dest module. We do this
-  // after linking GlobalValues so that MDNodes that reference GlobalValues
-  // are properly remapped.
-  LinkNamedMDNodes(Dest, Src, ValueMap);
+  // Now that all of the types from the source are used, resolve any structs
+  // copied over to the dest that didn't exist there.
+  TypeMap.linkDefinedTypeBodies();
+  
+  return false;
+}
 
-  // If the source library's module id is in the dependent library list of the
-  // destination library, remove it since that module is now linked in.
-  const std::string &modId = Src->getModuleIdentifier();
-  if (!modId.empty())
-    Dest->removeLibrary(sys::path::stem(modId));
+//===----------------------------------------------------------------------===//
+// LinkModules entrypoint.
+//===----------------------------------------------------------------------===//
 
+// LinkModules - This function links two modules together, with the resulting
+// left module modified to be the composite of the two input modules.  If an
+// error occurs, true is returned and ErrorMsg (if not null) is set to indicate
+// the problem.  Upon failure, the Dest module could be in a modified state, and
+// shouldn't be relied on to be consistent.
+bool Linker::LinkModules(Module *Dest, Module *Src, std::string *ErrorMsg) {
+  ModuleLinker TheLinker(Dest, Src);
+  if (TheLinker.run()) {
+    if (ErrorMsg) *ErrorMsg = TheLinker.ErrorMsg;
+    return true;
+  }
+  
   return false;
 }
-
-// vim: sw=2
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index a77ecd3..22afa7e 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -28,12 +28,14 @@ add_llvm_library(LLVMMC
   MCSectionELF.cpp
   MCSectionMachO.cpp
   MCStreamer.cpp
+  MCSubtargetInfo.cpp
   MCSymbol.cpp
   MCValue.cpp
   MCWin64EH.cpp
   MachObjectWriter.cpp
   WinCOFFStreamer.cpp
   WinCOFFObjectWriter.cpp
+  SubtargetFeature.cpp
   TargetAsmBackend.cpp
   )
 
diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp
index 73b259e..502b60b 100644
--- a/lib/MC/MCAsmInfo.cpp
+++ b/lib/MC/MCAsmInfo.cpp
@@ -23,6 +23,9 @@
 using namespace llvm;
 
 MCAsmInfo::MCAsmInfo() {
+  PointerSize = 4;
+  IsLittleEndian = true;
+  StackGrowsUp = false;
   HasSubsectionsViaSymbols = false;
   HasMachoZeroFillDirective = false;
   HasMachoTBSSDirective = false;
@@ -78,6 +81,7 @@ MCAsmInfo::MCAsmInfo() {
   DwarfRequiresRelocationForSectionOffset = true;
   DwarfSectionOffsetDirective = 0;
   DwarfUsesLabelOffsetForRanges = true;
+  DwarfRegNumForCFI = false;
   HasMicrosoftFastStdCallMangling = false;
 
   AsmTransCBE = 0;
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index e8b09fc..d5d08e8 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
@@ -33,8 +34,10 @@ using namespace llvm;
 namespace {
 
 class MCAsmStreamer : public MCStreamer {
+protected:
   formatted_raw_ostream &OS;
   const MCAsmInfo &MAI;
+private:
   OwningPtr<MCInstPrinter> InstPrinter;
   OwningPtr<MCCodeEmitter> Emitter;
   OwningPtr<TargetAsmBackend> AsmBackend;
@@ -134,7 +137,8 @@ public:
   virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
   virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                         const MCSymbol *LastLabel,
-                                        const MCSymbol *Label);
+                                        const MCSymbol *Label,
+                                        unsigned PointerSize);
   virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
                                          const MCSymbol *Label);
 
@@ -361,9 +365,9 @@ void MCAsmStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {
 
 void MCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                              const MCSymbol *LastLabel,
-                                             const MCSymbol *Label) {
-  EmitDwarfSetLineAddr(LineDelta, Label,
-                       getContext().getTargetAsmInfo().getPointerSize());
+                                             const MCSymbol *Label,
+                                             unsigned PointerSize) {
+  EmitDwarfSetLineAddr(LineDelta, Label, PointerSize);
 }
 
 void MCAsmStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
@@ -600,7 +604,7 @@ void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
     int64_t IntValue;
     if (!Value->EvaluateAsAbsolute(IntValue))
       report_fatal_error("Don't know how to emit this value.");
-    if (getContext().getTargetAsmInfo().isLittleEndian()) {
+    if (getContext().getAsmInfo().isLittleEndian()) {
       EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
       EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
     } else {
@@ -822,9 +826,9 @@ void MCAsmStreamer::EmitCFIEndProc() {
 }
 
 void MCAsmStreamer::EmitRegisterName(int64_t Register) {
-  if (InstPrinter) {
-    const TargetAsmInfo &asmInfo = getContext().getTargetAsmInfo();
-    unsigned LLVMRegister = asmInfo.getLLVMRegNum(Register, true);
+  if (InstPrinter && !MAI.useDwarfRegNumForCFI()) {
+    const TargetAsmInfo &TAI = getContext().getTargetAsmInfo();
+    unsigned LLVMRegister = TAI.getLLVMRegNum(Register, true);
     InstPrinter->printRegName(OS, LLVMRegister);
   } else {
     OS << Register;
@@ -1085,7 +1089,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
     }
   }
 
-  // FIXME: Node the fixup comments for Thumb2 are completely bogus since the
+  // FIXME: Note the fixup comments for Thumb2 are completely bogus since the
   // high order halfword of a 32-bit Thumb2 instruction is emitted first.
   OS << "encoding: [";
   for (unsigned i = 0, e = Code.size(); i != e; ++i) {
@@ -1120,7 +1124,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) {
         unsigned Bit = (Code[i] >> j) & 1;
 
         unsigned FixupBit;
-        if (getContext().getTargetAsmInfo().isLittleEndian())
+        if (getContext().getAsmInfo().isLittleEndian())
           FixupBit = i * 8 + j;
         else
           FixupBit = i * 8 + (7-j);
@@ -1241,13 +1245,12 @@ void MCAsmStreamer::Finish() {
   if (!UseCFI)
     EmitFrames(false);
 }
-
 MCStreamer *llvm::createAsmStreamer(MCContext &Context,
                                     formatted_raw_ostream &OS,
                                     bool isVerboseAsm, bool useLoc,
-                                    bool useCFI,
-                                    MCInstPrinter *IP, MCCodeEmitter *CE,
-                                    TargetAsmBackend *TAB, bool ShowInst) {
+                                    bool useCFI, MCInstPrinter *IP,
+                                    MCCodeEmitter *CE, TargetAsmBackend *TAB,
+                                    bool ShowInst) {
   return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI,
                            IP, CE, TAB, ShowInst);
 }
diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp
index 6e636f0..5480b4b 100644
--- a/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/lib/MC/MCDisassembler/Disassembler.cpp
@@ -40,6 +40,7 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
   llvm::InitializeAllTargetInfos();
   // FIXME: We shouldn't need to initialize the Target(Machine)s.
   llvm::InitializeAllTargets();
+  llvm::InitializeAllMCAsmInfos();
   llvm::InitializeAllAsmPrinters();
   llvm::InitializeAllAsmParsers();
   llvm::InitializeAllDisassemblers();
@@ -50,16 +51,18 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
   assert(TheTarget && "Unable to create target!");
 
   // Get the assembler info needed to setup the MCContext.
-  const MCAsmInfo *MAI = TheTarget->createAsmInfo(TripleName);
+  const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(TripleName);
   assert(MAI && "Unable to create target asm info!");
 
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
+  std::string CPU;
 
   // FIXME: We shouldn't need to do this (and link in codegen).
   //        When we split this out, we should do it in a way that makes
   //        it straightforward to switch subtargets on the fly.
-  TargetMachine *TM = TheTarget->createTargetMachine(TripleName, FeaturesStr);
+  TargetMachine *TM = TheTarget->createTargetMachine(TripleName, CPU,
+                                                     FeaturesStr);
   assert(TM && "Unable to create target machine!");
 
   // Get the target assembler info needed to setup the context.
@@ -77,7 +80,7 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo,
 
   // Set up the instruction printer.
   int AsmPrinterVariant = MAI->getAssemblerDialect();
-  MCInstPrinter *IP = TheTarget->createMCInstPrinter(*TM, AsmPrinterVariant,
+  MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant,
                                                      *MAI);
   assert(IP && "Unable to create instruction printer!");
 
diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp
index 91c5284..bdd99af 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.cpp
+++ b/lib/MC/MCDisassembler/EDDisassembler.cpp
@@ -23,6 +23,7 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCParser/AsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
@@ -106,6 +107,7 @@ void EDDisassembler::initialize() {
   
   InitializeAllTargetInfos();
   InitializeAllTargets();
+  InitializeAllMCAsmInfos();
   InitializeAllAsmPrinters();
   InitializeAllAsmParsers();
   InitializeAllDisassemblers();
@@ -167,11 +169,11 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
   if (!Tgt)
     return;
   
+  std::string CPU;
   std::string featureString;
-  
-  TargetMachine.reset(Tgt->createTargetMachine(tripleString,
+  TargetMachine.reset(Tgt->createTargetMachine(tripleString, CPU,
                                                featureString));
-  
+
   const TargetRegisterInfo *registerInfo = TargetMachine->getRegisterInfo();
   
   if (!registerInfo)
@@ -179,11 +181,11 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
     
   initMaps(*registerInfo);
   
-  AsmInfo.reset(Tgt->createAsmInfo(tripleString));
+  AsmInfo.reset(Tgt->createMCAsmInfo(tripleString));
   
   if (!AsmInfo)
     return;
-  
+
   Disassembler.reset(Tgt->createMCDisassembler());
   
   if (!Disassembler)
@@ -193,8 +195,7 @@ EDDisassembler::EDDisassembler(CPUKey &key) :
   
   InstString.reset(new std::string);
   InstStream.reset(new raw_string_ostream(*InstString));
-  InstPrinter.reset(Tgt->createMCInstPrinter(*TargetMachine, LLVMSyntaxVariant,
-                                             *AsmInfo));
+  InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo));
   
   if (!InstPrinter)
     return;
@@ -372,8 +373,11 @@ int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
   OwningPtr<MCAsmParser> genericParser(createMCAsmParser(*Tgt, sourceMgr,
                                                          context, *streamer,
                                                          *AsmInfo));
-  OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*genericParser,
-                                                               *TargetMachine));
+
+  StringRef triple = tripleFromArch(Key.Arch);
+  OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(triple, "", ""));
+  OwningPtr<TargetAsmParser> TargetParser(Tgt->createAsmParser(*STI,
+                                                               *genericParser));
   
   AsmToken OpcodeToken = genericParser->Lex();
   AsmToken NextToken = genericParser->Lex();  // consume next token, because specificParser expects us to
diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h
index 2fcc09d..11d69c1 100644
--- a/lib/MC/MCDisassembler/EDDisassembler.h
+++ b/lib/MC/MCDisassembler/EDDisassembler.h
@@ -41,6 +41,7 @@ class MCInstPrinter;
 class MCInst;
 class MCParsedAsmOperand;
 class MCStreamer;
+class MCSubtargetInfo;
 template <typename T> class SmallVectorImpl;
 class SourceMgr;
 class Target;
diff --git a/lib/MC/MCDisassembler/EDInfo.h b/lib/MC/MCDisassembler/EDInfo.h
index ad57282..e43ad16 100644
--- a/lib/MC/MCDisassembler/EDInfo.h
+++ b/lib/MC/MCDisassembler/EDInfo.h
@@ -25,8 +25,11 @@ enum OperandTypes {
   kOperandTypeARMBranchTarget,
   kOperandTypeARMSoReg,
   kOperandTypeARMSoImm,
+  kOperandTypeARMRotImm,
   kOperandTypeARMSoImm2Part,
   kOperandTypeARMPredicate,
+  kOperandTypeAddrModeImm12,
+  kOperandTypeLdStSOReg,
   kOperandTypeARMAddrMode2,
   kOperandTypeARMAddrMode2Offset,
   kOperandTypeARMAddrMode3,
@@ -38,13 +41,20 @@ enum OperandTypes {
   kOperandTypeARMAddrMode7,
   kOperandTypeARMAddrModePC,
   kOperandTypeARMRegisterList,
+  kOperandTypeARMDPRRegisterList,
+  kOperandTypeARMSPRRegisterList,
   kOperandTypeARMTBAddrMode,
   kOperandTypeThumbITMask,
-  kOperandTypeThumbAddrModeS1,
-  kOperandTypeThumbAddrModeS2,
-  kOperandTypeThumbAddrModeS4,
+  kOperandTypeThumbAddrModeRegS1,
+  kOperandTypeThumbAddrModeRegS2,
+  kOperandTypeThumbAddrModeRegS4,
+  kOperandTypeThumbAddrModeImmS1,
+  kOperandTypeThumbAddrModeImmS2,
+  kOperandTypeThumbAddrModeImmS4,
   kOperandTypeThumbAddrModeRR,
   kOperandTypeThumbAddrModeSP,
+  kOperandTypeThumbAddrModePC,
+  kOperandTypeThumb2AddrModeReg,
   kOperandTypeThumb2SoReg,
   kOperandTypeThumb2SoImm,
   kOperandTypeThumb2AddrModeImm8,
@@ -52,8 +62,7 @@ enum OperandTypes {
   kOperandTypeThumb2AddrModeImm12,
   kOperandTypeThumb2AddrModeSoReg,
   kOperandTypeThumb2AddrModeImm8s4,
-  kOperandTypeThumb2AddrModeImm8s4Offset,
-  kOperandTypeThumb2AddrModeReg
+  kOperandTypeThumb2AddrModeImm8s4Offset
 };
 
 enum OperandFlags {
diff --git a/lib/MC/MCDisassembler/EDOperand.cpp b/lib/MC/MCDisassembler/EDOperand.cpp
index 492bb08..6a4e56f 100644
--- a/lib/MC/MCDisassembler/EDOperand.cpp
+++ b/lib/MC/MCDisassembler/EDOperand.cpp
@@ -61,11 +61,14 @@ EDOperand::EDOperand(const EDDisassembler &disassembler,
     switch (operandType) {
     default:
     case kOperandTypeARMRegisterList:
+    case kOperandTypeARMDPRRegisterList:
+    case kOperandTypeARMSPRRegisterList:
       break;
     case kOperandTypeImmediate:
     case kOperandTypeRegister:
     case kOperandTypeARMBranchTarget:
     case kOperandTypeARMSoImm:
+    case kOperandTypeARMRotImm:
     case kOperandTypeThumb2SoImm:
     case kOperandTypeARMSoImm2Part:
     case kOperandTypeARMPredicate:
@@ -78,6 +81,7 @@ EDOperand::EDOperand(const EDDisassembler &disassembler,
       numMCOperands = 1;
       break;
     case kOperandTypeThumb2SoReg:
+    case kOperandTypeAddrModeImm12:
     case kOperandTypeARMAddrMode2Offset:
     case kOperandTypeARMAddrMode3Offset:
     case kOperandTypeARMAddrMode4:
@@ -86,17 +90,22 @@ EDOperand::EDOperand(const EDDisassembler &disassembler,
     case kOperandTypeThumb2AddrModeImm8:
     case kOperandTypeThumb2AddrModeImm12:
     case kOperandTypeThumb2AddrModeImm8s4:
+    case kOperandTypeThumbAddrModeImmS1:
+    case kOperandTypeThumbAddrModeImmS2:
+    case kOperandTypeThumbAddrModeImmS4:
     case kOperandTypeThumbAddrModeRR:
     case kOperandTypeThumbAddrModeSP:
+    case kOperandTypeThumbAddrModePC:
       numMCOperands = 2;
       break;
     case kOperandTypeARMSoReg:
+    case kOperandTypeLdStSOReg:
     case kOperandTypeARMAddrMode2:
     case kOperandTypeARMAddrMode3:
     case kOperandTypeThumb2AddrModeSoReg:
-    case kOperandTypeThumbAddrModeS1:
-    case kOperandTypeThumbAddrModeS2:
-    case kOperandTypeThumbAddrModeS4:
+    case kOperandTypeThumbAddrModeRegS1:
+    case kOperandTypeThumbAddrModeRegS2:
+    case kOperandTypeThumbAddrModeRegS4:
     case kOperandTypeARMAddrMode6Offset:
       numMCOperands = 3;
       break;
@@ -270,9 +279,9 @@ int EDOperand::isMemory() {
   case kOperandTypeARMAddrMode7:
   case kOperandTypeARMAddrModePC:
   case kOperandTypeARMBranchTarget:
-  case kOperandTypeThumbAddrModeS1:
-  case kOperandTypeThumbAddrModeS2:
-  case kOperandTypeThumbAddrModeS4:
+  case kOperandTypeThumbAddrModeRegS1:
+  case kOperandTypeThumbAddrModeRegS2:
+  case kOperandTypeThumbAddrModeRegS4:
   case kOperandTypeThumbAddrModeRR:
   case kOperandTypeThumbAddrModeSP:
   case kOperandTypeThumb2SoImm:
diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp
index 13cb81a..ad86db1 100644
--- a/lib/MC/MCDwarf.cpp
+++ b/lib/MC/MCDwarf.cpp
@@ -7,22 +7,21 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/FoldingSet.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCDwarf.h"
-#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCObjectWriter.h"
-#include "llvm/ADT/SmallString.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmBackend.h"
 #include "llvm/Target/TargetAsmInfo.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
 using namespace llvm;
 
 // Given a special op, return the address skip amount (in units of
@@ -30,28 +29,27 @@ using namespace llvm;
 #define SPECIAL_ADDR(op) (((op) - DWARF2_LINE_OPCODE_BASE)/DWARF2_LINE_RANGE)
 
 // The maximum address skip amount that can be encoded with a special op.
-#define MAX_SPECIAL_ADDR_DELTA		SPECIAL_ADDR(255)
+#define MAX_SPECIAL_ADDR_DELTA         SPECIAL_ADDR(255)
 
 // First special line opcode - leave room for the standard opcodes.
 // Note: If you want to change this, you'll have to update the
 // "standard_opcode_lengths" table that is emitted in DwarfFileTable::Emit().  
-#define DWARF2_LINE_OPCODE_BASE		13
+#define DWARF2_LINE_OPCODE_BASE         13
 
 // Minimum line offset in a special line info. opcode.  This value
 // was chosen to give a reasonable range of values.
-#define DWARF2_LINE_BASE		-5
+#define DWARF2_LINE_BASE                -5
 
 // Range of line offsets in a special line info. opcode.
-# define DWARF2_LINE_RANGE		14
+#define DWARF2_LINE_RANGE               14
 
 // Define the architecture-dependent minimum instruction length (in bytes).
 // This value should be rather too small than too big.
-# define DWARF2_LINE_MIN_INSN_LENGTH	1
+#define DWARF2_LINE_MIN_INSN_LENGTH     1
 
 // Note: when DWARF2_LINE_MIN_INSN_LENGTH == 1 which is the current setting,
 // this routine is a nop and will be optimized away.
-static inline uint64_t ScaleAddrDelta(uint64_t AddrDelta)
-{
+static inline uint64_t ScaleAddrDelta(uint64_t AddrDelta) {
   if (DWARF2_LINE_MIN_INSN_LENGTH == 1)
     return AddrDelta;
   if (AddrDelta % DWARF2_LINE_MIN_INSN_LENGTH != 0) {
@@ -174,7 +172,9 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS,
     // At this point we want to emit/create the sequence to encode the delta in
     // line numbers and the increment of the address from the previous Label
     // and the current Label.
-    MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label);
+    const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo();
+    MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
+                                   asmInfo.getPointerSize());
 
     LastLine = it->getLine();
     LastLabel = Label;
@@ -198,7 +198,9 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS,
   // Switch back the the dwarf line section.
   MCOS->SwitchSection(context.getTargetAsmInfo().getDwarfLineSection());
 
-  MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd);
+  const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo();
+  MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd,
+                                 asmInfo.getPointerSize());
 }
 
 //
@@ -291,7 +293,7 @@ void MCDwarfFileTable::Emit(MCStreamer *MCOS) {
   const std::vector<const MCSection *> &MCLineSectionOrder =
     MCOS->getContext().getMCLineSectionOrder();
   for (std::vector<const MCSection*>::const_iterator it =
-	MCLineSectionOrder.begin(), ie = MCLineSectionOrder.end(); it != ie;
+         MCLineSectionOrder.begin(), ie = MCLineSectionOrder.end(); it != ie;
        ++it) {
     const MCSection *Sec = *it;
     const MCLineSection *Line = MCLineSections.lookup(Sec);
@@ -354,10 +356,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta,
       OS << char(dwarf::DW_LNS_const_add_pc);
     else {
       OS << char(dwarf::DW_LNS_advance_pc);
-      SmallString<32> Tmp;
-      raw_svector_ostream OSE(Tmp);
-      MCObjectWriter::EncodeULEB128(AddrDelta, OSE);
-      OS << OSE.str();
+      MCObjectWriter::EncodeULEB128(AddrDelta, OS);
     }
     OS << char(dwarf::DW_LNS_extended_op);
     OS << char(1);
@@ -432,25 +431,24 @@ void MCDwarfFile::dump() const {
 
 static int getDataAlignmentFactor(MCStreamer &streamer) {
   MCContext &context = streamer.getContext();
-  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  const MCAsmInfo &asmInfo = context.getAsmInfo();
   int size = asmInfo.getPointerSize();
-  if (asmInfo.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
+  if (asmInfo.isStackGrowthDirectionUp())
     return size;
- else
-   return -size;
+  else
+    return -size;
 }
 
 static unsigned getSizeForEncoding(MCStreamer &streamer,
                                    unsigned symbolEncoding) {
   MCContext &context = streamer.getContext();
-  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
   unsigned format = symbolEncoding & 0x0f;
   switch (format) {
   default:
     assert(0 && "Unknown Encoding");
   case dwarf::DW_EH_PE_absptr:
   case dwarf::DW_EH_PE_signed:
-    return asmInfo.getPointerSize();
+    return context.getAsmInfo().getPointerSize();
   case dwarf::DW_EH_PE_udata2:
   case dwarf::DW_EH_PE_sdata2:
     return 2;
@@ -464,13 +462,14 @@ static unsigned getSizeForEncoding(MCStreamer &streamer,
 }
 
 static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol,
-                       unsigned symbolEncoding) {
+                       unsigned symbolEncoding, const char *comment = 0) {
   MCContext &context = streamer.getContext();
   const MCAsmInfo &asmInfo = context.getAsmInfo();
   const MCExpr *v = asmInfo.getExprForFDESymbol(&symbol,
                                                 symbolEncoding,
                                                 streamer);
   unsigned size = getSizeForEncoding(streamer, symbolEncoding);
+  if (streamer.isVerboseAsm() && comment) streamer.AddComment(comment);
   streamer.EmitAbsValue(v, size);
 }
 
@@ -486,11 +485,11 @@ static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol,
 }
 
 static const MachineLocation TranslateMachineLocation(
-                                                  const TargetAsmInfo &AsmInfo,
+                                                  const TargetAsmInfo &TAI,
                                                   const MachineLocation &Loc) {
   unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ?
     MachineLocation::VirtualFP :
-    unsigned(AsmInfo.getDwarfRegNum(Loc.getReg(), true));
+    unsigned(TAI.getDwarfRegNum(Loc.getReg(), true));
   const MachineLocation &NewLoc = Loc.isReg() ?
     MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset());
   return NewLoc;
@@ -503,13 +502,18 @@ namespace {
     bool UsingCFI;
     bool IsEH;
     const MCSymbol *SectionStart;
-
   public:
     FrameEmitterImpl(bool usingCFI, bool isEH, const MCSymbol *sectionStart) :
       CFAOffset(0), CIENum(0), UsingCFI(usingCFI), IsEH(isEH),
       SectionStart(sectionStart) {
     }
 
+    /// EmitCompactUnwind - Emit the unwind information in a compact way. If
+    /// we're successful, return 'true'. Otherwise, return 'false' and it will
+    /// emit the normal CIE and FDE.
+    bool EmitCompactUnwind(MCStreamer &streamer,
+                           const MCDwarfFrameInfo &frame);
+
     const MCSymbol &EmitCIE(MCStreamer &streamer,
                             const MCSymbol *personality,
                             unsigned personalityEncoding,
@@ -524,11 +528,46 @@ namespace {
     void EmitCFIInstruction(MCStreamer &Streamer,
                             const MCCFIInstruction &Instr);
   };
+
+} // end anonymous namespace
+
+static void EmitEncodingByte(MCStreamer &Streamer, unsigned Encoding,
+                             StringRef Prefix) {
+  if (Streamer.isVerboseAsm()) {
+    const char *EncStr = 0;
+    switch (Encoding) {
+    default: EncStr = "<unknown encoding>";
+    case dwarf::DW_EH_PE_absptr: EncStr = "absptr";
+    case dwarf::DW_EH_PE_omit:   EncStr = "omit";
+    case dwarf::DW_EH_PE_pcrel:  EncStr = "pcrel";
+    case dwarf::DW_EH_PE_udata4: EncStr = "udata4";
+    case dwarf::DW_EH_PE_udata8: EncStr = "udata8";
+    case dwarf::DW_EH_PE_sdata4: EncStr = "sdata4";
+    case dwarf::DW_EH_PE_sdata8: EncStr = "sdata8";
+    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata4: EncStr = "pcrel udata4";
+    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata4: EncStr = "pcrel sdata4";
+    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_udata8: EncStr = "pcrel udata8";
+    case dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: EncStr = "pcrel sdata8";
+    case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata4:
+      EncStr = "indirect pcrel udata4";
+    case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata4:
+      EncStr = "indirect pcrel sdata4";
+    case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_udata8:
+      EncStr = "indirect pcrel udata8";
+    case dwarf::DW_EH_PE_indirect |dwarf::DW_EH_PE_pcrel|dwarf::DW_EH_PE_sdata8:
+      EncStr = "indirect pcrel sdata8";
+    }
+
+    Streamer.AddComment(Twine(Prefix) + " = " + EncStr);
+  }
+
+  Streamer.EmitIntValue(Encoding, 1);
 }
 
 void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
                                           const MCCFIInstruction &Instr) {
   int dataAlignmentFactor = getDataAlignmentFactor(Streamer);
+  bool VerboseAsm = Streamer.isVerboseAsm();
 
   switch (Instr.getOperation()) {
   case MCCFIInstruction::Move:
@@ -540,9 +579,13 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
     // If advancing cfa.
     if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
       if (Src.getReg() == MachineLocation::VirtualFP) {
+        if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_offset");
         Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_offset, 1);
       } else {
+        if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa");
         Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa, 1);
+        if (VerboseAsm) Streamer.AddComment(Twine("Reg ") +
+                                            Twine(Src.getReg()));
         Streamer.EmitULEB128IntValue(Src.getReg());
       }
 
@@ -551,47 +594,62 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer,
       else
         CFAOffset = -Src.getOffset();
 
+      if (VerboseAsm) Streamer.AddComment(Twine("Offset " + Twine(CFAOffset)));
       Streamer.EmitULEB128IntValue(CFAOffset);
       return;
     }
 
     if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
       assert(Dst.isReg() && "Machine move not supported yet.");
+      if (VerboseAsm) Streamer.AddComment("DW_CFA_def_cfa_register");
       Streamer.EmitIntValue(dwarf::DW_CFA_def_cfa_register, 1);
+      if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Dst.getReg()));
       Streamer.EmitULEB128IntValue(Dst.getReg());
       return;
     }
 
     unsigned Reg = Src.getReg();
-
     int Offset = Dst.getOffset();
     if (IsRelative)
       Offset -= CFAOffset;
     Offset = Offset / dataAlignmentFactor;
 
     if (Offset < 0) {
+      if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended_sf");
       Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended_sf, 1);
+      if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
       Streamer.EmitULEB128IntValue(Reg);
+      if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
       Streamer.EmitSLEB128IntValue(Offset);
     } else if (Reg < 64) {
+      if (VerboseAsm) Streamer.AddComment(Twine("DW_CFA_offset + Reg(") +
+                                          Twine(Reg) + ")");
       Streamer.EmitIntValue(dwarf::DW_CFA_offset + Reg, 1);
+      if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
       Streamer.EmitULEB128IntValue(Offset);
     } else {
+      if (VerboseAsm) Streamer.AddComment("DW_CFA_offset_extended");
       Streamer.EmitIntValue(dwarf::DW_CFA_offset_extended, 1);
+      if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
       Streamer.EmitULEB128IntValue(Reg);
+      if (VerboseAsm) Streamer.AddComment(Twine("Offset ") + Twine(Offset));
       Streamer.EmitULEB128IntValue(Offset);
     }
     return;
   }
   case MCCFIInstruction::Remember:
+    if (VerboseAsm) Streamer.AddComment("DW_CFA_remember_state");
     Streamer.EmitIntValue(dwarf::DW_CFA_remember_state, 1);
     return;
   case MCCFIInstruction::Restore:
+    if (VerboseAsm) Streamer.AddComment("DW_CFA_restore_state");
     Streamer.EmitIntValue(dwarf::DW_CFA_restore_state, 1);
     return;
   case MCCFIInstruction::SameValue: {
     unsigned Reg = Instr.getDestination().getReg();
+    if (VerboseAsm) Streamer.AddComment("DW_CFA_same_value");
     Streamer.EmitIntValue(dwarf::DW_CFA_same_value, 1);
+    if (VerboseAsm) Streamer.AddComment(Twine("Reg ") + Twine(Reg));
     Streamer.EmitULEB128IntValue(Reg);
     return;
   }
@@ -614,6 +672,7 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
     if (BaseLabel && Label) {
       MCSymbol *ThisSym = Label;
       if (ThisSym != BaseLabel) {
+        if (streamer.isVerboseAsm()) streamer.AddComment("DW_CFA_advance_loc4");
         streamer.EmitDwarfAdvanceFrameAddr(BaseLabel, ThisSym);
         BaseLabel = ThisSym;
       }
@@ -623,40 +682,128 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer,
   }
 }
 
+/// EmitCompactUnwind - Emit the unwind information in a compact way. If we're
+/// successful, return 'true'. Otherwise, return 'false' and it will emit the
+/// normal CIE and FDE.
+bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer,
+                                         const MCDwarfFrameInfo &Frame) {
+#if 1
+  return false;
+#else
+  MCContext &Context = Streamer.getContext();
+  const TargetAsmInfo &TAI = Context.getTargetAsmInfo();
+  bool VerboseAsm = Streamer.isVerboseAsm();
+
+  // range-start range-length  compact-unwind-enc personality-func   lsda
+  //  _foo       LfooEnd-_foo  0x00000023          0                 0
+  //  _bar       LbarEnd-_bar  0x00000025         __gxx_personality  except_tab1
+  //
+  //   .section __LD,__compact_unwind,regular,debug
+  //
+  //   # compact unwind for _foo
+  //   .quad _foo
+  //   .set L1,LfooEnd-_foo
+  //   .long L1
+  //   .long 0x01010001
+  //   .quad 0
+  //   .quad 0
+  //
+  //   # compact unwind for _bar
+  //   .quad _bar
+  //   .set L2,LbarEnd-_bar
+  //   .long L2
+  //   .long 0x01020011
+  //   .quad __gxx_personality
+  //   .quad except_tab1
+
+  uint32_t Encoding =
+    TAI.getCompactUnwindEncoding(Frame.Instructions,
+                                 getDataAlignmentFactor(Streamer), IsEH);
+  if (!Encoding) return false;
+
+  // The encoding needs to know we have an LSDA.
+  if (Frame.Lsda)
+    Encoding |= 0x40000000;
+
+  Streamer.SwitchSection(TAI.getCompactUnwindSection());
+
+  // Range Start
+  unsigned FDEEncoding = TAI.getFDEEncoding(UsingCFI);
+  unsigned Size = getSizeForEncoding(Streamer, FDEEncoding);
+  if (VerboseAsm) Streamer.AddComment("Range Start");
+  Streamer.EmitSymbolValue(Frame.Function, Size);
+
+  // Range Length
+  const MCExpr *Range = MakeStartMinusEndExpr(Streamer, *Frame.Begin,
+                                              *Frame.End, 0);
+  if (VerboseAsm) Streamer.AddComment("Range Length");
+  Streamer.EmitAbsValue(Range, 4);
+
+  // Compact Encoding
+  Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_udata4);
+  if (VerboseAsm) Streamer.AddComment(Twine("Compact Unwind Encoding: 0x") +
+                                      Twine(llvm::utohexstr(Encoding)));
+  Streamer.EmitIntValue(Encoding, Size);
+
+  // Personality Function
+  Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_absptr);
+  if (VerboseAsm) Streamer.AddComment("Personality Function");
+  if (Frame.Personality)
+    Streamer.EmitSymbolValue(Frame.Personality, Size);
+  else
+    Streamer.EmitIntValue(0, Size); // No personality fn
+
+  // LSDA
+  Size = getSizeForEncoding(Streamer, Frame.LsdaEncoding);
+  if (VerboseAsm) Streamer.AddComment("LSDA");
+  if (Frame.Lsda)
+    Streamer.EmitSymbolValue(Frame.Lsda, Size);
+  else
+    Streamer.EmitIntValue(0, Size); // No LSDA
+
+  return true;
+#endif
+}
+
 const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
                                           const MCSymbol *personality,
                                           unsigned personalityEncoding,
                                           const MCSymbol *lsda,
                                           unsigned lsdaEncoding) {
   MCContext &context = streamer.getContext();
-  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  const TargetAsmInfo &TAI = context.getTargetAsmInfo();
+  bool verboseAsm = streamer.isVerboseAsm();
 
   MCSymbol *sectionStart;
-  if (asmInfo.isFunctionEHFrameSymbolPrivate() || !IsEH)
+  if (TAI.isFunctionEHFrameSymbolPrivate() || !IsEH)
     sectionStart = context.CreateTempSymbol();
   else
     sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum));
 
+  streamer.EmitLabel(sectionStart);
   CIENum++;
 
-  MCSymbol *sectionEnd = streamer.getContext().CreateTempSymbol();
+  MCSymbol *sectionEnd = context.CreateTempSymbol();
 
   // Length
   const MCExpr *Length = MakeStartMinusEndExpr(streamer, *sectionStart,
                                                *sectionEnd, 4);
-  streamer.EmitLabel(sectionStart);
+  if (verboseAsm) streamer.AddComment("CIE Length");
   streamer.EmitAbsValue(Length, 4);
 
   // CIE ID
   unsigned CIE_ID = IsEH ? 0 : -1;
+  if (verboseAsm) streamer.AddComment("CIE ID Tag");
   streamer.EmitIntValue(CIE_ID, 4);
 
   // Version
+  if (verboseAsm) streamer.AddComment("DW_CIE_VERSION");
   streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1);
 
   // Augmentation String
   SmallString<8> Augmentation;
   if (IsEH) {
+    if (verboseAsm) streamer.AddComment("CIE Augmentation");
     Augmentation += "z";
     if (personality)
       Augmentation += "P";
@@ -668,13 +815,16 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
   streamer.EmitIntValue(0, 1);
 
   // Code Alignment Factor
+  if (verboseAsm) streamer.AddComment("CIE Code Alignment Factor");
   streamer.EmitULEB128IntValue(1);
 
   // Data Alignment Factor
+  if (verboseAsm) streamer.AddComment("CIE Data Alignment Factor");
   streamer.EmitSLEB128IntValue(getDataAlignmentFactor(streamer));
 
   // Return Address Register
-  streamer.EmitULEB128IntValue(asmInfo.getDwarfRARegNum(true));
+  if (verboseAsm) streamer.AddComment("CIE Return Address Column");
+  streamer.EmitULEB128IntValue(TAI.getDwarfRARegNum(true));
 
   // Augmentation Data Length (optional)
 
@@ -691,32 +841,38 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
     // Encoding of the FDE pointers
     augmentationLength += 1;
 
+    if (verboseAsm) streamer.AddComment("Augmentation Size");
     streamer.EmitULEB128IntValue(augmentationLength);
 
     // Augmentation Data (optional)
     if (personality) {
       // Personality Encoding
-      streamer.EmitIntValue(personalityEncoding, 1);
+      EmitEncodingByte(streamer, personalityEncoding,
+                       "Personality Encoding");
       // Personality
+      if (verboseAsm) streamer.AddComment("Personality");
       EmitPersonality(streamer, *personality, personalityEncoding);
     }
+
     if (lsda)
-      streamer.EmitIntValue(lsdaEncoding, 1); // LSDA Encoding
+      EmitEncodingByte(streamer, lsdaEncoding, "LSDA Encoding");
+
     // Encoding of the FDE pointers
-    streamer.EmitIntValue(asmInfo.getFDEEncoding(UsingCFI), 1);
+    EmitEncodingByte(streamer, TAI.getFDEEncoding(UsingCFI),
+                     "FDE Encoding");
   }
 
   // Initial Instructions
 
-  const std::vector<MachineMove> Moves = asmInfo.getInitialFrameState();
+  const std::vector<MachineMove> &Moves = TAI.getInitialFrameState();
   std::vector<MCCFIInstruction> Instructions;
 
   for (int i = 0, n = Moves.size(); i != n; ++i) {
     MCSymbol *Label = Moves[i].getLabel();
     const MachineLocation &Dst =
-      TranslateMachineLocation(asmInfo, Moves[i].getDestination());
+      TranslateMachineLocation(TAI, Moves[i].getDestination());
     const MachineLocation &Src =
-      TranslateMachineLocation(asmInfo, Moves[i].getSource());
+      TranslateMachineLocation(TAI, Moves[i].getSource());
     MCCFIInstruction Inst(Label, Dst, Src);
     Instructions.push_back(Inst);
   }
@@ -724,7 +880,8 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer,
   EmitCFIInstructions(streamer, Instructions, NULL);
 
   // Padding
-  streamer.EmitValueToAlignment(IsEH ? 4 : asmInfo.getPointerSize());
+  streamer.EmitValueToAlignment(IsEH
+                                ? 4 : context.getAsmInfo().getPointerSize());
 
   streamer.EmitLabel(sectionEnd);
   return *sectionStart;
@@ -736,17 +893,19 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
   MCContext &context = streamer.getContext();
   MCSymbol *fdeStart = context.CreateTempSymbol();
   MCSymbol *fdeEnd = context.CreateTempSymbol();
-  const TargetAsmInfo &TAsmInfo = context.getTargetAsmInfo();
+  const TargetAsmInfo &TAI = context.getTargetAsmInfo();
+  bool verboseAsm = streamer.isVerboseAsm();
 
-  if (!TAsmInfo.isFunctionEHFrameSymbolPrivate() && IsEH) {
-    MCSymbol *EHSym = context.GetOrCreateSymbol(
-      frame.Function->getName() + Twine(".eh"));
+  if (!TAI.isFunctionEHFrameSymbolPrivate() && IsEH) {
+    MCSymbol *EHSym =
+      context.GetOrCreateSymbol(frame.Function->getName() + Twine(".eh"));
     streamer.EmitEHSymAttributes(frame.Function, EHSym);
     streamer.EmitLabel(EHSym);
   }
 
   // Length
   const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0);
+  if (verboseAsm) streamer.AddComment("FDE Length");
   streamer.EmitAbsValue(Length, 4);
 
   streamer.EmitLabel(fdeStart);
@@ -756,6 +915,7 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
   if (IsEH) {
     const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart,
                                                  0);
+    if (verboseAsm) streamer.AddComment("FDE CIE Offset");
     streamer.EmitAbsValue(offset, 4);
   } else if (!asmInfo.doesDwarfRequireRelocationForSectionOffset()) {
     const MCExpr *offset = MakeStartMinusEndExpr(streamer, *SectionStart,
@@ -764,18 +924,20 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
   } else {
     streamer.EmitSymbolValue(&cieStart, 4);
   }
-  unsigned fdeEncoding = TAsmInfo.getFDEEncoding(UsingCFI);
+
+  unsigned fdeEncoding = TAI.getFDEEncoding(UsingCFI);
   unsigned size = getSizeForEncoding(streamer, fdeEncoding);
 
   // PC Begin
   unsigned PCBeginEncoding = IsEH ? fdeEncoding :
     (unsigned)dwarf::DW_EH_PE_absptr;
   unsigned PCBeginSize = getSizeForEncoding(streamer, PCBeginEncoding);
-  EmitSymbol(streamer, *frame.Begin, PCBeginEncoding);
+  EmitSymbol(streamer, *frame.Begin, PCBeginEncoding, "FDE initial location");
 
   // PC Range
   const MCExpr *Range = MakeStartMinusEndExpr(streamer, *frame.Begin,
                                               *frame.End, 0);
+  if (verboseAsm) streamer.AddComment("FDE address range");
   streamer.EmitAbsValue(Range, size);
 
   if (IsEH) {
@@ -785,11 +947,13 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer,
     if (frame.Lsda)
       augmentationLength += getSizeForEncoding(streamer, frame.LsdaEncoding);
 
+    if (verboseAsm) streamer.AddComment("Augmentation size");
     streamer.EmitULEB128IntValue(augmentationLength);
 
     // Augmentation Data
     if (frame.Lsda)
-      EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding);
+      EmitSymbol(streamer, *frame.Lsda, frame.LsdaEncoding,
+                 "Language Specific Data Area");
   }
 
   // Call Frame Instructions
@@ -843,39 +1007,47 @@ namespace llvm {
   };
 }
 
-void MCDwarfFrameEmitter::Emit(MCStreamer &streamer,
-                               bool usingCFI,
-                               bool isEH) {
-  MCContext &context = streamer.getContext();
-  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
-  const MCSection &section = isEH ?
-    *asmInfo.getEHFrameSection() : *asmInfo.getDwarfFrameSection();
-  streamer.SwitchSection(&section);
-  MCSymbol *SectionStart = context.CreateTempSymbol();
-  streamer.EmitLabel(SectionStart);
-
-  MCSymbol *fdeEnd = NULL;
+void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer,
+                               bool UsingCFI,
+                               bool IsEH) {
+  MCContext &Context = Streamer.getContext();
+  const TargetAsmInfo &TAI = Context.getTargetAsmInfo();
+  const MCSection &Section = IsEH ? *TAI.getEHFrameSection() :
+                                    *TAI.getDwarfFrameSection();
+  Streamer.SwitchSection(&Section);
+  MCSymbol *SectionStart = Context.CreateTempSymbol();
+  Streamer.EmitLabel(SectionStart);
+
+  MCSymbol *FDEEnd = NULL;
   DenseMap<CIEKey, const MCSymbol*> CIEStarts;
-  FrameEmitterImpl Emitter(usingCFI, isEH, SectionStart);
+  FrameEmitterImpl Emitter(UsingCFI, IsEH, SectionStart);
 
   const MCSymbol *DummyDebugKey = NULL;
-  for (unsigned i = 0, n = streamer.getNumFrameInfos(); i < n; ++i) {
-    const MCDwarfFrameInfo &frame = streamer.getFrameInfo(i);
-    CIEKey key(frame.Personality, frame.PersonalityEncoding,
-               frame.LsdaEncoding);
-    const MCSymbol *&cieStart = isEH ? CIEStarts[key] : DummyDebugKey;
-    if (!cieStart)
-      cieStart = &Emitter.EmitCIE(streamer, frame.Personality,
-                                  frame.PersonalityEncoding, frame.Lsda,
-                                  frame.LsdaEncoding);
-    fdeEnd = Emitter.EmitFDE(streamer, *cieStart, frame);
+  for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) {
+    const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i);
+    if (IsEH && TAI.getCompactUnwindSection() &&
+        Emitter.EmitCompactUnwind(Streamer, Frame)) {
+      FDEEnd = NULL;
+      continue;
+    }
+
+    CIEKey Key(Frame.Personality, Frame.PersonalityEncoding,
+               Frame.LsdaEncoding);
+    const MCSymbol *&CIEStart = IsEH ? CIEStarts[Key] : DummyDebugKey;
+    if (!CIEStart)
+      CIEStart = &Emitter.EmitCIE(Streamer, Frame.Personality,
+                                  Frame.PersonalityEncoding, Frame.Lsda,
+                                  Frame.LsdaEncoding);
+
+    FDEEnd = Emitter.EmitFDE(Streamer, *CIEStart, Frame);
+
     if (i != n - 1)
-      streamer.EmitLabel(fdeEnd);
+      Streamer.EmitLabel(FDEEnd);
   }
 
-  streamer.EmitValueToAlignment(asmInfo.getPointerSize());
-  if (fdeEnd)
-    streamer.EmitLabel(fdeEnd);
+  Streamer.EmitValueToAlignment(Context.getAsmInfo().getPointerSize());
+  if (FDEEnd)
+    Streamer.EmitLabel(FDEEnd);
 }
 
 void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer,
diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp
index bbb2789..49340ed 100644
--- a/lib/MC/MCELFStreamer.cpp
+++ b/lib/MC/MCELFStreamer.cpp
@@ -26,7 +26,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetAsmBackend.h"
-#include "llvm/Target/TargetAsmInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/MC/MCELFStreamer.h b/lib/MC/MCELFStreamer.h
index db34d58..855e7e9 100644
--- a/lib/MC/MCELFStreamer.h
+++ b/lib/MC/MCELFStreamer.h
@@ -138,137 +138,3 @@ private:
 } // end llvm namespace
 
 #endif
-//===- lib/MC/MCELFStreamer.h - ELF Object Output -------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file assembles .s files and emits ELF .o object files.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_MC_MCELFSTREAMER_H
-#define LLVM_MC_MCELFSTREAMER_H
-
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/MC/MCAssembler.h"
-#include "llvm/MC/MCContext.h"
-#include "llvm/MC/MCObjectStreamer.h"
-#include "llvm/MC/MCSectionELF.h"
-
-namespace llvm {
-
-class MCELFStreamer : public MCObjectStreamer {
-public:
-  MCELFStreamer(MCContext &Context, TargetAsmBackend &TAB,
-                  raw_ostream &OS, MCCodeEmitter *Emitter)
-    : MCObjectStreamer(Context, TAB, OS, Emitter) {}
-
-  ~MCELFStreamer() {}
-
-  /// @name MCStreamer Interface
-  /// @{
-
-  virtual void InitSections();
-  virtual void ChangeSection(const MCSection *Section);
-  virtual void EmitLabel(MCSymbol *Symbol);
-  virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
-  virtual void EmitThumbFunc(MCSymbol *Func);
-  virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
-  virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
-  virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
-  virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-  virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
-                                unsigned ByteAlignment);
-  virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EmitCOFFSymbolStorageClass(int StorageClass) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EmitCOFFSymbolType(int Type) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EndCOFFSymbolDef() {
-    assert(0 && "ELF doesn't support this directive");
-  }
-
-  virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
-     MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol);
-     SD.setSize(Value);
-  }
-
-  virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size);
-
-  virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
-                            unsigned Size = 0, unsigned ByteAlignment = 0) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-  virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
-                              uint64_t Size, unsigned ByteAlignment = 0) {
-    assert(0 && "ELF doesn't support this directive");
-  }
-  virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
-  virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
-                                    unsigned ValueSize = 1,
-                                    unsigned MaxBytesToEmit = 0);
-  virtual void EmitCodeAlignment(unsigned ByteAlignment,
-                                 unsigned MaxBytesToEmit = 0);
-
-  virtual void EmitFileDirective(StringRef Filename);
-
-  virtual void Finish();
-
-private:
-  virtual void EmitInstToFragment(const MCInst &Inst);
-  virtual void EmitInstToData(const MCInst &Inst);
-
-  void fixSymbolsInTLSFixups(const MCExpr *expr);
-
-  struct LocalCommon {
-    MCSymbolData *SD;
-    uint64_t Size;
-    unsigned ByteAlignment;
-  };
-  std::vector<LocalCommon> LocalCommons;
-
-  SmallPtrSet<MCSymbol *, 16> BindingExplicitlySet;
-  /// @}
-  void SetSection(StringRef Section, unsigned Type, unsigned Flags,
-                  SectionKind Kind) {
-    SwitchSection(getContext().getELFSection(Section, Type, Flags, Kind));
-  }
-
-  void SetSectionData() {
-    SetSection(".data", ELF::SHT_PROGBITS,
-               ELF::SHF_WRITE |ELF::SHF_ALLOC,
-               SectionKind::getDataRel());
-    EmitCodeAlignment(4, 0);
-  }
-  void SetSectionText() {
-    SetSection(".text", ELF::SHT_PROGBITS,
-               ELF::SHF_EXECINSTR |
-               ELF::SHF_ALLOC, SectionKind::getText());
-    EmitCodeAlignment(4, 0);
-  }
-  void SetSectionBss() {
-    SetSection(".bss", ELF::SHT_NOBITS,
-               ELF::SHF_WRITE |
-               ELF::SHF_ALLOC, SectionKind::getBSS());
-    EmitCodeAlignment(4, 0);
-  }
-};
-
-} // end llvm namespace
-
-#endif
diff --git a/lib/MC/MCLoggingStreamer.cpp b/lib/MC/MCLoggingStreamer.cpp
index 46ea9b8..309752e 100644
--- a/lib/MC/MCLoggingStreamer.cpp
+++ b/lib/MC/MCLoggingStreamer.cpp
@@ -85,9 +85,11 @@ public:
 
   virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                         const MCSymbol *LastLabel,
-                                        const MCSymbol *Label) {
+                                        const MCSymbol *Label,
+                                        unsigned PointerSize) {
     LogCall("EmitDwarfAdvanceLineAddr");
-    return Child->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label);
+    return Child->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
+                                           PointerSize);
   }
 
   virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) {
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 12aeb4f..1b21249 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -24,7 +24,6 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetAsmBackend.h"
-#include "llvm/Target/TargetAsmInfo.h"
 
 using namespace llvm;
 
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index f38b822..9577af0 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -44,7 +44,8 @@ namespace {
     virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol){}
     virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                           const MCSymbol *LastLabel,
-                                          const MCSymbol *Label) {}
+                                          const MCSymbol *Label,
+                                          unsigned PointerSize) {}
 
     virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){}
 
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index e230c53..8635aac 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -18,7 +18,6 @@
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetAsmBackend.h"
-#include "llvm/Target/TargetAsmInfo.h"
 using namespace llvm;
 
 MCObjectStreamer::MCObjectStreamer(MCContext &Context, TargetAsmBackend &TAB,
@@ -197,9 +196,9 @@ void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) {
 
 void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                                 const MCSymbol *LastLabel,
-                                                const MCSymbol *Label) {
+                                                const MCSymbol *Label,
+                                                unsigned PointerSize) {
   if (!LastLabel) {
-    int PointerSize = getContext().getTargetAsmInfo().getPointerSize();
     EmitDwarfSetLineAddr(LineDelta, Label, PointerSize);
     return;
   }
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 4f55cea..0c181f3 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -28,6 +28,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
@@ -84,6 +85,7 @@ private:
   AsmLexer Lexer;
   MCContext &Ctx;
   MCStreamer &Out;
+  const MCAsmInfo &MAI;
   SourceMgr &SrcMgr;
   MCAsmParserExtension *GenericParser;
   MCAsmParserExtension *PlatformParser;
@@ -135,7 +137,7 @@ public:
   virtual MCContext &getContext() { return Ctx; }
   virtual MCStreamer &getStreamer() { return Out; }
 
-  virtual bool Warning(SMLoc L, const Twine &Meg);
+  virtual bool Warning(SMLoc L, const Twine &Msg);
   virtual bool Error(SMLoc L, const Twine &Msg);
 
   const AsmToken &Lex();
@@ -160,8 +162,9 @@ private:
   void HandleMacroExit();
 
   void PrintMacroInstantiations();
-  void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type) const {
-    SrcMgr.PrintMessage(Loc, Msg, Type);
+  void PrintMessage(SMLoc Loc, const Twine &Msg, const char *Type,
+                    bool ShowLine = true) const {
+    SrcMgr.PrintMessage(Loc, Msg, Type, ShowLine);
   }
 
   /// EnterIncludeFile - Enter the specified file. This returns true on failure.
@@ -337,7 +340,7 @@ enum { DEFAULT_ADDRSPACE = 0 };
 
 AsmParser::AsmParser(const Target &T, SourceMgr &_SM, MCContext &_Ctx,
                      MCStreamer &_Out, const MCAsmInfo &_MAI)
-  : Lexer(_MAI), Ctx(_Ctx), Out(_Out), SrcMgr(_SM),
+  : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM),
     GenericParser(new GenericAsmParser), PlatformParser(0),
     CurBuffer(0), MacrosEnabled(true) {
   Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer));
@@ -466,6 +469,29 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
       TokError("unassigned file number: " + Twine(i) + " for .file directives");
   }
 
+  // Check to see that all assembler local symbols were actually defined.
+  // Targets that don't do subsections via symbols may not want this, though,
+  // so conservatively exclude them. Only do this if we're finalizing, though,
+  // as otherwise we won't necessarilly have seen everything yet.
+  if (!NoFinalize && MAI.hasSubsectionsViaSymbols()) {
+    const MCContext::SymbolTable &Symbols = getContext().getSymbols();
+    for (MCContext::SymbolTable::const_iterator i = Symbols.begin(),
+         e = Symbols.end();
+         i != e; ++i) {
+      MCSymbol *Sym = i->getValue();
+      // Variable symbols may not be marked as defined, so check those
+      // explicitly. If we know it's a variable, we have a definition for
+      // the purposes of this check.
+      if (Sym->isTemporary() && !Sym->isVariable() && !Sym->isDefined())
+        // FIXME: We would really like to refer back to where the symbol was
+        // first referenced for a source location. We need to add something
+        // to track that. Currently, we just point to the end of the file.
+        PrintMessage(getLexer().getLoc(), "assembler local symbol '" +
+                     Sym->getName() + "' not defined", "error", false);
+    }
+  }
+
+
   // Finalize the output stream if there are no errors and if the client wants
   // us to.
   if (!HadError && !NoFinalize)
@@ -1121,7 +1147,7 @@ bool AsmParser::ParseStatement() {
     if (IDVal == ".weak_def_can_be_hidden")
       return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate);
 
-    if (IDVal == ".comm")
+    if (IDVal == ".comm" || IDVal == ".common")
       return ParseDirectiveComm(/*IsLocal=*/false);
     if (IDVal == ".lcomm")
       return ParseDirectiveComm(/*IsLocal=*/true);
@@ -1168,7 +1194,7 @@ bool AsmParser::ParseStatement() {
     for (unsigned i = 0; i != ParsedOperands.size(); ++i) {
       if (i != 0)
         OS << ", ";
-      ParsedOperands[i]->dump(OS);
+      ParsedOperands[i]->print(OS);
     }
     OS << "]";
 
@@ -1587,13 +1613,18 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) {
 
     for (;;) {
       const MCExpr *Value;
+      SMLoc ExprLoc = getLexer().getLoc();
       if (ParseExpression(Value))
         return true;
 
       // Special case constant expressions to match code generator.
-      if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value))
-        getStreamer().EmitIntValue(MCE->getValue(), Size, DEFAULT_ADDRSPACE);
-      else
+      if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+        assert(Size <= 8 && "Invalid size");
+        uint64_t IntValue = MCE->getValue();
+        if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+          return Error(ExprLoc, "literal value out of range for directive");
+        getStreamer().EmitIntValue(IntValue, Size, DEFAULT_ADDRSPACE);
+      } else
         getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE);
 
       if (getLexer().is(AsmToken::EndOfStatement))
diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp
index 64f6355..66ad384 100644
--- a/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/lib/MC/MCParser/COFFAsmParser.cpp
@@ -401,14 +401,14 @@ bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) {
 bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) {
   SMLoc startLoc = getLexer().getLoc();
   if (getLexer().is(AsmToken::Percent)) {
-    const TargetAsmInfo &asmInfo = getContext().getTargetAsmInfo();
+    const TargetAsmInfo &TAI = getContext().getTargetAsmInfo();
     SMLoc endLoc;
     unsigned LLVMRegNo;
     if (getParser().getTargetParser().ParseRegister(LLVMRegNo,startLoc,endLoc))
       return true;
 
     // Check that this is a non-volatile register.
-    const unsigned *NVRegs = asmInfo.getCalleeSavedRegs();
+    const unsigned *NVRegs = TAI.getCalleeSavedRegs();
     unsigned i;
     for (i = 0; NVRegs[i] != 0; ++i)
       if (NVRegs[i] == LLVMRegNo)
@@ -416,7 +416,7 @@ bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) {
     if (NVRegs[i] == 0)
       return Error(startLoc, "expected non-volatile register");
 
-    int SEHRegNo = asmInfo.getSEHRegNum(LLVMRegNo);
+    int SEHRegNo = TAI.getSEHRegNum(LLVMRegNo);
     if (SEHRegNo < 0)
       return Error(startLoc,"register can't be represented in SEH unwind info");
     RegNo = SEHRegNo;
diff --git a/lib/MC/MCParser/MCAsmParser.cpp b/lib/MC/MCParser/MCAsmParser.cpp
index 70295ef..4030e41 100644
--- a/lib/MC/MCParser/MCAsmParser.cpp
+++ b/lib/MC/MCParser/MCAsmParser.cpp
@@ -12,6 +12,8 @@
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Target/TargetAsmParser.h"
 using namespace llvm;
 
@@ -41,4 +43,6 @@ bool MCAsmParser::ParseExpression(const MCExpr *&Res) {
   return ParseExpression(Res, L);
 }
 
-
+void MCParsedAsmOperand::dump() const {
+  dbgs() << "  " << *this;
+}
diff --git a/lib/MC/MCParser/TargetAsmParser.cpp b/lib/MC/MCParser/TargetAsmParser.cpp
index 8d43c21..512f6b0 100644
--- a/lib/MC/MCParser/TargetAsmParser.cpp
+++ b/lib/MC/MCParser/TargetAsmParser.cpp
@@ -10,8 +10,8 @@
 #include "llvm/Target/TargetAsmParser.h"
 using namespace llvm;
 
-TargetAsmParser::TargetAsmParser(const Target &T) 
-  : TheTarget(T), AvailableFeatures(0)
+TargetAsmParser::TargetAsmParser()
+  : AvailableFeatures(0)
 {
 }
 
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
index ae3ed0f..6e96b78 100644
--- a/lib/MC/MCStreamer.cpp
+++ b/lib/MC/MCStreamer.cpp
@@ -15,7 +15,6 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include <cstdlib>
@@ -81,7 +80,7 @@ void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size,
   assert((isUIntN(8 * Size, Value) || isIntN(8 * Size, Value)) &&
          "Invalid size");
   char buf[8];
-  const bool isLittleEndian = Context.getTargetAsmInfo().isLittleEndian();
+  const bool isLittleEndian = Context.getAsmInfo().isLittleEndian();
   for (unsigned i = 0; i != Size; ++i) {
     unsigned index = isLittleEndian ? i : (Size - i - 1);
     buf[i] = uint8_t(Value >> (index * 8));
diff --git a/lib/MC/MCSubtargetInfo.cpp b/lib/MC/MCSubtargetInfo.cpp
new file mode 100644
index 0000000..86dc108
--- /dev/null
+++ b/lib/MC/MCSubtargetInfo.cpp
@@ -0,0 +1,96 @@
+//===-- MCSubtargetInfo.cpp - Subtarget Information -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+
+using namespace llvm;
+
+void
+MCSubtargetInfo::InitMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS,
+                                     const SubtargetFeatureKV *PF,
+                                     const SubtargetFeatureKV *PD,
+                                     const SubtargetInfoKV *PI,
+                                     const InstrStage *IS,
+                                     const unsigned *OC,
+                                     const unsigned *FP,
+                                     unsigned NF, unsigned NP) {
+  TargetTriple = TT;
+  ProcFeatures = PF;
+  ProcDesc = PD;
+  ProcItins = PI;
+  Stages = IS;
+  OperandCycles = OC;
+  ForwardingPathes = FP;
+  NumFeatures = NF;
+  NumProcs = NP;
+
+  SubtargetFeatures Features(FS);
+  FeatureBits = Features.getFeatureBits(CPU, ProcDesc, NumProcs,
+                                        ProcFeatures, NumFeatures);
+}
+
+
+/// ReInitMCSubtargetInfo - Change CPU (and optionally supplemented with
+/// feature string) and recompute feature bits.
+uint64_t MCSubtargetInfo::ReInitMCSubtargetInfo(StringRef CPU, StringRef FS) {
+  SubtargetFeatures Features(FS);
+  FeatureBits = Features.getFeatureBits(CPU, ProcDesc, NumProcs,
+                                        ProcFeatures, NumFeatures);
+  return FeatureBits;
+}
+
+/// ToggleFeature - Toggle a feature and returns the re-computed feature
+/// bits. This version does not change the implied bits.
+uint64_t MCSubtargetInfo::ToggleFeature(uint64_t FB) {
+  FeatureBits ^= FB;
+  return FeatureBits;
+}
+
+/// ToggleFeature - Toggle a feature and returns the re-computed feature
+/// bits. This version will also change all implied bits.
+uint64_t MCSubtargetInfo::ToggleFeature(StringRef FS) {
+  SubtargetFeatures Features;
+  FeatureBits = Features.ToggleFeature(FeatureBits, FS,
+                                       ProcFeatures, NumFeatures);
+  return FeatureBits;
+}
+
+
+InstrItineraryData
+MCSubtargetInfo::getInstrItineraryForCPU(StringRef CPU) const {
+  assert(ProcItins && "Instruction itineraries information not available!");
+
+#ifndef NDEBUG
+  for (size_t i = 1; i < NumProcs; i++) {
+    assert(strcmp(ProcItins[i - 1].Key, ProcItins[i].Key) < 0 &&
+           "Itineraries table is not sorted");
+  }
+#endif
+
+  // Find entry
+  SubtargetInfoKV KV;
+  KV.Key = CPU.data();
+  const SubtargetInfoKV *Found =
+    std::lower_bound(ProcItins, ProcItins+NumProcs, KV);
+  if (Found == ProcItins+NumProcs || StringRef(Found->Key) != CPU) {
+    errs() << "'" << CPU
+           << "' is not a recognized processor for this target"
+           << " (ignoring processor)\n";
+    return InstrItineraryData();
+  }
+
+  return InstrItineraryData(Stages, OperandCycles, ForwardingPathes,
+                            (InstrItinerary *)Found->Value);
+}
diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp
index 9453f5c..e698384 100644
--- a/lib/MC/MCWin64EH.cpp
+++ b/lib/MC/MCWin64EH.cpp
@@ -225,9 +225,9 @@ void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer,
   // Switch sections (the static function above is meant to be called from
   // here and from Emit().
   MCContext &context = streamer.getContext();
-  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  const TargetAsmInfo &TAI = context.getTargetAsmInfo();
   const MCSection *xdataSect =
-    asmInfo.getWin64EHTableSection(GetSectionSuffix(info->Function));
+    TAI.getWin64EHTableSection(GetSectionSuffix(info->Function));
   streamer.SwitchSection(xdataSect);
 
   llvm::EmitUnwindInfo(streamer, info);
@@ -236,11 +236,11 @@ void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer,
 void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) {
   MCContext &context = streamer.getContext();
   // Emit the unwind info structs first.
-  const TargetAsmInfo &asmInfo = context.getTargetAsmInfo();
+  const TargetAsmInfo &TAI = context.getTargetAsmInfo();
   for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) {
     MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i);
     const MCSection *xdataSect =
-      asmInfo.getWin64EHTableSection(GetSectionSuffix(info.Function));
+      TAI.getWin64EHTableSection(GetSectionSuffix(info.Function));
     streamer.SwitchSection(xdataSect);
     llvm::EmitUnwindInfo(streamer, &info);
   }
@@ -248,7 +248,7 @@ void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) {
   for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) {
     MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i);
     const MCSection *pdataSect =
-      asmInfo.getWin64EHFuncTableSection(GetSectionSuffix(info.Function));
+      TAI.getWin64EHFuncTableSection(GetSectionSuffix(info.Function));
     streamer.SwitchSection(pdataSect);
     EmitRuntimeFunction(streamer, &info);
   }
diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp
index f049b1c..69efe23 100644
--- a/lib/MC/MachObjectWriter.cpp
+++ b/lib/MC/MachObjectWriter.cpp
@@ -23,34 +23,12 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetAsmBackend.h"
 
-// FIXME: Gross.
-#include "../Target/ARM/ARMFixupKinds.h"
-#include "../Target/X86/X86FixupKinds.h"
-
 #include <vector>
 using namespace llvm;
 using namespace llvm::object;
 
-// FIXME: this has been copied from (or to) X86AsmBackend.cpp
-static unsigned getFixupKindLog2Size(unsigned Kind) {
-  switch (Kind) {
-  default:
-    llvm_unreachable("invalid fixup kind!");
-  case FK_PCRel_1:
-  case FK_Data_1: return 0;
-  case FK_PCRel_2:
-  case FK_Data_2: return 1;
-  case FK_PCRel_4:
-    // FIXME: Remove these!!!
-  case X86::reloc_riprel_4byte:
-  case X86::reloc_riprel_4byte_movq_load:
-  case X86::reloc_signed_4byte:
-  case FK_Data_4: return 2;
-  case FK_Data_8: return 3;
-  }
-}
-
-static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) {
+bool MachObjectWriter::
+doesSymbolRequireExternRelocation(const MCSymbolData *SD) {
   // Undefined symbols are always extern.
   if (SD->Symbol->isUndefined())
     return true;
@@ -64,1557 +42,740 @@ static bool doesSymbolRequireExternRelocation(MCSymbolData *SD) {
   return false;
 }
 
-namespace {
-
-class MachObjectWriter : public MCObjectWriter {
-  /// MachSymbolData - Helper struct for containing some precomputed information
-  /// on symbols.
-  struct MachSymbolData {
-    MCSymbolData *SymbolData;
-    uint64_t StringIndex;
-    uint8_t SectionIndex;
+bool MachObjectWriter::
+MachSymbolData::operator<(const MachSymbolData &RHS) const {
+  return SymbolData->getSymbol().getName() <
+    RHS.SymbolData->getSymbol().getName();
+}
 
-    // Support lexicographic sorting.
-    bool operator<(const MachSymbolData &RHS) const {
-      return SymbolData->getSymbol().getName() <
-             RHS.SymbolData->getSymbol().getName();
-    }
-  };
+bool MachObjectWriter::isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
+  const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
+    (MCFixupKind) Kind);
 
-  /// The target specific Mach-O writer instance.
-  llvm::OwningPtr<MCMachObjectTargetWriter> TargetObjectWriter;
+  return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
+}
 
-  /// @name Relocation Data
-  /// @{
+uint64_t MachObjectWriter::getFragmentAddress(const MCFragment *Fragment,
+                                              const MCAsmLayout &Layout) const {
+  return getSectionAddress(Fragment->getParent()) +
+    Layout.getFragmentOffset(Fragment);
+}
 
-  llvm::DenseMap<const MCSectionData*,
-                 std::vector<macho::RelocationEntry> > Relocations;
-  llvm::DenseMap<const MCSectionData*, unsigned> IndirectSymBase;
+uint64_t MachObjectWriter::getSymbolAddress(const MCSymbolData* SD,
+                                            const MCAsmLayout &Layout) const {
+  const MCSymbol &S = SD->getSymbol();
+
+  // If this is a variable, then recursively evaluate now.
+  if (S.isVariable()) {
+    MCValue Target;
+    if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout))
+      report_fatal_error("unable to evaluate offset for variable '" +
+                         S.getName() + "'");
+
+    // Verify that any used symbols are defined.
+    if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
+      report_fatal_error("unable to evaluate offset to undefined symbol '" +
+                         Target.getSymA()->getSymbol().getName() + "'");
+    if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
+      report_fatal_error("unable to evaluate offset to undefined symbol '" +
+                         Target.getSymB()->getSymbol().getName() + "'");
+
+    uint64_t Address = Target.getConstant();
+    if (Target.getSymA())
+      Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
+                                    Target.getSymA()->getSymbol()), Layout);
+    if (Target.getSymB())
+      Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
+                                    Target.getSymB()->getSymbol()), Layout);
+    return Address;
+  }
 
-  /// @}
-  /// @name Symbol Table Data
-  /// @{
+  return getSectionAddress(SD->getFragment()->getParent()) +
+    Layout.getSymbolOffset(SD);
+}
 
-  SmallString<256> StringTable;
-  std::vector<MachSymbolData> LocalSymbolData;
-  std::vector<MachSymbolData> ExternalSymbolData;
-  std::vector<MachSymbolData> UndefinedSymbolData;
+uint64_t MachObjectWriter::getPaddingSize(const MCSectionData *SD,
+                                          const MCAsmLayout &Layout) const {
+  uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD);
+  unsigned Next = SD->getLayoutOrder() + 1;
+  if (Next >= Layout.getSectionOrder().size())
+    return 0;
+
+  const MCSectionData &NextSD = *Layout.getSectionOrder()[Next];
+  if (NextSD.getSection().isVirtualSection())
+    return 0;
+  return OffsetToAlignment(EndAddr, NextSD.getAlignment());
+}
 
-  /// @}
+void MachObjectWriter::WriteHeader(unsigned NumLoadCommands,
+                                   unsigned LoadCommandsSize,
+                                   bool SubsectionsViaSymbols) {
+  uint32_t Flags = 0;
 
-private:
-  /// @name Utility Methods
-  /// @{
+  if (SubsectionsViaSymbols)
+    Flags |= macho::HF_SubsectionsViaSymbols;
 
-  bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind) {
-    const MCFixupKindInfo &FKI = Asm.getBackend().getFixupKindInfo(
-      (MCFixupKind) Kind);
+  // struct mach_header (28 bytes) or
+  // struct mach_header_64 (32 bytes)
 
-    return FKI.Flags & MCFixupKindInfo::FKF_IsPCRel;
-  }
+  uint64_t Start = OS.tell();
+  (void) Start;
 
-  /// @}
+  Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32);
 
-  SectionAddrMap SectionAddress;
-  uint64_t getSectionAddress(const MCSectionData* SD) const {
-    return SectionAddress.lookup(SD);
-  }
-  uint64_t getSymbolAddress(const MCSymbolData* SD,
-                            const MCAsmLayout &Layout) const {
-    const MCSymbol &S = SD->getSymbol();
-
-    // If this is a variable, then recursively evaluate now.
-    if (S.isVariable()) {
-      MCValue Target;
-      if (!S.getVariableValue()->EvaluateAsRelocatable(Target, Layout))
-        report_fatal_error("unable to evaluate offset for variable '" +
-                           S.getName() + "'");
-
-      // Verify that any used symbols are defined.
-      if (Target.getSymA() && Target.getSymA()->getSymbol().isUndefined())
-        report_fatal_error("unable to evaluate offset to undefined symbol '" +
-                           Target.getSymA()->getSymbol().getName() + "'");
-      if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined())
-        report_fatal_error("unable to evaluate offset to undefined symbol '" +
-                           Target.getSymB()->getSymbol().getName() + "'");
-
-      uint64_t Address = Target.getConstant();
-      if (Target.getSymA())
-        Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
-                                      Target.getSymA()->getSymbol()), Layout);
-      if (Target.getSymB())
-        Address += getSymbolAddress(&Layout.getAssembler().getSymbolData(
-                                      Target.getSymB()->getSymbol()), Layout);
-      return Address;
-    }
+  Write32(TargetObjectWriter->getCPUType());
+  Write32(TargetObjectWriter->getCPUSubtype());
 
-    return getSectionAddress(SD->getFragment()->getParent()) +
-      Layout.getSymbolOffset(SD);
-  }
-  uint64_t getFragmentAddress(const MCFragment *Fragment,
-                            const MCAsmLayout &Layout) const {
-    return getSectionAddress(Fragment->getParent()) +
-      Layout.getFragmentOffset(Fragment);
-  }
+  Write32(macho::HFT_Object);
+  Write32(NumLoadCommands);
+  Write32(LoadCommandsSize);
+  Write32(Flags);
+  if (is64Bit())
+    Write32(0); // reserved
 
-  uint64_t getPaddingSize(const MCSectionData *SD,
-                          const MCAsmLayout &Layout) const {
-    uint64_t EndAddr = getSectionAddress(SD) + Layout.getSectionAddressSize(SD);
-    unsigned Next = SD->getLayoutOrder() + 1;
-    if (Next >= Layout.getSectionOrder().size())
-      return 0;
-
-    const MCSectionData &NextSD = *Layout.getSectionOrder()[Next];
-    if (NextSD.getSection().isVirtualSection())
-      return 0;
-    return OffsetToAlignment(EndAddr, NextSD.getAlignment());
-  }
+  assert(OS.tell() - Start ==
+         (is64Bit() ? macho::Header64Size : macho::Header32Size));
+}
 
-public:
-  MachObjectWriter(MCMachObjectTargetWriter *MOTW, raw_ostream &_OS,
-                   bool _IsLittleEndian)
-    : MCObjectWriter(_OS, _IsLittleEndian), TargetObjectWriter(MOTW) {
+/// WriteSegmentLoadCommand - Write a segment load command.
+///
+/// \arg NumSections - The number of sections in this segment.
+/// \arg SectionDataSize - The total size of the sections.
+void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections,
+                                               uint64_t VMSize,
+                                               uint64_t SectionDataStartOffset,
+                                               uint64_t SectionDataSize) {
+  // struct segment_command (56 bytes) or
+  // struct segment_command_64 (72 bytes)
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  unsigned SegmentLoadCommandSize =
+    is64Bit() ? macho::SegmentLoadCommand64Size:
+    macho::SegmentLoadCommand32Size;
+  Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment);
+  Write32(SegmentLoadCommandSize +
+          NumSections * (is64Bit() ? macho::Section64Size :
+                         macho::Section32Size));
+
+  WriteBytes("", 16);
+  if (is64Bit()) {
+    Write64(0); // vmaddr
+    Write64(VMSize); // vmsize
+    Write64(SectionDataStartOffset); // file offset
+    Write64(SectionDataSize); // file size
+  } else {
+    Write32(0); // vmaddr
+    Write32(VMSize); // vmsize
+    Write32(SectionDataStartOffset); // file offset
+    Write32(SectionDataSize); // file size
   }
+  Write32(0x7); // maxprot
+  Write32(0x7); // initprot
+  Write32(NumSections);
+  Write32(0); // flags
 
-  /// @name Target Writer Proxy Accessors
-  /// @{
+  assert(OS.tell() - Start == SegmentLoadCommandSize);
+}
 
-  bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
-  bool isARM() const {
-    uint32_t CPUType = TargetObjectWriter->getCPUType() & ~mach::CTFM_ArchMask;
-    return CPUType == mach::CTM_ARM;
+void MachObjectWriter::WriteSection(const MCAssembler &Asm,
+                                    const MCAsmLayout &Layout,
+                                    const MCSectionData &SD,
+                                    uint64_t FileOffset,
+                                    uint64_t RelocationsStart,
+                                    unsigned NumRelocations) {
+  uint64_t SectionSize = Layout.getSectionAddressSize(&SD);
+
+  // The offset is unused for virtual sections.
+  if (SD.getSection().isVirtualSection()) {
+    assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
+    FileOffset = 0;
   }
 
-  /// @}
-
-  void WriteHeader(unsigned NumLoadCommands, unsigned LoadCommandsSize,
-                   bool SubsectionsViaSymbols) {
-    uint32_t Flags = 0;
-
-    if (SubsectionsViaSymbols)
-      Flags |= macho::HF_SubsectionsViaSymbols;
-
-    // struct mach_header (28 bytes) or
-    // struct mach_header_64 (32 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32);
-
-    Write32(TargetObjectWriter->getCPUType());
-    Write32(TargetObjectWriter->getCPUSubtype());
-
-    Write32(macho::HFT_Object);
-    Write32(NumLoadCommands);
-    Write32(LoadCommandsSize);
-    Write32(Flags);
-    if (is64Bit())
-      Write32(0); // reserved
-
-    assert(OS.tell() - Start ==
-           (is64Bit() ? macho::Header64Size : macho::Header32Size));
+  // struct section (68 bytes) or
+  // struct section_64 (80 bytes)
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
+  WriteBytes(Section.getSectionName(), 16);
+  WriteBytes(Section.getSegmentName(), 16);
+  if (is64Bit()) {
+    Write64(getSectionAddress(&SD)); // address
+    Write64(SectionSize); // size
+  } else {
+    Write32(getSectionAddress(&SD)); // address
+    Write32(SectionSize); // size
   }
+  Write32(FileOffset);
+
+  unsigned Flags = Section.getTypeAndAttributes();
+  if (SD.hasInstructions())
+    Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
+
+  assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
+  Write32(Log2_32(SD.getAlignment()));
+  Write32(NumRelocations ? RelocationsStart : 0);
+  Write32(NumRelocations);
+  Write32(Flags);
+  Write32(IndirectSymBase.lookup(&SD)); // reserved1
+  Write32(Section.getStubSize()); // reserved2
+  if (is64Bit())
+    Write32(0); // reserved3
+
+  assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size :
+                               macho::Section32Size));
+}
 
-  /// WriteSegmentLoadCommand - Write a segment load command.
-  ///
-  /// \arg NumSections - The number of sections in this segment.
-  /// \arg SectionDataSize - The total size of the sections.
-  void WriteSegmentLoadCommand(unsigned NumSections,
-                               uint64_t VMSize,
-                               uint64_t SectionDataStartOffset,
-                               uint64_t SectionDataSize) {
-    // struct segment_command (56 bytes) or
-    // struct segment_command_64 (72 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    unsigned SegmentLoadCommandSize =
-      is64Bit() ? macho::SegmentLoadCommand64Size:
-      macho::SegmentLoadCommand32Size;
-    Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment);
-    Write32(SegmentLoadCommandSize +
-            NumSections * (is64Bit() ? macho::Section64Size :
-                           macho::Section32Size));
-
-    WriteBytes("", 16);
-    if (is64Bit()) {
-      Write64(0); // vmaddr
-      Write64(VMSize); // vmsize
-      Write64(SectionDataStartOffset); // file offset
-      Write64(SectionDataSize); // file size
-    } else {
-      Write32(0); // vmaddr
-      Write32(VMSize); // vmsize
-      Write32(SectionDataStartOffset); // file offset
-      Write32(SectionDataSize); // file size
-    }
-    Write32(0x7); // maxprot
-    Write32(0x7); // initprot
-    Write32(NumSections);
-    Write32(0); // flags
+void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset,
+                                              uint32_t NumSymbols,
+                                              uint32_t StringTableOffset,
+                                              uint32_t StringTableSize) {
+  // struct symtab_command (24 bytes)
 
-    assert(OS.tell() - Start == SegmentLoadCommandSize);
-  }
+  uint64_t Start = OS.tell();
+  (void) Start;
 
-  void WriteSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                    const MCSectionData &SD, uint64_t FileOffset,
-                    uint64_t RelocationsStart, unsigned NumRelocations) {
-    uint64_t SectionSize = Layout.getSectionAddressSize(&SD);
+  Write32(macho::LCT_Symtab);
+  Write32(macho::SymtabLoadCommandSize);
+  Write32(SymbolOffset);
+  Write32(NumSymbols);
+  Write32(StringTableOffset);
+  Write32(StringTableSize);
 
-    // The offset is unused for virtual sections.
-    if (SD.getSection().isVirtualSection()) {
-      assert(Layout.getSectionFileSize(&SD) == 0 && "Invalid file size!");
-      FileOffset = 0;
-    }
+  assert(OS.tell() - Start == macho::SymtabLoadCommandSize);
+}
 
-    // struct section (68 bytes) or
-    // struct section_64 (80 bytes)
+void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
+                                                uint32_t NumLocalSymbols,
+                                                uint32_t FirstExternalSymbol,
+                                                uint32_t NumExternalSymbols,
+                                                uint32_t FirstUndefinedSymbol,
+                                                uint32_t NumUndefinedSymbols,
+                                                uint32_t IndirectSymbolOffset,
+                                                uint32_t NumIndirectSymbols) {
+  // struct dysymtab_command (80 bytes)
+
+  uint64_t Start = OS.tell();
+  (void) Start;
+
+  Write32(macho::LCT_Dysymtab);
+  Write32(macho::DysymtabLoadCommandSize);
+  Write32(FirstLocalSymbol);
+  Write32(NumLocalSymbols);
+  Write32(FirstExternalSymbol);
+  Write32(NumExternalSymbols);
+  Write32(FirstUndefinedSymbol);
+  Write32(NumUndefinedSymbols);
+  Write32(0); // tocoff
+  Write32(0); // ntoc
+  Write32(0); // modtaboff
+  Write32(0); // nmodtab
+  Write32(0); // extrefsymoff
+  Write32(0); // nextrefsyms
+  Write32(IndirectSymbolOffset);
+  Write32(NumIndirectSymbols);
+  Write32(0); // extreloff
+  Write32(0); // nextrel
+  Write32(0); // locreloff
+  Write32(0); // nlocrel
+
+  assert(OS.tell() - Start == macho::DysymtabLoadCommandSize);
+}
 
-    uint64_t Start = OS.tell();
-    (void) Start;
+void MachObjectWriter::WriteNlist(MachSymbolData &MSD,
+                                  const MCAsmLayout &Layout) {
+  MCSymbolData &Data = *MSD.SymbolData;
+  const MCSymbol &Symbol = Data.getSymbol();
+  uint8_t Type = 0;
+  uint16_t Flags = Data.getFlags();
+  uint32_t Address = 0;
 
-    const MCSectionMachO &Section = cast<MCSectionMachO>(SD.getSection());
-    WriteBytes(Section.getSectionName(), 16);
-    WriteBytes(Section.getSegmentName(), 16);
-    if (is64Bit()) {
-      Write64(getSectionAddress(&SD)); // address
-      Write64(SectionSize); // size
+  // Set the N_TYPE bits. See <mach-o/nlist.h>.
+  //
+  // FIXME: Are the prebound or indirect fields possible here?
+  if (Symbol.isUndefined())
+    Type = macho::STT_Undefined;
+  else if (Symbol.isAbsolute())
+    Type = macho::STT_Absolute;
+  else
+    Type = macho::STT_Section;
+
+  // FIXME: Set STAB bits.
+
+  if (Data.isPrivateExtern())
+    Type |= macho::STF_PrivateExtern;
+
+  // Set external bit.
+  if (Data.isExternal() || Symbol.isUndefined())
+    Type |= macho::STF_External;
+
+  // Compute the symbol address.
+  if (Symbol.isDefined()) {
+    if (Symbol.isAbsolute()) {
+      Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue();
     } else {
-      Write32(getSectionAddress(&SD)); // address
-      Write32(SectionSize); // size
+      Address = getSymbolAddress(&Data, Layout);
+    }
+  } else if (Data.isCommon()) {
+    // Common symbols are encoded with the size in the address
+    // field, and their alignment in the flags.
+    Address = Data.getCommonSize();
+
+    // Common alignment is packed into the 'desc' bits.
+    if (unsigned Align = Data.getCommonAlignment()) {
+      unsigned Log2Size = Log2_32(Align);
+      assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
+      if (Log2Size > 15)
+        report_fatal_error("invalid 'common' alignment '" +
+                           Twine(Align) + "'");
+      // FIXME: Keep this mask with the SymbolFlags enumeration.
+      Flags = (Flags & 0xF0FF) | (Log2Size << 8);
     }
-    Write32(FileOffset);
-
-    unsigned Flags = Section.getTypeAndAttributes();
-    if (SD.hasInstructions())
-      Flags |= MCSectionMachO::S_ATTR_SOME_INSTRUCTIONS;
-
-    assert(isPowerOf2_32(SD.getAlignment()) && "Invalid alignment!");
-    Write32(Log2_32(SD.getAlignment()));
-    Write32(NumRelocations ? RelocationsStart : 0);
-    Write32(NumRelocations);
-    Write32(Flags);
-    Write32(IndirectSymBase.lookup(&SD)); // reserved1
-    Write32(Section.getStubSize()); // reserved2
-    if (is64Bit())
-      Write32(0); // reserved3
-
-    assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size :
-           macho::Section32Size));
   }
 
-  void WriteSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
-                              uint32_t StringTableOffset,
-                              uint32_t StringTableSize) {
-    // struct symtab_command (24 bytes)
+  // struct nlist (12 bytes)
 
-    uint64_t Start = OS.tell();
-    (void) Start;
+  Write32(MSD.StringIndex);
+  Write8(Type);
+  Write8(MSD.SectionIndex);
 
-    Write32(macho::LCT_Symtab);
-    Write32(macho::SymtabLoadCommandSize);
-    Write32(SymbolOffset);
-    Write32(NumSymbols);
-    Write32(StringTableOffset);
-    Write32(StringTableSize);
-
-    assert(OS.tell() - Start == macho::SymtabLoadCommandSize);
-  }
-
-  void WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol,
-                                uint32_t NumLocalSymbols,
-                                uint32_t FirstExternalSymbol,
-                                uint32_t NumExternalSymbols,
-                                uint32_t FirstUndefinedSymbol,
-                                uint32_t NumUndefinedSymbols,
-                                uint32_t IndirectSymbolOffset,
-                                uint32_t NumIndirectSymbols) {
-    // struct dysymtab_command (80 bytes)
-
-    uint64_t Start = OS.tell();
-    (void) Start;
-
-    Write32(macho::LCT_Dysymtab);
-    Write32(macho::DysymtabLoadCommandSize);
-    Write32(FirstLocalSymbol);
-    Write32(NumLocalSymbols);
-    Write32(FirstExternalSymbol);
-    Write32(NumExternalSymbols);
-    Write32(FirstUndefinedSymbol);
-    Write32(NumUndefinedSymbols);
-    Write32(0); // tocoff
-    Write32(0); // ntoc
-    Write32(0); // modtaboff
-    Write32(0); // nmodtab
-    Write32(0); // extrefsymoff
-    Write32(0); // nextrefsyms
-    Write32(IndirectSymbolOffset);
-    Write32(NumIndirectSymbols);
-    Write32(0); // extreloff
-    Write32(0); // nextrel
-    Write32(0); // locreloff
-    Write32(0); // nlocrel
-
-    assert(OS.tell() - Start == macho::DysymtabLoadCommandSize);
-  }
-
-  void WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) {
-    MCSymbolData &Data = *MSD.SymbolData;
-    const MCSymbol &Symbol = Data.getSymbol();
-    uint8_t Type = 0;
-    uint16_t Flags = Data.getFlags();
-    uint32_t Address = 0;
+  // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
+  // value.
+  Write16(Flags);
+  if (is64Bit())
+    Write64(Address);
+  else
+    Write32(Address);
+}
 
-    // Set the N_TYPE bits. See <mach-o/nlist.h>.
-    //
-    // FIXME: Are the prebound or indirect fields possible here?
-    if (Symbol.isUndefined())
-      Type = macho::STT_Undefined;
-    else if (Symbol.isAbsolute())
-      Type = macho::STT_Absolute;
-    else
-      Type = macho::STT_Section;
-
-    // FIXME: Set STAB bits.
-
-    if (Data.isPrivateExtern())
-      Type |= macho::STF_PrivateExtern;
-
-    // Set external bit.
-    if (Data.isExternal() || Symbol.isUndefined())
-      Type |= macho::STF_External;
-
-    // Compute the symbol address.
-    if (Symbol.isDefined()) {
-      if (Symbol.isAbsolute()) {
-        Address = cast<MCConstantExpr>(Symbol.getVariableValue())->getValue();
-      } else {
-        Address = getSymbolAddress(&Data, Layout);
-      }
-    } else if (Data.isCommon()) {
-      // Common symbols are encoded with the size in the address
-      // field, and their alignment in the flags.
-      Address = Data.getCommonSize();
-
-      // Common alignment is packed into the 'desc' bits.
-      if (unsigned Align = Data.getCommonAlignment()) {
-        unsigned Log2Size = Log2_32(Align);
-        assert((1U << Log2Size) == Align && "Invalid 'common' alignment!");
-        if (Log2Size > 15)
-          report_fatal_error("invalid 'common' alignment '" +
-                            Twine(Align) + "'");
-        // FIXME: Keep this mask with the SymbolFlags enumeration.
-        Flags = (Flags & 0xF0FF) | (Log2Size << 8);
-      }
-    }
+void MachObjectWriter::RecordRelocation(const MCAssembler &Asm,
+                                        const MCAsmLayout &Layout,
+                                        const MCFragment *Fragment,
+                                        const MCFixup &Fixup,
+                                        MCValue Target,
+                                        uint64_t &FixedValue) {
+  TargetObjectWriter->RecordRelocation(this, Asm, Layout, Fragment, Fixup,
+                                       Target, FixedValue);
+}
 
-    // struct nlist (12 bytes)
+void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) {
+  // This is the point where 'as' creates actual symbols for indirect symbols
+  // (in the following two passes). It would be easier for us to do this sooner
+  // when we see the attribute, but that makes getting the order in the symbol
+  // table much more complicated than it is worth.
+  //
+  // FIXME: Revisit this when the dust settles.
 
-    Write32(MSD.StringIndex);
-    Write8(Type);
-    Write8(MSD.SectionIndex);
+  // Bind non lazy symbol pointers first.
+  unsigned IndirectIndex = 0;
+  for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+         ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
+    const MCSectionMachO &Section =
+      cast<MCSectionMachO>(it->SectionData->getSection());
 
-    // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc'
-    // value.
-    Write16(Flags);
-    if (is64Bit())
-      Write64(Address);
-    else
-      Write32(Address);
-  }
+    if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
+      continue;
 
-  // FIXME: We really need to improve the relocation validation. Basically, we
-  // want to implement a separate computation which evaluates the relocation
-  // entry as the linker would, and verifies that the resultant fixup value is
-  // exactly what the encoder wanted. This will catch several classes of
-  // problems:
-  //
-  //  - Relocation entry bugs, the two algorithms are unlikely to have the same
-  //    exact bug.
-  //
-  //  - Relaxation issues, where we forget to relax something.
-  //
-  //  - Input errors, where something cannot be correctly encoded. 'as' allows
-  //    these through in many cases.
+    // Initialize the section indirect symbol base, if necessary.
+    if (!IndirectSymBase.count(it->SectionData))
+      IndirectSymBase[it->SectionData] = IndirectIndex;
 
-  static bool isFixupKindRIPRel(unsigned Kind) {
-    return Kind == X86::reloc_riprel_4byte ||
-      Kind == X86::reloc_riprel_4byte_movq_load;
+    Asm.getOrCreateSymbolData(*it->Symbol);
   }
-  void RecordX86_64Relocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                              const MCFragment *Fragment,
-                              const MCFixup &Fixup, MCValue Target,
-                              uint64_t &FixedValue) {
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
-
-    // See <reloc.h>.
-    uint32_t FixupOffset =
-      Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
-    uint32_t FixupAddress =
-      getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
-    int64_t Value = 0;
-    unsigned Index = 0;
-    unsigned IsExtern = 0;
-    unsigned Type = 0;
-
-    Value = Target.getConstant();
-
-    if (IsPCRel) {
-      // Compensate for the relocation offset, Darwin x86_64 relocations only
-      // have the addend and appear to have attempted to define it to be the
-      // actual expression addend without the PCrel bias. However, instructions
-      // with data following the relocation are not accommodated for (see comment
-      // below regarding SIGNED{1,2,4}), so it isn't exactly that either.
-      Value += 1LL << Log2Size;
-    }
 
-    if (Target.isAbsolute()) { // constant
-      // SymbolNum of 0 indicates the absolute section.
-      Type = macho::RIT_X86_64_Unsigned;
-      Index = 0;
-
-      // FIXME: I believe this is broken, I don't think the linker can
-      // understand it. I think it would require a local relocation, but I'm not
-      // sure if that would work either. The official way to get an absolute
-      // PCrel relocation is to use an absolute symbol (which we don't support
-      // yet).
-      if (IsPCRel) {
-        IsExtern = 1;
-        Type = macho::RIT_X86_64_Branch;
-      }
-    } else if (Target.getSymB()) { // A - B + constant
-      const MCSymbol *A = &Target.getSymA()->getSymbol();
-      MCSymbolData &A_SD = Asm.getSymbolData(*A);
-      const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
-
-      const MCSymbol *B = &Target.getSymB()->getSymbol();
-      MCSymbolData &B_SD = Asm.getSymbolData(*B);
-      const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
-
-      // Neither symbol can be modified.
-      if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
-          Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
-        report_fatal_error("unsupported relocation of modified symbol");
-
-      // We don't support PCrel relocations of differences. Darwin 'as' doesn't
-      // implement most of these correctly.
-      if (IsPCRel)
-        report_fatal_error("unsupported pc-relative relocation of difference");
-
-      // The support for the situation where one or both of the symbols would
-      // require a local relocation is handled just like if the symbols were
-      // external.  This is certainly used in the case of debug sections where
-      // the section has only temporary symbols and thus the symbols don't have
-      // base symbols.  This is encoded using the section ordinal and
-      // non-extern relocation entries.
-
-      // Darwin 'as' doesn't emit correct relocations for this (it ends up with
-      // a single SIGNED relocation); reject it for now.  Except the case where
-      // both symbols don't have a base, equal but both NULL.
-      if (A_Base == B_Base && A_Base)
-        report_fatal_error("unsupported relocation with identical base");
-
-      Value += getSymbolAddress(&A_SD, Layout) -
-        (A_Base == NULL ? 0 : getSymbolAddress(A_Base, Layout));
-      Value -= getSymbolAddress(&B_SD, Layout) -
-        (B_Base == NULL ? 0 : getSymbolAddress(B_Base, Layout));
-
-      if (A_Base) {
-        Index = A_Base->getIndex();
-        IsExtern = 1;
-      }
-      else {
-        Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
-        IsExtern = 0;
-      }
-      Type = macho::RIT_X86_64_Unsigned;
-
-      macho::RelocationEntry MRE;
-      MRE.Word0 = FixupOffset;
-      MRE.Word1 = ((Index     <<  0) |
-                   (IsPCRel   << 24) |
-                   (Log2Size  << 25) |
-                   (IsExtern  << 27) |
-                   (Type      << 28));
-      Relocations[Fragment->getParent()].push_back(MRE);
-
-      if (B_Base) {
-        Index = B_Base->getIndex();
-        IsExtern = 1;
-      }
-      else {
-        Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
-        IsExtern = 0;
-      }
-      Type = macho::RIT_X86_64_Subtractor;
-    } else {
-      const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
-      MCSymbolData &SD = Asm.getSymbolData(*Symbol);
-      const MCSymbolData *Base = Asm.getAtom(&SD);
-
-      // Relocations inside debug sections always use local relocations when
-      // possible. This seems to be done because the debugger doesn't fully
-      // understand x86_64 relocation entries, and expects to find values that
-      // have already been fixed up.
-      if (Symbol->isInSection()) {
-        const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
-          Fragment->getParent()->getSection());
-        if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
-          Base = 0;
-      }
+  // Then lazy symbol pointers and symbol stubs.
+  IndirectIndex = 0;
+  for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
+         ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
+    const MCSectionMachO &Section =
+      cast<MCSectionMachO>(it->SectionData->getSection());
 
-      // x86_64 almost always uses external relocations, except when there is no
-      // symbol to use as a base address (a local symbol with no preceding
-      // non-local symbol).
-      if (Base) {
-        Index = Base->getIndex();
-        IsExtern = 1;
-
-        // Add the local offset, if needed.
-        if (Base != &SD)
-          Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
-      } else if (Symbol->isInSection() && !Symbol->isVariable()) {
-        // The index is the section ordinal (1-based).
-        Index = SD.getFragment()->getParent()->getOrdinal() + 1;
-        IsExtern = 0;
-        Value += getSymbolAddress(&SD, Layout);
-
-        if (IsPCRel)
-          Value -= FixupAddress + (1 << Log2Size);
-      } else if (Symbol->isVariable()) {
-        const MCExpr *Value = Symbol->getVariableValue();
-        int64_t Res;
-        bool isAbs = Value->EvaluateAsAbsolute(Res, Layout, SectionAddress);
-        if (isAbs) {
-          FixedValue = Res;
-          return;
-        } else {
-          report_fatal_error("unsupported relocation of variable '" +
-                             Symbol->getName() + "'");
-        }
-      } else {
-        report_fatal_error("unsupported relocation of undefined symbol '" +
-                           Symbol->getName() + "'");
-      }
+    if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
+        Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
+      continue;
 
-      MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
-      if (IsPCRel) {
-        if (IsRIPRel) {
-          if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
-            // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
-            // rewrite the movq to an leaq at link time if the symbol ends up in
-            // the same linkage unit.
-            if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
-              Type = macho::RIT_X86_64_GOTLoad;
-            else
-              Type = macho::RIT_X86_64_GOT;
-          }  else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
-            Type = macho::RIT_X86_64_TLV;
-          }  else if (Modifier != MCSymbolRefExpr::VK_None) {
-            report_fatal_error("unsupported symbol modifier in relocation");
-          } else {
-            Type = macho::RIT_X86_64_Signed;
-
-            // The Darwin x86_64 relocation format has a problem where it cannot
-            // encode an address (L<foo> + <constant>) which is outside the atom
-            // containing L<foo>. Generally, this shouldn't occur but it does
-            // happen when we have a RIPrel instruction with data following the
-            // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
-            // adjustment Darwin x86_64 uses, the offset is still negative and
-            // the linker has no way to recognize this.
-            //
-            // To work around this, Darwin uses several special relocation types
-            // to indicate the offsets. However, the specification or
-            // implementation of these seems to also be incomplete; they should
-            // adjust the addend as well based on the actual encoded instruction
-            // (the additional bias), but instead appear to just look at the
-            // final offset.
-            switch (-(Target.getConstant() + (1LL << Log2Size))) {
-            case 1: Type = macho::RIT_X86_64_Signed1; break;
-            case 2: Type = macho::RIT_X86_64_Signed2; break;
-            case 4: Type = macho::RIT_X86_64_Signed4; break;
-            }
-          }
-        } else {
-          if (Modifier != MCSymbolRefExpr::VK_None)
-            report_fatal_error("unsupported symbol modifier in branch "
-                              "relocation");
-
-          Type = macho::RIT_X86_64_Branch;
-        }
-      } else {
-        if (Modifier == MCSymbolRefExpr::VK_GOT) {
-          Type = macho::RIT_X86_64_GOT;
-        } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
-          // GOTPCREL is allowed as a modifier on non-PCrel instructions, in
-          // which case all we do is set the PCrel bit in the relocation entry;
-          // this is used with exception handling, for example. The source is
-          // required to include any necessary offset directly.
-          Type = macho::RIT_X86_64_GOT;
-          IsPCRel = 1;
-        } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
-          report_fatal_error("TLVP symbol modifier should have been rip-rel");
-        } else if (Modifier != MCSymbolRefExpr::VK_None)
-          report_fatal_error("unsupported symbol modifier in relocation");
-        else
-          Type = macho::RIT_X86_64_Unsigned;
-      }
-    }
+    // Initialize the section indirect symbol base, if necessary.
+    if (!IndirectSymBase.count(it->SectionData))
+      IndirectSymBase[it->SectionData] = IndirectIndex;
 
-    // x86_64 always writes custom values into the fixups.
-    FixedValue = Value;
-
-    // struct relocation_info (8 bytes)
-    macho::RelocationEntry MRE;
-    MRE.Word0 = FixupOffset;
-    MRE.Word1 = ((Index     <<  0) |
-                 (IsPCRel   << 24) |
-                 (Log2Size  << 25) |
-                 (IsExtern  << 27) |
-                 (Type      << 28));
-    Relocations[Fragment->getParent()].push_back(MRE);
+    // Set the symbol type to undefined lazy, but only on construction.
+    //
+    // FIXME: Do not hardcode.
+    bool Created;
+    MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
+    if (Created)
+      Entry.setFlags(Entry.getFlags() | 0x0001);
   }
+}
 
-  void RecordScatteredRelocation(const MCAssembler &Asm,
-                                 const MCAsmLayout &Layout,
-                                 const MCFragment *Fragment,
-                                 const MCFixup &Fixup, MCValue Target,
-                                 unsigned Log2Size,
-                                 uint64_t &FixedValue) {
-    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned Type = macho::RIT_Vanilla;
-
-    // See <reloc.h>.
-    const MCSymbol *A = &Target.getSymA()->getSymbol();
-    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
-
-    if (!A_SD->getFragment())
-      report_fatal_error("symbol '" + A->getName() +
-                        "' can not be undefined in a subtraction expression");
-
-    uint32_t Value = getSymbolAddress(A_SD, Layout);
-    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
-    FixedValue += SecAddr;
-    uint32_t Value2 = 0;
-
-    if (const MCSymbolRefExpr *B = Target.getSymB()) {
-      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
-
-      if (!B_SD->getFragment())
-        report_fatal_error("symbol '" + B->getSymbol().getName() +
-                          "' can not be undefined in a subtraction expression");
-
-      // Select the appropriate difference relocation type.
-      //
-      // Note that there is no longer any semantic difference between these two
-      // relocation types from the linkers point of view, this is done solely
-      // for pedantic compatibility with 'as'.
-      Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
-        (unsigned)macho::RIT_Generic_LocalDifference;
-      Value2 = getSymbolAddress(B_SD, Layout);
-      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
-    }
-
-    // Relocations are written out in reverse order, so the PAIR comes first.
-    if (Type == macho::RIT_Difference ||
-        Type == macho::RIT_Generic_LocalDifference) {
-      macho::RelocationEntry MRE;
-      MRE.Word0 = ((0         <<  0) |
-                   (macho::RIT_Pair  << 24) |
-                   (Log2Size  << 28) |
-                   (IsPCRel   << 30) |
-                   macho::RF_Scattered);
-      MRE.Word1 = Value2;
-      Relocations[Fragment->getParent()].push_back(MRE);
+/// ComputeSymbolTable - Compute the symbol table data
+///
+/// \param StringTable [out] - The string table data.
+/// \param StringIndexMap [out] - Map from symbol names to offsets in the
+/// string table.
+void MachObjectWriter::
+ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
+                   std::vector<MachSymbolData> &LocalSymbolData,
+                   std::vector<MachSymbolData> &ExternalSymbolData,
+                   std::vector<MachSymbolData> &UndefinedSymbolData) {
+  // Build section lookup table.
+  DenseMap<const MCSection*, uint8_t> SectionIndexMap;
+  unsigned Index = 1;
+  for (MCAssembler::iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it, ++Index)
+    SectionIndexMap[&it->getSection()] = Index;
+  assert(Index <= 256 && "Too many sections!");
+
+  // Index 0 is always the empty string.
+  StringMap<uint64_t> StringIndexMap;
+  StringTable += '\x00';
+
+  // Build the symbol arrays and the string table, but only for non-local
+  // symbols.
+  //
+  // The particular order that we collect the symbols and create the string
+  // table, then sort the symbols is chosen to match 'as'. Even though it
+  // doesn't matter for correctness, this is important for letting us diff .o
+  // files.
+  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+         ie = Asm.symbol_end(); it != ie; ++it) {
+    const MCSymbol &Symbol = it->getSymbol();
+
+    // Ignore non-linker visible symbols.
+    if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+      continue;
+
+    if (!it->isExternal() && !Symbol.isUndefined())
+      continue;
+
+    uint64_t &Entry = StringIndexMap[Symbol.getName()];
+    if (!Entry) {
+      Entry = StringTable.size();
+      StringTable += Symbol.getName();
+      StringTable += '\x00';
     }
 
-    macho::RelocationEntry MRE;
-    MRE.Word0 = ((FixupOffset <<  0) |
-                 (Type        << 24) |
-                 (Log2Size    << 28) |
-                 (IsPCRel     << 30) |
-                 macho::RF_Scattered);
-    MRE.Word1 = Value;
-    Relocations[Fragment->getParent()].push_back(MRE);
-  }
-
-  void RecordARMScatteredRelocation(const MCAssembler &Asm,
-                                    const MCAsmLayout &Layout,
-                                    const MCFragment *Fragment,
-                                    const MCFixup &Fixup, MCValue Target,
-                                    unsigned Log2Size,
-                                    uint64_t &FixedValue) {
-    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned Type = macho::RIT_Vanilla;
-
-    // See <reloc.h>.
-    const MCSymbol *A = &Target.getSymA()->getSymbol();
-    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
-
-    if (!A_SD->getFragment())
-      report_fatal_error("symbol '" + A->getName() +
-                        "' can not be undefined in a subtraction expression");
-
-    uint32_t Value = getSymbolAddress(A_SD, Layout);
-    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
-    FixedValue += SecAddr;
-    uint32_t Value2 = 0;
-
-    if (const MCSymbolRefExpr *B = Target.getSymB()) {
-      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
-
-      if (!B_SD->getFragment())
-        report_fatal_error("symbol '" + B->getSymbol().getName() +
-                          "' can not be undefined in a subtraction expression");
-
-      // Select the appropriate difference relocation type.
-      Type = macho::RIT_Difference;
-      Value2 = getSymbolAddress(B_SD, Layout);
-      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
-    }
+    MachSymbolData MSD;
+    MSD.SymbolData = it;
+    MSD.StringIndex = Entry;
 
-    // Relocations are written out in reverse order, so the PAIR comes first.
-    if (Type == macho::RIT_Difference ||
-        Type == macho::RIT_Generic_LocalDifference) {
-      macho::RelocationEntry MRE;
-      MRE.Word0 = ((0         <<  0) |
-                   (macho::RIT_Pair  << 24) |
-                   (Log2Size  << 28) |
-                   (IsPCRel   << 30) |
-                   macho::RF_Scattered);
-      MRE.Word1 = Value2;
-      Relocations[Fragment->getParent()].push_back(MRE);
+    if (Symbol.isUndefined()) {
+      MSD.SectionIndex = 0;
+      UndefinedSymbolData.push_back(MSD);
+    } else if (Symbol.isAbsolute()) {
+      MSD.SectionIndex = 0;
+      ExternalSymbolData.push_back(MSD);
+    } else {
+      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+      assert(MSD.SectionIndex && "Invalid section index!");
+      ExternalSymbolData.push_back(MSD);
     }
-
-    macho::RelocationEntry MRE;
-    MRE.Word0 = ((FixupOffset <<  0) |
-                 (Type        << 24) |
-                 (Log2Size    << 28) |
-                 (IsPCRel     << 30) |
-                 macho::RF_Scattered);
-    MRE.Word1 = Value;
-    Relocations[Fragment->getParent()].push_back(MRE);
   }
 
-  void RecordARMMovwMovtRelocation(const MCAssembler &Asm,
-                                   const MCAsmLayout &Layout,
-                                   const MCFragment *Fragment,
-                                   const MCFixup &Fixup, MCValue Target,
-                                   uint64_t &FixedValue) {
-    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned Type = macho::RIT_ARM_Half;
-
-    // See <reloc.h>.
-    const MCSymbol *A = &Target.getSymA()->getSymbol();
-    MCSymbolData *A_SD = &Asm.getSymbolData(*A);
-
-    if (!A_SD->getFragment())
-      report_fatal_error("symbol '" + A->getName() +
-                        "' can not be undefined in a subtraction expression");
-
-    uint32_t Value = getSymbolAddress(A_SD, Layout);
-    uint32_t Value2 = 0;
-    uint64_t SecAddr = getSectionAddress(A_SD->getFragment()->getParent());
-    FixedValue += SecAddr;
-
-    if (const MCSymbolRefExpr *B = Target.getSymB()) {
-      MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
-
-      if (!B_SD->getFragment())
-        report_fatal_error("symbol '" + B->getSymbol().getName() +
-                          "' can not be undefined in a subtraction expression");
-
-      // Select the appropriate difference relocation type.
-      Type = macho::RIT_ARM_HalfDifference;
-      Value2 = getSymbolAddress(B_SD, Layout);
-      FixedValue -= getSectionAddress(B_SD->getFragment()->getParent());
-    }
-
-    // Relocations are written out in reverse order, so the PAIR comes first.
-    // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field:
-    //
-    // For these two r_type relocations they always have a pair following them
-    // and the r_length bits are used differently.  The encoding of the
-    // r_length is as follows:
-    // low bit of r_length:
-    //  0 - :lower16: for movw instructions
-    //  1 - :upper16: for movt instructions
-    // high bit of r_length:
-    //  0 - arm instructions
-    //  1 - thumb instructions
-    // the other half of the relocated expression is in the following pair
-    // relocation entry in the the low 16 bits of r_address field.
-    unsigned ThumbBit = 0;
-    unsigned MovtBit = 0;
-    switch ((unsigned)Fixup.getKind()) {
-    default: break;
-    case ARM::fixup_arm_movt_hi16:
-    case ARM::fixup_arm_movt_hi16_pcrel:
-      MovtBit = 1;
-      break;
-    case ARM::fixup_t2_movt_hi16:
-    case ARM::fixup_t2_movt_hi16_pcrel:
-      MovtBit = 1;
-      // Fallthrough
-    case ARM::fixup_t2_movw_lo16:
-    case ARM::fixup_t2_movw_lo16_pcrel:
-      ThumbBit = 1;
-      break;
-    }
+  // Now add the data for local symbols.
+  for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
+         ie = Asm.symbol_end(); it != ie; ++it) {
+    const MCSymbol &Symbol = it->getSymbol();
 
+    // Ignore non-linker visible symbols.
+    if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
+      continue;
 
-    if (Type == macho::RIT_ARM_HalfDifference) {
-      uint32_t OtherHalf = MovtBit
-        ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
+    if (it->isExternal() || Symbol.isUndefined())
+      continue;
 
-      macho::RelocationEntry MRE;
-      MRE.Word0 = ((OtherHalf       <<  0) |
-                   (macho::RIT_Pair << 24) |
-                   (MovtBit         << 28) |
-                   (ThumbBit        << 29) |
-                   (IsPCRel         << 30) |
-                   macho::RF_Scattered);
-      MRE.Word1 = Value2;
-      Relocations[Fragment->getParent()].push_back(MRE);
+    uint64_t &Entry = StringIndexMap[Symbol.getName()];
+    if (!Entry) {
+      Entry = StringTable.size();
+      StringTable += Symbol.getName();
+      StringTable += '\x00';
     }
 
-    macho::RelocationEntry MRE;
-    MRE.Word0 = ((FixupOffset <<  0) |
-                 (Type        << 24) |
-                 (MovtBit     << 28) |
-                 (ThumbBit    << 29) |
-                 (IsPCRel     << 30) |
-                 macho::RF_Scattered);
-    MRE.Word1 = Value;
-    Relocations[Fragment->getParent()].push_back(MRE);
-  }
+    MachSymbolData MSD;
+    MSD.SymbolData = it;
+    MSD.StringIndex = Entry;
 
-  void RecordTLVPRelocation(const MCAssembler &Asm,
-                            const MCAsmLayout &Layout,
-                            const MCFragment *Fragment,
-                            const MCFixup &Fixup, MCValue Target,
-                            uint64_t &FixedValue) {
-    assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
-           !is64Bit() &&
-           "Should only be called with a 32-bit TLVP relocation!");
-
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
-    uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned IsPCRel = 0;
-
-    // Get the symbol data.
-    MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
-    unsigned Index = SD_A->getIndex();
-
-    // We're only going to have a second symbol in pic mode and it'll be a
-    // subtraction from the picbase. For 32-bit pic the addend is the difference
-    // between the picbase and the next address.  For 32-bit static the addend
-    // is zero.
-    if (Target.getSymB()) {
-      // If this is a subtraction then we're pcrel.
-      uint32_t FixupAddress =
-        getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
-      MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
-      IsPCRel = 1;
-      FixedValue = (FixupAddress - getSymbolAddress(SD_B, Layout) +
-                    Target.getConstant());
-      FixedValue += 1ULL << Log2Size;
+    if (Symbol.isAbsolute()) {
+      MSD.SectionIndex = 0;
+      LocalSymbolData.push_back(MSD);
     } else {
-      FixedValue = 0;
+      MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
+      assert(MSD.SectionIndex && "Invalid section index!");
+      LocalSymbolData.push_back(MSD);
     }
-
-    // struct relocation_info (8 bytes)
-    macho::RelocationEntry MRE;
-    MRE.Word0 = Value;
-    MRE.Word1 = ((Index                  <<  0) |
-                 (IsPCRel                << 24) |
-                 (Log2Size               << 25) |
-                 (1                      << 27) | // Extern
-                 (macho::RIT_Generic_TLV << 28)); // Type
-    Relocations[Fragment->getParent()].push_back(MRE);
   }
 
-  static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
-                                       unsigned &Log2Size) {
-    RelocType = unsigned(macho::RIT_Vanilla);
-    Log2Size = ~0U;
+  // External and undefined symbols are required to be in lexicographic order.
+  std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
+  std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
+
+  // Set the symbol indices.
+  Index = 0;
+  for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
+    LocalSymbolData[i].SymbolData->setIndex(Index++);
+  for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
+    ExternalSymbolData[i].SymbolData->setIndex(Index++);
+  for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
+    UndefinedSymbolData[i].SymbolData->setIndex(Index++);
+
+  // The string table is padded to a multiple of 4.
+  while (StringTable.size() % 4)
+    StringTable += '\x00';
+}
 
-    switch (Kind) {
-    default:
-      return false;
+void MachObjectWriter::computeSectionAddresses(const MCAssembler &Asm,
+                                               const MCAsmLayout &Layout) {
+  uint64_t StartAddress = 0;
+  const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder();
+  for (int i = 0, n = Order.size(); i != n ; ++i) {
+    const MCSectionData *SD = Order[i];
+    StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
+    SectionAddress[SD] = StartAddress;
+    StartAddress += Layout.getSectionAddressSize(SD);
+
+    // Explicitly pad the section to match the alignment requirements of the
+    // following one. This is for 'gas' compatibility, it shouldn't
+    /// strictly be necessary.
+    StartAddress += getPaddingSize(SD, Layout);
+  }
+}
 
-    case FK_Data_1:
-      Log2Size = llvm::Log2_32(1);
-      return true;
-    case FK_Data_2:
-      Log2Size = llvm::Log2_32(2);
-      return true;
-    case FK_Data_4:
-      Log2Size = llvm::Log2_32(4);
-      return true;
-    case FK_Data_8:
-      Log2Size = llvm::Log2_32(8);
-      return true;
+void MachObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm,
+                                                const MCAsmLayout &Layout) {
+  computeSectionAddresses(Asm, Layout);
 
-      // Handle 24-bit branch kinds.
-    case ARM::fixup_arm_ldst_pcrel_12:
-    case ARM::fixup_arm_pcrel_10:
-    case ARM::fixup_arm_adr_pcrel_12:
-    case ARM::fixup_arm_condbranch:
-    case ARM::fixup_arm_uncondbranch:
-      RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
-      // Report as 'long', even though that is not quite accurate.
-      Log2Size = llvm::Log2_32(4);
-      return true;
+  // Create symbol data for any indirect symbols.
+  BindIndirectSymbols(Asm);
 
-      // Handle Thumb branches.
-    case ARM::fixup_arm_thumb_br:
-      RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
-      Log2Size = llvm::Log2_32(2);
-      return true;
-      
-    case ARM::fixup_arm_thumb_bl:
-    case ARM::fixup_arm_thumb_blx:
-      RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
-      Log2Size = llvm::Log2_32(4);
-      return true;
+  // Compute symbol table information and bind symbol indices.
+  ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
+                     UndefinedSymbolData);
+}
 
-    case ARM::fixup_arm_movt_hi16:
-    case ARM::fixup_arm_movt_hi16_pcrel:
-    case ARM::fixup_t2_movt_hi16:
-    case ARM::fixup_t2_movt_hi16_pcrel:
-      RelocType = unsigned(macho::RIT_ARM_HalfDifference);
-      // Report as 'long', even though that is not quite accurate.
-      Log2Size = llvm::Log2_32(4);
-      return true;
+bool MachObjectWriter::
+IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
+                                       const MCSymbolData &DataA,
+                                       const MCFragment &FB,
+                                       bool InSet,
+                                       bool IsPCRel) const {
+  if (InSet)
+    return true;
 
-    case ARM::fixup_arm_movw_lo16:
-    case ARM::fixup_arm_movw_lo16_pcrel:
-    case ARM::fixup_t2_movw_lo16:
-    case ARM::fixup_t2_movw_lo16_pcrel:
-      RelocType = unsigned(macho::RIT_ARM_Half);
-      // Report as 'long', even though that is not quite accurate.
-      Log2Size = llvm::Log2_32(4);
+  // The effective address is
+  //     addr(atom(A)) + offset(A)
+  //   - addr(atom(B)) - offset(B)
+  // and the offsets are not relocatable, so the fixup is fully resolved when
+  //  addr(atom(A)) - addr(atom(B)) == 0.
+  const MCSymbolData *A_Base = 0, *B_Base = 0;
+
+  const MCSymbol &SA = DataA.getSymbol().AliasedSymbol();
+  const MCSection &SecA = SA.getSection();
+  const MCSection &SecB = FB.getParent()->getSection();
+
+  if (IsPCRel) {
+    // The simple (Darwin, except on x86_64) way of dealing with this was to
+    // assume that any reference to a temporary symbol *must* be a temporary
+    // symbol in the same atom, unless the sections differ. Therefore, any PCrel
+    // relocation to a temporary symbol (in the same section) is fully
+    // resolved. This also works in conjunction with absolutized .set, which
+    // requires the compiler to use .set to absolutize the differences between
+    // symbols which the compiler knows to be assembly time constants, so we
+    // don't need to worry about considering symbol differences fully resolved.
+
+    if (!Asm.getBackend().hasReliableSymbolDifference()) {
+      if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
+        return false;
       return true;
     }
+  } else {
+    if (!TargetObjectWriter->useAggressiveSymbolFolding())
+      return false;
   }
-  void RecordARMRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                           const MCFragment *Fragment, const MCFixup &Fixup,
-                           MCValue Target, uint64_t &FixedValue) {
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned Log2Size;
-    unsigned RelocType = macho::RIT_Vanilla;
-    if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
-      report_fatal_error("unknown ARM fixup kind!");
-      return;
-    }
 
-    // If this is a difference or a defined symbol plus an offset, then we need
-    // a scattered relocation entry.  Differences always require scattered
-    // relocations.
-    if (Target.getSymB()) {
-      if (RelocType == macho::RIT_ARM_Half ||
-          RelocType == macho::RIT_ARM_HalfDifference)
-        return RecordARMMovwMovtRelocation(Asm, Layout, Fragment, Fixup,
-                                           Target, FixedValue);
-      return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup,
-                                          Target, Log2Size, FixedValue);
-    }
+  const MCFragment &FA = *Asm.getSymbolData(SA).getFragment();
 
-    // Get the symbol data, if any.
-    MCSymbolData *SD = 0;
-    if (Target.getSymA())
-      SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+  A_Base = FA.getAtom();
+  if (!A_Base)
+    return false;
 
-    // FIXME: For other platforms, we need to use scattered relocations for
-    // internal relocations with offsets.  If this is an internal relocation
-    // with an offset, it also needs a scattered relocation entry.
-    //
-    // Is this right for ARM?
-    uint32_t Offset = Target.getConstant();
-    if (IsPCRel && RelocType == macho::RIT_Vanilla)
-      Offset += 1 << Log2Size;
-    if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
-      return RecordARMScatteredRelocation(Asm, Layout, Fragment, Fixup, Target,
-                                          Log2Size, FixedValue);
-
-    // See <reloc.h>.
-    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned Index = 0;
-    unsigned IsExtern = 0;
-    unsigned Type = 0;
-
-    if (Target.isAbsolute()) { // constant
-      // FIXME!
-      report_fatal_error("FIXME: relocations to absolute targets "
-                         "not yet implemented");
-    } else {
-      // Resolve constant variables.
-      if (SD->getSymbol().isVariable()) {
-        int64_t Res;
-        if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
-              Res, Layout, SectionAddress)) {
-          FixedValue = Res;
-          return;
-        }
-      }
+  B_Base = FB.getAtom();
+  if (!B_Base)
+    return false;
 
-      // Check whether we need an external or internal relocation.
-      if (doesSymbolRequireExternRelocation(SD)) {
-        IsExtern = 1;
-        Index = SD->getIndex();
-        // For external relocations, make sure to offset the fixup value to
-        // compensate for the addend of the symbol address, if it was
-        // undefined. This occurs with weak definitions, for example.
-        if (!SD->Symbol->isUndefined())
-          FixedValue -= Layout.getSymbolOffset(SD);
-      } else {
-        // The index is the section ordinal (1-based).
-        const MCSectionData &SymSD = Asm.getSectionData(
-          SD->getSymbol().getSection());
-        Index = SymSD.getOrdinal() + 1;
-        FixedValue += getSectionAddress(&SymSD);
-      }
-      if (IsPCRel)
-        FixedValue -= getSectionAddress(Fragment->getParent());
+  // If the atoms are the same, they are guaranteed to have the same address.
+  if (A_Base == B_Base)
+    return true;
 
-      // The type is determined by the fixup kind.
-      Type = RelocType;
-    }
+  // Otherwise, we can't prove this is fully resolved.
+  return false;
+}
 
-    // struct relocation_info (8 bytes)
-    macho::RelocationEntry MRE;
-    MRE.Word0 = FixupOffset;
-    MRE.Word1 = ((Index     <<  0) |
-                 (IsPCRel   << 24) |
-                 (Log2Size  << 25) |
-                 (IsExtern  << 27) |
-                 (Type      << 28));
-    Relocations[Fragment->getParent()].push_back(MRE);
+void MachObjectWriter::WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
+  unsigned NumSections = Asm.size();
+
+  // The section data starts after the header, the segment load command (and
+  // section headers) and the symbol table.
+  unsigned NumLoadCommands = 1;
+  uint64_t LoadCommandsSize = is64Bit() ?
+    macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
+    macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
+
+  // Add the symbol table load command sizes, if used.
+  unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
+    UndefinedSymbolData.size();
+  if (NumSymbols) {
+    NumLoadCommands += 2;
+    LoadCommandsSize += (macho::SymtabLoadCommandSize +
+                         macho::DysymtabLoadCommandSize);
   }
 
-  void RecordRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
-                        const MCFragment *Fragment, const MCFixup &Fixup,
-                        MCValue Target, uint64_t &FixedValue) {
-    // FIXME: These needs to be factored into the target Mach-O writer.
-    if (isARM()) {
-      RecordARMRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
-      return;
-    }
-    if (is64Bit()) {
-      RecordX86_64Relocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
-      return;
-    }
-
-    unsigned IsPCRel = isFixupKindPCRel(Asm, Fixup.getKind());
-    unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
-
-    // If this is a 32-bit TLVP reloc it's handled a bit differently.
-    if (Target.getSymA() &&
-        Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
-      RecordTLVPRelocation(Asm, Layout, Fragment, Fixup, Target, FixedValue);
-      return;
-    }
-
-    // If this is a difference or a defined symbol plus an offset, then we need
-    // a scattered relocation entry.
-    // Differences always require scattered relocations.
-    if (Target.getSymB())
-        return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
-                                         Target, Log2Size, FixedValue);
-
-    // Get the symbol data, if any.
-    MCSymbolData *SD = 0;
-    if (Target.getSymA())
-      SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
-
-    // If this is an internal relocation with an offset, it also needs a
-    // scattered relocation entry.
-    uint32_t Offset = Target.getConstant();
-    if (IsPCRel)
-      Offset += 1 << Log2Size;
-    if (Offset && SD && !doesSymbolRequireExternRelocation(SD))
-      return RecordScatteredRelocation(Asm, Layout, Fragment, Fixup,
-                                       Target, Log2Size, FixedValue);
-
-    // See <reloc.h>.
-    uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
-    unsigned Index = 0;
-    unsigned IsExtern = 0;
-    unsigned Type = 0;
-
-    if (Target.isAbsolute()) { // constant
-      // SymbolNum of 0 indicates the absolute section.
-      //
-      // FIXME: Currently, these are never generated (see code below). I cannot
-      // find a case where they are actually emitted.
-      Type = macho::RIT_Vanilla;
-    } else {
-      // Resolve constant variables.
-      if (SD->getSymbol().isVariable()) {
-        int64_t Res;
-        if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
-              Res, Layout, SectionAddress)) {
-          FixedValue = Res;
-          return;
-        }
-      }
-
-      // Check whether we need an external or internal relocation.
-      if (doesSymbolRequireExternRelocation(SD)) {
-        IsExtern = 1;
-        Index = SD->getIndex();
-        // For external relocations, make sure to offset the fixup value to
-        // compensate for the addend of the symbol address, if it was
-        // undefined. This occurs with weak definitions, for example.
-        if (!SD->Symbol->isUndefined())
-          FixedValue -= Layout.getSymbolOffset(SD);
-      } else {
-        // The index is the section ordinal (1-based).
-        const MCSectionData &SymSD = Asm.getSectionData(
-          SD->getSymbol().getSection());
-        Index = SymSD.getOrdinal() + 1;
-        FixedValue += getSectionAddress(&SymSD);
-      }
-      if (IsPCRel)
-        FixedValue -= getSectionAddress(Fragment->getParent());
-
-      Type = macho::RIT_Vanilla;
-    }
-
-    // struct relocation_info (8 bytes)
-    macho::RelocationEntry MRE;
-    MRE.Word0 = FixupOffset;
-    MRE.Word1 = ((Index     <<  0) |
-                 (IsPCRel   << 24) |
-                 (Log2Size  << 25) |
-                 (IsExtern  << 27) |
-                 (Type      << 28));
-    Relocations[Fragment->getParent()].push_back(MRE);
+  // Compute the total size of the section data, as well as its file size and vm
+  // size.
+  uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
+                               macho::Header32Size) + LoadCommandsSize;
+  uint64_t SectionDataSize = 0;
+  uint64_t SectionDataFileSize = 0;
+  uint64_t VMSize = 0;
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    const MCSectionData &SD = *it;
+    uint64_t Address = getSectionAddress(&SD);
+    uint64_t Size = Layout.getSectionAddressSize(&SD);
+    uint64_t FileSize = Layout.getSectionFileSize(&SD);
+    FileSize += getPaddingSize(&SD, Layout);
+
+    VMSize = std::max(VMSize, Address + Size);
+
+    if (SD.getSection().isVirtualSection())
+      continue;
+
+    SectionDataSize = std::max(SectionDataSize, Address + Size);
+    SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
   }
 
-  void BindIndirectSymbols(MCAssembler &Asm) {
-    // This is the point where 'as' creates actual symbols for indirect symbols
-    // (in the following two passes). It would be easier for us to do this
-    // sooner when we see the attribute, but that makes getting the order in the
-    // symbol table much more complicated than it is worth.
-    //
-    // FIXME: Revisit this when the dust settles.
+  // The section data is padded to 4 bytes.
+  //
+  // FIXME: Is this machine dependent?
+  unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
+  SectionDataFileSize += SectionDataPadding;
+
+  // Write the prolog, starting with the header and load command...
+  WriteHeader(NumLoadCommands, LoadCommandsSize,
+              Asm.getSubsectionsViaSymbols());
+  WriteSegmentLoadCommand(NumSections, VMSize,
+                          SectionDataStart, SectionDataSize);
+
+  // ... and then the section headers.
+  uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+    unsigned NumRelocs = Relocs.size();
+    uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
+    WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
+    RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
+  }
 
-    // Bind non lazy symbol pointers first.
-    unsigned IndirectIndex = 0;
-    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
-           ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
-      const MCSectionMachO &Section =
-        cast<MCSectionMachO>(it->SectionData->getSection());
+  // Write the symbol table load command, if used.
+  if (NumSymbols) {
+    unsigned FirstLocalSymbol = 0;
+    unsigned NumLocalSymbols = LocalSymbolData.size();
+    unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
+    unsigned NumExternalSymbols = ExternalSymbolData.size();
+    unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
+    unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
+    unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
+    unsigned NumSymTabSymbols =
+      NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
+    uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
+    uint64_t IndirectSymbolOffset = 0;
+
+    // If used, the indirect symbols are written after the section data.
+    if (NumIndirectSymbols)
+      IndirectSymbolOffset = RelocTableEnd;
+
+    // The symbol table is written after the indirect symbol data.
+    uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
+
+    // The string table is written after symbol table.
+    uint64_t StringTableOffset =
+      SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size :
+                                              macho::Nlist32Size);
+    WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
+                           StringTableOffset, StringTable.size());
+
+    WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
+                             FirstExternalSymbol, NumExternalSymbols,
+                             FirstUndefinedSymbol, NumUndefinedSymbols,
+                             IndirectSymbolOffset, NumIndirectSymbols);
+  }
 
-      if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS)
-        continue;
+  // Write the actual section data.
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    Asm.WriteSectionData(it, Layout);
 
-      // Initialize the section indirect symbol base, if necessary.
-      if (!IndirectSymBase.count(it->SectionData))
-        IndirectSymBase[it->SectionData] = IndirectIndex;
+    uint64_t Pad = getPaddingSize(it, Layout);
+    for (unsigned int i = 0; i < Pad; ++i)
+      Write8(0);
+  }
 
-      Asm.getOrCreateSymbolData(*it->Symbol);
-    }
+  // Write the extra padding.
+  WriteZeros(SectionDataPadding);
 
-    // Then lazy symbol pointers and symbol stubs.
-    IndirectIndex = 0;
-    for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(),
-           ie = Asm.indirect_symbol_end(); it != ie; ++it, ++IndirectIndex) {
-      const MCSectionMachO &Section =
-        cast<MCSectionMachO>(it->SectionData->getSection());
-
-      if (Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS &&
-          Section.getType() != MCSectionMachO::S_SYMBOL_STUBS)
-        continue;
-
-      // Initialize the section indirect symbol base, if necessary.
-      if (!IndirectSymBase.count(it->SectionData))
-        IndirectSymBase[it->SectionData] = IndirectIndex;
-
-      // Set the symbol type to undefined lazy, but only on construction.
-      //
-      // FIXME: Do not hardcode.
-      bool Created;
-      MCSymbolData &Entry = Asm.getOrCreateSymbolData(*it->Symbol, &Created);
-      if (Created)
-        Entry.setFlags(Entry.getFlags() | 0x0001);
+  // Write the relocation entries.
+  for (MCAssembler::const_iterator it = Asm.begin(),
+         ie = Asm.end(); it != ie; ++it) {
+    // Write the section relocation entries, in reverse order to match 'as'
+    // (approximately, the exact algorithm is more complicated than this).
+    std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
+    for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
+      Write32(Relocs[e - i - 1].Word0);
+      Write32(Relocs[e - i - 1].Word1);
     }
   }
 
-  /// ComputeSymbolTable - Compute the symbol table data
-  ///
-  /// \param StringTable [out] - The string table data.
-  /// \param StringIndexMap [out] - Map from symbol names to offsets in the
-  /// string table.
-  void ComputeSymbolTable(MCAssembler &Asm, SmallString<256> &StringTable,
-                          std::vector<MachSymbolData> &LocalSymbolData,
-                          std::vector<MachSymbolData> &ExternalSymbolData,
-                          std::vector<MachSymbolData> &UndefinedSymbolData) {
-    // Build section lookup table.
-    DenseMap<const MCSection*, uint8_t> SectionIndexMap;
-    unsigned Index = 1;
-    for (MCAssembler::iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it, ++Index)
-      SectionIndexMap[&it->getSection()] = Index;
-    assert(Index <= 256 && "Too many sections!");
-
-    // Index 0 is always the empty string.
-    StringMap<uint64_t> StringIndexMap;
-    StringTable += '\x00';
-
-    // Build the symbol arrays and the string table, but only for non-local
-    // symbols.
-    //
-    // The particular order that we collect the symbols and create the string
-    // table, then sort the symbols is chosen to match 'as'. Even though it
-    // doesn't matter for correctness, this is important for letting us diff .o
-    // files.
-    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
-           ie = Asm.symbol_end(); it != ie; ++it) {
-      const MCSymbol &Symbol = it->getSymbol();
-
-      // Ignore non-linker visible symbols.
-      if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
-        continue;
-
-      if (!it->isExternal() && !Symbol.isUndefined())
-        continue;
-
-      uint64_t &Entry = StringIndexMap[Symbol.getName()];
-      if (!Entry) {
-        Entry = StringTable.size();
-        StringTable += Symbol.getName();
-        StringTable += '\x00';
+  // Write the symbol table data, if used.
+  if (NumSymbols) {
+    // Write the indirect symbol entries.
+    for (MCAssembler::const_indirect_symbol_iterator
+           it = Asm.indirect_symbol_begin(),
+           ie = Asm.indirect_symbol_end(); it != ie; ++it) {
+      // Indirect symbols in the non lazy symbol pointer section have some
+      // special handling.
+      const MCSectionMachO &Section =
+        static_cast<const MCSectionMachO&>(it->SectionData->getSection());
+      if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
+        // If this symbol is defined and internal, mark it as such.
+        if (it->Symbol->isDefined() &&
+            !Asm.getSymbolData(*it->Symbol).isExternal()) {
+          uint32_t Flags = macho::ISF_Local;
+          if (it->Symbol->isAbsolute())
+            Flags |= macho::ISF_Absolute;
+          Write32(Flags);
+          continue;
+        }
       }
 
-      MachSymbolData MSD;
-      MSD.SymbolData = it;
-      MSD.StringIndex = Entry;
-
-      if (Symbol.isUndefined()) {
-        MSD.SectionIndex = 0;
-        UndefinedSymbolData.push_back(MSD);
-      } else if (Symbol.isAbsolute()) {
-        MSD.SectionIndex = 0;
-        ExternalSymbolData.push_back(MSD);
-      } else {
-        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
-        assert(MSD.SectionIndex && "Invalid section index!");
-        ExternalSymbolData.push_back(MSD);
-      }
+      Write32(Asm.getSymbolData(*it->Symbol).getIndex());
     }
 
-    // Now add the data for local symbols.
-    for (MCAssembler::symbol_iterator it = Asm.symbol_begin(),
-           ie = Asm.symbol_end(); it != ie; ++it) {
-      const MCSymbol &Symbol = it->getSymbol();
-
-      // Ignore non-linker visible symbols.
-      if (!Asm.isSymbolLinkerVisible(it->getSymbol()))
-        continue;
-
-      if (it->isExternal() || Symbol.isUndefined())
-        continue;
-
-      uint64_t &Entry = StringIndexMap[Symbol.getName()];
-      if (!Entry) {
-        Entry = StringTable.size();
-        StringTable += Symbol.getName();
-        StringTable += '\x00';
-      }
-
-      MachSymbolData MSD;
-      MSD.SymbolData = it;
-      MSD.StringIndex = Entry;
-
-      if (Symbol.isAbsolute()) {
-        MSD.SectionIndex = 0;
-        LocalSymbolData.push_back(MSD);
-      } else {
-        MSD.SectionIndex = SectionIndexMap.lookup(&Symbol.getSection());
-        assert(MSD.SectionIndex && "Invalid section index!");
-        LocalSymbolData.push_back(MSD);
-      }
-    }
+    // FIXME: Check that offsets match computed ones.
 
-    // External and undefined symbols are required to be in lexicographic order.
-    std::sort(ExternalSymbolData.begin(), ExternalSymbolData.end());
-    std::sort(UndefinedSymbolData.begin(), UndefinedSymbolData.end());
-
-    // Set the symbol indices.
-    Index = 0;
+    // Write the symbol table entries.
     for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
-      LocalSymbolData[i].SymbolData->setIndex(Index++);
+      WriteNlist(LocalSymbolData[i], Layout);
     for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
-      ExternalSymbolData[i].SymbolData->setIndex(Index++);
+      WriteNlist(ExternalSymbolData[i], Layout);
     for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
-      UndefinedSymbolData[i].SymbolData->setIndex(Index++);
-
-    // The string table is padded to a multiple of 4.
-    while (StringTable.size() % 4)
-      StringTable += '\x00';
-  }
-
-  void computeSectionAddresses(const MCAssembler &Asm,
-                               const MCAsmLayout &Layout) {
-    uint64_t StartAddress = 0;
-    const SmallVectorImpl<MCSectionData*> &Order = Layout.getSectionOrder();
-    for (int i = 0, n = Order.size(); i != n ; ++i) {
-      const MCSectionData *SD = Order[i];
-      StartAddress = RoundUpToAlignment(StartAddress, SD->getAlignment());
-      SectionAddress[SD] = StartAddress;
-      StartAddress += Layout.getSectionAddressSize(SD);
-      // Explicitly pad the section to match the alignment requirements of the
-      // following one. This is for 'gas' compatibility, it shouldn't
-      /// strictly be necessary.
-      StartAddress += getPaddingSize(SD, Layout);
-    }
-  }
-
-  void ExecutePostLayoutBinding(MCAssembler &Asm, const MCAsmLayout &Layout) {
-    computeSectionAddresses(Asm, Layout);
-
-    // Create symbol data for any indirect symbols.
-    BindIndirectSymbols(Asm);
-
-    // Compute symbol table information and bind symbol indices.
-    ComputeSymbolTable(Asm, StringTable, LocalSymbolData, ExternalSymbolData,
-                       UndefinedSymbolData);
-  }
-
-  virtual bool IsSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
-                                                      const MCSymbolData &DataA,
-                                                      const MCFragment &FB,
-                                                      bool InSet,
-                                                      bool IsPCRel) const {
-    if (InSet)
-      return true;
+      WriteNlist(UndefinedSymbolData[i], Layout);
 
-    // The effective address is
-    //     addr(atom(A)) + offset(A)
-    //   - addr(atom(B)) - offset(B)
-    // and the offsets are not relocatable, so the fixup is fully resolved when
-    //  addr(atom(A)) - addr(atom(B)) == 0.
-    const MCSymbolData *A_Base = 0, *B_Base = 0;
-
-    const MCSymbol &SA = DataA.getSymbol().AliasedSymbol();
-    const MCSection &SecA = SA.getSection();
-    const MCSection &SecB = FB.getParent()->getSection();
-
-    if (IsPCRel) {
-      // The simple (Darwin, except on x86_64) way of dealing with this was to
-      // assume that any reference to a temporary symbol *must* be a temporary
-      // symbol in the same atom, unless the sections differ. Therefore, any
-      // PCrel relocation to a temporary symbol (in the same section) is fully
-      // resolved. This also works in conjunction with absolutized .set, which
-      // requires the compiler to use .set to absolutize the differences between
-      // symbols which the compiler knows to be assembly time constants, so we
-      // don't need to worry about considering symbol differences fully
-      // resolved.
-
-      if (!Asm.getBackend().hasReliableSymbolDifference()) {
-        if (!SA.isTemporary() || !SA.isInSection() || &SecA != &SecB)
-          return false;
-        return true;
-      }
-    } else {
-      if (!TargetObjectWriter->useAggressiveSymbolFolding())
-        return false;
-    }
-
-    const MCFragment &FA = *Asm.getSymbolData(SA).getFragment();
-
-    A_Base = FA.getAtom();
-    if (!A_Base)
-      return false;
-
-    B_Base = FB.getAtom();
-    if (!B_Base)
-      return false;
-
-    // If the atoms are the same, they are guaranteed to have the same address.
-    if (A_Base == B_Base)
-      return true;
-
-    // Otherwise, we can't prove this is fully resolved.
-    return false;
+    // Write the string table.
+    OS << StringTable.str();
   }
-
-  void WriteObject(MCAssembler &Asm, const MCAsmLayout &Layout) {
-    unsigned NumSections = Asm.size();
-
-    // The section data starts after the header, the segment load command (and
-    // section headers) and the symbol table.
-    unsigned NumLoadCommands = 1;
-    uint64_t LoadCommandsSize = is64Bit() ?
-      macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size :
-      macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size;
-
-    // Add the symbol table load command sizes, if used.
-    unsigned NumSymbols = LocalSymbolData.size() + ExternalSymbolData.size() +
-      UndefinedSymbolData.size();
-    if (NumSymbols) {
-      NumLoadCommands += 2;
-      LoadCommandsSize += (macho::SymtabLoadCommandSize +
-                           macho::DysymtabLoadCommandSize);
-    }
-
-    // Compute the total size of the section data, as well as its file size and
-    // vm size.
-    uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size :
-                                 macho::Header32Size) + LoadCommandsSize;
-    uint64_t SectionDataSize = 0;
-    uint64_t SectionDataFileSize = 0;
-    uint64_t VMSize = 0;
-    for (MCAssembler::const_iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      const MCSectionData &SD = *it;
-      uint64_t Address = getSectionAddress(&SD);
-      uint64_t Size = Layout.getSectionAddressSize(&SD);
-      uint64_t FileSize = Layout.getSectionFileSize(&SD);
-      FileSize += getPaddingSize(&SD, Layout);
-
-      VMSize = std::max(VMSize, Address + Size);
-
-      if (SD.getSection().isVirtualSection())
-        continue;
-
-      SectionDataSize = std::max(SectionDataSize, Address + Size);
-      SectionDataFileSize = std::max(SectionDataFileSize, Address + FileSize);
-    }
-
-    // The section data is padded to 4 bytes.
-    //
-    // FIXME: Is this machine dependent?
-    unsigned SectionDataPadding = OffsetToAlignment(SectionDataFileSize, 4);
-    SectionDataFileSize += SectionDataPadding;
-
-    // Write the prolog, starting with the header and load command...
-    WriteHeader(NumLoadCommands, LoadCommandsSize,
-                Asm.getSubsectionsViaSymbols());
-    WriteSegmentLoadCommand(NumSections, VMSize,
-                            SectionDataStart, SectionDataSize);
-
-    // ... and then the section headers.
-    uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize;
-    for (MCAssembler::const_iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
-      unsigned NumRelocs = Relocs.size();
-      uint64_t SectionStart = SectionDataStart + getSectionAddress(it);
-      WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs);
-      RelocTableEnd += NumRelocs * macho::RelocationInfoSize;
-    }
-
-    // Write the symbol table load command, if used.
-    if (NumSymbols) {
-      unsigned FirstLocalSymbol = 0;
-      unsigned NumLocalSymbols = LocalSymbolData.size();
-      unsigned FirstExternalSymbol = FirstLocalSymbol + NumLocalSymbols;
-      unsigned NumExternalSymbols = ExternalSymbolData.size();
-      unsigned FirstUndefinedSymbol = FirstExternalSymbol + NumExternalSymbols;
-      unsigned NumUndefinedSymbols = UndefinedSymbolData.size();
-      unsigned NumIndirectSymbols = Asm.indirect_symbol_size();
-      unsigned NumSymTabSymbols =
-        NumLocalSymbols + NumExternalSymbols + NumUndefinedSymbols;
-      uint64_t IndirectSymbolSize = NumIndirectSymbols * 4;
-      uint64_t IndirectSymbolOffset = 0;
-
-      // If used, the indirect symbols are written after the section data.
-      if (NumIndirectSymbols)
-        IndirectSymbolOffset = RelocTableEnd;
-
-      // The symbol table is written after the indirect symbol data.
-      uint64_t SymbolTableOffset = RelocTableEnd + IndirectSymbolSize;
-
-      // The string table is written after symbol table.
-      uint64_t StringTableOffset =
-        SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size :
-                                                macho::Nlist32Size);
-      WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
-                             StringTableOffset, StringTable.size());
-
-      WriteDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
-                               FirstExternalSymbol, NumExternalSymbols,
-                               FirstUndefinedSymbol, NumUndefinedSymbols,
-                               IndirectSymbolOffset, NumIndirectSymbols);
-    }
-
-    // Write the actual section data.
-    for (MCAssembler::const_iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      Asm.WriteSectionData(it, Layout);
-
-      uint64_t Pad = getPaddingSize(it, Layout);
-      for (unsigned int i = 0; i < Pad; ++i)
-        Write8(0);
-    }
-
-    // Write the extra padding.
-    WriteZeros(SectionDataPadding);
-
-    // Write the relocation entries.
-    for (MCAssembler::const_iterator it = Asm.begin(),
-           ie = Asm.end(); it != ie; ++it) {
-      // Write the section relocation entries, in reverse order to match 'as'
-      // (approximately, the exact algorithm is more complicated than this).
-      std::vector<macho::RelocationEntry> &Relocs = Relocations[it];
-      for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
-        Write32(Relocs[e - i - 1].Word0);
-        Write32(Relocs[e - i - 1].Word1);
-      }
-    }
-
-    // Write the symbol table data, if used.
-    if (NumSymbols) {
-      // Write the indirect symbol entries.
-      for (MCAssembler::const_indirect_symbol_iterator
-             it = Asm.indirect_symbol_begin(),
-             ie = Asm.indirect_symbol_end(); it != ie; ++it) {
-        // Indirect symbols in the non lazy symbol pointer section have some
-        // special handling.
-        const MCSectionMachO &Section =
-          static_cast<const MCSectionMachO&>(it->SectionData->getSection());
-        if (Section.getType() == MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS) {
-          // If this symbol is defined and internal, mark it as such.
-          if (it->Symbol->isDefined() &&
-              !Asm.getSymbolData(*it->Symbol).isExternal()) {
-            uint32_t Flags = macho::ISF_Local;
-            if (it->Symbol->isAbsolute())
-              Flags |= macho::ISF_Absolute;
-            Write32(Flags);
-            continue;
-          }
-        }
-
-        Write32(Asm.getSymbolData(*it->Symbol).getIndex());
-      }
-
-      // FIXME: Check that offsets match computed ones.
-
-      // Write the symbol table entries.
-      for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i)
-        WriteNlist(LocalSymbolData[i], Layout);
-      for (unsigned i = 0, e = ExternalSymbolData.size(); i != e; ++i)
-        WriteNlist(ExternalSymbolData[i], Layout);
-      for (unsigned i = 0, e = UndefinedSymbolData.size(); i != e; ++i)
-        WriteNlist(UndefinedSymbolData[i], Layout);
-
-      // Write the string table.
-      OS << StringTable.str();
-    }
-  }
-};
-
 }
 
 MCObjectWriter *llvm::createMachObjectWriter(MCMachObjectTargetWriter *MOTW,
diff --git a/lib/Target/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp
index e0a9de8..348cd4c 100644
--- a/lib/Target/SubtargetFeature.cpp
+++ b/lib/MC/SubtargetFeature.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/SubtargetFeature.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/StringExtras.h"
@@ -27,7 +27,7 @@ using namespace llvm;
 
 /// hasFlag - Determine if a feature has a flag; '+' or '-'
 ///
-static inline bool hasFlag(const std::string &Feature) {
+static inline bool hasFlag(const StringRef Feature) {
   assert(!Feature.empty() && "Empty string");
   // Get first character
   char Ch = Feature[0];
@@ -37,13 +37,13 @@ static inline bool hasFlag(const std::string &Feature) {
 
 /// StripFlag - Return string stripped of flag.
 ///
-static inline std::string StripFlag(const std::string &Feature) {
+static inline std::string StripFlag(const StringRef Feature) {
   return hasFlag(Feature) ? Feature.substr(1) : Feature;
 }
 
 /// isEnabled - Return true if enable flag; '+'.
 ///
-static inline bool isEnabled(const std::string &Feature) {
+static inline bool isEnabled(const StringRef Feature) {
   assert(!Feature.empty() && "Empty string");
   // Get first character
   char Ch = Feature[0];
@@ -53,16 +53,22 @@ static inline bool isEnabled(const std::string &Feature) {
 
 /// PrependFlag - Return a string with a prepended flag; '+' or '-'.
 ///
-static inline std::string PrependFlag(const std::string &Feature,
-                                      bool IsEnabled) {
+static inline std::string PrependFlag(const StringRef Feature,
+                                    bool IsEnabled) {
   assert(!Feature.empty() && "Empty string");
-  if (hasFlag(Feature)) return Feature;
-  return std::string(IsEnabled ? "+" : "-") + Feature;
+  if (hasFlag(Feature))
+    return Feature;
+  std::string Prefix = IsEnabled ? "+" : "-";
+  Prefix += Feature;
+  return Prefix;
 }
 
 /// Split - Splits a string of comma separated items in to a vector of strings.
 ///
-static void Split(std::vector<std::string> &V, const std::string &S) {
+static void Split(std::vector<std::string> &V, const StringRef S) {
+  if (S.empty())
+    return;
+
   // Start at beginning of string.
   size_t Pos = 0;
   while (true) {
@@ -88,7 +94,7 @@ static std::string Join(const std::vector<std::string> &V) {
   std::string Result;
   // If the vector is not empty 
   if (!V.empty()) {
-    // Start with the CPU feature
+    // Start with the first feature
     Result = V[0];
     // For each successive feature
     for (size_t i = 1; i < V.size(); i++) {
@@ -103,7 +109,7 @@ static std::string Join(const std::vector<std::string> &V) {
 }
 
 /// Adding features.
-void SubtargetFeatures::AddFeature(const std::string &String,
+void SubtargetFeatures::AddFeature(const StringRef String,
                                    bool IsEnabled) {
   // Don't add empty features
   if (!String.empty()) {
@@ -113,16 +119,16 @@ void SubtargetFeatures::AddFeature(const std::string &String,
 }
 
 /// Find KV in array using binary search.
-template<typename T> const T *Find(const std::string &S, const T *A, size_t L) {
+template<typename T> const T *Find(const StringRef S, const T *A, size_t L) {
   // Make the lower bound element we're looking for
   T KV;
-  KV.Key = S.c_str();
+  KV.Key = S.data();
   // Determine the end of the array
   const T *Hi = A + L;
   // Binary search the array
   const T *F = std::lower_bound(A, Hi, KV);
   // If not found then return NULL
-  if (F == Hi || std::string(F->Key) != S) return NULL;
+  if (F == Hi || StringRef(F->Key) != S) return NULL;
   // Return the found array item
   return F;
 }
@@ -170,7 +176,7 @@ static void Help(const SubtargetFeatureKV *CPUTable, size_t CPUTableSize,
 //                    SubtargetFeatures Implementation
 //===----------------------------------------------------------------------===//
 
-SubtargetFeatures::SubtargetFeatures(const std::string &Initial) {
+SubtargetFeatures::SubtargetFeatures(const StringRef Initial) {
   // Break up string into separate features
   Split(Features, Initial);
 }
@@ -179,33 +185,6 @@ SubtargetFeatures::SubtargetFeatures(const std::string &Initial) {
 std::string SubtargetFeatures::getString() const {
   return Join(Features);
 }
-void SubtargetFeatures::setString(const std::string &Initial) {
-  // Throw out old features
-  Features.clear();
-  // Break up string into separate features
-  Split(Features, LowercaseString(Initial));
-}
-
-
-/// setCPU - Set the CPU string.  Replaces previous setting.  Setting to ""
-/// clears CPU.
-void SubtargetFeatures::setCPU(const std::string &String) {
-  Features[0] = LowercaseString(String);
-}
-
-
-/// setCPUIfNone - Setting CPU string only if no string is set.
-///
-void SubtargetFeatures::setCPUIfNone(const std::string &String) {
-  if (Features[0].empty()) setCPU(String);
-}
-
-/// getCPU - Returns current CPU.
-///
-const std::string & SubtargetFeatures::getCPU() const {
-  return Features[0];
-}
-
 
 /// SetImpliedBits - For each feature that is (transitively) implied by this
 /// feature, set it.
@@ -245,14 +224,48 @@ void ClearImpliedBits(uint64_t &Bits, const SubtargetFeatureKV *FeatureEntry,
   }
 }
 
-/// getBits - Get feature bits.
+/// ToggleFeature - Toggle a feature and returns the newly updated feature
+/// bits.
+uint64_t
+SubtargetFeatures::ToggleFeature(uint64_t Bits, const StringRef Feature,
+                                 const SubtargetFeatureKV *FeatureTable,
+                                 size_t FeatureTableSize) {
+  // Find feature in table.
+  const SubtargetFeatureKV *FeatureEntry =
+    Find(StripFlag(Feature), FeatureTable, FeatureTableSize);
+  // If there is a match
+  if (FeatureEntry) {
+    if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) {
+      Bits &= ~FeatureEntry->Value;
+
+      // For each feature that implies this, clear it.
+      ClearImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+    } else {
+      Bits |=  FeatureEntry->Value;
+
+      // For each feature that this implies, set it.
+      SetImpliedBits(Bits, FeatureEntry, FeatureTable, FeatureTableSize);
+    }
+  } else {
+    errs() << "'" << Feature
+           << "' is not a recognized feature for this target"
+           << " (ignoring feature)\n";
+  }
+
+  return Bits;
+}
+           
+
+/// getFeatureBits - Get feature bits a CPU.
 ///
-uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
-                                          size_t CPUTableSize,
-                                    const SubtargetFeatureKV *FeatureTable,
-                                          size_t FeatureTableSize) {
-  assert(CPUTable && "missing CPU table");
-  assert(FeatureTable && "missing features table");
+uint64_t SubtargetFeatures::getFeatureBits(const StringRef CPU,
+                                         const SubtargetFeatureKV *CPUTable,
+                                         size_t CPUTableSize,
+                                         const SubtargetFeatureKV *FeatureTable,
+                                         size_t FeatureTableSize) {
+  if (!FeatureTableSize || !CPUTableSize)
+    return 0;
+
 #ifndef NDEBUG
   for (size_t i = 1; i < CPUTableSize; i++) {
     assert(strcmp(CPUTable[i - 1].Key, CPUTable[i].Key) < 0 &&
@@ -266,31 +279,33 @@ uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
   uint64_t Bits = 0;                    // Resulting bits
 
   // Check if help is needed
-  if (Features[0] == "help")
+  if (CPU == "help")
     Help(CPUTable, CPUTableSize, FeatureTable, FeatureTableSize);
   
-  // Find CPU entry
-  const SubtargetFeatureKV *CPUEntry =
-                            Find(Features[0], CPUTable, CPUTableSize);
-  // If there is a match
-  if (CPUEntry) {
-    // Set base feature bits
-    Bits = CPUEntry->Value;
-
-    // Set the feature implied by this CPU feature, if any.
-    for (size_t i = 0; i < FeatureTableSize; ++i) {
-      const SubtargetFeatureKV &FE = FeatureTable[i];
-      if (CPUEntry->Value & FE.Value)
-        SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+  // Find CPU entry if CPU name is specified.
+  if (!CPU.empty()) {
+    const SubtargetFeatureKV *CPUEntry = Find(CPU, CPUTable, CPUTableSize);
+    // If there is a match
+    if (CPUEntry) {
+      // Set base feature bits
+      Bits = CPUEntry->Value;
+
+      // Set the feature implied by this CPU feature, if any.
+      for (size_t i = 0; i < FeatureTableSize; ++i) {
+        const SubtargetFeatureKV &FE = FeatureTable[i];
+        if (CPUEntry->Value & FE.Value)
+          SetImpliedBits(Bits, &FE, FeatureTable, FeatureTableSize);
+      }
+    } else {
+      errs() << "'" << CPU
+             << "' is not a recognized processor for this target"
+             << " (ignoring processor)\n";
     }
-  } else {
-    errs() << "'" << Features[0]
-           << "' is not a recognized processor for this target"
-           << " (ignoring processor)\n";
   }
+
   // Iterate through each feature
-  for (size_t i = 1; i < Features.size(); i++) {
-    const std::string &Feature = Features[i];
+  for (size_t i = 0, E = Features.size(); i < E; i++) {
+    const StringRef Feature = Features[i];
     
     // Check for help
     if (Feature == "+help")
@@ -323,9 +338,10 @@ uint64_t SubtargetFeatures::getBits(const SubtargetFeatureKV *CPUTable,
   return Bits;
 }
 
-/// Get info pointer
-void *SubtargetFeatures::getInfo(const SubtargetInfoKV *Table,
-                                       size_t TableSize) {
+/// Get scheduling itinerary of a CPU.
+void *SubtargetFeatures::getItinerary(const StringRef CPU,
+                                      const SubtargetInfoKV *Table,
+                                      size_t TableSize) {
   assert(Table && "missing table");
 #ifndef NDEBUG
   for (size_t i = 1; i < TableSize; i++) {
@@ -334,12 +350,12 @@ void *SubtargetFeatures::getInfo(const SubtargetInfoKV *Table,
 #endif
 
   // Find entry
-  const SubtargetInfoKV *Entry = Find(Features[0], Table, TableSize);
+  const SubtargetInfoKV *Entry = Find(CPU, Table, TableSize);
   
   if (Entry) {
     return Entry->Value;
   } else {
-    errs() << "'" << Features[0]
+    errs() << "'" << CPU
            << "' is not a recognized processor for this target"
            << " (ignoring processor)\n";
     return NULL;
@@ -367,10 +383,7 @@ void SubtargetFeatures::dump() const {
 /// subtarget. It would be better if we could encode this information
 /// into the IR. See <rdar://5972456>.
 ///
-void SubtargetFeatures::getDefaultSubtargetFeatures(const std::string &CPU,
-                                                    const Triple& Triple) {
-  setCPU(CPU);
-
+void SubtargetFeatures::getDefaultSubtargetFeatures(const Triple& Triple) {
   if (Triple.getVendor() == Triple::Apple) {
     if (Triple.getArch() == Triple::ppc) {
       // powerpc-apple-*
diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp
new file mode 100644
index 0000000..4b31c75
--- /dev/null
+++ b/lib/Object/Binary.cpp
@@ -0,0 +1,96 @@
+//===- Binary.cpp - A generic binary file -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Binary class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/Binary.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+
+// Include headers for createBinary.
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/COFF.h"
+
+using namespace llvm;
+using namespace object;
+
+Binary::~Binary() {
+  delete Data;
+}
+
+Binary::Binary(unsigned int Type, MemoryBuffer *Source)
+  : TypeID(Type)
+  , Data(Source) {}
+
+StringRef Binary::getData() const {
+  return Data->getBuffer();
+}
+
+StringRef Binary::getFileName() const {
+  return Data->getBufferIdentifier();
+}
+
+error_code object::createBinary(MemoryBuffer *Source,
+                                OwningPtr<Binary> &Result) {
+  OwningPtr<MemoryBuffer> scopedSource(Source);
+  if (!Source)
+    return make_error_code(errc::invalid_argument);
+  if (Source->getBufferSize() < 64)
+    return object_error::invalid_file_type;
+  sys::LLVMFileType type = sys::IdentifyFileType(Source->getBufferStart(),
+                                static_cast<unsigned>(Source->getBufferSize()));
+  error_code ec;
+  switch (type) {
+    case sys::ELF_Relocatable_FileType:
+    case sys::ELF_Executable_FileType:
+    case sys::ELF_SharedObject_FileType:
+    case sys::ELF_Core_FileType: {
+      OwningPtr<Binary> ret(
+        ObjectFile::createELFObjectFile(scopedSource.take()));
+      if (!ret)
+        return object_error::invalid_file_type;
+      Result.swap(ret);
+      return object_error::success;
+    }
+    case sys::Mach_O_Object_FileType:
+    case sys::Mach_O_Executable_FileType:
+    case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
+    case sys::Mach_O_Core_FileType:
+    case sys::Mach_O_PreloadExecutable_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
+    case sys::Mach_O_DynamicLinker_FileType:
+    case sys::Mach_O_Bundle_FileType:
+    case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: {
+      OwningPtr<Binary> ret(
+        ObjectFile::createMachOObjectFile(scopedSource.take()));
+      if (!ret)
+        return object_error::invalid_file_type;
+      Result.swap(ret);
+      return object_error::success;
+    }
+    case sys::COFF_FileType: {
+      OwningPtr<Binary> ret(new COFFObjectFile(scopedSource.take(), ec));
+      if (ec) return ec;
+      Result.swap(ret);
+      return object_error::success;
+    }
+    default: // Unrecognized object file format.
+      return object_error::invalid_file_type;
+  }
+}
+
+error_code object::createBinary(StringRef Path, OwningPtr<Binary> &Result) {
+  OwningPtr<MemoryBuffer> File;
+  if (error_code ec = MemoryBuffer::getFile(Path, File))
+    return ec;
+  return createBinary(File.take(), Result);
+}
diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt
index 642a8ec..68e5e94 100644
--- a/lib/Object/CMakeLists.txt
+++ b/lib/Object/CMakeLists.txt
@@ -1,8 +1,10 @@
 add_llvm_library(LLVMObject
+  Binary.cpp
+  COFFObjectFile.cpp
+  ELFObjectFile.cpp
+  Error.cpp
   MachOObject.cpp
   MachOObjectFile.cpp
   Object.cpp
   ObjectFile.cpp
-  COFFObjectFile.cpp
-  ELFObjectFile.cpp
   )
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 86bf44b..07de6bc 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -11,11 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Object/COFF.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/COFF.h"
-#include "llvm/Support/Endian.h"
 
 using namespace llvm;
 using namespace object;
@@ -28,174 +26,164 @@ using support::little16_t;
 }
 
 namespace {
-struct coff_file_header {
-  ulittle16_t Machine;
-  ulittle16_t NumberOfSections;
-  ulittle32_t TimeDateStamp;
-  ulittle32_t PointerToSymbolTable;
-  ulittle32_t NumberOfSymbols;
-  ulittle16_t SizeOfOptionalHeader;
-  ulittle16_t Characteristics;
-};
+// Returns false if size is greater than the buffer size. And sets ec.
+bool checkSize(const MemoryBuffer *m, error_code &ec, uint64_t size) {
+  if (m->getBufferSize() < size) {
+    ec = object_error::unexpected_eof;
+    return false;
+  }
+  return true;
 }
 
-extern char coff_file_header_layout_static_assert
-            [sizeof(coff_file_header) == 20 ? 1 : -1];
-
-namespace {
-struct coff_symbol {
-  struct StringTableOffset {
-    ulittle32_t Zeroes;
-    ulittle32_t Offset;
-  };
-
-  union {
-    char ShortName[8];
-    StringTableOffset Offset;
-  } Name;
-
-  ulittle32_t Value;
-  little16_t SectionNumber;
-
-  struct {
-    ulittle8_t BaseType;
-    ulittle8_t ComplexType;
-  } Type;
-
-  ulittle8_t  StorageClass;
-  ulittle8_t  NumberOfAuxSymbols;
-};
+// Returns false if any bytes in [addr, addr + size) fall outsize of m.
+bool checkAddr(const MemoryBuffer *m,
+               error_code &ec,
+               uintptr_t addr,
+               uint64_t size) {
+  if (addr + size < addr ||
+      addr + size < size ||
+      addr + size > uintptr_t(m->getBufferEnd())) {
+    ec = object_error::unexpected_eof;
+    return false;
+  }
+  return true;
+}
 }
 
-extern char coff_coff_symbol_layout_static_assert
-            [sizeof(coff_symbol) == 18 ? 1 : -1];
+const coff_symbol *COFFObjectFile::toSymb(DataRefImpl Symb) const {
+  const coff_symbol *addr = reinterpret_cast<const coff_symbol*>(Symb.p);
 
-namespace {
-struct coff_section {
-  char Name[8];
-  ulittle32_t VirtualSize;
-  ulittle32_t VirtualAddress;
-  ulittle32_t SizeOfRawData;
-  ulittle32_t PointerToRawData;
-  ulittle32_t PointerToRelocations;
-  ulittle32_t PointerToLinenumbers;
-  ulittle16_t NumberOfRelocations;
-  ulittle16_t NumberOfLinenumbers;
-  ulittle32_t Characteristics;
-};
+# ifndef NDEBUG
+  // Verify that the symbol points to a valid entry in the symbol table.
+  uintptr_t offset = uintptr_t(addr) - uintptr_t(base());
+  if (offset < Header->PointerToSymbolTable
+      || offset >= Header->PointerToSymbolTable
+         + (Header->NumberOfSymbols * sizeof(coff_symbol)))
+    report_fatal_error("Symbol was outside of symbol table.");
+
+  assert((offset - Header->PointerToSymbolTable) % sizeof(coff_symbol)
+         == 0 && "Symbol did not point to the beginning of a symbol");
+# endif
+
+  return addr;
 }
 
-extern char coff_coff_section_layout_static_assert
-            [sizeof(coff_section) == 40 ? 1 : -1];
+const coff_section *COFFObjectFile::toSec(DataRefImpl Sec) const {
+  const coff_section *addr = reinterpret_cast<const coff_section*>(Sec.p);
 
-namespace {
-class COFFObjectFile : public ObjectFile {
-private:
-        uint64_t         HeaderOff;
-  const coff_file_header *Header;
-  const coff_section     *SectionTable;
-  const coff_symbol      *SymbolTable;
-  const char             *StringTable;
-
-  const coff_section     *getSection(std::size_t index) const;
-  const char             *getString(std::size_t offset) const;
-
-protected:
-  virtual SymbolRef getSymbolNext(DataRefImpl Symb) const;
-  virtual StringRef getSymbolName(DataRefImpl Symb) const;
-  virtual uint64_t  getSymbolAddress(DataRefImpl Symb) const;
-  virtual uint64_t  getSymbolSize(DataRefImpl Symb) const;
-  virtual char      getSymbolNMTypeChar(DataRefImpl Symb) const;
-  virtual bool      isSymbolInternal(DataRefImpl Symb) const;
-
-  virtual SectionRef getSectionNext(DataRefImpl Sec) const;
-  virtual StringRef  getSectionName(DataRefImpl Sec) const;
-  virtual uint64_t   getSectionAddress(DataRefImpl Sec) const;
-  virtual uint64_t   getSectionSize(DataRefImpl Sec) const;
-  virtual StringRef  getSectionContents(DataRefImpl Sec) const;
-  virtual bool       isSectionText(DataRefImpl Sec) const;
-
-public:
-  COFFObjectFile(MemoryBuffer *Object);
-  virtual symbol_iterator begin_symbols() const;
-  virtual symbol_iterator end_symbols() const;
-  virtual section_iterator begin_sections() const;
-  virtual section_iterator end_sections() const;
-
-  virtual uint8_t getBytesInAddress() const;
-  virtual StringRef getFileFormatName() const;
-  virtual unsigned getArch() const;
-};
-} // end namespace
-
-SymbolRef COFFObjectFile::getSymbolNext(DataRefImpl Symb) const {
-  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+# ifndef NDEBUG
+  // Verify that the section points to a valid entry in the section table.
+  if (addr < SectionTable
+      || addr >= (SectionTable + Header->NumberOfSections))
+    report_fatal_error("Section was outside of section table.");
+
+  uintptr_t offset = uintptr_t(addr) - uintptr_t(SectionTable);
+  assert(offset % sizeof(coff_section) == 0 &&
+         "Section did not point to the beginning of a section");
+# endif
+
+  return addr;
+}
+
+error_code COFFObjectFile::getSymbolNext(DataRefImpl Symb,
+                                         SymbolRef &Result) const {
+  const coff_symbol *symb = toSymb(Symb);
   symb += 1 + symb->NumberOfAuxSymbols;
-  Symb.p = reinterpret_cast<intptr_t>(symb);
-  return SymbolRef(Symb, this);
+  Symb.p = reinterpret_cast<uintptr_t>(symb);
+  Result = SymbolRef(Symb, this);
+  return object_error::success;
 }
 
-StringRef COFFObjectFile::getSymbolName(DataRefImpl Symb) const {
-  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
+ error_code COFFObjectFile::getSymbolName(DataRefImpl Symb,
+                                          StringRef &Result) const {
+  const coff_symbol *symb = toSymb(Symb);
   // Check for string table entry. First 4 bytes are 0.
   if (symb->Name.Offset.Zeroes == 0) {
     uint32_t Offset = symb->Name.Offset.Offset;
-    return StringRef(getString(Offset));
+    if (error_code ec = getString(Offset, Result))
+      return ec;
+    return object_error::success;
   }
 
   if (symb->Name.ShortName[7] == 0)
     // Null terminated, let ::strlen figure out the length.
-    return StringRef(symb->Name.ShortName);
-  // Not null terminated, use all 8 bytes.
-  return StringRef(symb->Name.ShortName, 8);
+    Result = StringRef(symb->Name.ShortName);
+  else
+    // Not null terminated, use all 8 bytes.
+    Result = StringRef(symb->Name.ShortName, 8);
+  return object_error::success;
 }
 
-uint64_t COFFObjectFile::getSymbolAddress(DataRefImpl Symb) const {
-  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
-  const coff_section *Section = getSection(symb->SectionNumber);
-  char Type = getSymbolNMTypeChar(Symb);
+error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb,
+                                            uint64_t &Result) const {
+  const coff_symbol *symb = toSymb(Symb);
+  const coff_section *Section = NULL;
+  if (error_code ec = getSection(symb->SectionNumber, Section))
+    return ec;
+  char Type;
+  if (error_code ec = getSymbolNMTypeChar(Symb, Type))
+    return ec;
   if (Type == 'U' || Type == 'w')
-    return UnknownAddressOrSize;
-  if (Section)
-    return Section->VirtualAddress + symb->Value;
-  return symb->Value;
+    Result = UnknownAddressOrSize;
+  else if (Section)
+    Result = Section->VirtualAddress + symb->Value;
+  else
+    Result = symb->Value;
+  return object_error::success;
 }
 
-uint64_t COFFObjectFile::getSymbolSize(DataRefImpl Symb) const {
+error_code COFFObjectFile::getSymbolSize(DataRefImpl Symb,
+                                         uint64_t &Result) const {
   // FIXME: Return the correct size. This requires looking at all the symbols
   //        in the same section as this symbol, and looking for either the next
   //        symbol, or the end of the section.
-  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
-  const coff_section *Section = getSection(symb->SectionNumber);
-  char Type = getSymbolNMTypeChar(Symb);
+  const coff_symbol *symb = toSymb(Symb);
+  const coff_section *Section = NULL;
+  if (error_code ec = getSection(symb->SectionNumber, Section))
+    return ec;
+  char Type;
+  if (error_code ec = getSymbolNMTypeChar(Symb, Type))
+    return ec;
   if (Type == 'U' || Type == 'w')
-    return UnknownAddressOrSize;
-  if (Section)
-    return Section->SizeOfRawData - symb->Value;
-  return 0;
+    Result = UnknownAddressOrSize;
+  else if (Section)
+    Result = Section->SizeOfRawData - symb->Value;
+  else
+    Result = 0;
+  return object_error::success;
 }
 
-char COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb) const {
-  const coff_symbol *symb = reinterpret_cast<const coff_symbol*>(Symb.p);
-  char ret = StringSwitch<char>(getSymbolName(Symb))
+error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb,
+                                               char &Result) const {
+  const coff_symbol *symb = toSymb(Symb);
+  StringRef name;
+  if (error_code ec = getSymbolName(Symb, name))
+    return ec;
+  char ret = StringSwitch<char>(name)
     .StartsWith(".debug", 'N')
     .StartsWith(".sxdata", 'N')
     .Default('?');
 
-  if (ret != '?')
-    return ret;
+  if (ret != '?') {
+    Result = ret;
+    return object_error::success;
+  }
 
   uint32_t Characteristics = 0;
-  if (const coff_section *Section = getSection(symb->SectionNumber)) {
+  if (symb->SectionNumber > 0) {
+    const coff_section *Section = NULL;
+    if (error_code ec = getSection(symb->SectionNumber, Section))
+      return ec;
     Characteristics = Section->Characteristics;
   }
 
   switch (symb->SectionNumber) {
   case COFF::IMAGE_SYM_UNDEFINED:
     // Check storage classes.
-    if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL)
-      return 'w'; // Don't do ::toupper.
-    else
+    if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL) {
+      Result = 'w';
+      return object_error::success; // Don't do ::toupper.
+    } else
       ret = 'u';
     break;
   case COFF::IMAGE_SYM_ABSOLUTE:
@@ -227,22 +215,28 @@ char COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb) const {
   if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL)
     ret = ::toupper(ret);
 
-  return ret;
+  Result = ret;
+  return object_error::success;
 }
 
-bool COFFObjectFile::isSymbolInternal(DataRefImpl Symb) const {
-  return false;
+error_code COFFObjectFile::isSymbolInternal(DataRefImpl Symb,
+                                            bool &Result) const {
+  Result = false;
+  return object_error::success;
 }
 
-SectionRef COFFObjectFile::getSectionNext(DataRefImpl Sec) const {
-  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+error_code COFFObjectFile::getSectionNext(DataRefImpl Sec,
+                                          SectionRef &Result) const {
+  const coff_section *sec = toSec(Sec);
   sec += 1;
-  Sec.p = reinterpret_cast<intptr_t>(sec);
-  return SectionRef(Sec, this);
+  Sec.p = reinterpret_cast<uintptr_t>(sec);
+  Result = SectionRef(Sec, this);
+  return object_error::success;
 }
 
-StringRef COFFObjectFile::getSectionName(DataRefImpl Sec) const {
-  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
+error_code COFFObjectFile::getSectionName(DataRefImpl Sec,
+                                          StringRef &Result) const {
+  const coff_section *sec = toSec(Sec);
   StringRef name;
   if (sec->Name[7] == 0)
     // Null terminated, let ::strlen figure out the length.
@@ -255,64 +249,124 @@ StringRef COFFObjectFile::getSectionName(DataRefImpl Sec) const {
   if (name[0] == '/') {
     uint32_t Offset;
     name.substr(1).getAsInteger(10, Offset);
-    return StringRef(getString(Offset));
+    if (error_code ec = getString(Offset, name))
+      return ec;
   }
 
-  // It's just a normal name.
-  return name;
+  Result = name;
+  return object_error::success;
+}
+
+error_code COFFObjectFile::getSectionAddress(DataRefImpl Sec,
+                                             uint64_t &Result) const {
+  const coff_section *sec = toSec(Sec);
+  Result = sec->VirtualAddress;
+  return object_error::success;
 }
 
-uint64_t COFFObjectFile::getSectionAddress(DataRefImpl Sec) const {
-  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
-  return sec->VirtualAddress;
+error_code COFFObjectFile::getSectionSize(DataRefImpl Sec,
+                                          uint64_t &Result) const {
+  const coff_section *sec = toSec(Sec);
+  Result = sec->SizeOfRawData;
+  return object_error::success;
 }
 
-uint64_t COFFObjectFile::getSectionSize(DataRefImpl Sec) const {
-  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
-  return sec->SizeOfRawData;
+error_code COFFObjectFile::getSectionContents(DataRefImpl Sec,
+                                              StringRef &Result) const {
+  const coff_section *sec = toSec(Sec);
+  // The only thing that we need to verify is that the contents is contained
+  // within the file bounds. We don't need to make sure it doesn't cover other
+  // data, as there's nothing that says that is not allowed.
+  uintptr_t con_start = uintptr_t(base()) + sec->PointerToRawData;
+  uintptr_t con_end = con_start + sec->SizeOfRawData;
+  if (con_end >= uintptr_t(Data->getBufferEnd()))
+    return object_error::parse_failed;
+  Result = StringRef(reinterpret_cast<const char*>(con_start),
+                     sec->SizeOfRawData);
+  return object_error::success;
 }
 
-StringRef COFFObjectFile::getSectionContents(DataRefImpl Sec) const {
-  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
-  return StringRef(reinterpret_cast<const char *>(base + sec->PointerToRawData),
-                   sec->SizeOfRawData);
+error_code COFFObjectFile::isSectionText(DataRefImpl Sec,
+                                         bool &Result) const {
+  const coff_section *sec = toSec(Sec);
+  Result = sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE;
+  return object_error::success;
 }
 
-bool COFFObjectFile::isSectionText(DataRefImpl Sec) const {
-  const coff_section *sec = reinterpret_cast<const coff_section*>(Sec.p);
-  return sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE;
+error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec,
+                                                 DataRefImpl Symb,
+                                                 bool &Result) const {
+  // FIXME: Unimplemented.
+  Result = false;
+  return object_error::success;
 }
 
-COFFObjectFile::COFFObjectFile(MemoryBuffer *Object)
-  : ObjectFile(Object) {
+COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec)
+  : ObjectFile(Binary::isCOFF, Object, ec) {
+  // Check that we at least have enough room for a header.
+  if (!checkSize(Data, ec, sizeof(coff_file_header))) return;
 
-  HeaderOff = 0;
+  // The actual starting location of the COFF header in the file. This can be
+  // non-zero in PE/COFF files.
+  uint64_t HeaderStart = 0;
 
-  if (base[0] == 0x4d && base[1] == 0x5a) {
+  // Check if this is a PE/COFF file.
+  if (base()[0] == 0x4d && base()[1] == 0x5a) {
     // PE/COFF, seek through MS-DOS compatibility stub and 4-byte
     // PE signature to find 'normal' COFF header.
-    HeaderOff += *reinterpret_cast<const ulittle32_t *>(base + 0x3c);
-    HeaderOff += 4;
+    if (!checkSize(Data, ec, 0x3c + 8)) return;
+    HeaderStart += *reinterpret_cast<const ulittle32_t *>(base() + 0x3c);
+    // Check the PE header. ("PE\0\0")
+    if (std::memcmp(base() + HeaderStart, "PE\0\0", 4) != 0) {
+      ec = object_error::parse_failed;
+      return;
+    }
+    HeaderStart += 4; // Skip the PE Header.
   }
 
-  Header = reinterpret_cast<const coff_file_header *>(base + HeaderOff);
+  Header = reinterpret_cast<const coff_file_header *>(base() + HeaderStart);
+  if (!checkAddr(Data, ec, uintptr_t(Header), sizeof(coff_file_header)))
+    return;
+  
   SectionTable =
-    reinterpret_cast<const coff_section *>( base
-                                          + HeaderOff
+    reinterpret_cast<const coff_section *>( base()
+                                          + HeaderStart
                                           + sizeof(coff_file_header)
                                           + Header->SizeOfOptionalHeader);
+  if (!checkAddr(Data, ec, uintptr_t(SectionTable),
+                 Header->NumberOfSections * sizeof(coff_section)))
+    return;
+
   SymbolTable =
-    reinterpret_cast<const coff_symbol *>(base + Header->PointerToSymbolTable);
+    reinterpret_cast<const coff_symbol *>(base()
+                                          + Header->PointerToSymbolTable);
+  if (!checkAddr(Data, ec, uintptr_t(SymbolTable),
+                 Header->NumberOfSymbols * sizeof(coff_symbol)))
+    return;
 
   // Find string table.
-  StringTable = reinterpret_cast<const char *>(base)
-              + Header->PointerToSymbolTable
-              + Header->NumberOfSymbols * 18;
+  StringTable = reinterpret_cast<const char *>(base())
+                + Header->PointerToSymbolTable
+                + Header->NumberOfSymbols * sizeof(coff_symbol);
+  if (!checkAddr(Data, ec, uintptr_t(StringTable), sizeof(ulittle32_t)))
+    return;
+
+  StringTableSize = *reinterpret_cast<const ulittle32_t *>(StringTable);
+  if (!checkAddr(Data, ec, uintptr_t(StringTable), StringTableSize))
+    return;
+  // Check that the string table is null terminated if has any in it.
+  if (StringTableSize < 4
+      || (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) {
+    ec = object_error::parse_failed;
+    return;
+  }
+  
+  ec = object_error::success;
 }
 
 ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const {
   DataRefImpl ret;
-  memset(&ret, 0, sizeof(DataRefImpl));
+  std::memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(SymbolTable);
   return symbol_iterator(SymbolRef(ret, this));
 }
@@ -320,21 +374,21 @@ ObjectFile::symbol_iterator COFFObjectFile::begin_symbols() const {
 ObjectFile::symbol_iterator COFFObjectFile::end_symbols() const {
   // The symbol table ends where the string table begins.
   DataRefImpl ret;
-  memset(&ret, 0, sizeof(DataRefImpl));
+  std::memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(StringTable);
   return symbol_iterator(SymbolRef(ret, this));
 }
 
 ObjectFile::section_iterator COFFObjectFile::begin_sections() const {
   DataRefImpl ret;
-  memset(&ret, 0, sizeof(DataRefImpl));
+  std::memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(SectionTable);
   return section_iterator(SectionRef(ret, this));
 }
 
 ObjectFile::section_iterator COFFObjectFile::end_sections() const {
   DataRefImpl ret;
-  memset(&ret, 0, sizeof(DataRefImpl));
+  std::memset(&ret, 0, sizeof(DataRefImpl));
   ret.p = reinterpret_cast<intptr_t>(SectionTable + Header->NumberOfSections);
   return section_iterator(SectionRef(ret, this));
 }
@@ -365,24 +419,37 @@ unsigned COFFObjectFile::getArch() const {
   }
 }
 
-const coff_section *COFFObjectFile::getSection(std::size_t index) const {
-  if (index > 0 && index <= Header->NumberOfSections)
-    return SectionTable + (index - 1);
-  return 0;
+error_code COFFObjectFile::getSection(int32_t index,
+                                      const coff_section *&Result) const {
+  // Check for special index values.
+  if (index == COFF::IMAGE_SYM_UNDEFINED ||
+      index == COFF::IMAGE_SYM_ABSOLUTE ||
+      index == COFF::IMAGE_SYM_DEBUG)
+    Result = NULL;
+  else if (index > 0 && index <= Header->NumberOfSections)
+    // We already verified the section table data, so no need to check again.
+    Result = SectionTable + (index - 1);
+  else
+    return object_error::parse_failed;
+  return object_error::success;
 }
 
-const char *COFFObjectFile::getString(std::size_t offset) const {
-  const ulittle32_t *StringTableSize =
-    reinterpret_cast<const ulittle32_t *>(StringTable);
-  if (offset < *StringTableSize)
-    return StringTable + offset;
-  return 0;
+error_code COFFObjectFile::getString(uint32_t offset,
+                                     StringRef &Result) const {
+  if (StringTableSize <= 4)
+    // Tried to get a string from an empty string table.
+    return object_error::parse_failed;
+  if (offset >= StringTableSize)
+    return object_error::unexpected_eof;
+  Result = StringRef(StringTable + offset);
+  return object_error::success;
 }
 
 namespace llvm {
 
   ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) {
-    return new COFFObjectFile(Object);
+    error_code ec;
+    return new COFFObjectFile(Object, ec);
   }
 
 } // end namespace llvm
diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp
index d2a2726..e2ff4df 100644
--- a/lib/Object/ELFObjectFile.cpp
+++ b/lib/Object/ELFObjectFile.cpp
@@ -222,22 +222,24 @@ class ELFObjectFile : public ObjectFile {
   const char     *getString(const Elf_Shdr *section, uint32_t offset) const;
 
 protected:
-  virtual SymbolRef getSymbolNext(DataRefImpl Symb) const;
-  virtual StringRef getSymbolName(DataRefImpl Symb) const;
-  virtual uint64_t  getSymbolAddress(DataRefImpl Symb) const;
-  virtual uint64_t  getSymbolSize(DataRefImpl Symb) const;
-  virtual char      getSymbolNMTypeChar(DataRefImpl Symb) const;
-  virtual bool      isSymbolInternal(DataRefImpl Symb) const;
-
-  virtual SectionRef getSectionNext(DataRefImpl Sec) const;
-  virtual StringRef  getSectionName(DataRefImpl Sec) const;
-  virtual uint64_t   getSectionAddress(DataRefImpl Sec) const;
-  virtual uint64_t   getSectionSize(DataRefImpl Sec) const;
-  virtual StringRef  getSectionContents(DataRefImpl Sec) const;
-  virtual bool       isSectionText(DataRefImpl Sec) const;
+  virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
+  virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
+  virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
+  virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const;
+
+  virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
+  virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
+  virtual error_code sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb,
+                                           bool &Result) const;
 
 public:
-  ELFObjectFile(MemoryBuffer *Object);
+  ELFObjectFile(MemoryBuffer *Object, error_code &ec);
   virtual symbol_iterator begin_symbols() const;
   virtual symbol_iterator end_symbols() const;
   virtual section_iterator begin_sections() const;
@@ -259,9 +261,9 @@ void ELFObjectFile<target_endianness, is64Bits>
   //        an error object around.
   if (!(  symb
         && SymbolTableSection
-        && symb >= (const Elf_Sym*)(base
+        && symb >= (const Elf_Sym*)(base()
                    + SymbolTableSection->sh_offset)
-        && symb <  (const Elf_Sym*)(base
+        && symb <  (const Elf_Sym*)(base()
                    + SymbolTableSection->sh_offset
                    + SymbolTableSection->sh_size)))
     // FIXME: Proper error handling.
@@ -269,8 +271,9 @@ void ELFObjectFile<target_endianness, is64Bits>
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-SymbolRef ELFObjectFile<target_endianness, is64Bits>
-                       ::getSymbolNext(DataRefImpl Symb) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolNext(DataRefImpl Symb,
+                                        SymbolRef &Result) const {
   validateSymbol(Symb);
   const Elf_Shdr *SymbolTableSection = SymbolTableSections[Symb.d.b];
 
@@ -287,63 +290,80 @@ SymbolRef ELFObjectFile<target_endianness, is64Bits>
     }
   }
 
-  return SymbolRef(Symb, this);
+  Result = SymbolRef(Symb, this);
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-StringRef ELFObjectFile<target_endianness, is64Bits>
-                       ::getSymbolName(DataRefImpl Symb) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolName(DataRefImpl Symb,
+                                        StringRef &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
   if (symb->st_name == 0) {
     const Elf_Shdr *section = getSection(symb->st_shndx);
     if (!section)
-      return "";
-    return getString(dot_shstrtab_sec, section->sh_name);
+      Result = "";
+    else
+      Result = getString(dot_shstrtab_sec, section->sh_name);
+    return object_error::success;
   }
 
   // Use the default symbol table name section.
-  return getString(dot_strtab_sec, symb->st_name);
+  Result = getString(dot_strtab_sec, symb->st_name);
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-uint64_t ELFObjectFile<target_endianness, is64Bits>
-                      ::getSymbolAddress(DataRefImpl Symb) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolAddress(DataRefImpl Symb,
+                                           uint64_t &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
   const Elf_Shdr *Section;
   switch (symb->st_shndx) {
   case ELF::SHN_COMMON:
    // Undefined symbols have no address yet.
-  case ELF::SHN_UNDEF: return UnknownAddressOrSize;
-  case ELF::SHN_ABS: return symb->st_value;
+  case ELF::SHN_UNDEF:
+    Result = UnknownAddressOrSize;
+    return object_error::success;
+  case ELF::SHN_ABS:
+    Result = symb->st_value;
+    return object_error::success;
   default: Section = getSection(symb->st_shndx);
   }
 
   switch (symb->getType()) {
-  case ELF::STT_SECTION: return Section ? Section->sh_addr
-                                        : UnknownAddressOrSize;
+  case ELF::STT_SECTION:
+    Result = Section ? Section->sh_addr : UnknownAddressOrSize;
+    return object_error::success;
   case ELF::STT_FUNC:
   case ELF::STT_OBJECT:
   case ELF::STT_NOTYPE:
-    return symb->st_value;
-  default: return UnknownAddressOrSize;
+    Result = symb->st_value;
+    return object_error::success;
+  default:
+    Result = UnknownAddressOrSize;
+    return object_error::success;
   }
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-uint64_t ELFObjectFile<target_endianness, is64Bits>
-                      ::getSymbolSize(DataRefImpl Symb) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolSize(DataRefImpl Symb,
+                                        uint64_t &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
   if (symb->st_size == 0)
-    return UnknownAddressOrSize;
-  return symb->st_size;
+    Result = UnknownAddressOrSize;
+  Result = symb->st_size;
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-char ELFObjectFile<target_endianness, is64Bits>
-                  ::getSymbolNMTypeChar(DataRefImpl Symb) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSymbolNMTypeChar(DataRefImpl Symb,
+                                              char &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
   const Elf_Shdr *Section = getSection(symb->st_shndx);
@@ -390,89 +410,120 @@ char ELFObjectFile<target_endianness, is64Bits>
         ret = 'W';
   }
 
-  if (ret == '?' && symb->getType() == ELF::STT_SECTION)
-    return StringSwitch<char>(getSymbolName(Symb))
+  if (ret == '?' && symb->getType() == ELF::STT_SECTION) {
+    StringRef name;
+    if (error_code ec = getSymbolName(Symb, name))
+      return ec;
+    Result = StringSwitch<char>(name)
       .StartsWith(".debug", 'N')
       .StartsWith(".note", 'n');
+    return object_error::success;
+  }
 
-  return ret;
+  Result = ret;
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-bool ELFObjectFile<target_endianness, is64Bits>
-                  ::isSymbolInternal(DataRefImpl Symb) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::isSymbolInternal(DataRefImpl Symb,
+                                           bool &Result) const {
   validateSymbol(Symb);
   const Elf_Sym  *symb = getSymbol(Symb);
 
   if (  symb->getType() == ELF::STT_FILE
      || symb->getType() == ELF::STT_SECTION)
-    return true;
-  return false;
+    Result = true;
+  Result = false;
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-SectionRef ELFObjectFile<target_endianness, is64Bits>
-                        ::getSectionNext(DataRefImpl Sec) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionNext(DataRefImpl Sec, SectionRef &Result) const {
   const uint8_t *sec = reinterpret_cast<const uint8_t *>(Sec.p);
   sec += Header->e_shentsize;
   Sec.p = reinterpret_cast<intptr_t>(sec);
-  return SectionRef(Sec, this);
+  Result = SectionRef(Sec, this);
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-StringRef ELFObjectFile<target_endianness, is64Bits>
-                       ::getSectionName(DataRefImpl Sec) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionName(DataRefImpl Sec,
+                                         StringRef &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  return StringRef(getString(dot_shstrtab_sec, sec->sh_name));
+  Result = StringRef(getString(dot_shstrtab_sec, sec->sh_name));
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-uint64_t ELFObjectFile<target_endianness, is64Bits>
-                      ::getSectionAddress(DataRefImpl Sec) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionAddress(DataRefImpl Sec,
+                                            uint64_t &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  return sec->sh_addr;
+  Result = sec->sh_addr;
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-uint64_t ELFObjectFile<target_endianness, is64Bits>
-                      ::getSectionSize(DataRefImpl Sec) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionSize(DataRefImpl Sec,
+                                         uint64_t &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  return sec->sh_size;
+  Result = sec->sh_size;
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-StringRef ELFObjectFile<target_endianness, is64Bits>
-                       ::getSectionContents(DataRefImpl Sec) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::getSectionContents(DataRefImpl Sec,
+                                             StringRef &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
-  const char *start = (char*)base + sec->sh_offset;
-  return StringRef(start, sec->sh_size);
+  const char *start = (const char*)base() + sec->sh_offset;
+  Result = StringRef(start, sec->sh_size);
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-bool ELFObjectFile<target_endianness, is64Bits>
-                  ::isSectionText(DataRefImpl Sec) const {
+error_code ELFObjectFile<target_endianness, is64Bits>
+                        ::isSectionText(DataRefImpl Sec,
+                                        bool &Result) const {
   const Elf_Shdr *sec = reinterpret_cast<const Elf_Shdr *>(Sec.p);
   if (sec->sh_flags & ELF::SHF_EXECINSTR)
-    return true;
-  return false;
+    Result = true;
+  else
+    Result = false;
+  return object_error::success;
+}
+
+template<support::endianness target_endianness, bool is64Bits>
+error_code ELFObjectFile<target_endianness, is64Bits>
+                          ::sectionContainsSymbol(DataRefImpl Sec,
+                                                  DataRefImpl Symb,
+                                                  bool &Result) const {
+  // FIXME: Unimplemented.
+  Result = false;
+  return object_error::success;
 }
 
 template<support::endianness target_endianness, bool is64Bits>
-ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object)
-  : ObjectFile(Object)
+ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object
+                                                          , error_code &ec)
+  : ObjectFile(Binary::isELF, Object, ec)
   , SectionHeaderTable(0)
   , dot_shstrtab_sec(0)
   , dot_strtab_sec(0) {
-  Header = reinterpret_cast<const Elf_Ehdr *>(base);
+  Header = reinterpret_cast<const Elf_Ehdr *>(base());
 
   if (Header->e_shoff == 0)
     return;
 
   SectionHeaderTable =
-    reinterpret_cast<const Elf_Shdr *>(base + Header->e_shoff);
+    reinterpret_cast<const Elf_Shdr *>(base() + Header->e_shoff);
   uint32_t SectionTableSize = Header->e_shnum * Header->e_shentsize;
   if (!(  (const uint8_t *)SectionHeaderTable + SectionTableSize
-         <= base + MapFile->getBufferSize()))
+         <= base() + Data->getBufferSize()))
     // FIXME: Proper error handling.
     report_fatal_error("Section table goes past end of file!");
 
@@ -491,7 +542,7 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object)
   dot_shstrtab_sec = getSection(Header->e_shstrndx);
   if (dot_shstrtab_sec) {
     // Verify that the last byte in the string table in a null.
-    if (((const char*)base + dot_shstrtab_sec->sh_offset)
+    if (((const char*)base() + dot_shstrtab_sec->sh_offset)
         [dot_shstrtab_sec->sh_size - 1] != 0)
       // FIXME: Proper error handling.
       report_fatal_error("String table must end with a null terminator!");
@@ -509,7 +560,7 @@ ELFObjectFile<target_endianness, is64Bits>::ELFObjectFile(MemoryBuffer *Object)
           // FIXME: Proper error handling.
           report_fatal_error("Already found section named .strtab!");
         dot_strtab_sec = sh;
-        const char *dot_strtab = (const char*)base + sh->sh_offset;
+        const char *dot_strtab = (const char*)base() + sh->sh_offset;
           if (dot_strtab[sh->sh_size - 1] != 0)
             // FIXME: Proper error handling.
             report_fatal_error("String table must end with a null terminator!");
@@ -548,7 +599,7 @@ ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
                                           ::begin_sections() const {
   DataRefImpl ret;
   memset(&ret, 0, sizeof(DataRefImpl));
-  ret.p = reinterpret_cast<intptr_t>(base + Header->e_shoff);
+  ret.p = reinterpret_cast<intptr_t>(base() + Header->e_shoff);
   return section_iterator(SectionRef(ret, this));
 }
 
@@ -557,7 +608,7 @@ ObjectFile::section_iterator ELFObjectFile<target_endianness, is64Bits>
                                           ::end_sections() const {
   DataRefImpl ret;
   memset(&ret, 0, sizeof(DataRefImpl));
-  ret.p = reinterpret_cast<intptr_t>(base
+  ret.p = reinterpret_cast<intptr_t>(base()
                                      + Header->e_shoff
                                      + (Header->e_shentsize * Header->e_shnum));
   return section_iterator(SectionRef(ret, this));
@@ -613,7 +664,7 @@ const typename ELFObjectFile<target_endianness, is64Bits>::Elf_Sym *
 ELFObjectFile<target_endianness, is64Bits>::getSymbol(DataRefImpl Symb) const {
   const Elf_Shdr *sec = SymbolTableSections[Symb.d.b];
   return reinterpret_cast<const Elf_Sym *>(
-           base
+           base()
            + sec->sh_offset
            + (Symb.d.a * sec->sh_entsize));
 }
@@ -656,8 +707,8 @@ const char *ELFObjectFile<target_endianness, is64Bits>
   assert(section && section->sh_type == ELF::SHT_STRTAB && "Invalid section!");
   if (offset >= section->sh_size)
     // FIXME: Proper error handling.
-    report_fatal_error("Sybol name offset outside of string table!");
-  return (const char *)base + section->sh_offset + offset;
+    report_fatal_error("Symbol name offset outside of string table!");
+  return (const char *)base() + section->sh_offset + offset;
 }
 
 // EI_CLASS, EI_DATA.
@@ -673,14 +724,15 @@ namespace llvm {
 
   ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) {
     std::pair<unsigned char, unsigned char> Ident = getElfArchType(Object);
+    error_code ec;
     if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB)
-      return new ELFObjectFile<support::little, false>(Object);
+      return new ELFObjectFile<support::little, false>(Object, ec);
     else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB)
-      return new ELFObjectFile<support::big, false>(Object);
+      return new ELFObjectFile<support::big, false>(Object, ec);
     else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB)
-      return new ELFObjectFile<support::little, true>(Object);
+      return new ELFObjectFile<support::little, true>(Object, ec);
     else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB)
-      return new ELFObjectFile<support::big, true>(Object);
+      return new ELFObjectFile<support::big, true>(Object, ec);
     // FIXME: Proper error handling.
     report_fatal_error("Not an ELF object file!");
   }
diff --git a/lib/Object/Error.cpp b/lib/Object/Error.cpp
new file mode 100644
index 0000000..2594625
--- /dev/null
+++ b/lib/Object/Error.cpp
@@ -0,0 +1,57 @@
+//===- Error.cpp - system_error extensions for Object -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This defines a new error_category for the Object library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+using namespace object;
+
+namespace {
+class _object_error_category : public _do_message {
+public:
+  virtual const char* name() const;
+  virtual std::string message(int ev) const;
+  virtual error_condition default_error_condition(int ev) const;
+};
+}
+
+const char *_object_error_category::name() const {
+  return "llvm.object";
+}
+
+std::string _object_error_category::message(int ev) const {
+  switch (ev) {
+  case object_error::success: return "Success";
+  case object_error::invalid_file_type:
+    return "The file was not recognized as a valid object file";
+  case object_error::parse_failed:
+    return "Invalid data was encountered while parsing the file";
+  case object_error::unexpected_eof:
+    return "The end of the file was unexpectedly encountered";
+  default:
+    llvm_unreachable("An enumerator of object_error does not have a message "
+                     "defined.");
+  }
+}
+
+error_condition _object_error_category::default_error_condition(int ev) const {
+  if (ev == object_error::success)
+    return errc::success;
+  return errc::invalid_argument;
+}
+
+const error_category &object::object_category() {
+  static _object_error_category o;
+  return o;
+}
diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp
index 877cbfb..26a6e13 100644
--- a/lib/Object/MachOObjectFile.cpp
+++ b/lib/Object/MachOObjectFile.cpp
@@ -32,8 +32,8 @@ typedef MachOObject::LoadCommandInfo LoadCommandInfo;
 
 class MachOObjectFile : public ObjectFile {
 public:
-  MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO)
-    : ObjectFile(Object),
+  MachOObjectFile(MemoryBuffer *Object, MachOObject *MOO, error_code &ec)
+    : ObjectFile(Binary::isMachO, Object, ec),
       MachOObj(MOO),
       RegisteredStringTable(std::numeric_limits<uint32_t>::max()) {}
 
@@ -47,19 +47,21 @@ public:
   virtual unsigned getArch() const;
 
 protected:
-  virtual SymbolRef getSymbolNext(DataRefImpl Symb) const;
-  virtual StringRef getSymbolName(DataRefImpl Symb) const;
-  virtual uint64_t  getSymbolAddress(DataRefImpl Symb) const;
-  virtual uint64_t  getSymbolSize(DataRefImpl Symb) const;
-  virtual char      getSymbolNMTypeChar(DataRefImpl Symb) const;
-  virtual bool      isSymbolInternal(DataRefImpl Symb) const;
-
-  virtual SectionRef getSectionNext(DataRefImpl Sec) const;
-  virtual StringRef  getSectionName(DataRefImpl Sec) const;
-  virtual uint64_t   getSectionAddress(DataRefImpl Sec) const;
-  virtual uint64_t   getSectionSize(DataRefImpl Sec) const;
-  virtual StringRef  getSectionContents(DataRefImpl Sec) const;
-  virtual bool       isSectionText(DataRefImpl Sec) const;
+  virtual error_code getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const;
+  virtual error_code getSymbolName(DataRefImpl Symb, StringRef &Res) const;
+  virtual error_code getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolSize(DataRefImpl Symb, uint64_t &Res) const;
+  virtual error_code getSymbolNMTypeChar(DataRefImpl Symb, char &Res) const;
+  virtual error_code isSymbolInternal(DataRefImpl Symb, bool &Res) const;
+
+  virtual error_code getSectionNext(DataRefImpl Sec, SectionRef &Res) const;
+  virtual error_code getSectionName(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code getSectionAddress(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionSize(DataRefImpl Sec, uint64_t &Res) const;
+  virtual error_code getSectionContents(DataRefImpl Sec, StringRef &Res) const;
+  virtual error_code isSectionText(DataRefImpl Sec, bool &Res) const;
+  virtual error_code sectionContainsSymbol(DataRefImpl DRI, DataRefImpl S,
+                                           bool &Result) const;
 
 private:
   MachOObject *MachOObj;
@@ -68,16 +70,21 @@ private:
   void moveToNextSection(DataRefImpl &DRI) const;
   void getSymbolTableEntry(DataRefImpl DRI,
                            InMemoryStruct<macho::SymbolTableEntry> &Res) const;
+  void getSymbol64TableEntry(DataRefImpl DRI,
+                          InMemoryStruct<macho::Symbol64TableEntry> &Res) const;
   void moveToNextSymbol(DataRefImpl &DRI) const;
   void getSection(DataRefImpl DRI, InMemoryStruct<macho::Section> &Res) const;
+  void getSection64(DataRefImpl DRI,
+                    InMemoryStruct<macho::Section64> &Res) const;
 };
 
 ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) {
+  error_code ec;
   std::string Err;
   MachOObject *MachOObj = MachOObject::LoadFromBuffer(Buffer, &Err);
   if (!MachOObj)
     return NULL;
-  return new MachOObjectFile(Buffer, MachOObj);
+  return new MachOObjectFile(Buffer, MachOObj, ec);
 }
 
 /*===-- Symbols -----------------------------------------------------------===*/
@@ -113,35 +120,81 @@ void MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI,
                                  Res);
 }
 
+void MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI,
+    InMemoryStruct<macho::Symbol64TableEntry> &Res) const {
+  InMemoryStruct<macho::SymtabLoadCommand> SymtabLoadCmd;
+  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+  MachOObj->ReadSymtabLoadCommand(LCI, SymtabLoadCmd);
+
+  if (RegisteredStringTable != DRI.d.a) {
+    MachOObj->RegisterStringTable(*SymtabLoadCmd);
+    RegisteredStringTable = DRI.d.a;
+  }
+
+  MachOObj->ReadSymbol64TableEntry(SymtabLoadCmd->SymbolTableOffset, DRI.d.b,
+                                   Res);
+}
 
-SymbolRef MachOObjectFile::getSymbolNext(DataRefImpl DRI) const {
+
+error_code MachOObjectFile::getSymbolNext(DataRefImpl DRI,
+                                          SymbolRef &Result) const {
   DRI.d.b++;
   moveToNextSymbol(DRI);
-  return SymbolRef(DRI, this);
+  Result = SymbolRef(DRI, this);
+  return object_error::success;
 }
 
-StringRef MachOObjectFile::getSymbolName(DataRefImpl DRI) const {
-  InMemoryStruct<macho::SymbolTableEntry> Entry;
-  getSymbolTableEntry(DRI, Entry);
-  return MachOObj->getStringAtIndex(Entry->StringIndex);
+error_code MachOObjectFile::getSymbolName(DataRefImpl DRI,
+                                          StringRef &Result) const {
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(DRI, Entry);
+    Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(DRI, Entry);
+    Result = MachOObj->getStringAtIndex(Entry->StringIndex);
+  }
+  return object_error::success;
 }
 
-uint64_t MachOObjectFile::getSymbolAddress(DataRefImpl DRI) const {
-  InMemoryStruct<macho::SymbolTableEntry> Entry;
-  getSymbolTableEntry(DRI, Entry);
-  return Entry->Value;
+error_code MachOObjectFile::getSymbolAddress(DataRefImpl DRI,
+                                             uint64_t &Result) const {
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(DRI, Entry);
+    Result = Entry->Value;
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(DRI, Entry);
+    Result = Entry->Value;
+  }
+  return object_error::success;
 }
 
-uint64_t MachOObjectFile::getSymbolSize(DataRefImpl DRI) const {
-  return UnknownAddressOrSize;
+error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI,
+                                          uint64_t &Result) const {
+  Result = UnknownAddressOrSize;
+  return object_error::success;
 }
 
-char MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI) const {
-  InMemoryStruct<macho::SymbolTableEntry> Entry;
-  getSymbolTableEntry(DRI, Entry);
+error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI,
+                                                char &Result) const {
+  uint8_t Type, Flags;
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(DRI, Entry);
+    Type = Entry->Type;
+    Flags = Entry->Flags;
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(DRI, Entry);
+    Type = Entry->Type;
+    Flags = Entry->Flags;
+  }
 
   char Char;
-  switch (Entry->Type & macho::STF_TypeMask) {
+  switch (Type & macho::STF_TypeMask) {
     case macho::STT_Undefined:
       Char = 'u';
       break;
@@ -154,15 +207,24 @@ char MachOObjectFile::getSymbolNMTypeChar(DataRefImpl DRI) const {
       break;
   }
 
-  if (Entry->Flags & (macho::STF_External | macho::STF_PrivateExtern))
+  if (Flags & (macho::STF_External | macho::STF_PrivateExtern))
     Char = toupper(Char);
-  return Char;
+  Result = Char;
+  return object_error::success;
 }
 
-bool MachOObjectFile::isSymbolInternal(DataRefImpl DRI) const {
-  InMemoryStruct<macho::SymbolTableEntry> Entry;
-  getSymbolTableEntry(DRI, Entry);
-  return Entry->Flags & macho::STF_StabsEntryMask;
+error_code MachOObjectFile::isSymbolInternal(DataRefImpl DRI,
+                                             bool &Result) const {
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(DRI, Entry);
+    Result = Entry->Flags & macho::STF_StabsEntryMask;
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(DRI, Entry);
+    Result = Entry->Flags & macho::STF_StabsEntryMask;
+  }
+  return object_error::success;
 }
 
 ObjectFile::symbol_iterator MachOObjectFile::begin_symbols() const {
@@ -204,10 +266,12 @@ void MachOObjectFile::moveToNextSection(DataRefImpl &DRI) const {
   }
 }
 
-SectionRef MachOObjectFile::getSectionNext(DataRefImpl DRI) const {
+error_code MachOObjectFile::getSectionNext(DataRefImpl DRI,
+                                           SectionRef &Result) const {
   DRI.d.b++;
   moveToNextSection(DRI);
-  return SectionRef(DRI, this);
+  Result = SectionRef(DRI, this);
+  return object_error::success;
 }
 
 void
@@ -219,43 +283,121 @@ MachOObjectFile::getSection(DataRefImpl DRI,
   MachOObj->ReadSection(LCI, DRI.d.b, Res);
 }
 
-StringRef MachOObjectFile::getSectionName(DataRefImpl DRI) const {
-  InMemoryStruct<macho::SegmentLoadCommand> SLC;
+void
+MachOObjectFile::getSection64(DataRefImpl DRI,
+                            InMemoryStruct<macho::Section64> &Res) const {
+  InMemoryStruct<macho::Segment64LoadCommand> SLC;
   LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSegmentLoadCommand(LCI, SLC);
-  InMemoryStruct<macho::Section> Sect;
-  MachOObj->ReadSection(LCI, DRI.d.b, Sect);
-
-  static char Result[34];
-  strcpy(Result, SLC->Name);
-  strcat(Result, ",");
-  strcat(Result, Sect->Name);
-  return StringRef(Result);
+  MachOObj->ReadSegment64LoadCommand(LCI, SLC);
+  MachOObj->ReadSection64(LCI, DRI.d.b, Res);
 }
 
-uint64_t MachOObjectFile::getSectionAddress(DataRefImpl DRI) const {
-  InMemoryStruct<macho::Section> Sect;
-  getSection(DRI, Sect);
-  return Sect->Address;
+static bool is64BitLoadCommand(const MachOObject *MachOObj, DataRefImpl DRI) {
+  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+  if (LCI.Command.Type == macho::LCT_Segment64)
+    return true;
+  assert(LCI.Command.Type == macho::LCT_Segment && "Unexpected Type.");
+  return false;
 }
 
-uint64_t MachOObjectFile::getSectionSize(DataRefImpl DRI) const {
-  InMemoryStruct<macho::Section> Sect;
-  getSection(DRI, Sect);
-  return Sect->Size;
+error_code MachOObjectFile::getSectionName(DataRefImpl DRI,
+                                           StringRef &Result) const {
+  // FIXME: thread safety.
+  static char result[34];
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Segment64LoadCommand> SLC;
+    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+    MachOObj->ReadSegment64LoadCommand(LCI, SLC);
+    InMemoryStruct<macho::Section64> Sect;
+    MachOObj->ReadSection64(LCI, DRI.d.b, Sect);
+
+    strcpy(result, Sect->SegmentName);
+    strcat(result, ",");
+    strcat(result, Sect->Name);
+  } else {
+    InMemoryStruct<macho::SegmentLoadCommand> SLC;
+    LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
+    MachOObj->ReadSegmentLoadCommand(LCI, SLC);
+    InMemoryStruct<macho::Section> Sect;
+    MachOObj->ReadSection(LCI, DRI.d.b, Sect);
+
+    strcpy(result, Sect->SegmentName);
+    strcat(result, ",");
+    strcat(result, Sect->Name);
+  }
+  Result = StringRef(result);
+  return object_error::success;
 }
 
-StringRef MachOObjectFile::getSectionContents(DataRefImpl DRI) const {
-  InMemoryStruct<macho::Section> Sect;
-  getSection(DRI, Sect);
-  return MachOObj->getData(Sect->Offset, Sect->Size);
+error_code MachOObjectFile::getSectionAddress(DataRefImpl DRI,
+                                              uint64_t &Result) const {
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Section64> Sect;
+    getSection64(DRI, Sect);
+    Result = Sect->Address;
+  } else {
+    InMemoryStruct<macho::Section> Sect;
+    getSection(DRI, Sect);
+    Result = Sect->Address;
+  }
+  return object_error::success;
 }
 
-bool MachOObjectFile::isSectionText(DataRefImpl DRI) const {
-  InMemoryStruct<macho::SegmentLoadCommand> SLC;
-  LoadCommandInfo LCI = MachOObj->getLoadCommandInfo(DRI.d.a);
-  MachOObj->ReadSegmentLoadCommand(LCI, SLC);
-  return !strcmp(SLC->Name, "__TEXT");
+error_code MachOObjectFile::getSectionSize(DataRefImpl DRI,
+                                           uint64_t &Result) const {
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Section64> Sect;
+    getSection64(DRI, Sect);
+    Result = Sect->Size;
+  } else {
+    InMemoryStruct<macho::Section> Sect;
+    getSection(DRI, Sect);
+    Result = Sect->Size;
+  }
+  return object_error::success;
+}
+
+error_code MachOObjectFile::getSectionContents(DataRefImpl DRI,
+                                               StringRef &Result) const {
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Section64> Sect;
+    getSection64(DRI, Sect);
+    Result = MachOObj->getData(Sect->Offset, Sect->Size);
+  } else {
+    InMemoryStruct<macho::Section> Sect;
+    getSection(DRI, Sect);
+    Result = MachOObj->getData(Sect->Offset, Sect->Size);
+  }
+  return object_error::success;
+}
+
+error_code MachOObjectFile::isSectionText(DataRefImpl DRI,
+                                          bool &Result) const {
+  if (is64BitLoadCommand(MachOObj, DRI)) {
+    InMemoryStruct<macho::Section64> Sect;
+    getSection64(DRI, Sect);
+    Result = !strcmp(Sect->Name, "__text");
+  } else {
+    InMemoryStruct<macho::Section> Sect;
+    getSection(DRI, Sect);
+    Result = !strcmp(Sect->Name, "__text");
+  }
+  return object_error::success;
+}
+
+error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec,
+                                                  DataRefImpl Symb,
+                                                  bool &Result) const {
+  if (MachOObj->is64Bit()) {
+    InMemoryStruct<macho::Symbol64TableEntry> Entry;
+    getSymbol64TableEntry(Symb, Entry);
+    Result = Entry->SectionIndex == 1 + Sec.d.a + Sec.d.b;
+  } else {
+    InMemoryStruct<macho::SymbolTableEntry> Entry;
+    getSymbolTableEntry(Symb, Entry);
+    Result = Entry->SectionIndex == 1 + Sec.d.a + Sec.d.b;
+  }
+  return object_error::success;
 }
 
 ObjectFile::section_iterator MachOObjectFile::begin_sections() const {
diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp
index 603b23c..9a373ad 100644
--- a/lib/Object/Object.cpp
+++ b/lib/Object/Object.cpp
@@ -41,19 +41,28 @@ LLVMBool LLVMIsSectionIteratorAtEnd(LLVMObjectFileRef ObjectFile,
 }
 
 void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) {
-  // We can't use unwrap() here because the argument to ++ must be an lvalue.
-  ++*reinterpret_cast<ObjectFile::section_iterator*>(SI);
+  error_code ec;
+  unwrap(SI)->increment(ec);
+  if (ec) report_fatal_error("LLVMMoveToNextSection failed: " + ec.message());
 }
 
 const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) {
-  return (*unwrap(SI))->getName().data();
+  StringRef ret;
+  if (error_code ec = (*unwrap(SI))->getName(ret))
+   report_fatal_error(ec.message());
+  return ret.data();
 }
 
 uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) {
-  return (*unwrap(SI))->getSize();
+  uint64_t ret;
+  if (error_code ec = (*unwrap(SI))->getSize(ret))
+    report_fatal_error(ec.message());
+  return ret;
 }
 
 const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) {
-  return (*unwrap(SI))->getContents().data();
+  StringRef ret;
+  if (error_code ec = (*unwrap(SI))->getContents(ret))
+    report_fatal_error(ec.message());
+  return ret.data();
 }
-
diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp
index 47b6311..a7798df 100644
--- a/lib/Object/ObjectFile.cpp
+++ b/lib/Object/ObjectFile.cpp
@@ -21,18 +21,8 @@
 using namespace llvm;
 using namespace object;
 
-ObjectFile::ObjectFile(MemoryBuffer *Object)
-  : MapFile(Object) {
-  assert(MapFile && "Must be a valid MemoryBuffer!");
-  base = reinterpret_cast<const uint8_t *>(MapFile->getBufferStart());
-}
-
-ObjectFile::~ObjectFile() {
-  delete MapFile;
-}
-
-StringRef ObjectFile::getFilename() const {
-  return MapFile->getBufferIdentifier();
+ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *source, error_code &ec)
+  : Binary(Type, source) {
 }
 
 ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) {
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index c3169ac..c64da6e 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -2084,6 +2085,23 @@ APFloat::convertToInteger(integerPart *parts, unsigned int width,
   return fs;
 }
 
+/* Same as convertToInteger(integerPart*, ...), except the result is returned in
+   an APSInt, whose initial bit-width and signed-ness are used to determine the
+   precision of the conversion.
+ */
+APFloat::opStatus
+APFloat::convertToInteger(APSInt &result,
+                          roundingMode rounding_mode, bool *isExact) const
+{
+  unsigned bitWidth = result.getBitWidth();
+  SmallVector<uint64_t, 4> parts(result.getNumWords());
+  opStatus status = convertToInteger(
+    parts.data(), bitWidth, result.isSigned(), rounding_mode, isExact);
+  // Keeps the original signed-ness.
+  result = APInt(bitWidth, (unsigned)parts.size(), parts.data());
+  return status;
+}
+
 /* Convert an unsigned integer SRC to a floating point number,
    rounding according to ROUNDING_MODE.  The sign of the floating
    point number is not modified.  */
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 74d61c1..76265d4 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -2164,12 +2164,33 @@ void APInt::fromString(unsigned numbits, StringRef str, uint8_t radix) {
 }
 
 void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
-                     bool Signed) const {
+                     bool Signed, bool formatAsCLiteral) const {
   assert((Radix == 10 || Radix == 8 || Radix == 16 || Radix == 2) &&
          "Radix should be 2, 8, 10, or 16!");
 
+  const char *Prefix = "";
+  if (formatAsCLiteral) {
+    switch (Radix) {
+      case 2:
+        // Binary literals are a non-standard extension added in gcc 4.3:
+        // http://gcc.gnu.org/onlinedocs/gcc-4.3.0/gcc/Binary-constants.html
+        Prefix = "0b";
+        break;
+      case 8:
+        Prefix = "0";
+        break;
+      case 16:
+        Prefix = "0x";
+        break;
+    }
+  }
+
   // First, check for a zero value and just short circuit the logic below.
   if (*this == 0) {
+    while (*Prefix) {
+      Str.push_back(*Prefix);
+      ++Prefix;
+    };
     Str.push_back('0');
     return;
   }
@@ -2193,6 +2214,11 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
       }
     }
 
+    while (*Prefix) {
+      Str.push_back(*Prefix);
+      ++Prefix;
+    };
+
     while (N) {
       *--BufPtr = Digits[N % Radix];
       N /= Radix;
@@ -2212,6 +2238,11 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
     Str.push_back('-');
   }
 
+  while (*Prefix) {
+    Str.push_back(*Prefix);
+    ++Prefix;
+  };
+
   // We insert the digits backward, then reverse them to get the right order.
   unsigned StartDig = Str.size();
 
@@ -2251,7 +2282,7 @@ void APInt::toString(SmallVectorImpl<char> &Str, unsigned Radix,
 /// to the methods above.
 std::string APInt::toString(unsigned Radix = 10, bool Signed = true) const {
   SmallString<40> S;
-  toString(S, Radix, Signed);
+  toString(S, Radix, Signed, /* formatAsCLiteral = */false);
   return S.str();
 }
 
@@ -2266,7 +2297,7 @@ void APInt::dump() const {
 
 void APInt::print(raw_ostream &OS, bool isSigned) const {
   SmallString<40> S;
-  this->toString(S, 10, isSigned);
+  this->toString(S, 10, isSigned, /* formatAsCLiteral = */false);
   OS << S.str();
 }
 
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 7f1c0d3..2914337 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -911,8 +911,8 @@ size_t alias::getOptionWidth() const {
 // Print out the option for the alias.
 void alias::printOptionInfo(size_t GlobalWidth) const {
   size_t L = std::strlen(ArgStr);
-  errs() << "  -" << ArgStr;
-  errs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n";
+  outs() << "  -" << ArgStr;
+  outs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n";
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index 493f708..81382d0 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -529,8 +529,8 @@ ConstantRange::sub(const ConstantRange &Other) const {
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
 
   APInt Spread_X = getSetSize(), Spread_Y = Other.getSetSize();
-  APInt NewLower = getLower() - Other.getLower();
-  APInt NewUpper = getUpper() - Other.getUpper() + 1;
+  APInt NewLower = getLower() - Other.getUpper() + 1;
+  APInt NewUpper = getUpper() - Other.getLower();
   if (NewLower == NewUpper)
     return ConstantRange(getBitWidth(), /*isFullSet=*/true);
 
diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
index 4299aa4..c525a12 100644
--- a/lib/Support/Host.cpp
+++ b/lib/Support/Host.cpp
@@ -214,7 +214,12 @@ std::string sys::getHostCPUName() {
                // As found in a Summer 2010 model iMac.
       case 37: // Intel Core i7, laptop version.
         return "corei7";
-      case 42: // SandyBridge
+
+      // SandyBridge:
+      case 42: // Intel Core i7 processor. All processors are manufactured
+               // using the 32 nm process.
+      case 44: // Intel Core i7 processor and Intel Xeon processor. All
+               // processors are manufactured using the 32 nm process.
       case 45:
         return "corei7-avx";
 
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index dbdb303..7e094ee 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -113,6 +113,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case Win32: return "win32";
   case Haiku: return "haiku";
   case Minix: return "minix";
+  case RTEMS: return "rtems";
   }
 
   return "<invalid>";
@@ -281,7 +282,8 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) {
     return cellspu;
   else if (ArchName == "msp430")
     return msp430;
-  else if (ArchName == "mips" || ArchName == "mipsallegrex")
+  else if (ArchName == "mips" || ArchName == "mipseb" ||
+           ArchName == "mipsallegrex")
     return mips;
   else if (ArchName == "mipsel" || ArchName == "mipsallegrexel" ||
            ArchName == "psp")
@@ -350,6 +352,8 @@ Triple::OSType Triple::ParseOS(StringRef OSName) {
     return Haiku;
   else if (OSName.startswith("minix"))
     return Minix;
+  else if (OSName.startswith("rtems"))
+    return RTEMS;
   else
     return UnknownOS;
 }
diff --git a/lib/Support/Twine.cpp b/lib/Support/Twine.cpp
index 75cea29..d62123c 100644
--- a/lib/Support/Twine.cpp
+++ b/lib/Support/Twine.cpp
@@ -14,6 +14,11 @@
 using namespace llvm;
 
 std::string Twine::str() const {
+  // If we're storing only a std::string, just return it.
+  if (LHSKind == StdStringKind && RHSKind == EmptyKind)
+    return *static_cast<const std::string*>(LHS);
+
+  // Otherwise, flatten and copy the contents first.
   SmallString<256> Vec;
   return toStringRef(Vec).str();
 }
@@ -37,9 +42,9 @@ StringRef Twine::toNullTerminatedStringRef(SmallVectorImpl<char> &Out) const {
       // Already null terminated, yay!
       return StringRef(static_cast<const char*>(LHS));
     case StdStringKind: {
-        const std::string *str = static_cast<const std::string*>(LHS);
-        return StringRef(str->c_str(), str->size());
-      }
+      const std::string *str = static_cast<const std::string*>(LHS);
+      return StringRef(str->c_str(), str->size());
+    }
     default:
       break;
     }
diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc
index 430cf2e..f295b92 100644
--- a/lib/Support/Unix/Path.inc
+++ b/lib/Support/Unix/Path.inc
@@ -842,6 +842,9 @@ Path::makeUnique(bool reuse_current, std::string* ErrMsg) {
 
   // Save the name
   path = FNBuffer;
+
+  // By default mkstemp sets the mode to 0600, so update mode bits now.
+  AddPermissionBits (*this, 0666);
 #elif defined(HAVE_MKTEMP)
   // If we don't have mkstemp, use the old and obsolete mktemp function.
   if (mktemp(FNBuffer) == 0)
diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc
index 4227844..fc5f580 100644
--- a/lib/Support/Windows/DynamicLibrary.inc
+++ b/lib/Support/Windows/DynamicLibrary.inc
@@ -115,7 +115,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
        E = OpenedHandles.end(); I != E; ++I) {
     FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
     if (ptr) {
-      return (void *) ptr;
+      return (void *)(intptr_t)ptr;
     }
   }
 
diff --git a/lib/Support/Windows/explicit_symbols.inc b/lib/Support/Windows/explicit_symbols.inc
index 84862d6..379645d 100644
--- a/lib/Support/Windows/explicit_symbols.inc
+++ b/lib/Support/Windows/explicit_symbols.inc
@@ -2,7 +2,7 @@
 
 #ifdef HAVE__ALLOCA
   EXPLICIT_SYMBOL(_alloca)
-  EXPLICIT_SYMBOL2(alloca, _alloca);
+  EXPLICIT_SYMBOL2(alloca, _alloca)
 #endif
 #ifdef HAVE___ALLOCA
   EXPLICIT_SYMBOL(__alloca)
@@ -62,5 +62,5 @@
 
 /* msvcrt */
 #if defined(_MSC_VER)
-  EXPLICIT_SYMBOL2(alloca, _alloca_probe);
+  EXPLICIT_SYMBOL2(alloca, _alloca_probe)
 #endif
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 4679f74..08dc340 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -16,24 +16,29 @@
 #define TARGET_ARM_H
 
 #include "ARMBaseInfo.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cassert>
 
 namespace llvm {
 
+class ARMAsmPrinter;
 class ARMBaseTargetMachine;
 class FunctionPass;
 class JITCodeEmitter;
-class formatted_raw_ostream;
-class MCCodeEmitter;
-class TargetAsmBackend;
 class MachineInstr;
-class ARMAsmPrinter;
+class MCCodeEmitter;
 class MCInst;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCSubtargetInfo;
+class TargetAsmBackend;
+class formatted_raw_ostream;
 
-MCCodeEmitter *createARMMCCodeEmitter(const Target &,
-                                      TargetMachine &TM,
+MCCodeEmitter *createARMMCCodeEmitter(const MCInstrInfo &MCII,
+                                      const MCSubtargetInfo &STI,
                                       MCContext &Ctx);
 
 TargetAsmBackend *createARMAsmBackend(const Target &, const std::string &);
@@ -53,11 +58,15 @@ FunctionPass *createMLxExpansionPass();
 FunctionPass *createThumb2ITBlockPass();
 FunctionPass *createThumb2SizeReductionPass();
 
-extern Target TheARMTarget, TheThumbTarget;
-
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
 
+/// createARMMachObjectWriter - Construct an ARM Mach-O object writer.
+MCObjectWriter *createARMMachObjectWriter(raw_ostream &OS,
+                                          bool Is64Bit,
+                                          uint32_t CPUType,
+                                          uint32_t CPUSubtype);
+
 } // end namespace llvm;
 
 #endif
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 6af5f85..cf333cc 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -16,18 +16,26 @@
 
 include "llvm/Target/Target.td"
 
+//===----------------------------------------------------------------------===//
+// ARM Subtarget state.
+//
+
+def ModeThumb  : SubtargetFeature<"thumb-mode", "InThumbMode", "true",
+                                  "Thumb mode">;
 
 //===----------------------------------------------------------------------===//
 // ARM Subtarget features.
 //
 
-def FeatureVFP2 : SubtargetFeature<"vfp2", "ARMFPUType", "VFPv2",
+def FeatureVFP2 : SubtargetFeature<"vfp2", "HasVFPv2", "true",
                                    "Enable VFP2 instructions">;
-def FeatureVFP3 : SubtargetFeature<"vfp3", "ARMFPUType", "VFPv3",
-                                   "Enable VFP3 instructions">;
-def FeatureNEON : SubtargetFeature<"neon", "ARMFPUType", "NEON",
-                                   "Enable NEON instructions">;
-def FeatureThumb2 : SubtargetFeature<"thumb2", "ThumbMode", "Thumb2",
+def FeatureVFP3 : SubtargetFeature<"vfp3", "HasVFPv3", "true",
+                                   "Enable VFP3 instructions",
+                                   [FeatureVFP2]>;
+def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
+                                   "Enable NEON instructions",
+                                   [FeatureVFP3]>;
+def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true",
                                      "Enable Thumb2 instructions">;
 def FeatureNoARM  : SubtargetFeature<"noarm", "NoARM", "true",
                                      "Does not support ARM mode execution">;
@@ -75,32 +83,32 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
                                                "AvoidCPSRPartialUpdate", "true",
                                  "Avoid CPSR partial update for OOO execution">;
 
+/// Some M architectures don't have the DSP extension (v7E-M vs. v7M)
+def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true",
+                                 "Supports v7 DSP instructions in Thumb2.">;
+
 // Multiprocessing extension.
 def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true",
                                  "Supports Multiprocessing extension">;
 
-// ARM architectures.
-def ArchV4T     : SubtargetFeature<"v4t", "ARMArchVersion", "V4T",
-                                   "ARM v4T">;
-def ArchV5T     : SubtargetFeature<"v5t", "ARMArchVersion", "V5T",
-                                   "ARM v5T">;
-def ArchV5TE    : SubtargetFeature<"v5te", "ARMArchVersion", "V5TE",
-                                   "ARM v5TE, v5TEj, v5TExp">;
-def ArchV6      : SubtargetFeature<"v6", "ARMArchVersion", "V6",
-                                   "ARM v6">;
-def ArchV6M     : SubtargetFeature<"v6m", "ARMArchVersion", "V6M",
-                                   "ARM v6m",
-                                   [FeatureNoARM, FeatureDB]>;
-def ArchV6T2    : SubtargetFeature<"v6t2", "ARMArchVersion", "V6T2",
-                                   "ARM v6t2",
-                                   [FeatureThumb2]>;
-def ArchV7A     : SubtargetFeature<"v7a", "ARMArchVersion", "V7A",
-                                   "ARM v7A",
-                                   [FeatureThumb2, FeatureNEON, FeatureDB]>;
-def ArchV7M     : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
-                                   "ARM v7M",
-                                   [FeatureThumb2, FeatureNoARM, FeatureDB,
-                                    FeatureHWDiv]>;
+// ARM ISAs.
+def HasV4TOps   : SubtargetFeature<"v4t", "HasV4TOps", "true",
+                                   "Support ARM v4T instructions">;
+def HasV5TOps   : SubtargetFeature<"v5t", "HasV5TOps", "true",
+                                   "Support ARM v5T instructions",
+                                   [HasV4TOps]>;
+def HasV5TEOps  : SubtargetFeature<"v5te", "HasV5TEOps", "true",
+                             "Support ARM v5TE, v5TEj, and v5TExp instructions",
+                                   [HasV5TOps]>;
+def HasV6Ops    : SubtargetFeature<"v6", "HasV6Ops", "true",
+                                   "Support ARM v6 instructions",
+                                   [HasV5TEOps]>;
+def HasV6T2Ops  : SubtargetFeature<"v6t2", "HasV6T2Ops", "true",
+                                   "Support ARM v6t2 instructions",
+                                   [HasV6Ops, FeatureThumb2, FeatureDSPThumb2]>;
+def HasV7Ops    : SubtargetFeature<"v7", "HasV7Ops", "true",
+                                   "Support ARM v7 instructions",
+                                   [HasV6T2Ops]>;
 
 //===----------------------------------------------------------------------===//
 // ARM Processors supported.
@@ -109,8 +117,6 @@ def ArchV7M     : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
 include "ARMSchedule.td"
 
 // ARM processor families.
-def ProcOthers  : SubtargetFeature<"others", "ARMProcFamily", "Others",
-                                   "One of the other ARM processor families">;
 def ProcA8      : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
                                    "Cortex-A8 ARM processors",
                                    [FeatureSlowFPBrcc, FeatureNEONForFP,
@@ -135,64 +141,76 @@ def : ProcNoItin<"strongarm1100",   []>;
 def : ProcNoItin<"strongarm1110",   []>;
 
 // V4T Processors.
-def : ProcNoItin<"arm7tdmi",        [ArchV4T]>;
-def : ProcNoItin<"arm7tdmi-s",      [ArchV4T]>;
-def : ProcNoItin<"arm710t",         [ArchV4T]>;
-def : ProcNoItin<"arm720t",         [ArchV4T]>;
-def : ProcNoItin<"arm9",            [ArchV4T]>;
-def : ProcNoItin<"arm9tdmi",        [ArchV4T]>;
-def : ProcNoItin<"arm920",          [ArchV4T]>;
-def : ProcNoItin<"arm920t",         [ArchV4T]>;
-def : ProcNoItin<"arm922t",         [ArchV4T]>;
-def : ProcNoItin<"arm940t",         [ArchV4T]>;
-def : ProcNoItin<"ep9312",          [ArchV4T]>;
+def : ProcNoItin<"arm7tdmi",        [HasV4TOps]>;
+def : ProcNoItin<"arm7tdmi-s",      [HasV4TOps]>;
+def : ProcNoItin<"arm710t",         [HasV4TOps]>;
+def : ProcNoItin<"arm720t",         [HasV4TOps]>;
+def : ProcNoItin<"arm9",            [HasV4TOps]>;
+def : ProcNoItin<"arm9tdmi",        [HasV4TOps]>;
+def : ProcNoItin<"arm920",          [HasV4TOps]>;
+def : ProcNoItin<"arm920t",         [HasV4TOps]>;
+def : ProcNoItin<"arm922t",         [HasV4TOps]>;
+def : ProcNoItin<"arm940t",         [HasV4TOps]>;
+def : ProcNoItin<"ep9312",          [HasV4TOps]>;
 
 // V5T Processors.
-def : ProcNoItin<"arm10tdmi",       [ArchV5T]>;
-def : ProcNoItin<"arm1020t",        [ArchV5T]>;
+def : ProcNoItin<"arm10tdmi",       [HasV5TOps]>;
+def : ProcNoItin<"arm1020t",        [HasV5TOps]>;
 
 // V5TE Processors.
-def : ProcNoItin<"arm9e",           [ArchV5TE]>;
-def : ProcNoItin<"arm926ej-s",      [ArchV5TE]>;
-def : ProcNoItin<"arm946e-s",       [ArchV5TE]>;
-def : ProcNoItin<"arm966e-s",       [ArchV5TE]>;
-def : ProcNoItin<"arm968e-s",       [ArchV5TE]>;
-def : ProcNoItin<"arm10e",          [ArchV5TE]>;
-def : ProcNoItin<"arm1020e",        [ArchV5TE]>;
-def : ProcNoItin<"arm1022e",        [ArchV5TE]>;
-def : ProcNoItin<"xscale",          [ArchV5TE]>;
-def : ProcNoItin<"iwmmxt",          [ArchV5TE]>;
+def : ProcNoItin<"arm9e",           [HasV5TEOps]>;
+def : ProcNoItin<"arm926ej-s",      [HasV5TEOps]>;
+def : ProcNoItin<"arm946e-s",       [HasV5TEOps]>;
+def : ProcNoItin<"arm966e-s",       [HasV5TEOps]>;
+def : ProcNoItin<"arm968e-s",       [HasV5TEOps]>;
+def : ProcNoItin<"arm10e",          [HasV5TEOps]>;
+def : ProcNoItin<"arm1020e",        [HasV5TEOps]>;
+def : ProcNoItin<"arm1022e",        [HasV5TEOps]>;
+def : ProcNoItin<"xscale",          [HasV5TEOps]>;
+def : ProcNoItin<"iwmmxt",          [HasV5TEOps]>;
 
 // V6 Processors.
-def : Processor<"arm1136j-s",       ARMV6Itineraries, [ArchV6]>;
-def : Processor<"arm1136jf-s",      ARMV6Itineraries, [ArchV6, FeatureVFP2,
+def : Processor<"arm1136j-s",       ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"arm1136jf-s",      ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
                                                        FeatureHasSlowFPVMLx]>;
-def : Processor<"arm1176jz-s",      ARMV6Itineraries, [ArchV6]>;
-def : Processor<"arm1176jzf-s",     ARMV6Itineraries, [ArchV6, FeatureVFP2,
+def : Processor<"arm1176jz-s",      ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"arm1176jzf-s",     ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
                                                        FeatureHasSlowFPVMLx]>;
-def : Processor<"mpcorenovfp",      ARMV6Itineraries, [ArchV6]>;
-def : Processor<"mpcore",           ARMV6Itineraries, [ArchV6, FeatureVFP2,
+def : Processor<"mpcorenovfp",      ARMV6Itineraries, [HasV6Ops]>;
+def : Processor<"mpcore",           ARMV6Itineraries, [HasV6Ops, FeatureVFP2,
                                                        FeatureHasSlowFPVMLx]>;
 
 // V6M Processors.
-def : Processor<"cortex-m0",        ARMV6Itineraries, [ArchV6M]>;
+def : Processor<"cortex-m0",        ARMV6Itineraries, [HasV6Ops, FeatureNoARM,
+                                                       FeatureDB]>;
 
 // V6T2 Processors.
-def : Processor<"arm1156t2-s",      ARMV6Itineraries, [ArchV6T2]>;
-def : Processor<"arm1156t2f-s",     ARMV6Itineraries, [ArchV6T2, FeatureVFP2,
+def : Processor<"arm1156t2-s",      ARMV6Itineraries, [HasV6T2Ops]>;
+def : Processor<"arm1156t2f-s",     ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2,
                                                        FeatureHasSlowFPVMLx]>;
 
-// V7 Processors.
+// V7a Processors.
 def : Processor<"cortex-a8",        CortexA8Itineraries,
-                                    [ArchV7A, ProcA8]>;
+                                    [ProcA8, HasV7Ops, FeatureNEON, FeatureDB,
+                                     FeatureDSPThumb2]>;
 def : Processor<"cortex-a9",        CortexA9Itineraries,
-                                    [ArchV7A, ProcA9]>;
+                                    [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
+                                     FeatureDSPThumb2]>;
 def : Processor<"cortex-a9-mp",     CortexA9Itineraries,
-                                    [ArchV7A, ProcA9, FeatureMP]>;
+                                    [ProcA9, HasV7Ops, FeatureNEON, FeatureDB,
+                                     FeatureDSPThumb2, FeatureMP]>;
 
 // V7M Processors.
-def : ProcNoItin<"cortex-m3",       [ArchV7M]>;
-def : ProcNoItin<"cortex-m4",       [ArchV7M, FeatureVFP2, FeatureVFPOnlySP]>;
+def : ProcNoItin<"cortex-m3",       [HasV7Ops,
+                                     FeatureThumb2, FeatureNoARM, FeatureDB,
+                                     FeatureHWDiv]>;
+
+// V7EM Processors.
+def : ProcNoItin<"cortex-m4",       [HasV7Ops,
+                                     FeatureThumb2, FeatureNoARM, FeatureDB,
+                                     FeatureHWDiv, FeatureDSPThumb2,
+                                     FeatureT2XtPk, FeatureVFP2,
+                                     FeatureVFPOnlySP]>;
 
 //===----------------------------------------------------------------------===//
 // Register File Description
diff --git a/lib/Target/ARM/ARMAsmBackend.cpp b/lib/Target/ARM/ARMAsmBackend.cpp
index 618a2b5..5e438a9 100644
--- a/lib/Target/ARM/ARMAsmBackend.cpp
+++ b/lib/Target/ARM/ARMAsmBackend.cpp
@@ -28,14 +28,6 @@
 using namespace llvm;
 
 namespace {
-class ARMMachObjectWriter : public MCMachObjectTargetWriter {
-public:
-  ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
-                      uint32_t CPUSubtype)
-    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
-                               /*UseAggressiveSymbolFolding=*/true) {}
-};
-
 class ARMELFObjectWriter : public MCELFObjectTargetWriter {
 public:
   ARMELFObjectWriter(Triple::OSType OSType)
@@ -182,7 +174,8 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
     Value >>= 16;
     // Fallthrough
   case ARM::fixup_t2_movw_lo16:
-  case ARM::fixup_t2_movt_hi16_pcrel:
+  case ARM::fixup_t2_movt_hi16_pcrel:  //FIXME: Shouldn't this be shifted like
+                                       // the other hi16 fixup?
   case ARM::fixup_t2_movw_lo16_pcrel: {
     unsigned Hi4 = (Value & 0xF000) >> 12;
     unsigned i = (Value & 0x800) >> 11;
@@ -192,8 +185,10 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
     // inst{26} = i;
     // inst{14-12} = Mid3;
     // inst{7-0} = Lo8;
-    assert ((((int64_t)Value) >= -0x8000) && (((int64_t)Value) <= 0x7fff) &&
-            "Out of range pc-relative fixup value!");
+    // The value comes in as the whole thing, not just the portion required
+    // for this fixup, so we need to mask off the bits not handled by this
+    // portion (lo vs. hi).
+    Value &= 0xffff;
     Value = (Hi4 << 16) | (i << 26) | (Mid3 << 12) | (Lo8);
     uint64_t swapped = (Value & 0xFFFF0000) >> 16;
     swapped |= (Value & 0x0000FFFF) << 16;
@@ -423,12 +418,9 @@ public:
     : ARMAsmBackend(T), Subtype(st) { }
 
   MCObjectWriter *createObjectWriter(raw_ostream &OS) const {
-    return createMachObjectWriter(new ARMMachObjectWriter(
-                                    /*Is64Bit=*/false,
-                                    object::mach::CTM_ARM,
-                                    Subtype),
-                                  OS,
-                                  /*IsLittleEndian=*/true);
+    return createARMMachObjectWriter(OS, /*Is64Bit=*/false,
+                                     object::mach::CTM_ARM,
+                                     Subtype);
   }
 
   void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
@@ -505,7 +497,13 @@ TargetAsmBackend *llvm::createARMAsmBackend(const Target &T,
   Triple TheTriple(TT);
 
   if (TheTriple.isOSDarwin()) {
-    if (TheTriple.getArchName() == "armv6" ||
+    if (TheTriple.getArchName() == "armv4t" ||
+        TheTriple.getArchName() == "thumbv4t")
+      return new DarwinARMAsmBackend(T, object::mach::CSARM_V4T);
+    else if (TheTriple.getArchName() == "armv5e" ||
+        TheTriple.getArchName() == "thumbv5e")
+      return new DarwinARMAsmBackend(T, object::mach::CSARM_V5TEJ);
+    else if (TheTriple.getArchName() == "armv6" ||
         TheTriple.getArchName() == "thumbv6")
       return new DarwinARMAsmBackend(T, object::mach::CSARM_V6);
     return new DarwinARMAsmBackend(T, object::mach::CSARM_V7);
diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp
index eb73902..dbc3ee4 100644
--- a/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -654,7 +654,7 @@ void ARMAsmPrinter::emitAttributes() {
   }
 
   /* TODO: ARMBuildAttrs::Allowed is not completely accurate,
-   * since NEON can have 1 (allowed) or 2 (fused MAC operations) */
+   * since NEON can have 1 (allowed) or 2 (MAC operations) */
   if (Subtarget->hasNEON()) {
     AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch,
                                ARMBuildAttrs::Allowed);
@@ -1010,19 +1010,16 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
         MI->dump();
         assert(0 && "Unsupported opcode for unwinding information");
       case ARM::MOVr:
-      case ARM::tMOVgpr2gpr:
-      case ARM::tMOVgpr2tgpr:
         Offset = 0;
         break;
       case ARM::ADDri:
         Offset = -MI->getOperand(2).getImm();
         break;
       case ARM::SUBri:
-      case ARM::t2SUBrSPi:
-        Offset =  MI->getOperand(2).getImm();
+        Offset = MI->getOperand(2).getImm();
         break;
       case ARM::tSUBspi:
-        Offset =  MI->getOperand(2).getImm()*4;
+        Offset = MI->getOperand(2).getImm()*4;
         break;
       case ARM::tADDspi:
       case ARM::tADDrSPi:
@@ -1072,39 +1069,18 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) {
 
 extern cl::opt<bool> EnableARMEHABI;
 
+// Simple pseudo-instructions have their lowering (with expansion to real
+// instructions) auto-generated.
+#include "ARMGenMCPseudoLowering.inc"
+
 void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-  unsigned Opc = MI->getOpcode();
-  switch (Opc) {
-  default: break;
-  case ARM::B: {
-    // B is just a Bcc with an 'always' predicate.
-    MCInst TmpInst;
-    LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
-    TmpInst.setOpcode(ARM::Bcc);
-    // Add predicate operands.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    OutStreamer.EmitInstruction(TmpInst);
-    return;
-  }
-  case ARM::LDMIA_RET: {
-    // LDMIA_RET is just a normal LDMIA_UPD instruction that targets PC and as
-    // such has additional code-gen properties and scheduling information.
-    // To emit it, we just construct as normal and set the opcode to LDMIA_UPD.
-    MCInst TmpInst;
-    LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
-    TmpInst.setOpcode(ARM::LDMIA_UPD);
-    OutStreamer.EmitInstruction(TmpInst);
+  // Do any auto-generated pseudo lowerings.
+  if (emitPseudoExpansionLowering(OutStreamer, MI))
     return;
-  }
-  case ARM::t2ADDrSPi:
-  case ARM::t2ADDrSPi12:
-  case ARM::t2SUBrSPi:
-  case ARM::t2SUBrSPi12:
-    assert ((MI->getOperand(1).getReg() == ARM::SP) &&
-            "Unexpected source register!");
-    break;
 
+  // Check for manual lowerings.
+  unsigned Opc = MI->getOpcode();
+  switch (Opc) {
   case ARM::t2MOVi32imm: assert(0 && "Should be lowered by thumb2it pass");
   case ARM::DBG_VALUE: {
     if (isVerbose() && OutStreamer.hasRawTextSupport()) {
@@ -1115,14 +1091,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     }
     return;
   }
-  case ARM::tBfar: {
-    MCInst TmpInst;
-    TmpInst.setOpcode(ARM::tBL);
-    TmpInst.addOperand(MCOperand::CreateExpr(MCSymbolRefExpr::Create(
-          MI->getOperand(0).getMBB()->getSymbol(), OutContext)));
-    OutStreamer.EmitInstruction(TmpInst);
-    return;
-  }
   case ARM::LEApcrel:
   case ARM::tLEApcrel:
   case ARM::t2LEApcrel: {
@@ -1153,39 +1121,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     OutStreamer.EmitInstruction(TmpInst);
     return;
   }
-  case ARM::MOVPCRX: {
-    MCInst TmpInst;
-    TmpInst.setOpcode(ARM::MOVr);
-    TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    // Add predicate operands.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    // Add 's' bit operand (always reg0 for this)
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    OutStreamer.EmitInstruction(TmpInst);
-    return;
-  }
   // Darwin call instructions are just normal call instructions with different
   // clobber semantics (they clobber R9).
-  case ARM::BLr9:
-  case ARM::BLr9_pred:
-  case ARM::BLXr9:
-  case ARM::BLXr9_pred: {
-    unsigned newOpc;
-    switch (Opc) {
-    default: assert(0);
-    case ARM::BLr9:       newOpc = ARM::BL; break;
-    case ARM::BLr9_pred:  newOpc = ARM::BL_pred; break;
-    case ARM::BLXr9:      newOpc = ARM::BLX; break;
-    case ARM::BLXr9_pred: newOpc = ARM::BLX_pred; break;
-    }
-    MCInst TmpInst;
-    LowerARMMachineInstrToMCInst(MI, TmpInst, *this);
-    TmpInst.setOpcode(newOpc);
-    OutStreamer.EmitInstruction(TmpInst);
-    return;
-  }
   case ARM::BXr9_CALL:
   case ARM::BX_CALL: {
     {
@@ -1215,6 +1152,9 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       TmpInst.setOpcode(ARM::tMOVr);
       TmpInst.addOperand(MCOperand::CreateReg(ARM::LR));
       TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
+      // Add predicate operands.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
       OutStreamer.EmitInstruction(TmpInst);
     }
     {
@@ -1445,7 +1385,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case ARM::t2BR_JT: {
     // Lower and emit the instruction itself, then the jump table following it.
     MCInst TmpInst;
-    TmpInst.setOpcode(ARM::tMOVgpr2gpr);
+    TmpInst.setOpcode(ARM::tMOVr);
     TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
     TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
     // Add predicate operands.
@@ -1494,7 +1434,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     // mov pc, target
     MCInst TmpInst;
     unsigned Opc = MI->getOpcode() == ARM::BR_JTr ?
-      ARM::MOVr : ARM::tMOVgpr2gpr;
+      ARM::MOVr : ARM::tMOVr;
     TmpInst.setOpcode(Opc);
     TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
     TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
@@ -1507,7 +1447,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     OutStreamer.EmitInstruction(TmpInst);
 
     // Make sure the Thumb jump table is 4-byte aligned.
-    if (Opc == ARM::tMOVgpr2gpr)
+    if (Opc == ARM::tMOVr)
       EmitAlignment(2);
 
     // Output the data for the jump table itself
@@ -1599,11 +1539,12 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     MCSymbol *Label = GetARMSJLJEHLabel();
     {
       MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tMOVgpr2tgpr);
+      TmpInst.setOpcode(ARM::tMOVr);
       TmpInst.addOperand(MCOperand::CreateReg(ValReg));
       TmpInst.addOperand(MCOperand::CreateReg(ARM::PC));
-      // 's' bit operand
-      TmpInst.addOperand(MCOperand::CreateReg(ARM::CPSR));
+      // Predicate.
+      TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
+      TmpInst.addOperand(MCOperand::CreateReg(0));
       OutStreamer.AddComment("eh_setjmp begin");
       OutStreamer.EmitInstruction(TmpInst);
     }
@@ -1817,7 +1758,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     }
     {
       MCInst TmpInst;
-      TmpInst.setOpcode(ARM::tMOVtgpr2gpr);
+      TmpInst.setOpcode(ARM::tMOVr);
       TmpInst.addOperand(MCOperand::CreateReg(ARM::SP));
       TmpInst.addOperand(MCOperand::CreateReg(ScratchReg));
       // Predicate.
@@ -1858,75 +1799,6 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     }
     return;
   }
-  // Tail jump branches are really just branch instructions with additional
-  // code-gen attributes. Convert them to the canonical form here.
-  case ARM::TAILJMPd:
-  case ARM::TAILJMPdND: {
-    MCInst TmpInst, TmpInst2;
-    // Lower the instruction as-is to get the operands properly converted.
-    LowerARMMachineInstrToMCInst(MI, TmpInst2, *this);
-    TmpInst.setOpcode(ARM::Bcc);
-    TmpInst.addOperand(TmpInst2.getOperand(0));
-    // Add predicate operands.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    OutStreamer.AddComment("TAILCALL");
-    OutStreamer.EmitInstruction(TmpInst);
-    return;
-  }
-  case ARM::tTAILJMPd:
-  case ARM::tTAILJMPdND: {
-    MCInst TmpInst, TmpInst2;
-    LowerARMMachineInstrToMCInst(MI, TmpInst2, *this);
-    // The Darwin toolchain doesn't support tail call relocations of 16-bit
-    // branches.
-    TmpInst.setOpcode(Opc == ARM::tTAILJMPd ? ARM::t2B : ARM::tB);
-    TmpInst.addOperand(TmpInst2.getOperand(0));
-    OutStreamer.AddComment("TAILCALL");
-    OutStreamer.EmitInstruction(TmpInst);
-    return;
-  }
-  case ARM::TAILJMPrND:
-  case ARM::tTAILJMPrND:
-  case ARM::TAILJMPr:
-  case ARM::tTAILJMPr: {
-    unsigned newOpc = (Opc == ARM::TAILJMPr || Opc == ARM::TAILJMPrND)
-      ? ARM::BX : ARM::tBX;
-    MCInst TmpInst;
-    TmpInst.setOpcode(newOpc);
-    TmpInst.addOperand(MCOperand::CreateReg(MI->getOperand(0).getReg()));
-    // Predicate.
-    TmpInst.addOperand(MCOperand::CreateImm(ARMCC::AL));
-    TmpInst.addOperand(MCOperand::CreateReg(0));
-    OutStreamer.AddComment("TAILCALL");
-    OutStreamer.EmitInstruction(TmpInst);
-    return;
-  }
-
-  // These are the pseudos created to comply with stricter operand restrictions
-  // on ARMv5. Lower them now to "normal" instructions, since all the
-  // restrictions are already satisfied.
-  case ARM::MULv5:
-    EmitPatchedInstruction(MI, ARM::MUL);
-    return;
-  case ARM::MLAv5:
-    EmitPatchedInstruction(MI, ARM::MLA);
-    return;
-  case ARM::SMULLv5:
-    EmitPatchedInstruction(MI, ARM::SMULL);
-    return;
-  case ARM::UMULLv5:
-    EmitPatchedInstruction(MI, ARM::UMULL);
-    return;
-  case ARM::SMLALv5:
-    EmitPatchedInstruction(MI, ARM::SMLAL);
-    return;
-  case ARM::UMLALv5:
-    EmitPatchedInstruction(MI, ARM::UMLAL);
-    return;
-  case ARM::UMAALv5:
-    EmitPatchedInstruction(MI, ARM::UMAAL);
-    return;
   }
 
   MCInst TmpInst;
@@ -1944,11 +1816,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 //===----------------------------------------------------------------------===//
 
 static MCInstPrinter *createARMMCInstPrinter(const Target &T,
-                                             TargetMachine &TM,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI) {
   if (SyntaxVariant == 0)
-    return new ARMInstPrinter(TM, MAI);
+    return new ARMInstPrinter(MAI);
   return 0;
 }
 
diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h
index 5f9169e..7741fc4 100644
--- a/lib/Target/ARM/ARMAsmPrinter.h
+++ b/lib/Target/ARM/ARMAsmPrinter.h
@@ -21,6 +21,8 @@
 
 namespace llvm {
 
+class MCOperand;
+
 namespace ARM {
   enum DW_ISA {
     DW_ISA_ARM_thumb = 1,
@@ -72,6 +74,9 @@ public:
   void EmitStartOfAsmFile(Module &M);
   void EmitEndOfAsmFile(Module &M);
 
+  // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
+  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
+
 private:
   // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
   void emitAttributes();
@@ -84,6 +89,10 @@ private:
 
   void EmitUnwindingInstruction(const MachineInstr *MI);
 
+  // emitPseudoExpansionLowering - tblgen'erated.
+  bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+                                   const MachineInstr *MI);
+
 public:
   void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
 
@@ -100,6 +109,7 @@ public:
       llvm::ARM::DW_ISA_ARM_thumb : llvm::ARM::DW_ISA_ARM_arm;
   }
 
+  MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
   MCSymbol *GetARMSetPICJumpTableLabel2(unsigned uid, unsigned uid2,
                                         const MachineBasicBlock *MBB) const;
   MCSymbol *GetARMJTIPICJumpTableLabel2(unsigned uid, unsigned uid2) const;
@@ -107,7 +117,7 @@ public:
   MCSymbol *GetARMSJLJEHLabel(void) const;
 
   MCSymbol *GetARMGVSymbol(const GlobalValue *GV);
-  
+
   /// EmitMachineConstantPoolValue - Print a machine constantpool value to
   /// the .s file.
   virtual void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV);
diff --git a/lib/Target/ARM/ARMBaseInfo.h b/lib/Target/ARM/ARMBaseInfo.h
index 36edbad..458f7dd 100644
--- a/lib/Target/ARM/ARMBaseInfo.h
+++ b/lib/Target/ARM/ARMBaseInfo.h
@@ -17,20 +17,12 @@
 #ifndef ARMBASEINFO_H
 #define ARMBASEINFO_H
 
+#include "MCTargetDesc/ARMMCTargetDesc.h"
 #include "llvm/Support/ErrorHandling.h"
 
 // Note that the following auto-generated files only defined enum types, and
 // so are safe to include here.
 
-// Defines symbolic names for ARM registers.  This defines a mapping from
-// register name to register number.
-//
-#include "ARMGenRegisterNames.inc"
-
-// Defines symbolic names for the ARM instructions.
-//
-#include "ARMGenInstrNames.inc"
-
 namespace llvm {
 
 // Enums corresponding to ARM condition codes
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 44a3976..649bd7d 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -18,7 +18,6 @@
 #include "ARMHazardRecognizer.h"
 #include "ARMMachineFunctionInfo.h"
 #include "ARMRegisterInfo.h"
-#include "ARMGenInstrInfo.inc"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
 #include "llvm/GlobalValue.h"
@@ -31,10 +30,15 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "ARMGenInstrInfo.inc"
+
 using namespace llvm;
 
 static cl::opt<bool>
@@ -74,7 +78,7 @@ static const ARM_MLxEntry ARM_MLxTable[] = {
 };
 
 ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
-  : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
+  : ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
     Subtarget(STI) {
   for (unsigned i = 0, e = array_lengthof(ARM_MLxTable); i != e; ++i) {
     if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)
@@ -136,9 +140,9 @@ ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   MachineInstr *UpdateMI = NULL;
   MachineInstr *MemMI = NULL;
   unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);
-  const TargetInstrDesc &TID = MI->getDesc();
-  unsigned NumOps = TID.getNumOperands();
-  bool isLoad = !TID.mayStore();
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned NumOps = MCID.getNumOperands();
+  bool isLoad = !MCID.mayStore();
   const MachineOperand &WB = isLoad ? MI->getOperand(1) : MI->getOperand(0);
   const MachineOperand &Base = MI->getOperand(2);
   const MachineOperand &Offset = MI->getOperand(NumOps-3);
@@ -475,8 +479,8 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
 bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
                                     std::vector<MachineOperand> &Pred) const {
   // FIXME: This confuses implicit_def with optional CPSR def.
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.getImplicitDefs() && !MCID.hasOptionalDef())
     return false;
 
   bool Found = false;
@@ -495,11 +499,11 @@ bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
 /// By default, this returns true for every instruction with a
 /// PredicateOperand.
 bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isPredicable())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isPredicable())
     return false;
 
-  if ((TID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
+  if ((MCID.TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) {
     ARMFunctionInfo *AFI =
       MI->getParent()->getParent()->getInfo<ARMFunctionInfo>();
     return AFI->isThumb2Function();
@@ -524,35 +528,23 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   const MachineFunction *MF = MBB.getParent();
   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
 
-  // Basic size info comes from the TSFlags field.
-  const TargetInstrDesc &TID = MI->getDesc();
-  uint64_t TSFlags = TID.TSFlags;
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MCID.getSize())
+    return MCID.getSize();
 
-  unsigned Opc = MI->getOpcode();
-  switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
-  default: {
     // If this machine instr is an inline asm, measure it.
     if (MI->getOpcode() == ARM::INLINEASM)
       return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI);
     if (MI->isLabel())
       return 0;
+  unsigned Opc = MI->getOpcode();
     switch (Opc) {
-    default:
-      llvm_unreachable("Unknown or unset size field for instr!");
     case TargetOpcode::IMPLICIT_DEF:
     case TargetOpcode::KILL:
     case TargetOpcode::PROLOG_LABEL:
     case TargetOpcode::EH_LABEL:
     case TargetOpcode::DBG_VALUE:
       return 0;
-    }
-    break;
-  }
-  case ARMII::Size8Bytes: return 8;          // ARM instruction x 2.
-  case ARMII::Size4Bytes: return 4;          // ARM / Thumb2 instruction.
-  case ARMII::Size2Bytes: return 2;          // Thumb1 instruction.
-  case ARMII::SizeSpecial: {
-    switch (Opc) {
     case ARM::MOVi16_ga_pcrel:
     case ARM::MOVTi16_ga_pcrel:
     case ARM::t2MOVi16_ga_pcrel:
@@ -588,9 +580,9 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
       // entry is one byte; TBH two byte each.
       unsigned EntrySize = (Opc == ARM::t2TBB_JT)
         ? 1 : ((Opc == ARM::t2TBH_JT) ? 2 : 4);
-      unsigned NumOps = TID.getNumOperands();
+      unsigned NumOps = MCID.getNumOperands();
       MachineOperand JTOP =
-        MI->getOperand(NumOps - (TID.isPredicable() ? 3 : 2));
+        MI->getOperand(NumOps - (MCID.isPredicable() ? 3 : 2));
       unsigned JTI = JTOP.getIndex();
       const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
       assert(MJTI != 0);
@@ -616,8 +608,6 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
       // Otherwise, pseudo-instruction sizes are zero.
       return 0;
     }
-  }
-  }
   return 0; // Not reached
 }
 
@@ -647,7 +637,7 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   else if (ARM::DPRRegClass.contains(DestReg, SrcReg))
     Opc = ARM::VMOVD;
   else if (ARM::QPRRegClass.contains(DestReg, SrcReg))
-    Opc = ARM::VMOVQ;
+    Opc = ARM::VORRq;
   else if (ARM::QQPRRegClass.contains(DestReg, SrcReg))
     Opc = ARM::VMOVQQ;
   else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg))
@@ -657,6 +647,8 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
 
   MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);
   MIB.addReg(SrcReg, getKillRegState(KillSrc));
+  if (Opc == ARM::VORRq)
+    MIB.addReg(SrcReg, getKillRegState(KillSrc));
   if (Opc != ARM::VMOVQQ && Opc != ARM::VMOVQQQQ)
     AddDefaultPred(MIB);
 }
@@ -788,7 +780,7 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
     break;
   case ARM::STRi12:
   case ARM::t2STRi12:
-  case ARM::tSpill:
+  case ARM::tSTRspi:
   case ARM::VSTRD:
   case ARM::VSTRS:
     if (MI->getOperand(1).isFI() &&
@@ -923,7 +915,7 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
     break;
   case ARM::LDRi12:
   case ARM::t2LDRi12:
-  case ARM::tRestore:
+  case ARM::tLDRspi:
   case ARM::VLDRD:
   case ARM::VLDRS:
     if (MI->getOperand(1).isFI() &&
@@ -1269,20 +1261,20 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
   return false;
 }
 
-bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
-                                           unsigned NumCycles,
-                                           unsigned ExtraPredCycles,
-                                           float Probability,
-                                           float Confidence) const {
+bool ARMBaseInstrInfo::
+isProfitableToIfCvt(MachineBasicBlock &MBB,
+                    unsigned NumCycles, unsigned ExtraPredCycles,
+                    const BranchProbability &Probability) const {
   if (!NumCycles)
     return false;
 
   // Attempt to estimate the relative costs of predication versus branching.
-  float UnpredCost = Probability * NumCycles;
-  UnpredCost += 1.0; // The branch itself
-  UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
+  unsigned UnpredCost = Probability.getNumerator() * NumCycles;
+  UnpredCost /= Probability.getDenominator();
+  UnpredCost += 1; // The branch itself
+  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
 
-  return (float)(NumCycles + ExtraPredCycles) < UnpredCost;
+  return (NumCycles + ExtraPredCycles) <= UnpredCost;
 }
 
 bool ARMBaseInstrInfo::
@@ -1290,16 +1282,23 @@ isProfitableToIfCvt(MachineBasicBlock &TMBB,
                     unsigned TCycles, unsigned TExtra,
                     MachineBasicBlock &FMBB,
                     unsigned FCycles, unsigned FExtra,
-                    float Probability, float Confidence) const {
+                    const BranchProbability &Probability) const {
   if (!TCycles || !FCycles)
     return false;
 
   // Attempt to estimate the relative costs of predication versus branching.
-  float UnpredCost = Probability * TCycles + (1.0 - Probability) * FCycles;
-  UnpredCost += 1.0; // The branch itself
-  UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
-
-  return (float)(TCycles + FCycles + TExtra + FExtra) < UnpredCost;
+  unsigned TUnpredCost = Probability.getNumerator() * TCycles;
+  TUnpredCost /= Probability.getDenominator();
+    
+  uint32_t Comp = Probability.getDenominator() - Probability.getNumerator();
+  unsigned FUnpredCost = Comp * FCycles;
+  FUnpredCost /= Probability.getDenominator();
+
+  unsigned UnpredCost = TUnpredCost + FUnpredCost;
+  UnpredCost += 1; // The branch itself
+  UnpredCost += Subtarget.getMispredictionPenalty() / 10;
+
+  return (TCycles + FCycles + TExtra + FExtra) <= UnpredCost;
 }
 
 /// getInstrPredicate - If instruction is predicated, returns its predicate
@@ -1363,7 +1362,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
                                 unsigned FrameReg, int &Offset,
                                 const ARMBaseInstrInfo &TII) {
   unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = MI.getDesc();
+  const MCInstrDesc &Desc = MI.getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
   bool isSub = false;
 
@@ -1803,7 +1802,7 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
   if (!ItinData || ItinData->isEmpty())
     return 1;
 
-  const TargetInstrDesc &Desc = MI->getDesc();
+  const MCInstrDesc &Desc = MI->getDesc();
   unsigned Class = Desc.getSchedClass();
   unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
   if (UOps)
@@ -1906,10 +1905,10 @@ ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
 
 int
 ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
-                                  const TargetInstrDesc &DefTID,
+                                  const MCInstrDesc &DefMCID,
                                   unsigned DefClass,
                                   unsigned DefIdx, unsigned DefAlign) const {
-  int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
+  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
   if (RegNo <= 0)
     // Def is the address writeback.
     return ItinData->getOperandCycle(DefClass, DefIdx);
@@ -1924,7 +1923,7 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
     DefCycle = RegNo;
     bool isSLoad = false;
 
-    switch (DefTID.getOpcode()) {
+    switch (DefMCID.getOpcode()) {
     default: break;
     case ARM::VLDMSIA:
     case ARM::VLDMSIA_UPD:
@@ -1947,10 +1946,10 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
 
 int
 ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
-                                 const TargetInstrDesc &DefTID,
+                                 const MCInstrDesc &DefMCID,
                                  unsigned DefClass,
                                  unsigned DefIdx, unsigned DefAlign) const {
-  int RegNo = (int)(DefIdx+1) - DefTID.getNumOperands() + 1;
+  int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;
   if (RegNo <= 0)
     // Def is the address writeback.
     return ItinData->getOperandCycle(DefClass, DefIdx);
@@ -1982,10 +1981,10 @@ ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
 
 int
 ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
-                                  const TargetInstrDesc &UseTID,
+                                  const MCInstrDesc &UseMCID,
                                   unsigned UseClass,
                                   unsigned UseIdx, unsigned UseAlign) const {
-  int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
+  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
   if (RegNo <= 0)
     return ItinData->getOperandCycle(UseClass, UseIdx);
 
@@ -1999,7 +1998,7 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
     UseCycle = RegNo;
     bool isSStore = false;
 
-    switch (UseTID.getOpcode()) {
+    switch (UseMCID.getOpcode()) {
     default: break;
     case ARM::VSTMSIA:
     case ARM::VSTMSIA_UPD:
@@ -2022,10 +2021,10 @@ ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,
 
 int
 ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
-                                 const TargetInstrDesc &UseTID,
+                                 const MCInstrDesc &UseMCID,
                                  unsigned UseClass,
                                  unsigned UseIdx, unsigned UseAlign) const {
-  int RegNo = (int)(UseIdx+1) - UseTID.getNumOperands() + 1;
+  int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;
   if (RegNo <= 0)
     return ItinData->getOperandCycle(UseClass, UseIdx);
 
@@ -2051,14 +2050,14 @@ ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,
 
 int
 ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
-                                    const TargetInstrDesc &DefTID,
+                                    const MCInstrDesc &DefMCID,
                                     unsigned DefIdx, unsigned DefAlign,
-                                    const TargetInstrDesc &UseTID,
+                                    const MCInstrDesc &UseMCID,
                                     unsigned UseIdx, unsigned UseAlign) const {
-  unsigned DefClass = DefTID.getSchedClass();
-  unsigned UseClass = UseTID.getSchedClass();
+  unsigned DefClass = DefMCID.getSchedClass();
+  unsigned UseClass = UseMCID.getSchedClass();
 
-  if (DefIdx < DefTID.getNumDefs() && UseIdx < UseTID.getNumOperands())
+  if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())
     return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
 
   // This may be a def / use of a variable_ops instruction, the operand
@@ -2066,7 +2065,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   // figure it out.
   int DefCycle = -1;
   bool LdmBypass = false;
-  switch (DefTID.getOpcode()) {
+  switch (DefMCID.getOpcode()) {
   default:
     DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);
     break;
@@ -2077,7 +2076,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   case ARM::VLDMSIA:
   case ARM::VLDMSIA_UPD:
   case ARM::VLDMSDB_UPD:
-    DefCycle = getVLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
+    DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
     break;
 
   case ARM::LDMIA_RET:
@@ -2098,7 +2097,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   case ARM::t2LDMIA_UPD:
   case ARM::t2LDMDB_UPD:
     LdmBypass = 1;
-    DefCycle = getLDMDefCycle(ItinData, DefTID, DefClass, DefIdx, DefAlign);
+    DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);
     break;
   }
 
@@ -2107,7 +2106,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     DefCycle = 2;
 
   int UseCycle = -1;
-  switch (UseTID.getOpcode()) {
+  switch (UseMCID.getOpcode()) {
   default:
     UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);
     break;
@@ -2118,7 +2117,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   case ARM::VSTMSIA:
   case ARM::VSTMSIA_UPD:
   case ARM::VSTMSDB_UPD:
-    UseCycle = getVSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
+    UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
     break;
 
   case ARM::STMIA:
@@ -2137,7 +2136,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   case ARM::t2STMDB:
   case ARM::t2STMIA_UPD:
   case ARM::t2STMDB_UPD:
-    UseCycle = getSTMUseCycle(ItinData, UseTID, UseClass, UseIdx, UseAlign);
+    UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);
     break;
   }
 
@@ -2150,7 +2149,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     if (LdmBypass) {
       // It's a variable_ops instruction so we can't use DefIdx here. Just use
       // first def operand.
-      if (ItinData->hasPipelineForwarding(DefClass, DefTID.getNumOperands()-1,
+      if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,
                                           UseClass, UseIdx))
         --UseCycle;
     } else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,
@@ -2170,11 +2169,11 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
       DefMI->isRegSequence() || DefMI->isImplicitDef())
     return 1;
 
-  const TargetInstrDesc &DefTID = DefMI->getDesc();
+  const MCInstrDesc &DefMCID = DefMI->getDesc();
   if (!ItinData || ItinData->isEmpty())
-    return DefTID.mayLoad() ? 3 : 1;
+    return DefMCID.mayLoad() ? 3 : 1;
 
-  const TargetInstrDesc &UseTID = UseMI->getDesc();
+  const MCInstrDesc &UseMCID = UseMI->getDesc();
   const MachineOperand &DefMO = DefMI->getOperand(DefIdx);
   if (DefMO.getReg() == ARM::CPSR) {
     if (DefMI->getOpcode() == ARM::FMSTAT) {
@@ -2183,7 +2182,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     }
 
     // CPSR set and branch can be paired in the same cycle.
-    if (UseTID.isBranch())
+    if (UseMCID.isBranch())
       return 0;
   }
 
@@ -2191,14 +2190,14 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
     ? (*DefMI->memoperands_begin())->getAlignment() : 0;
   unsigned UseAlign = UseMI->hasOneMemOperand()
     ? (*UseMI->memoperands_begin())->getAlignment() : 0;
-  int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
-                                  UseTID, UseIdx, UseAlign);
+  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
+                                  UseMCID, UseIdx, UseAlign);
 
   if (Latency > 1 &&
       (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
     // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
     // variants are one cycle cheaper.
-    switch (DefTID.getOpcode()) {
+    switch (DefMCID.getOpcode()) {
     default: break;
     case ARM::LDRrs:
     case ARM::LDRBrs: {
@@ -2223,7 +2222,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   }
 
   if (DefAlign < 8 && Subtarget.isCortexA9())
-    switch (DefTID.getOpcode()) {
+    switch (DefMCID.getOpcode()) {
     default: break;
     case ARM::VLD1q8:
     case ARM::VLD1q16:
@@ -2327,37 +2326,37 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   if (!DefNode->isMachineOpcode())
     return 1;
 
-  const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode());
+  const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());
 
-  if (isZeroCost(DefTID.Opcode))
+  if (isZeroCost(DefMCID.Opcode))
     return 0;
 
   if (!ItinData || ItinData->isEmpty())
-    return DefTID.mayLoad() ? 3 : 1;
+    return DefMCID.mayLoad() ? 3 : 1;
 
   if (!UseNode->isMachineOpcode()) {
-    int Latency = ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx);
+    int Latency = ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);
     if (Subtarget.isCortexA9())
       return Latency <= 2 ? 1 : Latency - 1;
     else
       return Latency <= 3 ? 1 : Latency - 2;
   }
 
-  const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode());
+  const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());
   const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode);
   unsigned DefAlign = !DefMN->memoperands_empty()
     ? (*DefMN->memoperands_begin())->getAlignment() : 0;
   const MachineSDNode *UseMN = dyn_cast<MachineSDNode>(UseNode);
   unsigned UseAlign = !UseMN->memoperands_empty()
     ? (*UseMN->memoperands_begin())->getAlignment() : 0;
-  int Latency = getOperandLatency(ItinData, DefTID, DefIdx, DefAlign,
-                                  UseTID, UseIdx, UseAlign);
+  int Latency = getOperandLatency(ItinData, DefMCID, DefIdx, DefAlign,
+                                  UseMCID, UseIdx, UseAlign);
 
   if (Latency > 1 &&
       (Subtarget.isCortexA8() || Subtarget.isCortexA9())) {
     // FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]
     // variants are one cycle cheaper.
-    switch (DefTID.getOpcode()) {
+    switch (DefMCID.getOpcode()) {
     default: break;
     case ARM::LDRrs:
     case ARM::LDRBrs: {
@@ -2384,7 +2383,7 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
   }
 
   if (DefAlign < 8 && Subtarget.isCortexA9())
-    switch (DefTID.getOpcode()) {
+    switch (DefMCID.getOpcode()) {
     default: break;
     case ARM::VLD1q8Pseudo:
     case ARM::VLD1q16Pseudo:
@@ -2503,10 +2502,10 @@ int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
   if (!ItinData || ItinData->isEmpty())
     return 1;
 
-  const TargetInstrDesc &TID = MI->getDesc();
-  unsigned Class = TID.getSchedClass();
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned Class = MCID.getSchedClass();
   unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
-  if (PredCost && TID.hasImplicitDefOfPhysReg(ARM::CPSR))
+  if (PredCost && MCID.hasImplicitDefOfPhysReg(ARM::CPSR))
     // When predicated, CPSR is an additional source operand for CPSR updating
     // instructions, this apparently increases their latencies.
     *PredCost = 1;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 9a2faf8..507e897 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -20,6 +20,9 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 
+#define GET_INSTRINFO_HEADER
+#include "ARMGenInstrInfo.inc"
+
 namespace llvm {
   class ARMSubtarget;
   class ARMBaseRegisterInfo;
@@ -36,24 +39,16 @@ namespace ARMII {
     // This four-bit field describes the addressing mode used.
     AddrModeMask  = 0x1f, // The AddrMode enums are declared in ARMBaseInfo.h
 
-    // Size* - Flags to keep track of the size of an instruction.
-    SizeShift     = 5,
-    SizeMask      = 7 << SizeShift,
-    SizeSpecial   = 1,   // 0 byte pseudo or special case.
-    Size8Bytes    = 2,
-    Size4Bytes    = 3,
-    Size2Bytes    = 4,
-
     // IndexMode - Unindex, pre-indexed, or post-indexed are valid for load
     // and store ops only.  Generic "updating" flag is used for ld/st multiple.
     // The index mode enums are declared in ARMBaseInfo.h
-    IndexModeShift = 8,
+    IndexModeShift = 5,
     IndexModeMask  = 3 << IndexModeShift,
 
     //===------------------------------------------------------------------===//
     // Instruction encoding formats.
     //
-    FormShift     = 10,
+    FormShift     = 7,
     FormMask      = 0x3f << FormShift,
 
     // Pseudo instructions
@@ -126,15 +121,15 @@ namespace ARMII {
 
     // UnaryDP - Indicates this is a unary data processing instruction, i.e.
     // it doesn't have a Rn operand.
-    UnaryDP       = 1 << 16,
+    UnaryDP       = 1 << 13,
 
     // Xform16Bit - Indicates this Thumb2 instruction may be transformed into
     // a 16-bit Thumb instruction if certain conditions are met.
-    Xform16Bit    = 1 << 17,
+    Xform16Bit    = 1 << 14,
 
     //===------------------------------------------------------------------===//
     // Code domain.
-    DomainShift   = 18,
+    DomainShift   = 15,
     DomainMask    = 7 << DomainShift,
     DomainGeneral = 0 << DomainShift,
     DomainVFP     = 1 << DomainShift,
@@ -172,7 +167,7 @@ namespace ARMII {
   };
 }
 
-class ARMBaseInstrInfo : public TargetInstrInfoImpl {
+class ARMBaseInstrInfo : public ARMGenInstrInfo {
   const ARMSubtarget &Subtarget;
 
 protected:
@@ -291,8 +286,8 @@ public:
                                        int64_t &Offset1, int64_t &Offset2)const;
 
   /// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to
-  /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should
-  /// be scheduled togther. On some targets if two loads are loading from
+  /// determine (in conjunction with areLoadsFromSameBasePtr) if two loads
+  /// should be scheduled togther. On some targets if two loads are loading from
   /// addresses in the same cache line, it's better if they are scheduled
   /// together. This function takes two integers that represent the load offsets
   /// from the common base address. It returns true if it decides it's desirable
@@ -308,18 +303,18 @@ public:
 
   virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
                                    unsigned NumCycles, unsigned ExtraPredCycles,
-                                   float Prob, float Confidence) const;
+                                   const BranchProbability &Probability) const;
 
   virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
                                    unsigned NumT, unsigned ExtraT,
                                    MachineBasicBlock &FMBB,
                                    unsigned NumF, unsigned ExtraF,
-                                   float Probability, float Confidence) const;
+                                   const BranchProbability &Probability) const;
 
   virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
                                          unsigned NumCycles,
-                                         float Probability,
-                                         float Confidence) const {
+                                         const BranchProbability
+                                           &Probability) const {
     return NumCycles == 1;
   }
 
@@ -353,25 +348,25 @@ public:
                         SDNode *UseNode, unsigned UseIdx) const;
 private:
   int getVLDMDefCycle(const InstrItineraryData *ItinData,
-                      const TargetInstrDesc &DefTID,
+                      const MCInstrDesc &DefMCID,
                       unsigned DefClass,
                       unsigned DefIdx, unsigned DefAlign) const;
   int getLDMDefCycle(const InstrItineraryData *ItinData,
-                     const TargetInstrDesc &DefTID,
+                     const MCInstrDesc &DefMCID,
                      unsigned DefClass,
                      unsigned DefIdx, unsigned DefAlign) const;
   int getVSTMUseCycle(const InstrItineraryData *ItinData,
-                      const TargetInstrDesc &UseTID,
+                      const MCInstrDesc &UseMCID,
                       unsigned UseClass,
                       unsigned UseIdx, unsigned UseAlign) const;
   int getSTMUseCycle(const InstrItineraryData *ItinData,
-                     const TargetInstrDesc &UseTID,
+                     const MCInstrDesc &UseMCID,
                      unsigned UseClass,
                      unsigned UseIdx, unsigned UseAlign) const;
   int getOperandLatency(const InstrItineraryData *ItinData,
-                        const TargetInstrDesc &DefTID,
+                        const MCInstrDesc &DefMCID,
                         unsigned DefIdx, unsigned DefAlign,
-                        const TargetInstrDesc &UseTID,
+                        const MCInstrDesc &UseMCID,
                         unsigned UseIdx, unsigned UseAlign) const;
 
   int getInstrLatency(const InstrItineraryData *ItinData,
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 4ab37f6..ba42295 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -40,6 +40,9 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/CommandLine.h"
 
+#define GET_REGINFO_TARGET_DESC
+#include "ARMGenRegisterInfo.inc"
+
 using namespace llvm;
 
 static cl::opt<bool>
@@ -54,8 +57,7 @@ EnableBasePointer("arm-use-base-pointer", cl::Hidden, cl::init(true),
 
 ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii,
                                          const ARMSubtarget &sti)
-  : ARMGenRegisterInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),
-    TII(tii), STI(sti),
+  : ARMGenRegisterInfo(), TII(tii), STI(sti),
     FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11),
     BasePtr(ARM::R6) {
 }
@@ -100,6 +102,12 @@ getReservedRegs(const MachineFunction &MF) const {
   // Some targets reserve R9.
   if (STI.isR9Reserved())
     Reserved.set(ARM::R9);
+  // Reserve D16-D31 if the subtarget doesn't support them.
+  if (!STI.hasVFP3() || STI.hasD16()) {
+    assert(ARM::D31 == ARM::D16 + 15);
+    for (unsigned i = 0; i != 16; ++i)
+      Reserved.set(ARM::D16 + i);
+  }
   return Reserved;
 }
 
@@ -387,12 +395,12 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   }
 }
 
-/// getAllocationOrder - Returns the register allocation order for a specified
-/// register class in the form of a pair of TargetRegisterClass iterators.
-std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
-ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
-                                        unsigned HintType, unsigned HintReg,
-                                        const MachineFunction &MF) const {
+/// getRawAllocationOrder - Returns the register allocation order for a
+/// specified register class with a target-dependent hint.
+ArrayRef<unsigned>
+ARMBaseRegisterInfo::getRawAllocationOrder(const TargetRegisterClass *RC,
+                                           unsigned HintType, unsigned HintReg,
+                                           const MachineFunction &MF) const {
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
   // Alternative register allocation orders when favoring even / odd registers
   // of register pairs.
@@ -469,70 +477,54 @@ ARMBaseRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC,
 
   // We only support even/odd hints for GPR and rGPR.
   if (RC != ARM::GPRRegisterClass && RC != ARM::rGPRRegisterClass)
-    return std::make_pair(RC->allocation_order_begin(MF),
-                          RC->allocation_order_end(MF));
+    return RC->getRawAllocationOrder(MF);
 
   if (HintType == ARMRI::RegPairEven) {
     if (isPhysicalRegister(HintReg) && getRegisterPairEven(HintReg, MF) == 0)
       // It's no longer possible to fulfill this hint. Return the default
       // allocation order.
-      return std::make_pair(RC->allocation_order_begin(MF),
-                            RC->allocation_order_end(MF));
+      return RC->getRawAllocationOrder(MF);
 
     if (!TFI->hasFP(MF)) {
       if (!STI.isR9Reserved())
-        return std::make_pair(GPREven1,
-                              GPREven1 + (sizeof(GPREven1)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPREven1);
       else
-        return std::make_pair(GPREven4,
-                              GPREven4 + (sizeof(GPREven4)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPREven4);
     } else if (FramePtr == ARM::R7) {
       if (!STI.isR9Reserved())
-        return std::make_pair(GPREven2,
-                              GPREven2 + (sizeof(GPREven2)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPREven2);
       else
-        return std::make_pair(GPREven5,
-                              GPREven5 + (sizeof(GPREven5)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPREven5);
     } else { // FramePtr == ARM::R11
       if (!STI.isR9Reserved())
-        return std::make_pair(GPREven3,
-                              GPREven3 + (sizeof(GPREven3)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPREven3);
       else
-        return std::make_pair(GPREven6,
-                              GPREven6 + (sizeof(GPREven6)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPREven6);
     }
   } else if (HintType == ARMRI::RegPairOdd) {
     if (isPhysicalRegister(HintReg) && getRegisterPairOdd(HintReg, MF) == 0)
       // It's no longer possible to fulfill this hint. Return the default
       // allocation order.
-      return std::make_pair(RC->allocation_order_begin(MF),
-                            RC->allocation_order_end(MF));
+      return RC->getRawAllocationOrder(MF);
 
     if (!TFI->hasFP(MF)) {
       if (!STI.isR9Reserved())
-        return std::make_pair(GPROdd1,
-                              GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPROdd1);
       else
-        return std::make_pair(GPROdd4,
-                              GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPROdd4);
     } else if (FramePtr == ARM::R7) {
       if (!STI.isR9Reserved())
-        return std::make_pair(GPROdd2,
-                              GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPROdd2);
       else
-        return std::make_pair(GPROdd5,
-                              GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPROdd5);
     } else { // FramePtr == ARM::R11
       if (!STI.isR9Reserved())
-        return std::make_pair(GPROdd3,
-                              GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPROdd3);
       else
-        return std::make_pair(GPROdd6,
-                              GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned)));
+        return ArrayRef<unsigned>(GPROdd6);
     }
   }
-  return std::make_pair(RC->allocation_order_begin(MF),
-                        RC->allocation_order_end(MF));
+  return RC->getRawAllocationOrder(MF);
 }
 
 /// ResolveRegAllocHint - Resolves the specified register allocation hint
@@ -965,7 +957,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 
 int64_t ARMBaseRegisterInfo::
 getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const {
-  const TargetInstrDesc &Desc = MI->getDesc();
+  const MCInstrDesc &Desc = MI->getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
   int64_t InstrOffs = 0;;
   int Scale = 1;
@@ -1115,11 +1107,11 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB,
   if (Ins != MBB->end())
     DL = Ins->getDebugLoc();
 
-  const TargetInstrDesc &TID = TII.get(ADDriOpc);
+  const MCInstrDesc &MCID = TII.get(ADDriOpc);
   MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-  MRI.constrainRegClass(BaseReg, TID.OpInfo[0].getRegClass(this));
+  MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this));
 
-  MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, TID, BaseReg)
+  MachineInstrBuilder MIB = BuildMI(*MBB, Ins, DL, MCID, BaseReg)
     .addFrameIndex(FrameIdx).addImm(Offset);
 
   if (!AFI->isThumb1OnlyFunction())
@@ -1155,7 +1147,7 @@ ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I,
 
 bool ARMBaseRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
                                              int64_t Offset) const {
-  const TargetInstrDesc &Desc = MI->getDesc();
+  const MCInstrDesc &Desc = MI->getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
   unsigned i = 0;
 
@@ -1291,11 +1283,5 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     }
     // Update the original instruction to use the scratch register.
     MI.getOperand(i).ChangeToRegister(ScratchReg, false, false, true);
-    if (MI.getOpcode() == ARM::t2ADDrSPi)
-      MI.setDesc(TII.get(ARM::t2ADDri));
-    else if (MI.getOpcode() == ARM::t2SUBrSPi)
-      MI.setDesc(TII.get(ARM::t2SUBri));
   }
 }
-
-#include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index c60d75a..b4b4059 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -16,7 +16,9 @@
 
 #include "ARM.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "ARMGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "ARMGenRegisterInfo.inc"
 
 namespace llvm {
   class ARMSubtarget;
@@ -134,10 +136,9 @@ public:
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const;
 
-  std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>
-  getAllocationOrder(const TargetRegisterClass *RC,
-                     unsigned HintType, unsigned HintReg,
-                     const MachineFunction &MF) const;
+  ArrayRef<unsigned> getRawAllocationOrder(const TargetRegisterClass *RC,
+                                           unsigned HintType, unsigned HintReg,
+                                           const MachineFunction &MF) const;
 
   unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg,
                                const MachineFunction &MF) const;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 16d4ca5..d6fca62 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -96,13 +96,13 @@ namespace {
     void addPCLabel(unsigned LabelID);
     void emitPseudoInstruction(const MachineInstr &MI);
     unsigned getMachineSoRegOpValue(const MachineInstr &MI,
-                                    const TargetInstrDesc &TID,
+                                    const MCInstrDesc &MCID,
                                     const MachineOperand &MO,
                                     unsigned OpIdx);
 
     unsigned getMachineSoImmOpValue(unsigned SoImm);
     unsigned getAddrModeSBit(const MachineInstr &MI,
-                             const TargetInstrDesc &TID) const;
+                             const MCInstrDesc &MCID) const;
 
     void emitDataProcessingInstruction(const MachineInstr &MI,
                                        unsigned ImplicitRd = 0,
@@ -443,9 +443,9 @@ unsigned ARMCodeEmitter::getMachineOpValue(const MachineInstr &MI,
   else if (MO.isSymbol())
     emitExternalSymbolAddress(MO.getSymbolName(), ARM::reloc_arm_branch);
   else if (MO.isCPI()) {
-    const TargetInstrDesc &TID = MI.getDesc();
+    const MCInstrDesc &MCID = MI.getDesc();
     // For VFP load, the immediate offset is multiplied by 4.
-    unsigned Reloc =  ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
+    unsigned Reloc =  ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPLdStFrm)
       ? ARM::reloc_arm_vfp_cp_entry : ARM::reloc_arm_cp_entry;
     emitConstPoolAddress(MO.getIndex(), Reloc);
   } else if (MO.isJTI())
@@ -757,7 +757,7 @@ void ARMCodeEmitter::emitMOVi2piecesInstruction(const MachineInstr &MI) {
 void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
   // It's basically add r, pc, (LJTI - $+8)
 
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Emit the 'add' instruction.
   unsigned Binary = 0x4 << 21;  // add: Insts{24-21} = 0b0100
@@ -766,7 +766,7 @@ void ARMCodeEmitter::emitLEApcrelJTInstruction(const MachineInstr &MI) {
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
   // Encode S bit if MI modifies CPSR.
-  Binary |= getAddrModeSBit(MI, TID);
+  Binary |= getAddrModeSBit(MI, MCID);
 
   // Encode Rd.
   Binary |= getMachineOpValue(MI, 0) << ARMII::RegRdShift;
@@ -912,7 +912,7 @@ void ARMCodeEmitter::emitPseudoInstruction(const MachineInstr &MI) {
 }
 
 unsigned ARMCodeEmitter::getMachineSoRegOpValue(const MachineInstr &MI,
-                                                const TargetInstrDesc &TID,
+                                                const MCInstrDesc &MCID,
                                                 const MachineOperand &MO,
                                                 unsigned OpIdx) {
   unsigned Binary = getMachineOpValue(MI, MO);
@@ -982,8 +982,8 @@ unsigned ARMCodeEmitter::getMachineSoImmOpValue(unsigned SoImm) {
 }
 
 unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
-                                         const TargetInstrDesc &TID) const {
-  for (unsigned i = MI.getNumOperands(), e = TID.getNumOperands(); i >= e; --i){
+                                         const MCInstrDesc &MCID) const {
+  for (unsigned i = MI.getNumOperands(), e = MCID.getNumOperands(); i >= e; --i){
     const MachineOperand &MO = MI.getOperand(i-1);
     if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR)
       return 1 << ARMII::S_BitShift;
@@ -994,7 +994,7 @@ unsigned ARMCodeEmitter::getAddrModeSBit(const MachineInstr &MI,
 void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
                                                    unsigned ImplicitRd,
                                                    unsigned ImplicitRn) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1003,10 +1003,10 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
   // Encode S bit if MI modifies CPSR.
-  Binary |= getAddrModeSBit(MI, TID);
+  Binary |= getAddrModeSBit(MI, MCID);
 
   // Encode register def if there is one.
-  unsigned NumDefs = TID.getNumDefs();
+  unsigned NumDefs = MCID.getNumDefs();
   unsigned OpIdx = 0;
   if (NumDefs)
     Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
@@ -1014,7 +1014,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
     // Special handling for implicit use (e.g. PC).
     Binary |= (getARMRegisterNumbering(ImplicitRd) << ARMII::RegRdShift);
 
-  if (TID.Opcode == ARM::MOVi16) {
+  if (MCID.Opcode == ARM::MOVi16) {
       // Get immediate from MI.
       unsigned Lo16 = getMovi32Value(MI, MI.getOperand(OpIdx),
                       ARM::reloc_arm_movw);
@@ -1023,14 +1023,14 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
       Binary |= ((Lo16 >> 12) & 0xF) << 16;
       emitWordLE(Binary);
       return;
-  } else if(TID.Opcode == ARM::MOVTi16) {
+  } else if(MCID.Opcode == ARM::MOVTi16) {
       unsigned Hi16 = (getMovi32Value(MI, MI.getOperand(OpIdx),
                        ARM::reloc_arm_movt) >> 16);
       Binary |= Hi16 & 0xFFF;
       Binary |= ((Hi16 >> 12) & 0xF) << 16;
       emitWordLE(Binary);
       return;
-  } else if ((TID.Opcode == ARM::BFC) || (TID.Opcode == ARM::BFI)) {
+  } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) {
       uint32_t v = ~MI.getOperand(2).getImm();
       int32_t lsb = CountTrailingZeros_32(v);
       int32_t msb = (32 - CountLeadingZeros_32(v)) - 1;
@@ -1039,7 +1039,7 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
       Binary |= (lsb & 0x1F) << 7;
       emitWordLE(Binary);
       return;
-  } else if ((TID.Opcode == ARM::UBFX) || (TID.Opcode == ARM::SBFX)) {
+  } else if ((MCID.Opcode == ARM::UBFX) || (MCID.Opcode == ARM::SBFX)) {
       // Encode Rn in Instr{0-3}
       Binary |= getMachineOpValue(MI, OpIdx++);
 
@@ -1054,11 +1054,11 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
   }
 
   // If this is a two-address operand, skip it. e.g. MOVCCr operand 1.
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
     ++OpIdx;
 
   // Encode first non-shifter register operand if there is one.
-  bool isUnary = TID.TSFlags & ARMII::UnaryDP;
+  bool isUnary = MCID.TSFlags & ARMII::UnaryDP;
   if (!isUnary) {
     if (ImplicitRn)
       // Special handling for implicit use (e.g. PC).
@@ -1071,9 +1071,9 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
 
   // Encode shifter operand.
   const MachineOperand &MO = MI.getOperand(OpIdx);
-  if ((TID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
+  if ((MCID.TSFlags & ARMII::FormMask) == ARMII::DPSoRegFrm) {
     // Encode SoReg.
-    emitWordLE(Binary | getMachineSoRegOpValue(MI, TID, MO, OpIdx));
+    emitWordLE(Binary | getMachineSoRegOpValue(MI, MCID, MO, OpIdx));
     return;
   }
 
@@ -1092,9 +1092,9 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI,
 void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
                                               unsigned ImplicitRd,
                                               unsigned ImplicitRn) {
-  const TargetInstrDesc &TID = MI.getDesc();
-  unsigned Form = TID.TSFlags & ARMII::FormMask;
-  bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+  const MCInstrDesc &MCID = MI.getDesc();
+  unsigned Form = MCID.TSFlags & ARMII::FormMask;
+  bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1134,7 +1134,7 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
     Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
 
   // If this is a two-address operand, skip it. e.g. LDR_PRE.
-  if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+  if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
     ++OpIdx;
 
   const MachineOperand &MO2 = MI.getOperand(OpIdx);
@@ -1170,9 +1170,9 @@ void ARMCodeEmitter::emitLoadStoreInstruction(const MachineInstr &MI,
 
 void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
                                                   unsigned ImplicitRn) {
-  const TargetInstrDesc &TID = MI.getDesc();
-  unsigned Form = TID.TSFlags & ARMII::FormMask;
-  bool IsPrePost = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+  const MCInstrDesc &MCID = MI.getDesc();
+  unsigned Form = MCID.TSFlags & ARMII::FormMask;
+  bool IsPrePost = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1194,7 +1194,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
   Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
 
   // Skip LDRD and STRD's second operand.
-  if (TID.Opcode == ARM::LDRD || TID.Opcode == ARM::STRD)
+  if (MCID.Opcode == ARM::LDRD || MCID.Opcode == ARM::STRD)
     ++OpIdx;
 
   // Set second operand
@@ -1205,7 +1205,7 @@ void ARMCodeEmitter::emitMiscLoadStoreInstruction(const MachineInstr &MI,
     Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRnShift;
 
   // If this is a two-address operand, skip it. e.g. LDRH_POST.
-  if (!Skipped && TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+  if (!Skipped && MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
     ++OpIdx;
 
   const MachineOperand &MO2 = MI.getOperand(OpIdx);
@@ -1255,8 +1255,8 @@ static unsigned getAddrModeUPBits(unsigned Mode) {
 }
 
 void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
-  bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+  const MCInstrDesc &MCID = MI.getDesc();
+  bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1295,7 +1295,7 @@ void ARMCodeEmitter::emitLoadStoreMultipleInstruction(const MachineInstr &MI) {
 }
 
 void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1304,12 +1304,12 @@ void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
   // Encode S bit if MI modifies CPSR.
-  Binary |= getAddrModeSBit(MI, TID);
+  Binary |= getAddrModeSBit(MI, MCID);
 
   // 32x32->64bit operations have two destination registers. The number
   // of register definitions will tell us if that's what we're dealing with.
   unsigned OpIdx = 0;
-  if (TID.getNumDefs() == 2)
+  if (MCID.getNumDefs() == 2)
     Binary |= getMachineOpValue (MI, OpIdx++) << ARMII::RegRdLoShift;
 
   // Encode Rd
@@ -1323,16 +1323,16 @@ void ARMCodeEmitter::emitMulFrmInstruction(const MachineInstr &MI) {
 
   // Many multiple instructions (e.g. MLA) have three src operands. Encode
   // it as Rn (for multiply, that's in the same offset as RdLo.
-  if (TID.getNumOperands() > OpIdx &&
-      !TID.OpInfo[OpIdx].isPredicate() &&
-      !TID.OpInfo[OpIdx].isOptionalDef())
+  if (MCID.getNumOperands() > OpIdx &&
+      !MCID.OpInfo[OpIdx].isPredicate() &&
+      !MCID.OpInfo[OpIdx].isOptionalDef())
     Binary |= getMachineOpValue(MI, OpIdx) << ARMII::RegRdLoShift;
 
   emitWordLE(Binary);
 }
 
 void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1361,15 +1361,15 @@ void ARMCodeEmitter::emitExtendInstruction(const MachineInstr &MI) {
 
   // Encode rot imm (0, 8, 16, or 24) if it has a rotate immediate operand.
   if (MI.getOperand(OpIdx).isImm() &&
-      !TID.OpInfo[OpIdx].isPredicate() &&
-      !TID.OpInfo[OpIdx].isOptionalDef())
+      !MCID.OpInfo[OpIdx].isPredicate() &&
+      !MCID.OpInfo[OpIdx].isOptionalDef())
     Binary |= (getMachineOpValue(MI, OpIdx) / 8) << ARMII::ExtRotImmShift;
 
   emitWordLE(Binary);
 }
 
 void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1378,7 +1378,7 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
   // PKH instructions are finished at this point
-  if (TID.Opcode == ARM::PKHBT || TID.Opcode == ARM::PKHTB) {
+  if (MCID.Opcode == ARM::PKHBT || MCID.Opcode == ARM::PKHTB) {
     emitWordLE(Binary);
     return;
   }
@@ -1389,9 +1389,9 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
   Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift;
 
   const MachineOperand &MO = MI.getOperand(OpIdx++);
-  if (OpIdx == TID.getNumOperands() ||
-      TID.OpInfo[OpIdx].isPredicate() ||
-      TID.OpInfo[OpIdx].isOptionalDef()) {
+  if (OpIdx == MCID.getNumOperands() ||
+      MCID.OpInfo[OpIdx].isPredicate() ||
+      MCID.OpInfo[OpIdx].isOptionalDef()) {
     // Encode Rm and it's done.
     Binary |= getMachineOpValue(MI, MO);
     emitWordLE(Binary);
@@ -1406,7 +1406,7 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
 
   // Encode shift_imm.
   unsigned ShiftAmt = MI.getOperand(OpIdx).getImm();
-  if (TID.Opcode == ARM::PKHTB) {
+  if (MCID.Opcode == ARM::PKHTB) {
     assert(ShiftAmt != 0 && "PKHTB shift_imm is 0!");
     if (ShiftAmt == 32)
       ShiftAmt = 0;
@@ -1418,7 +1418,7 @@ void ARMCodeEmitter::emitMiscArithInstruction(const MachineInstr &MI) {
 }
 
 void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Part of binary is determined by TableGen.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1431,11 +1431,11 @@ void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
 
   // Encode saturate bit position.
   unsigned Pos = MI.getOperand(1).getImm();
-  if (TID.Opcode == ARM::SSAT || TID.Opcode == ARM::SSAT16)
+  if (MCID.Opcode == ARM::SSAT || MCID.Opcode == ARM::SSAT16)
     Pos -= 1;
   assert((Pos < 16 || (Pos < 32 &&
-                       TID.Opcode != ARM::SSAT16 &&
-                       TID.Opcode != ARM::USAT16)) &&
+                       MCID.Opcode != ARM::SSAT16 &&
+                       MCID.Opcode != ARM::USAT16)) &&
          "saturate bit position out of range");
   Binary |= Pos << 16;
 
@@ -1443,7 +1443,7 @@ void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
   Binary |= getMachineOpValue(MI, 2);
 
   // Encode shift_imm.
-  if (TID.getNumOperands() == 4) {
+  if (MCID.getNumOperands() == 4) {
     unsigned ShiftOp = MI.getOperand(3).getImm();
     ARM_AM::ShiftOpc Opc = ARM_AM::getSORegShOp(ShiftOp);
     if (Opc == ARM_AM::asr)
@@ -1459,9 +1459,9 @@ void ARMCodeEmitter::emitSaturateInstruction(const MachineInstr &MI) {
 }
 
 void ARMCodeEmitter::emitBranchInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
-  if (TID.Opcode == ARM::TPsoft) {
+  if (MCID.Opcode == ARM::TPsoft) {
     llvm_unreachable("ARM::TPsoft FIXME"); // FIXME
   }
 
@@ -1498,20 +1498,20 @@ void ARMCodeEmitter::emitInlineJumpTable(unsigned JTIndex) {
 }
 
 void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Handle jump tables.
-  if (TID.Opcode == ARM::BR_JTr || TID.Opcode == ARM::BR_JTadd) {
+  if (MCID.Opcode == ARM::BR_JTr || MCID.Opcode == ARM::BR_JTadd) {
     // First emit a ldr pc, [] instruction.
     emitDataProcessingInstruction(MI, ARM::PC);
 
     // Then emit the inline jump table.
     unsigned JTIndex =
-      (TID.Opcode == ARM::BR_JTr)
+      (MCID.Opcode == ARM::BR_JTr)
       ? MI.getOperand(1).getIndex() : MI.getOperand(2).getIndex();
     emitInlineJumpTable(JTIndex);
     return;
-  } else if (TID.Opcode == ARM::BR_JTm) {
+  } else if (MCID.Opcode == ARM::BR_JTm) {
     // First emit a ldr pc, [] instruction.
     emitLoadStoreInstruction(MI, ARM::PC);
 
@@ -1526,7 +1526,7 @@ void ARMCodeEmitter::emitMiscBranchInstruction(const MachineInstr &MI) {
   // Set the conditional execution predicate
   Binary |= II->getPredicate(&MI) << ARMII::CondShift;
 
-  if (TID.Opcode == ARM::BX_RET || TID.Opcode == ARM::MOVPCLR)
+  if (MCID.Opcode == ARM::BX_RET || MCID.Opcode == ARM::MOVPCLR)
     // The return register is LR.
     Binary |= getARMRegisterNumbering(ARM::LR);
   else
@@ -1579,7 +1579,7 @@ static unsigned encodeVFPRm(const MachineInstr &MI, unsigned OpIdx) {
 }
 
 void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1596,16 +1596,16 @@ void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
   Binary |= encodeVFPRd(MI, OpIdx++);
 
   // If this is a two-address operand, skip it, e.g. FMACD.
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
     ++OpIdx;
 
   // Encode Dn / Sn.
-  if ((TID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
+  if ((MCID.TSFlags & ARMII::FormMask) == ARMII::VFPBinaryFrm)
     Binary |= encodeVFPRn(MI, OpIdx++);
 
-  if (OpIdx == TID.getNumOperands() ||
-      TID.OpInfo[OpIdx].isPredicate() ||
-      TID.OpInfo[OpIdx].isOptionalDef()) {
+  if (OpIdx == MCID.getNumOperands() ||
+      MCID.OpInfo[OpIdx].isPredicate() ||
+      MCID.OpInfo[OpIdx].isOptionalDef()) {
     // FCMPEZD etc. has only one operand.
     emitWordLE(Binary);
     return;
@@ -1618,8 +1618,8 @@ void ARMCodeEmitter::emitVFPArithInstruction(const MachineInstr &MI) {
 }
 
 void ARMCodeEmitter::emitVFPConversionInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
-  unsigned Form = TID.TSFlags & ARMII::FormMask;
+  const MCInstrDesc &MCID = MI.getDesc();
+  unsigned Form = MCID.TSFlags & ARMII::FormMask;
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1709,8 +1709,8 @@ void ARMCodeEmitter::emitVFPLoadStoreInstruction(const MachineInstr &MI) {
 
 void
 ARMCodeEmitter::emitVFPLoadStoreMultipleInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
-  bool IsUpdating = (TID.TSFlags & ARMII::IndexModeMask) != 0;
+  const MCInstrDesc &MCID = MI.getDesc();
+  bool IsUpdating = (MCID.TSFlags & ARMII::IndexModeMask) != 0;
 
   // Part of binary is determined by TableGn.
   unsigned Binary = getBinaryCodeForInstr(MI);
@@ -1795,8 +1795,8 @@ void ARMCodeEmitter::emitNEONLaneInstruction(const MachineInstr &MI) {
   unsigned Binary = getBinaryCodeForInstr(MI);
 
   unsigned RegTOpIdx, RegNOpIdx, LnOpIdx;
-  const TargetInstrDesc &TID = MI.getDesc();
-  if ((TID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
+  const MCInstrDesc &MCID = MI.getDesc();
+  if ((MCID.TSFlags & ARMII::FormMask) == ARMII::NGetLnFrm) {
     RegTOpIdx = 0;
     RegNOpIdx = 1;
     LnOpIdx = 2;
@@ -1863,12 +1863,12 @@ void ARMCodeEmitter::emitNEON1RegModImmInstruction(const MachineInstr &MI) {
 }
 
 void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
   unsigned Binary = getBinaryCodeForInstr(MI);
   // Destination register is encoded in Dd; source register in Dm.
   unsigned OpIdx = 0;
   Binary |= encodeNEONRd(MI, OpIdx++);
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
     ++OpIdx;
   Binary |= encodeNEONRm(MI, OpIdx);
   if (IsThumb)
@@ -1878,15 +1878,15 @@ void ARMCodeEmitter::emitNEON2RegInstruction(const MachineInstr &MI) {
 }
 
 void ARMCodeEmitter::emitNEON3RegInstruction(const MachineInstr &MI) {
-  const TargetInstrDesc &TID = MI.getDesc();
+  const MCInstrDesc &MCID = MI.getDesc();
   unsigned Binary = getBinaryCodeForInstr(MI);
   // Destination register is encoded in Dd; source registers in Dn and Dm.
   unsigned OpIdx = 0;
   Binary |= encodeNEONRd(MI, OpIdx++);
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
     ++OpIdx;
   Binary |= encodeNEONRn(MI, OpIdx++);
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
     ++OpIdx;
   Binary |= encodeNEONRm(MI, OpIdx);
   if (IsThumb)
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index baf95a3..f45ebdc 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -1538,7 +1538,10 @@ bool ARMConstantIslands::UndoLRSpillRestore() {
     if (MI->getOpcode() == ARM::tPOP_RET &&
         MI->getOperand(2).getReg() == ARM::PC &&
         MI->getNumExplicitOperands() == 3) {
-      BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET));
+      // Create the new insn and copy the predicate from the old.
+      BuildMI(MI->getParent(), MI->getDebugLoc(), TII->get(ARM::tBX_RET))
+        .addOperand(MI->getOperand(0))
+        .addOperand(MI->getOperand(1));
       MI->eraseFromParent();
       MadeChange = true;
     }
@@ -1692,9 +1695,9 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
     MachineInstr *MI = T2JumpTables[i];
-    const TargetInstrDesc &TID = MI->getDesc();
-    unsigned NumOps = TID.getNumOperands();
-    unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2);
+    const MCInstrDesc &MCID = MI->getDesc();
+    unsigned NumOps = MCID.getNumOperands();
+    unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
     MachineOperand JTOP = MI->getOperand(JTOpIdx);
     unsigned JTI = JTOP.getIndex();
     assert(JTI < JT.size());
@@ -1815,9 +1818,9 @@ bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
   const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
   for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
     MachineInstr *MI = T2JumpTables[i];
-    const TargetInstrDesc &TID = MI->getDesc();
-    unsigned NumOps = TID.getNumOperands();
-    unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2);
+    const MCInstrDesc &MCID = MI->getDesc();
+    unsigned NumOps = MCID.getNumOperands();
+    unsigned JTOpIdx = NumOps - (MCID.isPredicable() ? 3 : 2);
     MachineOperand JTOP = MI->getOperand(JTOpIdx);
     unsigned JTI = JTOP.getIndex();
     assert(JTI < JT.size());
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index b6b3c75..94b72fd 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -68,7 +68,7 @@ namespace {
 void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
                                      MachineInstrBuilder &UseMI,
                                      MachineInstrBuilder &DefMI) {
-  const TargetInstrDesc &Desc = OldMI.getDesc();
+  const MCInstrDesc &Desc = OldMI.getDesc();
   for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
        i != e; ++i) {
     const MachineOperand &MO = OldMI.getOperand(i);
@@ -727,8 +727,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       MI.eraseFromParent();
       return true;
     }
+    case ARM::t2MOVCCr:
     case ARM::MOVCCr: {
-      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVr),
+      unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
               MI.getOperand(1).getReg())
         .addReg(MI.getOperand(2).getReg(),
                 getKillRegState(MI.getOperand(2).isKill()))
@@ -764,8 +766,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       MI.eraseFromParent();
       return true;
     }
+    case ARM::t2MOVCCi:
     case ARM::MOVCCi: {
-      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi),
+      unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
+      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
               MI.getOperand(1).getReg())
         .addImm(MI.getOperand(2).getImm())
         .addImm(MI.getOperand(3).getImm()) // 'pred'
@@ -837,8 +841,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
                              MI.getOperand(0).getReg())
                      .addOperand(MI.getOperand(1))
                      .addReg(0)
-                     .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr
-                                                  : ARM_AM::asr), 1)))
+                     .addImm(ARM_AM::getSORegOpc((Opcode == ARM::MOVsrl_flag ?
+                                                  ARM_AM::lsr : ARM_AM::asr),
+                                                 1)))
         .addReg(ARM::CPSR, RegState::Define);
       MI.eraseFromParent();
       return true;
@@ -856,10 +861,11 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       MI.eraseFromParent();
       return true;
     }
+    case ARM::tTPsoft:
     case ARM::TPsoft: {
       MachineInstrBuilder MIB =
         BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                TII->get(ARM::BL))
+                TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL))
         .addExternalSymbol("__aeabi_read_tp", 0);
 
       MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
@@ -900,10 +906,10 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       const MachineOperand &MO1 = MI.getOperand(1);
       const GlobalValue *GV = MO1.getGlobal();
       unsigned TF = MO1.getTargetFlags();
-      bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode != ARM::t2MOV_ga_dyn);
+      bool isARM = (Opcode != ARM::t2MOV_ga_pcrel && Opcode!=ARM::t2MOV_ga_dyn);
       bool isPIC = (Opcode != ARM::MOV_ga_dyn && Opcode != ARM::t2MOV_ga_dyn);
       unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
-      unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel : ARM::t2MOVTi16_ga_pcrel;
+      unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel;
       unsigned LO16TF = isPIC
         ? ARMII::MO_LO16_NONLAZY_PIC : ARMII::MO_LO16_NONLAZY;
       unsigned HI16TF = isPIC
@@ -958,15 +964,17 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
       unsigned OddSrc  = TRI->getSubReg(SrcReg, ARM::qsub_1);
       MachineInstrBuilder Even =
         AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                               TII->get(ARM::VMOVQ))
+                               TII->get(ARM::VORRq))
                        .addReg(EvenDst,
                                RegState::Define | getDeadRegState(DstIsDead))
+                       .addReg(EvenSrc, getKillRegState(SrcIsKill))
                        .addReg(EvenSrc, getKillRegState(SrcIsKill)));
       MachineInstrBuilder Odd =
         AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
-                               TII->get(ARM::VMOVQ))
+                               TII->get(ARM::VORRq))
                        .addReg(OddDst,
                                RegState::Define | getDeadRegState(DstIsDead))
+                       .addReg(OddSrc, getKillRegState(SrcIsKill))
                        .addReg(OddSrc, getKillRegState(SrcIsKill)));
       TransferImpOps(MI, Even, Odd);
       MI.eraseFromParent();
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 5cf73c4..f469d7e 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -219,8 +219,8 @@ class ARMFastISel : public FastISel {
 // we don't care about implicit defs here, just places we'll need to add a
 // default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR.
 bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.hasOptionalDef())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.hasOptionalDef())
     return false;
 
   // Look to see if our OptionalDef is defining CPSR or CCR.
@@ -234,15 +234,15 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) {
 }
 
 bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) {
-  const TargetInstrDesc &TID = MI->getDesc();
+  const MCInstrDesc &MCID = MI->getDesc();
 
   // If we're a thumb2 or not NEON function we were handled via isPredicable.
-  if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
+  if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON ||
        AFI->isThumb2Function())
     return false;
 
-  for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i)
-    if (TID.OpInfo[i].isPredicate())
+  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i)
+    if (MCID.OpInfo[i].isPredicate())
       return true;
 
   return false;
@@ -278,7 +278,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) {
 unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode,
                                     const TargetRegisterClass* RC) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg));
   return ResultReg;
@@ -288,7 +288,7 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode,
                                      const TargetRegisterClass *RC,
                                      unsigned Op0, bool Op0IsKill) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -308,7 +308,7 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode,
                                       unsigned Op0, bool Op0IsKill,
                                       unsigned Op1, bool Op1IsKill) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -331,7 +331,7 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode,
                                        unsigned Op1, bool Op1IsKill,
                                        unsigned Op2, bool Op2IsKill) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -355,7 +355,7 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode,
                                       unsigned Op0, bool Op0IsKill,
                                       uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -377,7 +377,7 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode,
                                       unsigned Op0, bool Op0IsKill,
                                       const ConstantFP *FPImm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -400,7 +400,7 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode,
                                        unsigned Op1, bool Op1IsKill,
                                        uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -423,7 +423,7 @@ unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode,
                                      const TargetRegisterClass *RC,
                                      uint64_t Imm) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -442,7 +442,7 @@ unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode,
                                       const TargetRegisterClass *RC,
                                       uint64_t Imm1, uint64_t Imm2) {
   unsigned ResultReg = createResultReg(RC);
-  const TargetInstrDesc &II = TII.get(MachineInstOpcode);
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
 
   if (II.getNumDefs() >= 1)
     AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)
@@ -1549,7 +1549,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args,
   NumBytes = CCInfo.getNextStackOffset();
 
   // Issue CALLSEQ_START
-  unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
+  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                           TII.get(AdjStackDown))
                   .addImm(NumBytes));
@@ -1647,7 +1647,7 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs,
                              const Instruction *I, CallingConv::ID CC,
                              unsigned &NumBytes) {
   // Issue CALLSEQ_END
-  unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
   AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
                           TII.get(AdjStackUp))
                   .addImm(NumBytes).addImm(0));
diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp
index e2e95d4..381b404 100644
--- a/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/lib/Target/ARM/ARMFrameLowering.cpp
@@ -268,14 +268,14 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
       // bic r4, r4, MaxAlign
       // mov sp, r4
       // FIXME: It will be better just to find spare register here.
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R4)
-        .addReg(ARM::SP, RegState::Kill);
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
+        .addReg(ARM::SP, RegState::Kill));
       AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
                                           TII.get(ARM::t2BICri), ARM::R4)
                                   .addReg(ARM::R4, RegState::Kill)
                                   .addImm(MaxAlign-1)));
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
-        .addReg(ARM::R4, RegState::Kill);
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
+        .addReg(ARM::R4, RegState::Kill));
     }
 
     AFI->setShouldRestoreSPFromFP(true);
@@ -293,9 +293,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const {
         .addReg(ARM::SP)
         .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
     else
-      BuildMI(MBB, MBBI, dl,
-              TII.get(ARM::tMOVgpr2gpr), RegInfo->getBaseRegister())
-        .addReg(ARM::SP);
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                             RegInfo->getBaseRegister())
+        .addReg(ARM::SP));
   }
 
   // If the frame has variable sized objects then the epilogue must restore
@@ -364,8 +364,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                  "No scratch register to restore SP from FP!");
           emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
                                  ARMCC::AL, 0, TII);
-          BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
-            .addReg(ARM::R4);
+          AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                                 ARM::SP)
+            .addReg(ARM::R4));
         }
       } else {
         // Thumb2 or ARM.
@@ -373,8 +374,9 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
           BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
             .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0);
         else
-          BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP)
-            .addReg(FramePtr);
+          AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                                 ARM::SP)
+            .addReg(FramePtr));
       }
     } else if (NumBytes)
       emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
@@ -427,6 +429,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
 
     // Delete the pseudo instruction TCRETURN.
     MBB.erase(MBBI);
+    MBBI = NewMI;
   }
 
   if (VARegSaveSize)
@@ -736,20 +739,52 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
 /// estimateStackSize - Estimate and return the size of the frame.
 /// FIXME: Make generic?
 static unsigned estimateStackSize(MachineFunction &MF) {
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
+  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
+  const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo();
+  unsigned MaxAlign = MFI->getMaxAlignment();
   int Offset = 0;
-  for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
-    int FixedOff = -FFI->getObjectOffset(i);
+
+  // This code is very, very similar to PEI::calculateFrameObjectOffsets().
+  // It really should be refactored to share code. Until then, changes
+  // should keep in mind that there's tight coupling between the two.
+
+  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+    int FixedOff = -MFI->getObjectOffset(i);
     if (FixedOff > Offset) Offset = FixedOff;
   }
-  for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
-    if (FFI->isDeadObjectIndex(i))
+  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
+    if (MFI->isDeadObjectIndex(i))
       continue;
-    Offset += FFI->getObjectSize(i);
-    unsigned Align = FFI->getObjectAlignment(i);
+    Offset += MFI->getObjectSize(i);
+    unsigned Align = MFI->getObjectAlignment(i);
     // Adjust to alignment boundary
     Offset = (Offset+Align-1)/Align*Align;
+
+    MaxAlign = std::max(Align, MaxAlign);
   }
+
+  if (MFI->adjustsStack() && TFI->hasReservedCallFrame(MF))
+    Offset += MFI->getMaxCallFrameSize();
+
+  // Round up the size to a multiple of the alignment.  If the function has
+  // any calls or alloca's, align to the target's StackAlignment value to
+  // ensure that the callee's frame or the alloca data is suitably aligned;
+  // otherwise, for leaf functions, align to the TransientStackAlignment
+  // value.
+  unsigned StackAlign;
+  if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
+      (RegInfo->needsStackRealignment(MF) && MFI->getObjectIndexEnd() != 0))
+    StackAlign = TFI->getStackAlignment();
+  else
+    StackAlign = TFI->getTransientStackAlignment();
+
+  // If the frame pointer is eliminated, all frame offsets will be relative to
+  // SP not FP. Align to MaxAlign so this works.
+  StackAlign = std::max(StackAlign, MaxAlign);
+  unsigned AlignMask = StackAlign - 1;
+  Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
+
   return (unsigned)Offset;
 }
 
@@ -841,9 +876,14 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     if (AFI->getVarArgsRegSaveSize() > 0)
       MF.getRegInfo().setPhysRegUsed(ARM::LR);
 
-    // Spill R4 if Thumb1 epilogue has to restore SP from FP since 
+    // Spill R4 if Thumb1 epilogue has to restore SP from FP. We don't know
+    // for sure what the stack size will be, but for this, an estimate is good
+    // enough. If there anything changes it, it'll be a spill, which implies
+    // we've used all the registers and so R4 is already used, so not marking
+    // it here will be OK.
     // FIXME: It will be better just to find spare register here.
-    if (MFI->hasVarSizedObjects())
+    unsigned StackSize = estimateStackSize(MF);
+    if (MFI->hasVarSizedObjects() || StackSize > 508)
       MF.getRegInfo().setPhysRegUsed(ARM::R4);
   }
 
diff --git a/lib/Target/ARM/ARMGlobalMerge.cpp b/lib/Target/ARM/ARMGlobalMerge.cpp
index 3f02383..8d77b2d 100644
--- a/lib/Target/ARM/ARMGlobalMerge.cpp
+++ b/lib/Target/ARM/ARMGlobalMerge.cpp
@@ -128,10 +128,10 @@ bool ARMGlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
   for (size_t i = 0, e = Globals.size(); i != e; ) {
     size_t j = 0;
     uint64_t MergedSize = 0;
-    std::vector<const Type*> Tys;
+    std::vector<Type*> Tys;
     std::vector<Constant*> Inits;
     for (j = i; j != e; ++j) {
-      const Type *Ty = Globals[j]->getType()->getElementType();
+      Type *Ty = Globals[j]->getType()->getElementType();
       MergedSize += TD->getTypeAllocSize(Ty);
       if (MergedSize > MaxOffset) {
         break;
@@ -175,7 +175,9 @@ bool ARMGlobalMerge::doInitialization(Module &M) {
       continue;
 
     // Ignore fancy-aligned globals for now.
-    if (I->getAlignment() != 0)
+    unsigned Alignment = I->getAlignment();
+    const Type *Ty = I->getType()->getElementType();
+    if (Alignment > TD->getABITypeAlignment(Ty))
       continue;
 
     // Ignore all 'special' globals.
@@ -183,7 +185,7 @@ bool ARMGlobalMerge::doInitialization(Module &M) {
         I->getName().startswith(".llvm."))
       continue;
 
-    if (TD->getTypeAllocSize(I->getType()->getElementType()) < MaxOffset) {
+    if (TD->getTypeAllocSize(Ty) < MaxOffset) {
       const TargetLoweringObjectFile &TLOF = TLI->getObjFileLowering();
       if (TLOF.getKindForGlobal(I, TLI->getTargetMachine()).isBSSLocal())
         BSSGlobals.push_back(I);
diff --git a/lib/Target/ARM/ARMHazardRecognizer.cpp b/lib/Target/ARM/ARMHazardRecognizer.cpp
index 517bba8..787f6a2 100644
--- a/lib/Target/ARM/ARMHazardRecognizer.cpp
+++ b/lib/Target/ARM/ARMHazardRecognizer.cpp
@@ -19,11 +19,11 @@ using namespace llvm;
 static bool hasRAWHazard(MachineInstr *DefMI, MachineInstr *MI,
                          const TargetRegisterInfo &TRI) {
   // FIXME: Detect integer instructions properly.
-  const TargetInstrDesc &TID = MI->getDesc();
-  unsigned Domain = TID.TSFlags & ARMII::DomainMask;
-  if (TID.mayStore())
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+  if (MCID.mayStore())
     return false;
-  unsigned Opcode = TID.getOpcode();
+  unsigned Opcode = MCID.getOpcode();
   if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
     return false;
   if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
@@ -43,15 +43,15 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
 
     // Look for special VMLA / VMLS hazards. A VMUL / VADD / VSUB following
     // a VMLA / VMLS will cause 4 cycle stall.
-    const TargetInstrDesc &TID = MI->getDesc();
-    if (LastMI && (TID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
+    const MCInstrDesc &MCID = MI->getDesc();
+    if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) {
       MachineInstr *DefMI = LastMI;
-      const TargetInstrDesc &LastTID = LastMI->getDesc();
+      const MCInstrDesc &LastMCID = LastMI->getDesc();
       // Skip over one non-VFP / NEON instruction.
-      if (!LastTID.isBarrier() &&
+      if (!LastMCID.isBarrier() &&
           // On A9, AGU and NEON/FPU are muxed.
-          !(STI.isCortexA9() && (LastTID.mayLoad() || LastTID.mayStore())) &&
-          (LastTID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
+          !(STI.isCortexA9() && (LastMCID.mayLoad() || LastMCID.mayStore())) &&
+          (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) {
         MachineBasicBlock::iterator I = LastMI;
         if (I != LastMI->getParent()->begin()) {
           I = llvm::prior(I);
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9ad516d..2c9481b 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -329,10 +329,10 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
   if (Use->getOpcode() == ISD::CopyToReg)
     return true;
   if (Use->isMachineOpcode()) {
-    const TargetInstrDesc &TID = TII->get(Use->getMachineOpcode());
-    if (TID.mayStore())
+    const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());
+    if (MCID.mayStore())
       return true;
-    unsigned Opcode = TID.getOpcode();
+    unsigned Opcode = MCID.getOpcode();
     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
       return true;
     // vmlx feeding into another vmlx. We actually want to unfold
@@ -1354,30 +1354,34 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) {
 ///
 SDNode *ARMDAGToDAGISel::PairSRegs(EVT VT, SDValue V0, SDValue V1) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass =
+    CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32);
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
-  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
 }
 
 /// PairDRegs - Form a quad register from a pair of D registers.
 ///
 SDNode *ARMDAGToDAGISel::PairDRegs(EVT VT, SDValue V0, SDValue V1) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32);
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
-  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
 }
 
 /// PairQRegs - Form 4 consecutive D registers from a pair of Q registers.
 ///
 SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
-  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 4);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 5);
 }
 
 /// QuadSRegs - Form 4 consecutive S registers.
@@ -1385,12 +1389,15 @@ SDNode *ARMDAGToDAGISel::PairQRegs(EVT VT, SDValue V0, SDValue V1) {
 SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
                                    SDValue V2, SDValue V3) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass =
+    CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32);
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, MVT::i32);
   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, MVT::i32);
   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, MVT::i32);
-  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+                                    V2, SubReg2, V3, SubReg3 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
 }
 
 /// QuadDRegs - Form 4 consecutive D registers.
@@ -1398,12 +1405,14 @@ SDNode *ARMDAGToDAGISel::QuadSRegs(EVT VT, SDValue V0, SDValue V1,
 SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
                                    SDValue V2, SDValue V3) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32);
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32);
   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, MVT::i32);
   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, MVT::i32);
-  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+                                    V2, SubReg2, V3, SubReg3 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
 }
 
 /// QuadQRegs - Form 4 consecutive Q registers.
@@ -1411,12 +1420,14 @@ SDNode *ARMDAGToDAGISel::QuadDRegs(EVT VT, SDValue V0, SDValue V1,
 SDNode *ARMDAGToDAGISel::QuadQRegs(EVT VT, SDValue V0, SDValue V1,
                                    SDValue V2, SDValue V3) {
   DebugLoc dl = V0.getNode()->getDebugLoc();
+  SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32);
   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32);
   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32);
   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, MVT::i32);
   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, MVT::i32);
-  const SDValue Ops[] = { V0, SubReg0, V1, SubReg1, V2, SubReg2, V3, SubReg3 };
-  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 8);
+  const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
+                                    V2, SubReg2, V3, SubReg3 };
+  return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops, 9);
 }
 
 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 339c858..cf8c5ba 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -506,6 +506,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
     setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
     setTargetDAGCombine(ISD::STORE);
+    setTargetDAGCombine(ISD::FP_TO_SINT);
+    setTargetDAGCombine(ISD::FP_TO_UINT);
+    setTargetDAGCombine(ISD::FDIV);
   }
 
   computeRegisterProperties();
@@ -538,7 +541,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
     setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
   }
-  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops())
+  if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
+      || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP()))
     setOperationAction(ISD::MULHS, MVT::i32, Expand);
 
   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
@@ -704,6 +708,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FPOW,      MVT::f64, Expand);
   setOperationAction(ISD::FPOW,      MVT::f32, Expand);
 
+  setOperationAction(ISD::FMA, MVT::f64, Expand);
+  setOperationAction(ISD::FMA, MVT::f32, Expand);
+
   // Various VFP goodness
   if (!UseSoftFloat && !Subtarget->isThumb1Only()) {
     // int <-> fp are custom expanded into bit_convert + ARMISD ops.
@@ -974,12 +981,12 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
   // Load are scheduled for latency even if there instruction itinerary
   // is not available.
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
-  const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
+  const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
 
-  if (TID.getNumDefs() == 0)
+  if (MCID.getNumDefs() == 0)
     return Sched::RegPressure;
   if (!Itins->isEmpty() &&
-      Itins->getOperandCycle(TID.getSchedClass(), 0) > 2)
+      Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
     return Sched::Latency;
 
   return Sched::RegPressure;
@@ -1633,7 +1640,11 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     return false;
 
   // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo::
-  // emitEpilogue is not ready for them.
+  // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as
+  // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation
+  // support in the assembler and linker to be used. This would need to be
+  // fixed to fully support tail calls in Thumb1.
+  //
   // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take
   // LR.  This means if we need to reload LR, it takes an extra instructions,
   // which outweighs the value of the tail call; but here we don't know yet
@@ -2281,12 +2292,13 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
     // ARMv7 with MP extension has PLDW.
     return Op.getOperand(0);
 
-  if (Subtarget->isThumb())
+  unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
+  if (Subtarget->isThumb()) {
     // Invert the bits.
     isRead = ~isRead & 1;
-  unsigned isData = Subtarget->isThumb() ? 0 : 1;
+    isData = ~isData & 1;
+  }
 
-  // Currently there is no intrinsic that matches pli.
   return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
                      Op.getOperand(1), DAG.getConstant(isRead, MVT::i32),
                      DAG.getConstant(isData, MVT::i32));
@@ -2742,7 +2754,7 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
     SDValue ARMcc;
     SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
     SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
-    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
+    return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp);
   }
 
   ARMCC::CondCodes CondCode, CondCode2;
@@ -5522,12 +5534,108 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
   return SDValue();
 }
 
+// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction
+// (only after legalization).
+static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const ARMSubtarget *Subtarget) {
+
+  // Only perform optimization if after legalize, and if NEON is available. We
+  // also expected both operands to be BUILD_VECTORs.
+  if (DCI.isBeforeLegalize() || !Subtarget->hasNEON()
+      || N0.getOpcode() != ISD::BUILD_VECTOR
+      || N1.getOpcode() != ISD::BUILD_VECTOR)
+    return SDValue();
+
+  // Check output type since VPADDL operand elements can only be 8, 16, or 32.
+  EVT VT = N->getValueType(0);
+  if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64)
+    return SDValue();
+
+  // Check that the vector operands are of the right form.
+  // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR
+  // operands, where N is the size of the formed vector.
+  // Each EXTRACT_VECTOR should have the same input vector and odd or even
+  // index such that we have a pair wise add pattern.
+
+  // Grab the vector that all EXTRACT_VECTOR nodes should be referencing.
+  if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
+    return SDValue();
+  SDValue Vec = N0->getOperand(0)->getOperand(0);
+  SDNode *V = Vec.getNode();
+  unsigned nextIndex = 0;
+
+  // For each operands to the ADD which are BUILD_VECTORs,
+  // check to see if each of their operands are an EXTRACT_VECTOR with
+  // the same vector and appropriate index.
+  for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) {
+    if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT
+        && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+
+      SDValue ExtVec0 = N0->getOperand(i);
+      SDValue ExtVec1 = N1->getOperand(i);
+
+      // First operand is the vector, verify its the same.
+      if (V != ExtVec0->getOperand(0).getNode() ||
+          V != ExtVec1->getOperand(0).getNode())
+        return SDValue();
+
+      // Second is the constant, verify its correct.
+      ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1));
+      ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1));
+
+      // For the constant, we want to see all the even or all the odd.
+      if (!C0 || !C1 || C0->getZExtValue() != nextIndex
+          || C1->getZExtValue() != nextIndex+1)
+        return SDValue();
+
+      // Increment index.
+      nextIndex+=2;
+    } else
+      return SDValue();
+  }
+
+  // Create VPADDL node.
+  SelectionDAG &DAG = DCI.DAG;
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // Build operand list.
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls,
+                                TLI.getPointerTy()));
+
+  // Input is the vector.
+  Ops.push_back(Vec);
+
+  // Get widened type and narrowed type.
+  MVT widenType;
+  unsigned numElem = VT.getVectorNumElements();
+  switch (VT.getVectorElementType().getSimpleVT().SimpleTy) {
+    case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break;
+    case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break;
+    case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break;
+    default:
+      assert(0 && "Invalid vector element type for padd optimization.");
+  }
+
+  SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+                            widenType, &Ops[0], Ops.size());
+  return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp);
+}
+
 /// PerformADDCombineWithOperands - Try DAG combinations for an ADD with
 /// operands N0 and N1.  This is a helper for PerformADDCombine that is
 /// called with the default operands, and if that fails, with commuted
 /// operands.
 static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
-                                         TargetLowering::DAGCombinerInfo &DCI) {
+                                          TargetLowering::DAGCombinerInfo &DCI,
+                                          const ARMSubtarget *Subtarget){
+
+  // Attempt to create vpaddl for this add.
+  SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget);
+  if (Result.getNode())
+    return Result;
+
   // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c))
   if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) {
     SDValue Result = combineSelectAndUse(N, N0, N1, DCI);
@@ -5539,17 +5647,18 @@ static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1,
 /// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD.
 ///
 static SDValue PerformADDCombine(SDNode *N,
-                                 TargetLowering::DAGCombinerInfo &DCI) {
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const ARMSubtarget *Subtarget) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
 
   // First try with the default operand order.
-  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI);
+  SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget);
   if (Result.getNode())
     return Result;
 
   // If that didn't work, try again with the operands commuted.
-  return PerformADDCombineWithOperands(N, N1, N0, DCI);
+  return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget);
 }
 
 /// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
@@ -5588,7 +5697,7 @@ static SDValue PerformVMULCombine(SDNode *N,
   unsigned Opcode = N0.getOpcode();
   if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
       Opcode != ISD::FADD && Opcode != ISD::FSUB) {
-    Opcode = N0.getOpcode();
+    Opcode = N1.getOpcode();
     if (Opcode != ISD::ADD && Opcode != ISD::SUB &&
         Opcode != ISD::FADD && Opcode != ISD::FSUB)
       return SDValue();
@@ -5874,8 +5983,8 @@ static SDValue PerformORCombine(SDNode *N,
   return SDValue();
 }
 
-/// PerformBFICombine - (bfi A, (and B, C1), C2) -> (bfi A, B, C2) iff
-/// C1 & C2 == C1.
+/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff
+/// the bits being cleared by the AND are not demanded by the BFI.
 static SDValue PerformBFICombine(SDNode *N,
                                  TargetLowering::DAGCombinerInfo &DCI) {
   SDValue N1 = N->getOperand(1);
@@ -5883,9 +5992,12 @@ static SDValue PerformBFICombine(SDNode *N,
     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
     if (!N11C)
       return SDValue();
-    unsigned Mask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+    unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
+    unsigned LSB = CountTrailingZeros_32(~InvMask);
+    unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB;
+    unsigned Mask = (1 << Width)-1;
     unsigned Mask2 = N11C->getZExtValue();
-    if ((Mask & Mask2) == Mask2)
+    if ((Mask & (~Mask2)) == 0)
       return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0),
                              N->getOperand(0), N1.getOperand(0),
                              N->getOperand(2));
@@ -6378,7 +6490,105 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
   return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op);
 }
 
-/// getVShiftImm - Check if this is a valid build_vector for the immediate
+// isConstVecPow2 - Return true if each vector element is a power of 2, all
+// elements are the same constant, C, and Log2(C) ranges from 1 to 32.
+static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C)
+{
+  integerPart cN;
+  integerPart c0 = 0;
+  for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements();
+       I != E; I++) {
+    ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I));
+    if (!C)
+      return false;
+
+    bool isExact;
+    APFloat APF = C->getValueAPF();
+    if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact)
+        != APFloat::opOK || !isExact)
+      return false;
+
+    c0 = (I == 0) ? cN : c0;
+    if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32)
+      return false;
+  }
+  C = c0;
+  return true;
+}
+
+/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD)
+/// can replace combinations of VMUL and VCVT (floating-point to integer)
+/// when the VMUL has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+///  vmul.f32        d16, d17, d16
+///  vcvt.s32.f32    d16, d16
+/// becomes:
+///  vcvt.s32.f32    d16, d16, #3
+static SDValue PerformVCVTCombine(SDNode *N,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const ARMSubtarget *Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue Op = N->getOperand(0);
+
+  if (!Subtarget->hasNEON() || !Op.getValueType().isVector() ||
+      Op.getOpcode() != ISD::FMUL)
+    return SDValue();
+
+  uint64_t C;
+  SDValue N0 = Op->getOperand(0);
+  SDValue ConstVec = Op->getOperand(1);
+  bool isSigned = N->getOpcode() == ISD::FP_TO_SINT;
+
+  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
+      !isConstVecPow2(ConstVec, isSigned, C))
+    return SDValue();
+
+  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs :
+    Intrinsic::arm_neon_vcvtfp2fxu;
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+                     N->getValueType(0),
+                     DAG.getConstant(IntrinsicOpcode, MVT::i32), N0,
+                     DAG.getConstant(Log2_64(C), MVT::i32));
+}
+
+/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD)
+/// can replace combinations of VCVT (integer to floating-point) and VDIV
+/// when the VDIV has a constant operand that is a power of 2.
+///
+/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>):
+///  vcvt.f32.s32    d16, d16
+///  vdiv.f32        d16, d17, d16
+/// becomes:
+///  vcvt.f32.s32    d16, d16, #3
+static SDValue PerformVDIVCombine(SDNode *N,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const ARMSubtarget *Subtarget) {
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue Op = N->getOperand(0);
+  unsigned OpOpcode = Op.getNode()->getOpcode();
+
+  if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() ||
+      (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP))
+    return SDValue();
+
+  uint64_t C;
+  SDValue ConstVec = N->getOperand(1);
+  bool isSigned = OpOpcode == ISD::SINT_TO_FP;
+
+  if (ConstVec.getOpcode() != ISD::BUILD_VECTOR ||
+      !isConstVecPow2(ConstVec, isSigned, C))
+    return SDValue();
+
+  unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp :
+    Intrinsic::arm_neon_vcvtfxu2fp;
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(),
+                     Op.getValueType(),
+                     DAG.getConstant(IntrinsicOpcode, MVT::i32),
+                     Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32));
+}
+
+/// Getvshiftimm - Check if this is a valid build_vector for the immediate
 /// operand of a vector shift operation, where all the elements of the
 /// build_vector must have the same constant integer value.
 static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
@@ -6750,11 +6960,75 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
 }
 
+/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV.
+SDValue
+ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const {
+  SDValue Cmp = N->getOperand(4);
+  if (Cmp.getOpcode() != ARMISD::CMPZ)
+    // Only looking at EQ and NE cases.
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+  DebugLoc dl = N->getDebugLoc();
+  SDValue LHS = Cmp.getOperand(0);
+  SDValue RHS = Cmp.getOperand(1);
+  SDValue FalseVal = N->getOperand(0);
+  SDValue TrueVal = N->getOperand(1);
+  SDValue ARMcc = N->getOperand(2);
+  ARMCC::CondCodes CC = (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+
+  // Simplify
+  //   mov     r1, r0
+  //   cmp     r1, x
+  //   mov     r0, y
+  //   moveq   r0, x
+  // to
+  //   cmp     r0, x
+  //   movne   r0, y
+  //
+  //   mov     r1, r0
+  //   cmp     r1, x
+  //   mov     r0, x
+  //   movne   r0, y
+  // to
+  //   cmp     r0, x
+  //   movne   r0, y
+  /// FIXME: Turn this into a target neutral optimization?
+  SDValue Res;
+  if (CC == ARMCC::NE && FalseVal == RHS) {
+    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc,
+                      N->getOperand(3), Cmp);
+  } else if (CC == ARMCC::EQ && TrueVal == RHS) {
+    SDValue ARMcc;
+    SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl);
+    Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc,
+                      N->getOperand(3), NewCmp);
+  }
+
+  if (Res.getNode()) {
+    APInt KnownZero, KnownOne;
+    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
+    DAG.ComputeMaskedBits(SDValue(N,0), Mask, KnownZero, KnownOne);
+    // Capture demanded bits information that would be otherwise lost.
+    if (KnownZero == 0xfffffffe)
+      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+                        DAG.getValueType(MVT::i1));
+    else if (KnownZero == 0xffffff00)
+      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+                        DAG.getValueType(MVT::i8));
+    else if (KnownZero == 0xffff0000)
+      Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res,
+                        DAG.getValueType(MVT::i16));
+  }
+
+  return Res;
+}
+
 SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   switch (N->getOpcode()) {
   default: break;
-  case ISD::ADD:        return PerformADDCombine(N, DCI);
+  case ISD::ADD:        return PerformADDCombine(N, DCI, Subtarget);
   case ISD::SUB:        return PerformSUBCombine(N, DCI);
   case ISD::MUL:        return PerformMULCombine(N, DCI, Subtarget);
   case ISD::OR:         return PerformORCombine(N, DCI, Subtarget);
@@ -6767,6 +7041,9 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
   case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
   case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget);
+  case ISD::FDIV:       return PerformVDIVCombine(N, DCI, Subtarget);
   case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
   case ISD::SHL:
   case ISD::SRA:
@@ -6775,6 +7052,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::ZERO_EXTEND:
   case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
   case ISD::SELECT_CC:  return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
+  case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
   case ARMISD::VLD2DUP:
   case ARMISD::VLD3DUP:
   case ARMISD::VLD4DUP:
@@ -7277,10 +7555,17 @@ ARMTargetLowering::getConstraintType(const std::string &Constraint) const {
     default:  break;
     case 'l': return C_RegisterClass;
     case 'w': return C_RegisterClass;
+    case 'h': return C_RegisterClass;
+    case 'x': return C_RegisterClass;
+    case 't': return C_RegisterClass;
+    case 'j': return C_Other; // Constant for movw.
+    }
+  } else if (Constraint.size() == 2) {
+    switch (Constraint[0]) {
+    default: break;
+    // All 'U+' constraints are addresses.
+    case 'U': return C_Memory;
     }
-  } else {
-    if (Constraint == "Uv")
-      return C_Memory;
   }
   return TargetLowering::getConstraintType(Constraint);
 }
@@ -7319,26 +7604,43 @@ ARMTargetLowering::getSingleConstraintMatchWeight(
   return weight;
 }
 
-std::pair<unsigned, const TargetRegisterClass*>
+typedef std::pair<unsigned, const TargetRegisterClass*> RCPair;
+RCPair
 ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
                                                 EVT VT) const {
   if (Constraint.size() == 1) {
     // GCC ARM Constraint Letters
     switch (Constraint[0]) {
-    case 'l':
+    case 'l': // Low regs or general regs.
       if (Subtarget->isThumb())
-        return std::make_pair(0U, ARM::tGPRRegisterClass);
+        return RCPair(0U, ARM::tGPRRegisterClass);
       else
-        return std::make_pair(0U, ARM::GPRRegisterClass);
+        return RCPair(0U, ARM::GPRRegisterClass);
+    case 'h': // High regs or no regs.
+      if (Subtarget->isThumb())
+	return RCPair(0U, ARM::hGPRRegisterClass);
+      break;
     case 'r':
-      return std::make_pair(0U, ARM::GPRRegisterClass);
+      return RCPair(0U, ARM::GPRRegisterClass);
     case 'w':
       if (VT == MVT::f32)
-        return std::make_pair(0U, ARM::SPRRegisterClass);
+        return RCPair(0U, ARM::SPRRegisterClass);
       if (VT.getSizeInBits() == 64)
-        return std::make_pair(0U, ARM::DPRRegisterClass);
+        return RCPair(0U, ARM::DPRRegisterClass);
       if (VT.getSizeInBits() == 128)
-        return std::make_pair(0U, ARM::QPRRegisterClass);
+        return RCPair(0U, ARM::QPRRegisterClass);
+      break;
+    case 'x':
+      if (VT == MVT::f32)
+	return RCPair(0U, ARM::SPR_8RegisterClass);
+      if (VT.getSizeInBits() == 64)
+	return RCPair(0U, ARM::DPR_8RegisterClass);
+      if (VT.getSizeInBits() == 128)
+	return RCPair(0U, ARM::QPR_8RegisterClass);
+      break;
+    case 't':
+      if (VT == MVT::f32)
+	return RCPair(0U, ARM::SPRRegisterClass);
       break;
     }
   }
@@ -7348,47 +7650,6 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
 
-std::vector<unsigned> ARMTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const {
-  if (Constraint.size() != 1)
-    return std::vector<unsigned>();
-
-  switch (Constraint[0]) {      // GCC ARM Constraint Letters
-  default: break;
-  case 'l':
-    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
-                                 0);
-  case 'r':
-    return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-                                 ARM::R4, ARM::R5, ARM::R6, ARM::R7,
-                                 ARM::R8, ARM::R9, ARM::R10, ARM::R11,
-                                 ARM::R12, ARM::LR, 0);
-  case 'w':
-    if (VT == MVT::f32)
-      return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3,
-                                   ARM::S4, ARM::S5, ARM::S6, ARM::S7,
-                                   ARM::S8, ARM::S9, ARM::S10, ARM::S11,
-                                   ARM::S12,ARM::S13,ARM::S14,ARM::S15,
-                                   ARM::S16,ARM::S17,ARM::S18,ARM::S19,
-                                   ARM::S20,ARM::S21,ARM::S22,ARM::S23,
-                                   ARM::S24,ARM::S25,ARM::S26,ARM::S27,
-                                   ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0);
-    if (VT.getSizeInBits() == 64)
-      return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3,
-                                   ARM::D4, ARM::D5, ARM::D6, ARM::D7,
-                                   ARM::D8, ARM::D9, ARM::D10,ARM::D11,
-                                   ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
-    if (VT.getSizeInBits() == 128)
-      return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
-                                   ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
-      break;
-  }
-
-  return std::vector<unsigned>();
-}
-
 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
 /// vector.  If it is invalid, don't add anything to Ops.
 void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
@@ -7403,6 +7664,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   char ConstraintLetter = Constraint[0];
   switch (ConstraintLetter) {
   default: break;
+  case 'j':
   case 'I': case 'J': case 'K': case 'L':
   case 'M': case 'N': case 'O':
     ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
@@ -7417,6 +7679,13 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       return;
 
     switch (ConstraintLetter) {
+      case 'j':
+	// Constant suitable for movw, must be between 0 and
+	// 65535.
+	if (Subtarget->hasV6T2Ops())
+	  if (CVal >= 0 && CVal <= 65535)
+	    break;
+	return;
       case 'I':
         if (Subtarget->isThumb1Only()) {
           // This must be a constant between 0 and 255, for ADD
@@ -7685,7 +7954,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.ptrVal = I.getArgOperand(2);
     Info.offset = 0;
     Info.align = 8;
-    Info.vol = false;
+    Info.vol = true;
     Info.readMem = false;
     Info.writeMem = true;
     return true;
@@ -7696,7 +7965,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
     Info.align = 8;
-    Info.vol = false;
+    Info.vol = true;
     Info.readMem = true;
     Info.writeMem = false;
     return true;
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 21a9a3a..980fb40 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -244,6 +244,7 @@ namespace llvm {
       EmitInstrWithCustomInserter(MachineInstr *MI,
                                   MachineBasicBlock *MBB) const;
 
+    SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const;
     virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
     bool isDesirableToTransformToIntegerOp(unsigned Opc, EVT VT) const;
@@ -306,9 +307,6 @@ namespace llvm {
     std::pair<unsigned, const TargetRegisterClass*>
       getRegForInlineAsmConstraint(const std::string &Constraint,
                                    EVT VT) const;
-    std::vector<unsigned>
-    getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                      EVT VT) const;
 
     /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
     /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index 897d8a5..3ccf22f 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -107,16 +107,6 @@ def AddrModeT2_pc   : AddrMode<14>;
 def AddrModeT2_i8s4 : AddrMode<15>;
 def AddrMode_i12    : AddrMode<16>;
 
-// Instruction size.
-class SizeFlagVal<bits<3> val> {
-  bits<3> Value = val;
-}
-def SizeInvalid  : SizeFlagVal<0>;  // Unset.
-def SizeSpecial  : SizeFlagVal<1>;  // Pseudo or special.
-def Size8Bytes   : SizeFlagVal<2>;
-def Size4Bytes   : SizeFlagVal<3>;
-def Size2Bytes   : SizeFlagVal<4>;
-
 // Load / store index mode.
 class IndexMode<bits<2> val> {
   bits<2> Value = val;
@@ -236,13 +226,13 @@ def shr_imm64 : Operand<i32> {
 // ARM Instruction templates.
 //
 
-class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
+class InstTemplate<AddrMode am, int sz, IndexMode im,
                    Format f, Domain d, string cstr, InstrItinClass itin>
   : Instruction {
   let Namespace = "ARM";
 
   AddrMode AM = am;
-  SizeFlagVal SZ = sz;
+  int Size = sz;
   IndexMode IM = im;
   bits<2> IndexModeBits = IM.Value;
   Format F = f;
@@ -256,12 +246,11 @@ class InstTemplate<AddrMode am, SizeFlagVal sz, IndexMode im,
 
   // The layout of TSFlags should be kept in sync with ARMBaseInstrInfo.h.
   let TSFlags{4-0}   = AM.Value;
-  let TSFlags{7-5}   = SZ.Value;
-  let TSFlags{9-8}   = IndexModeBits;
-  let TSFlags{15-10} = Form;
-  let TSFlags{16}    = isUnaryDataProc;
-  let TSFlags{17}    = canXformTo16Bit;
-  let TSFlags{20-18} = D.Value;
+  let TSFlags{6-5}   = IndexModeBits;
+  let TSFlags{12-7} = Form;
+  let TSFlags{13}    = isUnaryDataProc;
+  let TSFlags{14}    = canXformTo16Bit;
+  let TSFlags{17-15} = D.Value;
 
   let Constraints = cstr;
   let Itinerary = itin;
@@ -271,53 +260,70 @@ class Encoding {
   field bits<32> Inst;
 }
 
-class InstARM<AddrMode am, SizeFlagVal sz, IndexMode im,
+class InstARM<AddrMode am, int sz, IndexMode im,
               Format f, Domain d, string cstr, InstrItinClass itin>
   : InstTemplate<am, sz, im, f, d, cstr, itin>, Encoding;
 
 // This Encoding-less class is used by Thumb1 to specify the encoding bits later
 // on by adding flavors to specific instructions.
-class InstThumb<AddrMode am, SizeFlagVal sz, IndexMode im,
+class InstThumb<AddrMode am, int sz, IndexMode im,
                 Format f, Domain d, string cstr, InstrItinClass itin>
   : InstTemplate<am, sz, im, f, d, cstr, itin>;
 
 class PseudoInst<dag oops, dag iops, InstrItinClass itin, list<dag> pattern>
-  // FIXME: This really should derive from InstTemplate instead, as pseudos
-  //        don't need encoding information. TableGen doesn't like that
-  //        currently. Need to figure out why and fix it.
-  : InstARM<AddrModeNone, SizeSpecial, IndexModeNone, Pseudo, GenericDomain,
-            "", itin> {
+  : InstTemplate<AddrModeNone, 0, IndexModeNone, Pseudo,
+                 GenericDomain, "", itin> {
   let OutOperandList = oops;
   let InOperandList = iops;
   let Pattern = pattern;
   let isCodeGenOnly = 1;
+  let isPseudo = 1;
 }
 
 // PseudoInst that's ARM-mode only.
-class ARMPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+class ARMPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
                     list<dag> pattern>
   : PseudoInst<oops, iops, itin, pattern> {
-  let SZ = sz;
+  let Size = sz;
   list<Predicate> Predicates = [IsARM];
 }
 
 // PseudoInst that's Thumb-mode only.
-class tPseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+class tPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
                     list<dag> pattern>
   : PseudoInst<oops, iops, itin, pattern> {
-  let SZ = sz;
+  let Size = sz;
   list<Predicate> Predicates = [IsThumb];
 }
 
 // PseudoInst that's Thumb2-mode only.
-class t2PseudoInst<dag oops, dag iops, SizeFlagVal sz, InstrItinClass itin,
+class t2PseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
                     list<dag> pattern>
   : PseudoInst<oops, iops, itin, pattern> {
-  let SZ = sz;
+  let Size = sz;
   list<Predicate> Predicates = [IsThumb2];
 }
+
+class ARMPseudoExpand<dag oops, dag iops, int sz,
+                      InstrItinClass itin, list<dag> pattern,
+                      dag Result>
+  : ARMPseudoInst<oops, iops, sz, itin, pattern>,
+    PseudoInstExpansion<Result>;
+
+class tPseudoExpand<dag oops, dag iops, int sz,
+                    InstrItinClass itin, list<dag> pattern,
+                    dag Result>
+  : tPseudoInst<oops, iops, sz, itin, pattern>,
+    PseudoInstExpansion<Result>;
+
+class t2PseudoExpand<dag oops, dag iops, int sz,
+                    InstrItinClass itin, list<dag> pattern,
+                    dag Result>
+  : t2PseudoInst<oops, iops, sz, itin, pattern>,
+    PseudoInstExpansion<Result>;
+
 // Almost all ARM instructions are predicable.
-class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class I<dag oops, dag iops, AddrMode am, int sz,
         IndexMode im, Format f, InstrItinClass itin,
         string opc, string asm, string cstr,
         list<dag> pattern>
@@ -332,7 +338,7 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 }
 
 // A few are not predicable
-class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class InoP<dag oops, dag iops, AddrMode am, int sz,
            IndexMode im, Format f, InstrItinClass itin,
            string opc, string asm, string cstr,
            list<dag> pattern>
@@ -348,7 +354,7 @@ class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 // Same as I except it can optionally modify CPSR. Note it's modeled as an input
 // operand since by default it's a zero register. It will become an implicit def
 // once it's "flipped".
-class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class sI<dag oops, dag iops, AddrMode am, int sz,
          IndexMode im, Format f, InstrItinClass itin,
          string opc, string asm, string cstr,
          list<dag> pattern>
@@ -366,7 +372,7 @@ class sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 }
 
 // Special cases
-class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class XI<dag oops, dag iops, AddrMode am, int sz,
          IndexMode im, Format f, InstrItinClass itin,
          string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
@@ -379,31 +385,31 @@ class XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 class AI<dag oops, dag iops, Format f, InstrItinClass itin,
          string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
       opc, asm, "", pattern>;
 class AsI<dag oops, dag iops, Format f, InstrItinClass itin,
           string opc, string asm, list<dag> pattern>
-  : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+  : sI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
        opc, asm, "", pattern>;
 class AXI<dag oops, dag iops, Format f, InstrItinClass itin,
           string asm, list<dag> pattern>
-  : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+  : XI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
        asm, "", pattern>;
 class AInoP<dag oops, dag iops, Format f, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : InoP<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+  : InoP<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
          opc, asm, "", pattern>;
 
 // Ctrl flow instructions
 class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
           string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin,
       opc, asm, "", pattern> {
   let Inst{27-24} = opcod;
 }
 class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
            string asm, list<dag> pattern>
-  : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, itin,
+  : XI<oops, iops, AddrModeNone, 4, IndexModeNone, BrFrm, itin,
        asm, "", pattern> {
   let Inst{27-24} = opcod;
 }
@@ -411,13 +417,13 @@ class ABXI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
 // BR_JT instructions
 class JTI<dag oops, dag iops, InstrItinClass itin,
           string asm, list<dag> pattern>
-  : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin,
+  : XI<oops, iops, AddrModeNone, 0, IndexModeNone, BrMiscFrm, itin,
        asm, "", pattern>;
 
 // Atomic load/store instructions
 class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
               string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
       opc, asm, "", pattern> {
   bits<4> Rt;
   bits<4> Rn;
@@ -430,7 +436,7 @@ class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
 }
 class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
               string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, LdStExFrm, itin,
       opc, asm, "", pattern> {
   bits<4> Rd;
   bits<4> Rt;
@@ -460,21 +466,21 @@ class AIswp<bit b, dag oops, dag iops, string opc, list<dag> pattern>
 // addrmode1 instructions
 class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
           string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
+  : I<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
       opc, asm, "", pattern> {
   let Inst{24-21} = opcod;
   let Inst{27-26} = 0b00;
 }
 class AsI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
            string opc, string asm, list<dag> pattern>
-  : sI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
+  : sI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
        opc, asm, "", pattern> {
   let Inst{24-21} = opcod;
   let Inst{27-26} = 0b00;
 }
 class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
            string asm, list<dag> pattern>
-  : XI<oops, iops, AddrMode1, Size4Bytes, IndexModeNone, f, itin,
+  : XI<oops, iops, AddrMode1, 4, IndexModeNone, f, itin,
        asm, "", pattern> {
   let Inst{24-21} = opcod;
   let Inst{27-26} = 0b00;
@@ -486,7 +492,7 @@ class AXI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
 class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am,
              Format f, InstrItinClass itin, string opc, string asm,
              list<dag> pattern>
-  : I<oops, iops, am, Size4Bytes, IndexModeNone, f, itin, opc, asm,
+  : I<oops, iops, am, 4, IndexModeNone, f, itin, opc, asm,
       "", pattern> {
   let Inst{27-25} = op;
   let Inst{24} = 1;  // 24 == P
@@ -499,7 +505,7 @@ class AI2ldst<bits<3> op, bit isLd, bit isByte, dag oops, dag iops, AddrMode am,
 class AI2ldstidx<bit isLd, bit isByte, bit isPre, dag oops, dag iops,
                 IndexMode im, Format f, InstrItinClass itin, string opc,
                 string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode2, Size4Bytes, im, f, itin,
+  : I<oops, iops, AddrMode2, 4, im, f, itin,
       opc, asm, cstr, pattern> {
   bits<4> Rt;
   let Inst{27-26} = 0b01;
@@ -547,7 +553,7 @@ class AI2stridxT<bit isByte, bit isPre, dag oops, dag iops,
 // addrmode3 instructions
 class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
             InstrItinClass itin, string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+  : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin,
       opc, asm, "", pattern> {
   bits<14> addr;
   bits<4> Rt;
@@ -567,7 +573,7 @@ class AI3ld<bits<4> op, bit op20, dag oops, dag iops, Format f,
 class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
                 IndexMode im, Format f, InstrItinClass itin, string opc,
                 string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, im, f, itin,
+  : I<oops, iops, AddrMode3, 4, im, f, itin,
       opc, asm, cstr, pattern> {
   bits<4> Rt;
   let Inst{27-25} = 0b000;
@@ -583,7 +589,7 @@ class AI3ldstidx<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
 class AI3ldstidxT<bits<4> op, bit op20, bit isLd, bit isPre, dag oops, dag iops,
                   IndexMode im, Format f, InstrItinClass itin, string opc,
                   string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, im, f, itin,
+  : I<oops, iops, AddrMode3, 4, im, f, itin,
       opc, asm, cstr, pattern> {
   // {13}     1 == imm8, 0 == Rm
   // {12-9}   Rn
@@ -627,7 +633,7 @@ class AI3stridx<bits<4> op, bit isByte, bit isPre, dag oops, dag iops,
 // stores
 class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin,
              string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModeNone, f, itin,
+  : I<oops, iops, AddrMode3, 4, IndexModeNone, f, itin,
       opc, asm, "", pattern> {
   bits<14> addr;
   bits<4> Rt;
@@ -647,7 +653,7 @@ class AI3str<bits<4> op, dag oops, dag iops, Format f, InstrItinClass itin,
 // Pre-indexed stores
 class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+  : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin,
       opc, asm, cstr, pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
@@ -660,7 +666,7 @@ class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
 }
 class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin,
              string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+  : I<oops, iops, AddrMode3, 4, IndexModePre, f, itin,
       opc, asm, cstr, pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
@@ -675,7 +681,7 @@ class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin,
 // Post-indexed stores
 class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+  : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin,
       opc, asm, cstr,pattern> {
   // {13}     1 == imm8, 0 == Rm
   // {12-9}   Rn
@@ -701,7 +707,7 @@ class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
 }
 class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
              string opc, string asm, string cstr, list<dag> pattern>
-  : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+  : I<oops, iops, AddrMode3, 4, IndexModePost, f, itin,
       opc, asm, cstr, pattern> {
   let Inst{4}     = 1;
   let Inst{5}     = 1; // H bit
@@ -716,7 +722,7 @@ class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
 // addrmode4 instructions
 class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
            string asm, string cstr, list<dag> pattern>
-  : XI<oops, iops, AddrMode4, Size4Bytes, im, f, itin, asm, cstr, pattern> {
+  : XI<oops, iops, AddrMode4, 4, im, f, itin, asm, cstr, pattern> {
   bits<4>  p;
   bits<16> regs;
   bits<4>  Rn;
@@ -730,7 +736,7 @@ class AXI4<dag oops, dag iops, IndexMode im, Format f, InstrItinClass itin,
 // Unsigned multiply, multiply-accumulate instructions.
 class AMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
              string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
       opc, asm, "", pattern> {
   let Inst{7-4}   = 0b1001;
   let Inst{20}    = 0; // S bit
@@ -738,7 +744,7 @@ class AMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
 }
 class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
               string opc, string asm, list<dag> pattern>
-  : sI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+  : sI<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
        opc, asm, "", pattern> {
   let Inst{7-4}   = 0b1001;
   let Inst{27-21} = opcod;
@@ -747,7 +753,7 @@ class AsMul1I<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
 // Most significant word multiply
 class AMul2I<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
              InstrItinClass itin, string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
       opc, asm, "", pattern> {
   bits<4> Rd;
   bits<4> Rn;
@@ -770,7 +776,7 @@ class AMul2Ia<bits<7> opcod, bits<4> opc7_4, dag oops, dag iops,
 // SMUL<x><y> / SMULW<y> / SMLA<x><y> / SMLAW<x><y>
 class AMulxyIbase<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
               InstrItinClass itin, string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, MulFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, MulFrm, itin,
       opc, asm, "", pattern> {
   bits<4> Rn;
   bits<4> Rm;
@@ -809,7 +815,7 @@ class AMulxyI64<bits<7> opcod, bits<2> bit6_5, dag oops, dag iops,
 // Extend instructions.
 class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ExtFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, ExtFrm, itin,
       opc, asm, "", pattern> {
   // All AExtI instructions have Rd and Rm register operands.
   bits<4> Rd;
@@ -824,7 +830,7 @@ class AExtI<bits<8> opcod, dag oops, dag iops, InstrItinClass itin,
 // Misc Arithmetic instructions.
 class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
                InstrItinClass itin, string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
       opc, asm, "", pattern> {
   bits<4> Rd;
   bits<4> Rm;
@@ -839,7 +845,7 @@ class AMiscA1I<bits<8> opcod, bits<4> opc7_4, dag oops, dag iops,
 // PKH instructions
 class APKHI<bits<8> opcod, bit tb, dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, ArithMiscFrm, itin,
+  : I<oops, iops, AddrModeNone, 4, IndexModeNone, ArithMiscFrm, itin,
       opc, asm, "", pattern> {
   bits<4> Rd;
   bits<4> Rn;
@@ -874,7 +880,7 @@ class ARMV6Pat<dag pattern, dag result> : Pat<pattern, result> {
 // Thumb Instruction Format Definitions.
 //
 
-class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class ThumbI<dag oops, dag iops, AddrMode am, int sz,
              InstrItinClass itin, string asm, string cstr, list<dag> pattern>
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
@@ -886,39 +892,32 @@ class ThumbI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 // TI - Thumb instruction.
 class TI<dag oops, dag iops, InstrItinClass itin, string asm, list<dag> pattern>
-  : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>;
+  : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>;
 
 // Two-address instructions
 class TIt<dag oops, dag iops, InstrItinClass itin, string asm,
           list<dag> pattern>
-  : ThumbI<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "$lhs = $dst",
+  : ThumbI<oops, iops, AddrModeNone, 2, itin, asm, "$lhs = $dst",
            pattern>;
 
 // tBL, tBX 32-bit instructions
 class TIx2<bits<5> opcod1, bits<2> opcod2, bit opcod3,
            dag oops, dag iops, InstrItinClass itin, string asm,
            list<dag> pattern>
-    : ThumbI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>,
+    : ThumbI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>,
       Encoding {
   let Inst{31-27} = opcod1;
   let Inst{15-14} = opcod2;
   let Inst{12}    = opcod3;
 }
 
-// Move to/from coprocessor instructions
-class T1Cop<dag oops, dag iops, string asm, list<dag> pattern>
-  : ThumbI<oops, iops, AddrModeNone, Size4Bytes, NoItinerary, asm, "", pattern>,
-    Encoding, Requires<[IsThumb, HasV6]> {
-  let Inst{31-28} = 0b1110;
-}
-
 // BR_JT instructions
 class TJTI<dag oops, dag iops, InstrItinClass itin, string asm,
            list<dag> pattern>
-  : ThumbI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
+  : ThumbI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>;
 
 // Thumb1 only
-class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class Thumb1I<dag oops, dag iops, AddrMode am, int sz,
               InstrItinClass itin, string asm, string cstr, list<dag> pattern>
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
@@ -930,19 +929,19 @@ class Thumb1I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 class T1I<dag oops, dag iops, InstrItinClass itin,
           string asm, list<dag> pattern>
-  : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin, asm, "", pattern>;
+  : Thumb1I<oops, iops, AddrModeNone, 2, itin, asm, "", pattern>;
 class T1Ix2<dag oops, dag iops, InstrItinClass itin,
             string asm, list<dag> pattern>
-  : Thumb1I<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>;
+  : Thumb1I<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>;
 
 // Two-address instructions
 class T1It<dag oops, dag iops, InstrItinClass itin,
            string asm, string cstr, list<dag> pattern>
-  : Thumb1I<oops, iops, AddrModeNone, Size2Bytes, itin,
+  : Thumb1I<oops, iops, AddrModeNone, 2, itin,
             asm, cstr, pattern>;
 
 // Thumb1 instruction that can either be predicated or set CPSR.
-class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class Thumb1sI<dag oops, dag iops, AddrMode am, int sz,
                InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
@@ -955,16 +954,16 @@ class Thumb1sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 class T1sI<dag oops, dag iops, InstrItinClass itin,
            string opc, string asm, list<dag> pattern>
-  : Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, "", pattern>;
+  : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>;
 
 // Two-address instructions
 class T1sIt<dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : Thumb1sI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
+  : Thumb1sI<oops, iops, AddrModeNone, 2, itin, opc, asm,
              "$Rn = $Rdn", pattern>;
 
 // Thumb1 instruction that can be predicated.
-class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class Thumb1pI<dag oops, dag iops, AddrMode am, int sz,
                InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
   : InstThumb<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
@@ -977,17 +976,17 @@ class Thumb1pI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 class T1pI<dag oops, dag iops, InstrItinClass itin,
            string opc, string asm, list<dag> pattern>
-  : Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm, "", pattern>;
+  : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm, "", pattern>;
 
 // Two-address instructions
 class T1pIt<dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : Thumb1pI<oops, iops, AddrModeNone, Size2Bytes, itin, opc, asm,
+  : Thumb1pI<oops, iops, AddrModeNone, 2, itin, opc, asm,
              "$Rn = $Rdn", pattern>;
 
 class T1pIs<dag oops, dag iops,
             InstrItinClass itin, string opc, string asm, list<dag> pattern>
-  : Thumb1pI<oops, iops, AddrModeT1_s, Size2Bytes, itin, opc, asm, "", pattern>;
+  : Thumb1pI<oops, iops, AddrModeT1_s, 2, itin, opc, asm, "", pattern>;
 
 class Encoding16 : Encoding {
   let Inst{31-16} = 0x0000;
@@ -1036,7 +1035,7 @@ class T1BranchCond<bits<4> opcode> : Encoding16 {
 class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am,
                      InstrItinClass itin, string opc, string asm,
                      list<dag> pattern>
-  : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>,
+  : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>,
     T1LoadStore<0b0101, opcode> {
   bits<3> Rt;
   bits<8> addr;
@@ -1047,7 +1046,7 @@ class T1pILdStEncode<bits<3> opcode, dag oops, dag iops, AddrMode am,
 class T1pILdStEncodeImm<bits<4> opA, bit opB, dag oops, dag iops, AddrMode am,
                         InstrItinClass itin, string opc, string asm,
                         list<dag> pattern>
-  : Thumb1pI<oops, iops, am, Size2Bytes, itin, opc, asm, "", pattern>,
+  : Thumb1pI<oops, iops, am, 2, itin, opc, asm, "", pattern>,
     T1LoadStore<opA, {opB,?,?}> {
   bits<3> Rt;
   bits<8> addr;
@@ -1063,7 +1062,7 @@ class T1Misc<bits<7> opcode> : Encoding16 {
 }
 
 // Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable.
-class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class Thumb2I<dag oops, dag iops, AddrMode am, int sz,
               InstrItinClass itin,
               string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
@@ -1080,7 +1079,7 @@ class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 //
 // FIXME: This uses unified syntax so {s} comes before {p}. We should make it
 // more consistent.
-class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class Thumb2sI<dag oops, dag iops, AddrMode am, int sz,
                InstrItinClass itin,
                string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
@@ -1095,7 +1094,7 @@ class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 }
 
 // Special cases
-class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class Thumb2XI<dag oops, dag iops, AddrMode am, int sz,
                InstrItinClass itin,
                string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
@@ -1106,7 +1105,7 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   list<Predicate> Predicates = [IsThumb2];
 }
 
-class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class ThumbXI<dag oops, dag iops, AddrMode am, int sz,
               InstrItinClass itin,
               string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
@@ -1119,22 +1118,22 @@ class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 class T2I<dag oops, dag iops, InstrItinClass itin,
           string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>;
+  : Thumb2I<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
 class T2Ii12<dag oops, dag iops, InstrItinClass itin,
              string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeT2_i12, Size4Bytes, itin, opc, asm, "",pattern>;
+  : Thumb2I<oops, iops, AddrModeT2_i12, 4, itin, opc, asm, "",pattern>;
 class T2Ii8<dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeT2_i8, Size4Bytes, itin, opc, asm, "", pattern>;
+  : Thumb2I<oops, iops, AddrModeT2_i8, 4, itin, opc, asm, "", pattern>;
 class T2Iso<dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeT2_so, Size4Bytes, itin, opc, asm, "", pattern>;
+  : Thumb2I<oops, iops, AddrModeT2_so, 4, itin, opc, asm, "", pattern>;
 class T2Ipc<dag oops, dag iops, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeT2_pc, Size4Bytes, itin, opc, asm, "", pattern>;
+  : Thumb2I<oops, iops, AddrModeT2_pc, 4, itin, opc, asm, "", pattern>;
 class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
               string opc, string asm, list<dag> pattern>
-  : Thumb2I<oops, iops, AddrModeT2_i8s4, Size4Bytes, itin, opc, asm, "",
+  : Thumb2I<oops, iops, AddrModeT2_i8s4, 4, itin, opc, asm, "",
             pattern> {
   bits<4> Rt;
   bits<4> Rt2;
@@ -1153,32 +1152,32 @@ class T2Ii8s4<bit P, bit W, bit isLoad, dag oops, dag iops, InstrItinClass itin,
 
 class T2sI<dag oops, dag iops, InstrItinClass itin,
            string opc, string asm, list<dag> pattern>
-  : Thumb2sI<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>;
+  : Thumb2sI<oops, iops, AddrModeNone, 4, itin, opc, asm, "", pattern>;
 
 class T2XI<dag oops, dag iops, InstrItinClass itin,
            string asm, list<dag> pattern>
-  : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, "", pattern>;
+  : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, "", pattern>;
 class T2JTI<dag oops, dag iops, InstrItinClass itin,
             string asm, list<dag> pattern>
-  : Thumb2XI<oops, iops, AddrModeNone, SizeSpecial, itin, asm, "", pattern>;
+  : Thumb2XI<oops, iops, AddrModeNone, 0, itin, asm, "", pattern>;
 
 // Move to/from coprocessor instructions
-class T2Cop<dag oops, dag iops, string asm, list<dag> pattern>
-  : T2XI<oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2, HasV6]> {
-  let Inst{31-28} = 0b1111;
+class T2Cop<bits<4> opc, dag oops, dag iops, string asm, list<dag> pattern>
+  : T2XI <oops, iops, NoItinerary, asm, pattern>, Requires<[IsThumb2]> {
+  let Inst{31-28} = opc;
 }
 
 // Two-address instructions
 class T2XIt<dag oops, dag iops, InstrItinClass itin,
             string asm, string cstr, list<dag> pattern>
-  : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, itin, asm, cstr, pattern>;
+  : Thumb2XI<oops, iops, AddrModeNone, 4, itin, asm, cstr, pattern>;
 
 // T2Iidxldst - Thumb2 indexed load / store instructions.
 class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
                  dag oops, dag iops,
                  AddrMode am, IndexMode im, InstrItinClass itin,
                  string opc, string asm, string cstr, list<dag> pattern>
-  : InstARM<am, Size4Bytes, im, ThumbFrm, GenericDomain, cstr, itin> {
+  : InstARM<am, 4, im, ThumbFrm, GenericDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(opc, "${p}", asm);
@@ -1232,7 +1231,7 @@ class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
 //
 
 // Almost all VFP instructions are predicable.
-class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class VFPI<dag oops, dag iops, AddrMode am, int sz,
            IndexMode im, Format f, InstrItinClass itin,
            string opc, string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
@@ -1247,7 +1246,7 @@ class VFPI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 }
 
 // Special cases
-class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class VFPXI<dag oops, dag iops, AddrMode am, int sz,
             IndexMode im, Format f, InstrItinClass itin,
             string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, im, f, VFPDomain, cstr, itin> {
@@ -1263,7 +1262,7 @@ class VFPXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 
 class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
             string opc, string asm, list<dag> pattern>
-  : VFPI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+  : VFPI<oops, iops, AddrModeNone, 4, IndexModeNone, f, itin,
          opc, asm, "", pattern> {
   let PostEncoderMethod = "VFPThumb2PostEncoder";
 }
@@ -1272,7 +1271,7 @@ class VFPAI<dag oops, dag iops, Format f, InstrItinClass itin,
 class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
            InstrItinClass itin,
            string opc, string asm, list<dag> pattern>
-  : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+  : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
          VFPLdStFrm, itin, opc, asm, "", pattern> {
   // Instruction operands.
   bits<5>  Dd;
@@ -1298,7 +1297,7 @@ class ADI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
 class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
            InstrItinClass itin,
            string opc, string asm, list<dag> pattern>
-  : VFPI<oops, iops, AddrMode5, Size4Bytes, IndexModeNone,
+  : VFPI<oops, iops, AddrMode5, 4, IndexModeNone,
          VFPLdStFrm, itin, opc, asm, "", pattern> {
   // Instruction operands.
   bits<5>  Sd;
@@ -1324,7 +1323,7 @@ class ASI5<bits<4> opcod1, bits<2> opcod2, dag oops, dag iops,
 // VFP Load / store multiple pseudo instructions.
 class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
                      list<dag> pattern>
-  : InstARM<AddrMode4, Size4Bytes, IndexModeNone, Pseudo, VFPNeonDomain,
+  : InstARM<AddrMode4, 4, IndexModeNone, Pseudo, VFPNeonDomain,
             cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
@@ -1335,7 +1334,7 @@ class PseudoVFPLdStM<dag oops, dag iops, InstrItinClass itin, string cstr,
 // Load / store multiple
 class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
             string asm, string cstr, list<dag> pattern>
-  : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
+  : VFPXI<oops, iops, AddrMode4, 4, im,
           VFPLdStMulFrm, itin, asm, cstr, pattern> {
   // Instruction operands.
   bits<4>  Rn;
@@ -1355,7 +1354,7 @@ class AXDI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
 
 class AXSI4<dag oops, dag iops, IndexMode im, InstrItinClass itin,
             string asm, string cstr, list<dag> pattern>
-  : VFPXI<oops, iops, AddrMode4, Size4Bytes, im,
+  : VFPXI<oops, iops, AddrMode4, 4, im,
           VFPLdStMulFrm, itin, asm, cstr, pattern> {
   // Instruction operands.
   bits<4> Rn;
@@ -1569,7 +1568,7 @@ class AVConv5I<bits<8> opcod1, bits<4> opcod2, dag oops, dag iops,
 class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
             InstrItinClass itin, string opc, string dt, string asm, string cstr,
             list<dag> pattern>
-  : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
+  : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(opc, "${p}", ".", dt, "\t", asm);
@@ -1581,7 +1580,7 @@ class NeonI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
 class NeonXI<dag oops, dag iops, AddrMode am, IndexMode im, Format f,
              InstrItinClass itin, string opc, string asm, string cstr,
              list<dag> pattern>
-  : InstARM<am, Size4Bytes, im, f, NeonDomain, cstr, itin> {
+  : InstARM<am, 4, im, f, NeonDomain, cstr, itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
   let AsmString = !strconcat(opc, "${p}", "\t", asm);
@@ -1621,7 +1620,7 @@ class NLdStLn<bit op23, bits<2> op21_20, bits<4> op11_8, bits<4> op7_4,
 }
 
 class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
-  : InstARM<AddrMode6, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+  : InstARM<AddrMode6, 4, IndexModeNone, Pseudo, NeonDomain, cstr,
             itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
@@ -1630,7 +1629,7 @@ class PseudoNLdSt<dag oops, dag iops, InstrItinClass itin, string cstr>
 
 class PseudoNeonI<dag oops, dag iops, InstrItinClass itin, string cstr,
                   list<dag> pattern>
-  : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, Pseudo, NeonDomain, cstr,
+  : InstARM<AddrModeNone, 4, IndexModeNone, Pseudo, NeonDomain, cstr,
             itin> {
   let OutOperandList = oops;
   let InOperandList = !con(iops, (ins pred:$p));
@@ -1859,7 +1858,7 @@ class N3VX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op6,
 class NVLaneOp<bits<8> opcod1, bits<4> opcod2, bits<2> opcod3,
                dag oops, dag iops, Format f, InstrItinClass itin,
                string opc, string dt, string asm, list<dag> pattern>
-  : InstARM<AddrModeNone, Size4Bytes, IndexModeNone, f, NeonDomain,
+  : InstARM<AddrModeNone, 4, IndexModeNone, f, NeonDomain,
             "", itin> {
   let Inst{27-20} = opcod1;
   let Inst{11-8}  = opcod2;
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 6f48d96..adcbf18 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -14,7 +14,6 @@
 #include "ARMInstrInfo.h"
 #include "ARM.h"
 #include "ARMAddressingModes.h"
-#include "ARMGenInstrInfo.inc"
 #include "ARMMachineFunctionInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/LiveVariables.h"
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 9af76df..a42dd1a 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -62,6 +62,9 @@ def SDT_ARMEH_SJLJ_DispatchSetup: SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
 def SDT_ARMMEMBARRIER     : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
 
+def SDT_ARMPREFETCH : SDTypeProfile<0, 3, [SDTCisPtrTy<0>, SDTCisSameAs<1, 2>,
+                                           SDTCisInt<1>]>;
+
 def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
 
 def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
@@ -130,7 +133,7 @@ def ARMMemBarrier     : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER,
                                [SDNPHasChain]>;
 def ARMMemBarrierMCR  : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER,
                                [SDNPHasChain]>;
-def ARMPreload        : SDNode<"ARMISD::PRELOAD", SDTPrefetch,
+def ARMPreload        : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH,
                                [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
 
 def ARMrbit          : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
@@ -144,33 +147,48 @@ def ARMbfi           : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
 //===----------------------------------------------------------------------===//
 // ARM Instruction Predicate Definitions.
 //
-def HasV4T           : Predicate<"Subtarget->hasV4TOps()">, AssemblerPredicate;
+def HasV4T           : Predicate<"Subtarget->hasV4TOps()">,
+                                 AssemblerPredicate<"HasV4TOps">;
 def NoV4T            : Predicate<"!Subtarget->hasV4TOps()">;
 def HasV5T           : Predicate<"Subtarget->hasV5TOps()">;
-def HasV5TE          : Predicate<"Subtarget->hasV5TEOps()">, AssemblerPredicate;
-def HasV6            : Predicate<"Subtarget->hasV6Ops()">, AssemblerPredicate;
+def HasV5TE          : Predicate<"Subtarget->hasV5TEOps()">,
+                                 AssemblerPredicate<"HasV5TEOps">;
+def HasV6            : Predicate<"Subtarget->hasV6Ops()">,
+                                 AssemblerPredicate<"HasV6Ops">;
 def NoV6             : Predicate<"!Subtarget->hasV6Ops()">;
-def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">, AssemblerPredicate;
+def HasV6T2          : Predicate<"Subtarget->hasV6T2Ops()">,
+                                 AssemblerPredicate<"HasV6T2Ops">;
 def NoV6T2           : Predicate<"!Subtarget->hasV6T2Ops()">;
-def HasV7            : Predicate<"Subtarget->hasV7Ops()">, AssemblerPredicate;
+def HasV7            : Predicate<"Subtarget->hasV7Ops()">,
+                                 AssemblerPredicate<"HasV7Ops">;
 def NoVFP            : Predicate<"!Subtarget->hasVFP2()">;
-def HasVFP2          : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate;
-def HasVFP3          : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate;
-def HasNEON          : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate;
-def HasFP16          : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate;
-def HasDivide        : Predicate<"Subtarget->hasDivide()">, AssemblerPredicate;
+def HasVFP2          : Predicate<"Subtarget->hasVFP2()">,
+                                 AssemblerPredicate<"FeatureVFP2">;
+def HasVFP3          : Predicate<"Subtarget->hasVFP3()">,
+                                 AssemblerPredicate<"FeatureVFP3">;
+def HasNEON          : Predicate<"Subtarget->hasNEON()">,
+                                 AssemblerPredicate<"FeatureNEON">;
+def HasFP16          : Predicate<"Subtarget->hasFP16()">,
+                                 AssemblerPredicate<"FeatureFP16">;
+def HasDivide        : Predicate<"Subtarget->hasDivide()">,
+                                 AssemblerPredicate<"FeatureHWDiv">;
 def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">,
-                                 AssemblerPredicate;
+                                 AssemblerPredicate<"FeatureT2XtPk">;
+def HasThumb2DSP     : Predicate<"Subtarget->hasThumb2DSP()">,
+                                 AssemblerPredicate<"FeatureDSPThumb2">;
 def HasDB            : Predicate<"Subtarget->hasDataBarrier()">,
-                                 AssemblerPredicate;
+                                 AssemblerPredicate<"FeatureDB">;
 def HasMP            : Predicate<"Subtarget->hasMPExtension()">,
-                                 AssemblerPredicate;
+                                 AssemblerPredicate<"FeatureMP">;
 def UseNEONForFP     : Predicate<"Subtarget->useNEONForSinglePrecisionFP()">;
 def DontUseNEONForFP : Predicate<"!Subtarget->useNEONForSinglePrecisionFP()">;
-def IsThumb          : Predicate<"Subtarget->isThumb()">, AssemblerPredicate;
+def IsThumb          : Predicate<"Subtarget->isThumb()">,
+                                 AssemblerPredicate<"ModeThumb">;
 def IsThumb1Only     : Predicate<"Subtarget->isThumb1Only()">;
-def IsThumb2         : Predicate<"Subtarget->isThumb2()">, AssemblerPredicate;
-def IsARM            : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate;
+def IsThumb2         : Predicate<"Subtarget->isThumb2()">,
+                                 AssemblerPredicate<"ModeThumb,FeatureThumb2">;
+def IsARM            : Predicate<"!Subtarget->isThumb()">,
+                                 AssemblerPredicate<"!ModeThumb">;
 def IsDarwin         : Predicate<"Subtarget->isTargetDarwin()">;
 def IsNotDarwin      : Predicate<"!Subtarget->isTargetDarwin()">;
 
@@ -237,11 +255,13 @@ def lo16AllZero : PatLeaf<(i32 imm), [{
   return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
 }], hi16>;
 
-/// imm0_65535 predicate - True if the 32-bit immediate is in the range
-/// [0.65535].
-def imm0_65535 : ImmLeaf<i32, [{
+/// imm0_65535 - An immediate is in the range [0.65535].
+def Imm0_65535AsmOperand: AsmOperandClass { let Name = "Imm0_65535"; }
+def imm0_65535 : Operand<i32>, ImmLeaf<i32, [{
   return Imm >= 0 && Imm < 65536;
-}]>;
+}]> {
+  let ParserMatchClass = Imm0_65535AsmOperand;
+}
 
 class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
 class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
@@ -294,16 +314,19 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{
 // FIXME: rename brtarget to t2_brtarget
 def brtarget : Operand<OtherVT> {
   let EncoderMethod = "getBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
 }
 
 // FIXME: get rid of this one?
 def uncondbrtarget : Operand<OtherVT> {
   let EncoderMethod = "getUnconditionalBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
 }
 
 // Branch target for ARM. Handles conditional/unconditional
 def br_target : Operand<OtherVT> {
   let EncoderMethod = "getARMBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
 }
 
 // Call target.
@@ -311,6 +334,7 @@ def br_target : Operand<OtherVT> {
 def bltarget : Operand<i32> {
   // Encoded the same as branch targets.
   let EncoderMethod = "getBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
 }
 
 // Call target for ARM. Handles conditional/unconditional
@@ -318,6 +342,7 @@ def bltarget : Operand<i32> {
 def bl_target : Operand<i32> {
   // Encoded the same as branch targets.
   let EncoderMethod = "getARMBranchTargetOpValue";
+  let OperandType = "OPERAND_PCREL";
 }
 
 
@@ -394,14 +419,20 @@ def shift_imm : Operand<i32> {
   let ParserMatchClass = ShifterAsmOperand;
 }
 
+def ShiftedRegAsmOperand : AsmOperandClass {
+  let Name = "ShiftedReg";
+}
+
 // shifter_operand operands: so_reg and so_imm.
 def so_reg : Operand<i32>,    // reg reg imm
              ComplexPattern<i32, 3, "SelectShifterOperandReg",
                             [shl,srl,sra,rotr]> {
   let EncoderMethod = "getSORegOpValue";
   let PrintMethod = "printSORegOperand";
+  let ParserMatchClass = ShiftedRegAsmOperand;
   let MIOperandInfo = (ops GPR, GPR, shift_imm);
 }
+// FIXME: Does this need to be distinct from so_reg?
 def shift_so_reg : Operand<i32>,    // reg reg imm
                    ComplexPattern<i32, 3, "SelectShiftShifterOperandReg",
                                   [shl,srl,sra,rotr]> {
@@ -416,7 +447,6 @@ def so_imm : Operand<i32>, ImmLeaf<i32, [{
     return ARM_AM::getSOImmVal(Imm) != -1;
   }]> {
   let EncoderMethod = "getSOImmOpValue";
-  let PrintMethod = "printSOImmOperand";
 }
 
 // Break so_imm's up into two pieces.  This handles immediates with up to 16
@@ -434,6 +464,22 @@ def arm_i32imm : PatLeaf<(imm), [{
   return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue());
 }]>;
 
+/// imm0_7 predicate - Immediate in the range [0,31].
+def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; }
+def imm0_7 : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 8;
+}]> {
+  let ParserMatchClass = Imm0_7AsmOperand;
+}
+
+/// imm0_15 predicate - Immediate in the range [0,31].
+def Imm0_15AsmOperand: AsmOperandClass { let Name = "Imm0_15"; }
+def imm0_15 : Operand<i32>, ImmLeaf<i32, [{
+  return Imm >= 0 && Imm < 16;
+}]> {
+  let ParserMatchClass = Imm0_15AsmOperand;
+}
+
 /// imm0_31 predicate - True if the 32-bit immediate is in the range [0,31].
 def imm0_31 : Operand<i32>, ImmLeaf<i32, [{
   return Imm >= 0 && Imm < 32;
@@ -673,7 +719,7 @@ include "ARMInstrFormats.td"
 /// binop that produces a value.
 multiclass AsI1_bin_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                        PatFrag opnode, bit Commutable = 0> {
+                        PatFrag opnode, string baseOpc, bit Commutable = 0> {
   // The register-immediate version is re-materializable. This is useful
   // in particular for taking the address of a local.
   let isReMaterializable = 1 in {
@@ -713,6 +759,24 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc,
     let Inst{15-12} = Rd;
     let Inst{11-0} = shift;
   }
+
+  // Assembly aliases for optional destination operand when it's the same
+  // as the source operand.
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+                                                    so_imm:$imm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+                                                    GPR:$Rm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn,
+                                                    so_reg:$shift, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
 }
 
 /// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
@@ -909,9 +973,9 @@ multiclass AI_exta_rrot_np<bits<8> opcod, string opc> {
 }
 
 /// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
-let Uses = [CPSR] in {
 multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
-                             bit Commutable = 0> {
+                             string baseOpc, bit Commutable = 0> {
+  let Uses = [CPSR] in {
   def ri : AsI1<opcod, (outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
                 DPFrm, IIC_iALUi, opc, "\t$Rd, $Rn, $imm",
                [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>,
@@ -950,7 +1014,24 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
     let Inst{15-12} = Rd;
     let Inst{19-16} = Rn;
   }
-}
+  }
+  // Assembly aliases for optional destination operand when it's the same
+  // as the source operand.
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "ri")) GPR:$Rdn, GPR:$Rdn,
+                                                    so_imm:$imm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $Rm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rr")) GPR:$Rdn, GPR:$Rdn,
+                                                    GPR:$Rm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $shift"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rs")) GPR:$Rdn, GPR:$Rdn,
+                                                    so_reg:$shift, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsARM]>;
 }
 
 // Carry setting variants
@@ -958,15 +1039,15 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
 let usesCustomInserter = 1 in {
 multiclass AI1_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
   def ri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
-               Size4Bytes, IIC_iALUi,
+               4, IIC_iALUi,
                [(set GPR:$Rd, (opnode GPR:$Rn, so_imm:$imm))]>;
   def rr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
-               Size4Bytes, IIC_iALUr,
+               4, IIC_iALUr,
                [(set GPR:$Rd, (opnode GPR:$Rn, GPR:$Rm))]> {
     let isCommutable = Commutable;
   }
   def rs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
-               Size4Bytes, IIC_iALUsr,
+               4, IIC_iALUsr,
                [(set GPR:$Rd, (opnode GPR:$Rn, so_reg:$shift))]>;
 }
 }
@@ -1116,9 +1197,8 @@ def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
 
 // The i32imm operand $val can be used by a debugger to store more information
 // about the breakpoint.
-def BKPT : AI<(outs), (ins i32imm:$val), MiscFrm, NoItinerary, "bkpt", "\t$val",
-              [/* For disassembly only; pattern left blank */]>,
-           Requires<[IsARM]> {
+def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary,
+              "bkpt", "\t$val", []>, Requires<[IsARM]> {
   bits<16> val;
   let Inst{3-0} = val{3-0};
   let Inst{19-8} = val{15-4};
@@ -1208,9 +1288,8 @@ def SETEND : AXI<(outs),(ins setend_op:$end), MiscFrm, NoItinerary,
   let Inst{8-0} = 0;
 }
 
-def DBG : AI<(outs), (ins i32imm:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
-             [/* For disassembly only; pattern left blank */]>,
-          Requires<[IsARM, HasV7]> {
+def DBG : AI<(outs), (ins imm0_15:$opt), MiscFrm, NoItinerary, "dbg", "\t$opt",
+             []>, Requires<[IsARM, HasV7]> {
   bits<4> opt;
   let Inst{27-4} = 0b001100100000111100001111;
   let Inst{3-0} = opt;
@@ -1227,40 +1306,40 @@ def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary,
 // Address computation and loads and stores in PIC mode.
 let isNotDuplicable = 1 in {
 def PICADD  : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p),
-                            Size4Bytes, IIC_iALUr,
+                            4, IIC_iALUr,
                             [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>;
 
 let AddedComplexity = 10 in {
 def PICLDR  : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p),
-                            Size4Bytes, IIC_iLoad_r,
+                            4, IIC_iLoad_r,
                             [(set GPR:$dst, (load addrmodepc:$addr))]>;
 
 def PICLDRH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
-                            Size4Bytes, IIC_iLoad_bh_r,
+                            4, IIC_iLoad_bh_r,
                             [(set GPR:$Rt, (zextloadi16 addrmodepc:$addr))]>;
 
 def PICLDRB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
-                            Size4Bytes, IIC_iLoad_bh_r,
+                            4, IIC_iLoad_bh_r,
                             [(set GPR:$Rt, (zextloadi8 addrmodepc:$addr))]>;
 
 def PICLDRSH : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
-                            Size4Bytes, IIC_iLoad_bh_r,
+                            4, IIC_iLoad_bh_r,
                             [(set GPR:$Rt, (sextloadi16 addrmodepc:$addr))]>;
 
 def PICLDRSB : ARMPseudoInst<(outs GPR:$Rt), (ins addrmodepc:$addr, pred:$p),
-                            Size4Bytes, IIC_iLoad_bh_r,
+                            4, IIC_iLoad_bh_r,
                             [(set GPR:$Rt, (sextloadi8 addrmodepc:$addr))]>;
 }
 let AddedComplexity = 10 in {
 def PICSTR  : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-      Size4Bytes, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>;
+      4, IIC_iStore_r, [(store GPR:$src, addrmodepc:$addr)]>;
 
 def PICSTRH : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-      Size4Bytes, IIC_iStore_bh_r, [(truncstorei16 GPR:$src,
+      4, IIC_iStore_bh_r, [(truncstorei16 GPR:$src,
                                                    addrmodepc:$addr)]>;
 
 def PICSTRB : ARMPseudoInst<(outs), (ins GPR:$src, addrmodepc:$addr, pred:$p),
-      Size4Bytes, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
+      4, IIC_iStore_bh_r, [(truncstorei8 GPR:$src, addrmodepc:$addr)]>;
 }
 } // isNotDuplicable = 1
 
@@ -1282,11 +1361,11 @@ def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label),
   let Inst{11-0} = label;
 }
 def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p),
-                    Size4Bytes, IIC_iALUi, []>;
+                    4, IIC_iALUi, []>;
 
 def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd),
                       (ins i32imm:$label, nohash_imm:$id, pred:$p),
-                      Size4Bytes, IIC_iALUi, []>;
+                      4, IIC_iALUi, []>;
 
 //===----------------------------------------------------------------------===//
 //  Control Flow Instructions.
@@ -1319,22 +1398,13 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
     let Inst{3-0}  = dst;
   }
 
-  // For disassembly only.
-  def BX_pred : AXI<(outs), (ins GPR:$dst, pred:$p), BrMiscFrm, IIC_Br,
-                  "bx$p\t$dst", [/* pattern left blank */]>,
+  def BX_pred : AI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br,
+                  "bx", "\t$dst", [/* pattern left blank */]>,
               Requires<[IsARM, HasV4T]> {
     bits<4> dst;
     let Inst{27-4} = 0b000100101111111111110001;
     let Inst{3-0}  = dst;
   }
-
-  // ARMV4 only
-  // FIXME: We would really like to define this as a vanilla ARMPat like:
-  // ARMPat<(brind GPR:$dst), (MOVr PC, GPR:$dst)>
-  // With that, however, we can't set isBranch, isTerminator, etc..
-  def MOVPCRX : ARMPseudoInst<(outs), (ins GPR:$dst),
-                    Size4Bytes, IIC_Br, [(brind GPR:$dst)]>,
-                    Requires<[IsARM, NoV4T]>;
 }
 
 // All calls clobber the non-callee saved registers. SP is marked as
@@ -1386,12 +1456,12 @@ let isCall = 1,
   // ARMv4T
   // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
   def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
-                   Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                   8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
                    Requires<[IsARM, HasV4T, IsNotDarwin]>;
 
   // ARMv4
   def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
-                   Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                   8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
                    Requires<[IsARM, NoV4T, IsNotDarwin]>;
 }
 
@@ -1401,131 +1471,82 @@ let isCall = 1,
   // moved above / below calls.
   Defs = [R0,  R1,  R2,  R3,  R9,  R12, LR, QQQQ0, QQQQ2, QQQQ3, CPSR, FPSCR],
   Uses = [R7, SP] in {
-  def BLr9  : ARMPseudoInst<(outs), (ins bltarget:$func, variable_ops),
-                Size4Bytes, IIC_Br,
-                [(ARMcall tglobaladdr:$func)]>, Requires<[IsARM, IsDarwin]>;
-
-  def BLr9_pred : ARMPseudoInst<(outs),
-                   (ins bltarget:$func, pred:$p, variable_ops),
-                   Size4Bytes, IIC_Br,
-                   [(ARMcall_pred tglobaladdr:$func)]>,
+  def BLr9  : ARMPseudoExpand<(outs), (ins bl_target:$func, variable_ops),
+                4, IIC_Br,
+                [(ARMcall tglobaladdr:$func)], (BL bl_target:$func)>,
+              Requires<[IsARM, IsDarwin]>;
+
+  def BLr9_pred : ARMPseudoExpand<(outs),
+                   (ins bl_target:$func, pred:$p, variable_ops),
+                   4, IIC_Br,
+                   [(ARMcall_pred tglobaladdr:$func)],
+                   (BL_pred bl_target:$func, pred:$p)>,
                   Requires<[IsARM, IsDarwin]>;
 
   // ARMv5T and above
-  def BLXr9 : ARMPseudoInst<(outs), (ins GPR:$func, variable_ops),
-                Size4Bytes, IIC_Br,
-                [(ARMcall GPR:$func)]>, Requires<[IsARM, HasV5T, IsDarwin]>;
-
-  def BLXr9_pred: ARMPseudoInst<(outs), (ins GPR:$func, pred:$p,  variable_ops),
-                      Size4Bytes, IIC_Br,
-                      [(ARMcall_pred GPR:$func)]>,
+  def BLXr9 : ARMPseudoExpand<(outs), (ins GPR:$func, variable_ops),
+                4, IIC_Br,
+                [(ARMcall GPR:$func)],
+                (BLX GPR:$func)>,
+               Requires<[IsARM, HasV5T, IsDarwin]>;
+
+  def BLXr9_pred: ARMPseudoExpand<(outs), (ins GPR:$func, pred:$p,variable_ops),
+                4, IIC_Br,
+                [(ARMcall_pred GPR:$func)],
+                (BLX_pred GPR:$func, pred:$p)>,
                    Requires<[IsARM, HasV5T, IsDarwin]>;
 
   // ARMv4T
   // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
   def BXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
-                  Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                  8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
                   Requires<[IsARM, HasV4T, IsDarwin]>;
 
   // ARMv4
   def BMOVPCRXr9_CALL : ARMPseudoInst<(outs), (ins tGPR:$func, variable_ops),
-                  Size8Bytes, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
+                  8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>,
                   Requires<[IsARM, NoV4T, IsDarwin]>;
 }
 
-// Tail calls.
-
-// FIXME: The Thumb versions of these should live in ARMInstrThumb.td
-let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
-  // Darwin versions.
-  let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
-      Uses = [SP] in {
-    def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
-                       IIC_Br, []>, Requires<[IsDarwin]>;
-
-    def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
-                       IIC_Br, []>, Requires<[IsDarwin]>;
-
-    def TAILJMPd : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
-                   Size4Bytes, IIC_Br,
-                   []>, Requires<[IsARM, IsDarwin]>;
-
-    def tTAILJMPd: tPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
-                   Size4Bytes, IIC_Br,
-                   []>, Requires<[IsThumb, IsDarwin]>;
-
-    def TAILJMPr : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
-                     Size4Bytes, IIC_Br,
-                   []>, Requires<[IsARM, IsDarwin]>;
-
-    def tTAILJMPr : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
-                     Size4Bytes, IIC_Br,
-                   []>, Requires<[IsThumb, IsDarwin]>;
-  }
-
-  // Non-Darwin versions (the difference is R9).
-  let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
-      Uses = [SP] in {
-    def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
-                       IIC_Br, []>, Requires<[IsNotDarwin]>;
-
-    def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
-                       IIC_Br, []>, Requires<[IsNotDarwin]>;
-
-    def TAILJMPdND : ARMPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
-                   Size4Bytes, IIC_Br,
-                   []>, Requires<[IsARM, IsNotDarwin]>;
-
-    def tTAILJMPdND : tPseudoInst<(outs), (ins brtarget:$dst, variable_ops),
-                   Size4Bytes, IIC_Br,
-                   []>, Requires<[IsThumb, IsNotDarwin]>;
-
-    def TAILJMPrND : ARMPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
-                     Size4Bytes, IIC_Br,
-                   []>, Requires<[IsARM, IsNotDarwin]>;
-    def tTAILJMPrND : tPseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
-                     Size4Bytes, IIC_Br,
-                   []>, Requires<[IsThumb, IsNotDarwin]>;
+let isBranch = 1, isTerminator = 1 in {
+  // FIXME: should be able to write a pattern for ARMBrcond, but can't use
+  // a two-value operand where a dag node expects two operands. :(
+  def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
+               IIC_Br, "b", "\t$target",
+               [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
+    bits<24> target;
+    let Inst{23-0} = target;
   }
-}
 
-let isBranch = 1, isTerminator = 1 in {
-  // B is "predicable" since it's just a Bcc with an 'always' condition.
   let isBarrier = 1 in {
+    // B is "predicable" since it's just a Bcc with an 'always' condition.
     let isPredicable = 1 in
     // FIXME: We shouldn't need this pseudo at all. Just using Bcc directly
     // should be sufficient.
-    def B : ARMPseudoInst<(outs), (ins brtarget:$target), Size4Bytes, IIC_Br,
-                [(br bb:$target)]>;
+    // FIXME: Is B really a Barrier? That doesn't seem right.
+    def B : ARMPseudoExpand<(outs), (ins br_target:$target), 4, IIC_Br,
+                [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>;
 
     let isNotDuplicable = 1, isIndirectBranch = 1 in {
     def BR_JTr : ARMPseudoInst<(outs),
                       (ins GPR:$target, i32imm:$jt, i32imm:$id),
-                      SizeSpecial, IIC_Br,
+                      0, IIC_Br,
                       [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>;
     // FIXME: This shouldn't use the generic "addrmode2," but rather be split
     // into i12 and rs suffixed versions.
     def BR_JTm : ARMPseudoInst<(outs),
                      (ins addrmode2:$target, i32imm:$jt, i32imm:$id),
-                     SizeSpecial, IIC_Br,
+                     0, IIC_Br,
                      [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
                        imm:$id)]>;
     def BR_JTadd : ARMPseudoInst<(outs),
                    (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id),
-                   SizeSpecial, IIC_Br,
+                   0, IIC_Br,
                    [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
                      imm:$id)]>;
     } // isNotDuplicable = 1, isIndirectBranch = 1
   } // isBarrier = 1
 
-  // FIXME: should be able to write a pattern for ARMBrcond, but can't use
-  // a two-value operand where a dag node expects two operands. :(
-  def Bcc : ABI<0b1010, (outs), (ins br_target:$target),
-               IIC_Br, "b", "\t$target",
-               [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> {
-    bits<24> target;
-    let Inst{23-0} = target;
-  }
 }
 
 // BLX (immediate) -- for disassembly only
@@ -1538,14 +1559,65 @@ def BLXi : AXI<(outs), (ins br_target:$target), BrMiscFrm, NoItinerary,
   let Inst{24} = target{0};
 }
 
-// Branch and Exchange Jazelle -- for disassembly only
+// Branch and Exchange Jazelle
 def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
-              [/* For disassembly only; pattern left blank */]> {
+              [/* pattern left blank */]> {
+  bits<4> func;
   let Inst{23-20} = 0b0010;
-  //let Inst{19-8} = 0xfff;
+  let Inst{19-8} = 0xfff;
   let Inst{7-4} = 0b0010;
+  let Inst{3-0} = func;
+}
+
+// Tail calls.
+
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+  // Darwin versions.
+  let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+      Uses = [SP] in {
+    def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsDarwin]>;
+
+    def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsDarwin]>;
+
+    def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst, variable_ops),
+                   4, IIC_Br, [],
+                   (Bcc br_target:$dst, (ops 14, zero_reg))>,
+                   Requires<[IsARM, IsDarwin]>;
+
+    def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+                   4, IIC_Br, [],
+                   (BX GPR:$dst)>,
+                   Requires<[IsARM, IsDarwin]>;
+
+  }
+
+  // Non-Darwin versions (the difference is R9).
+  let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+      Uses = [SP] in {
+    def TCRETURNdiND : PseudoInst<(outs), (ins i32imm:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsNotDarwin]>;
+
+    def TCRETURNriND : PseudoInst<(outs), (ins tcGPR:$dst, variable_ops),
+                       IIC_Br, []>, Requires<[IsNotDarwin]>;
+
+    def TAILJMPdND : ARMPseudoExpand<(outs), (ins brtarget:$dst, variable_ops),
+                   4, IIC_Br, [],
+                   (Bcc br_target:$dst, (ops 14, zero_reg))>,
+                   Requires<[IsARM, IsNotDarwin]>;
+
+    def TAILJMPrND : ARMPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+                     4, IIC_Br, [],
+                     (BX GPR:$dst)>,
+                     Requires<[IsARM, IsNotDarwin]>;
+  }
 }
 
+
+
+
+
 // Secure Monitor Call is a system instruction -- for disassembly only
 def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
               [/* For disassembly only; pattern left blank */]> {
@@ -1562,7 +1634,6 @@ def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
   let Inst{23-0} = svc;
 }
 }
-def : MnemonicAlias<"swi", "svc">;
 
 // Store Return State is a system instruction -- for disassembly only
 let isCodeGenOnly = 1 in {  // FIXME: This should not use submode!
@@ -1908,10 +1979,12 @@ def STRHT: AI3sthpo<(outs GPR:$base_wb), (ins GPR:$Rt, addrmode3:$addr),
 
 multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
                          InstrItinClass itin, InstrItinClass itin_upd> {
+  // IA is the default, so no need for an explicit suffix on the
+  // mnemonic here. Without it is the cannonical spelling.
   def IA :
     AXI4<(outs), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeNone, f, itin,
-         !strconcat(asm, "ia${p}\t$Rn, $regs"), "", []> {
+         !strconcat(asm, "${p}\t$Rn, $regs"), "", []> {
     let Inst{24-23} = 0b01;       // Increment After
     let Inst{21}    = 0;          // No writeback
     let Inst{20}    = L_bit;
@@ -1919,7 +1992,7 @@ multiclass arm_ldst_mult<string asm, bit L_bit, Format f,
   def IA_UPD :
     AXI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, reglist:$regs, variable_ops),
          IndexModeUpd, f, itin_upd,
-         !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
+         !strconcat(asm, "${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
     let Inst{24-23} = 0b01;       // Increment After
     let Inst{21}    = 1;          // Writeback
     let Inst{20}    = L_bit;
@@ -1984,17 +2057,14 @@ defm STM : arm_ldst_mult<"stm", 0, LdStMulFrm, IIC_iStore_m, IIC_iStore_mu>;
 
 } // neverHasSideEffects
 
-// Load / Store Multiple Mnemonic Aliases
-def : MnemonicAlias<"ldm", "ldmia">;
-def : MnemonicAlias<"stm", "stmia">;
-
 // FIXME: remove when we have a way to marking a MI with these properties.
 // FIXME: Should pc be an implicit operand like PICADD, etc?
 let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
     hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
-def LDMIA_RET : ARMPseudoInst<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
-                                               reglist:$regs, variable_ops),
-                     Size4Bytes, IIC_iLoad_mBr, []>,
+def LDMIA_RET : ARMPseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+                                                 reglist:$regs, variable_ops),
+                     4, IIC_iLoad_mBr, [],
+                     (LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
       RegConstraint<"$Rn = $wb">;
 
 //===----------------------------------------------------------------------===//
@@ -2164,7 +2234,7 @@ defm UXTAB16 : AI_exta_rrot_np<0b01101100, "uxtab16">;
 
 def SBFX  : I<(outs GPR:$Rd),
               (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
-               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
                "sbfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
                Requires<[IsARM, HasV6T2]> {
   bits<4> Rd;
@@ -2181,7 +2251,7 @@ def SBFX  : I<(outs GPR:$Rd),
 
 def UBFX  : I<(outs GPR:$Rd),
               (ins GPR:$Rn, imm0_31:$lsb, imm0_31_m1:$width),
-               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
                "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>,
                Requires<[IsARM, HasV6T2]> {
   bits<4> Rd;
@@ -2202,10 +2272,10 @@ def UBFX  : I<(outs GPR:$Rd),
 
 defm ADD  : AsI1_bin_irs<0b0100, "add",
                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
-                         BinOpFrag<(add  node:$LHS, node:$RHS)>, 1>;
+                         BinOpFrag<(add  node:$LHS, node:$RHS)>, "ADD", 1>;
 defm SUB  : AsI1_bin_irs<0b0010, "sub",
                          IIC_iALUi, IIC_iALUr, IIC_iALUsr,
-                         BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
+                         BinOpFrag<(sub  node:$LHS, node:$RHS)>, "SUB">;
 
 // ADD and SUB with 's' bit set.
 defm ADDS : AI1_bin_s_irs<0b0100, "adds",
@@ -2216,9 +2286,11 @@ defm SUBS : AI1_bin_s_irs<0b0010, "subs",
                           BinOpFrag<(subc node:$LHS, node:$RHS)>>;
 
 defm ADC : AI1_adde_sube_irs<0b0101, "adc",
-                          BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
+                          BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>,
+                          "ADC", 1>;
 defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
-                          BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
+                          BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>,
+                          "SBC">;
 
 // ADC and SUBC with 's' bit set.
 let usesCustomInserter = 1 in {
@@ -2271,13 +2343,13 @@ def RSBrs : AsI1<0b0011, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
 // NOTE: CPSR def omitted because it will be handled by the custom inserter.
 let usesCustomInserter = 1 in {
 def RSBSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
-                 Size4Bytes, IIC_iALUi,
+                 4, IIC_iALUi,
                  [(set GPR:$Rd, (subc so_imm:$imm, GPR:$Rn))]>;
 def RSBSrr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
-                 Size4Bytes, IIC_iALUr,
+                 4, IIC_iALUr,
                  [/* For disassembly only; pattern left blank */]>;
 def RSBSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
-                 Size4Bytes, IIC_iALUsr,
+                 4, IIC_iALUsr,
                  [(set GPR:$Rd, (subc so_reg:$shift, GPR:$Rn))]>;
 }
 
@@ -2325,10 +2397,10 @@ def RSCrs : AsI1<0b0111, (outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
 // NOTE: CPSR def omitted because it will be handled by the custom inserter.
 let usesCustomInserter = 1, Uses = [CPSR] in {
 def RSCSri : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_imm:$imm),
-                  Size4Bytes, IIC_iALUi,
+                  4, IIC_iALUi,
                   [(set GPR:$Rd, (sube_dead_carry so_imm:$imm, GPR:$Rn))]>;
 def RSCSrs : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, so_reg:$shift),
-                  Size4Bytes, IIC_iALUsr,
+                  4, IIC_iALUsr,
                   [(set GPR:$Rd, (sube_dead_carry so_reg:$shift, GPR:$Rn))]>;
 }
 
@@ -2528,19 +2600,19 @@ def : ARMV6Pat<(int_arm_usat GPR:$a, imm:$pos), (USAT imm:$pos, GPR:$a, 0)>;
 
 defm AND   : AsI1_bin_irs<0b0000, "and",
                           IIC_iBITi, IIC_iBITr, IIC_iBITsr,
-                          BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+                          BinOpFrag<(and node:$LHS, node:$RHS)>, "AND", 1>;
 defm ORR   : AsI1_bin_irs<0b1100, "orr",
                           IIC_iBITi, IIC_iBITr, IIC_iBITsr,
-                          BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
+                          BinOpFrag<(or  node:$LHS, node:$RHS)>, "ORR", 1>;
 defm EOR   : AsI1_bin_irs<0b0001, "eor",
                           IIC_iBITi, IIC_iBITr, IIC_iBITsr,
-                          BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
+                          BinOpFrag<(xor node:$LHS, node:$RHS)>, "EOR", 1>;
 defm BIC   : AsI1_bin_irs<0b1110, "bic",
                           IIC_iBITi, IIC_iBITr, IIC_iBITsr,
-                          BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+                          BinOpFrag<(and node:$LHS, (not node:$RHS))>, "BIC">;
 
 def BFC    : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
-               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
                "bfc", "\t$Rd, $imm", "$src = $Rd",
                [(set GPR:$Rd, (and GPR:$src, bf_inv_mask_imm:$imm))]>,
                Requires<[IsARM, HasV6T2]> {
@@ -2555,7 +2627,7 @@ def BFC    : I<(outs GPR:$Rd), (ins GPR:$src, bf_inv_mask_imm:$imm),
 
 // A8.6.18  BFI - Bitfield insert (Encoding A1)
 def BFI    : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
-               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
                "bfi", "\t$Rd, $Rn, $imm", "$src = $Rd",
                [(set GPR:$Rd, (ARMbfi GPR:$src, GPR:$Rn,
                                 bf_inv_mask_imm:$imm))]>,
@@ -2575,7 +2647,7 @@ def BFI    : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn, bf_inv_mask_imm:$imm),
 let isAsmParserOnly = 1 in
 def BFI4p : I<(outs GPR:$Rd), (ins GPR:$src, GPR:$Rn,
                                    lsb_pos_imm:$lsb, width_imm:$width),
-               AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+               AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi,
                "bfi", "\t$Rd, $Rn, $lsb, $width", "$src = $Rd",
                []>, Requires<[IsARM, HasV6T2]> {
   bits<4> Rd;
@@ -2652,31 +2724,26 @@ class AsMul1I64<bits<7> opcod, dag oops, dag iops, InstrItinClass itin,
   let Inst{3-0}   = Rn;
 }
 
+// FIXME: The v5 pseudos are only necessary for the additional Constraint
+//        property. Remove them when it's possible to add those properties
+//        on an individual MachineInstr, not just an instuction description.
 let isCommutable = 1 in {
-let Constraints = "@earlyclobber $Rd" in
-def MULv5: ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
-                                          pred:$p, cc_out:$s),
-                          Size4Bytes, IIC_iMUL32,
-                         [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
-                        Requires<[IsARM, NoV6]>;
-
 def MUL  : AsMul1I32<0b0000000, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
                    IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm",
                    [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))]>,
                    Requires<[IsARM, HasV6]> {
   let Inst{15-12} = 0b0000;
 }
-}
 
 let Constraints = "@earlyclobber $Rd" in
-def MLAv5: ARMPseudoInst<(outs GPR:$Rd),
-                         (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
-                         Size4Bytes, IIC_iMAC32,
-                         [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
-                        Requires<[IsARM, NoV6]> {
-  bits<4> Ra;
-  let Inst{15-12} = Ra;
+def MULv5: ARMPseudoExpand<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm,
+                                            pred:$p, cc_out:$s),
+                          4, IIC_iMUL32,
+                         [(set GPR:$Rd, (mul GPR:$Rn, GPR:$Rm))],
+                         (MUL GPR:$Rd, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+                        Requires<[IsARM, NoV6]>;
 }
+
 def MLA  : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
                     IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra",
                    [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))]>,
@@ -2685,6 +2752,14 @@ def MLA  : AsMul1I32<0b0000001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
   let Inst{15-12} = Ra;
 }
 
+let Constraints = "@earlyclobber $Rd" in
+def MLAv5: ARMPseudoExpand<(outs GPR:$Rd),
+                          (ins GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s),
+                          4, IIC_iMAC32,
+                        [(set GPR:$Rd, (add (mul GPR:$Rn, GPR:$Rm), GPR:$Ra))],
+                  (MLA GPR:$Rd, GPR:$Rn, GPR:$Rm, GPR:$Ra, pred:$p, cc_out:$s)>,
+                        Requires<[IsARM, NoV6]>;
+
 def MLS  : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
                    IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra",
                    [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>,
@@ -2700,49 +2775,34 @@ def MLS  : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra),
 }
 
 // Extra precision multiplies with low / high results
-
 let neverHasSideEffects = 1 in {
 let isCommutable = 1 in {
-let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
-def SMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
-                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
-                            Size4Bytes, IIC_iMUL64, []>,
-                           Requires<[IsARM, NoV6]>;
-
-def UMULLv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
-                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
-                            Size4Bytes, IIC_iMUL64, []>,
-                           Requires<[IsARM, NoV6]>;
-}
-
 def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi),
-                               (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+                                 (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
                     "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
                     Requires<[IsARM, HasV6]>;
 
 def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi),
-                               (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
+                                 (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64,
                     "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
                     Requires<[IsARM, HasV6]>;
-}
 
-// Multiply + accumulate
 let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
-def SMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
-                            (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
-                            Size4Bytes, IIC_iMAC64, []>,
-                           Requires<[IsARM, NoV6]>;
-def UMLALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
                             (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
-                            Size4Bytes, IIC_iMAC64, []>,
+                            4, IIC_iMUL64, [],
+          (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
                            Requires<[IsARM, NoV6]>;
-def UMAALv5 : ARMPseudoInst<(outs GPR:$RdLo, GPR:$RdHi),
+
+def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
                             (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
-                            Size4Bytes, IIC_iMAC64, []>,
+                            4, IIC_iMUL64, [],
+          (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
                            Requires<[IsARM, NoV6]>;
-
+}
 }
 
+// Multiply + accumulate
 def SMLAL : AsMul1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi),
                                (ins GPR:$Rn, GPR:$Rm), IIC_iMAC64,
                     "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
@@ -2765,6 +2825,25 @@ def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi),
   let Inst{11-8}  = Rm;
   let Inst{3-0}   = Rn;
 }
+
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in {
+def SMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+                              (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+                              4, IIC_iMAC64, [],
+          (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+                           Requires<[IsARM, NoV6]>;
+def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+                              (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s),
+                              4, IIC_iMAC64, [],
+          (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>,
+                           Requires<[IsARM, NoV6]>;
+def UMAALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi),
+                              (ins GPR:$Rn, GPR:$Rm, pred:$p),
+                              4, IIC_iMAC64, [],
+          (UMAAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p)>,
+                           Requires<[IsARM, NoV6]>;
+}
+
 } // neverHasSideEffects
 
 // Most significant word multiply
@@ -3005,31 +3084,22 @@ def REV  : AMiscA1I<0b01101011, 0b0011, (outs GPR:$Rd), (ins GPR:$Rm),
               IIC_iUNAr, "rev", "\t$Rd, $Rm",
               [(set GPR:$Rd, (bswap GPR:$Rm))]>, Requires<[IsARM, HasV6]>;
 
+let AddedComplexity = 5 in
 def REV16 : AMiscA1I<0b01101011, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
                IIC_iUNAr, "rev16", "\t$Rd, $Rm",
-               [(set GPR:$Rd,
-                   (or (and (srl GPR:$Rm, (i32 8)), 0xFF),
-                       (or (and (shl GPR:$Rm, (i32 8)), 0xFF00),
-                           (or (and (srl GPR:$Rm, (i32 8)), 0xFF0000),
-                               (and (shl GPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+               [(set GPR:$Rd, (rotr (bswap GPR:$Rm), (i32 16)))]>,
                Requires<[IsARM, HasV6]>;
 
+let AddedComplexity = 5 in
 def REVSH : AMiscA1I<0b01101111, 0b1011, (outs GPR:$Rd), (ins GPR:$Rm),
                IIC_iUNAr, "revsh", "\t$Rd, $Rm",
-               [(set GPR:$Rd,
-                  (sext_inreg
-                    (or (srl GPR:$Rm, (i32 8)),
-                        (shl GPR:$Rm, (i32 8))), i16))]>,
+               [(set GPR:$Rd, (sra (bswap GPR:$Rm), (i32 16)))]>,
                Requires<[IsARM, HasV6]>;
 
-def : ARMV6Pat<(sext_inreg (or (srl (and GPR:$Rm, 0xFF00), (i32 8)),
-                               (shl GPR:$Rm, (i32 8))), i16),
+def : ARMV6Pat<(or (sra (shl GPR:$Rm, (i32 24)), (i32 16)),
+                   (and (srl GPR:$Rm, (i32 8)), 0xFF)),
                (REVSH GPR:$Rm)>;
 
-// Need the AddedComplexity or else MOVs + REV would be chosen.
-let AddedComplexity = 5 in
-def : ARMV6Pat<(sra (bswap GPR:$Rm), (i32 16)), (REVSH GPR:$Rm)>;
-
 def lsl_shift_imm : SDNodeXForm<imm, [{
   unsigned Sh = ARM_AM::getSORegOpc(ARM_AM::lsl, N->getZExtValue());
   return CurDAG->getTargetConstant(Sh, MVT::i32);
@@ -3177,26 +3247,26 @@ def BCCZi64 : PseudoInst<(outs),
 // a two-value operand where a dag node expects two operands. :(
 let neverHasSideEffects = 1 in {
 def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p),
-                           Size4Bytes, IIC_iCMOVr,
+                           4, IIC_iCMOVr,
   [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
       RegConstraint<"$false = $Rd">;
 def MOVCCs : ARMPseudoInst<(outs GPR:$Rd),
                            (ins GPR:$false, so_reg:$shift, pred:$p),
-                           Size4Bytes, IIC_iCMOVsr,
+                           4, IIC_iCMOVsr,
   [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg:$shift, imm:$cc, CCR:$ccr))*/]>,
       RegConstraint<"$false = $Rd">;
 
 let isMoveImm = 1 in
 def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd),
                              (ins GPR:$false, i32imm_hilo16:$imm, pred:$p),
-                             Size4Bytes, IIC_iMOVi,
+                             4, IIC_iMOVi,
                              []>,
       RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>;
 
 let isMoveImm = 1 in
 def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
                            (ins GPR:$false, so_imm:$imm, pred:$p),
-                           Size4Bytes, IIC_iCMOVi,
+                           4, IIC_iCMOVi,
    [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
       RegConstraint<"$false = $Rd">;
 
@@ -3204,12 +3274,12 @@ def MOVCCi : ARMPseudoInst<(outs GPR:$Rd),
 let isMoveImm = 1 in
 def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd),
                                 (ins GPR:$false, i32imm:$src, pred:$p),
-                  Size8Bytes, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
+                  8, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">;
 
 let isMoveImm = 1 in
 def MVNCCi : ARMPseudoInst<(outs GPR:$Rd),
                            (ins GPR:$false, so_imm:$imm, pred:$p),
-                           Size4Bytes, IIC_iCMOVi,
+                           4, IIC_iCMOVi,
  [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>,
                 RegConstraint<"$false = $Rd">;
 } // neverHasSideEffects
@@ -3235,19 +3305,20 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
 }
 
 def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
-                "dsb", "\t$opt",
-                [/* For disassembly only; pattern left blank */]>,
+                "dsb", "\t$opt", []>,
                 Requires<[IsARM, HasDB]> {
   bits<4> opt;
   let Inst{31-4} = 0xf57ff04;
   let Inst{3-0} = opt;
 }
 
-// ISB has only full system option -- for disassembly only
-def ISB : AInoP<(outs), (ins), MiscFrm, NoItinerary, "isb", "", []>,
+// ISB has only full system option
+def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary,
+                "isb", "\t$opt", []>,
                 Requires<[IsARM, HasDB]> {
+  bits<4> opt;
   let Inst{31-4} = 0xf57ff06;
-  let Inst{3-0} = 0b1111;
+  let Inst{3-0} = opt;
 }
 
 let usesCustomInserter = 1 in {
@@ -3410,8 +3481,8 @@ def SWPB : AIswp<1, (outs GPR:$Rt), (ins GPR:$Rt2, GPR:$Rn), "swpb",
 // Coprocessor Instructions.
 //
 
-def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
-            c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+            c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
             NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
             [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
                           imm:$CRm, imm:$opc2)]> {
@@ -3431,8 +3502,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
   let Inst{23-20} = opc1;
 }
 
-def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
-               c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+               c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
                NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
                [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
                               imm:$CRm, imm:$opc2)]> {
@@ -3455,7 +3526,7 @@ def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, i32imm:$opc1,
 
 class ACI<dag oops, dag iops, string opc, string asm,
           IndexMode im = IndexModeNone>
-  : InoP<oops, iops, AddrModeNone, Size4Bytes, im, BrFrm, NoItinerary,
+  : InoP<oops, iops, AddrModeNone, 4, im, BrFrm, NoItinerary,
          opc, asm, "", [/* For disassembly only; pattern left blank */]> {
   let Inst{27-25} = 0b110;
 }
@@ -3583,8 +3654,8 @@ class MovRCopro<string opc, bit direction, dag oops, dag iops,
 
 def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
                     (outs),
-                    (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn,
-                         c_imm:$CRm, i32imm:$opc2),
+                    (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                         c_imm:$CRm, imm0_7:$opc2),
                     [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
                                   imm:$CRm, imm:$opc2)]>;
 def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
@@ -3620,8 +3691,8 @@ class MovRCopro2<string opc, bit direction, dag oops, dag iops,
 
 def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */,
                       (outs),
-                      (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn,
-                           c_imm:$CRm, i32imm:$opc2),
+                      (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                           c_imm:$CRm, imm0_7:$opc2),
                       [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
                                      imm:$CRm, imm:$opc2)]>;
 def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */,
@@ -3635,7 +3706,7 @@ def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
 
 class MovRRCopro<string opc, bit direction,
                  list<dag> pattern = [/* For disassembly only */]>
-  : ABI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1,
+  : ABI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
         GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
         NoItinerary, opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm", pattern> {
   let Inst{23-21} = 0b010;
@@ -3661,7 +3732,7 @@ def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
 
 class MovRRCopro2<string opc, bit direction,
                   list<dag> pattern = [/* For disassembly only */]>
-  : ABXI<0b1100, (outs), (ins p_imm:$cop, i32imm:$opc1,
+  : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1,
          GPR:$Rt, GPR:$Rt2, c_imm:$CRm), NoItinerary,
          !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
   let Inst{31-28} = 0b1111;
@@ -3812,6 +3883,13 @@ def Int_eh_sjlj_dispatchsetup :
 // Non-Instruction Patterns
 //
 
+// ARMv4 indirect branch using (MOVr PC, dst)
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
+  def MOVPCRX : ARMPseudoExpand<(outs), (ins GPR:$dst),
+                    4, IIC_Br, [(brind GPR:$dst)],
+                    (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>,
+                  Requires<[IsARM, NoV4T]>;
+
 // Large immediate handling.
 
 // 32-bit immediate using two piece so_imms or movw + movt.
@@ -3977,3 +4055,22 @@ include "ARMInstrVFP.td"
 
 include "ARMInstrNEON.td"
 
+//===----------------------------------------------------------------------===//
+// Assembler aliases
+//
+
+// Memory barriers
+def : InstAlias<"dmb", (DMB 0xf)>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"dsb", (DSB 0xf)>, Requires<[IsARM, HasDB]>;
+def : InstAlias<"isb", (ISB 0xf)>, Requires<[IsARM, HasDB]>;
+
+// System instructions
+def : MnemonicAlias<"swi", "svc">;
+
+// Load / Store Multiple
+def : MnemonicAlias<"ldmfd", "ldm">;
+def : MnemonicAlias<"ldmia", "ldm">;
+def : MnemonicAlias<"stmfd", "stmdb">;
+def : MnemonicAlias<"stmia", "stm">;
+def : MnemonicAlias<"stmea", "stm">;
+
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 79d95d9..0df62f4 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -175,7 +175,7 @@ class VLDQQWBPseudo<InstrItinClass itin>
                 (ins addrmode6:$addr, am6offset:$offset), itin,
                 "$addr.addr = $wb">;
 class VLDQQQQPseudo<InstrItinClass itin>
-  : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src), itin,"">;
+  : PseudoNLdSt<(outs QQQQPR:$dst), (ins addrmode6:$addr, QQQQPR:$src),itin,"">;
 class VLDQQQQWBPseudo<InstrItinClass itin>
   : PseudoNLdSt<(outs QQQQPR:$dst, GPR:$wb),
                 (ins addrmode6:$addr, am6offset:$offset, QQQQPR:$src), itin,
@@ -1387,7 +1387,7 @@ class VST1LN32<bits<4> op11_8, bits<4> op7_4, string Dt, ValueType Ty,
   : NLdStLn<1, 0b00, op11_8, op7_4, (outs),
           (ins addrmode6oneL32:$Rn, DPR:$Vd, nohash_imm:$lane),
           IIC_VST1ln, "vst1", Dt, "\\{$Vd[$lane]\\}, $Rn", "",
-          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]> {
+          [(StoreOp (ExtractOp (Ty DPR:$Vd), imm:$lane), addrmode6oneL32:$Rn)]>{
   let Rm = 0b1111;
 }
 class VST1QLNPseudo<ValueType Ty, PatFrag StoreOp, SDNode ExtractOp>
@@ -3793,7 +3793,8 @@ def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
                      (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
                      N3RegFrm, IIC_VCNTiD,
                      "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
-                     [(set DPR:$Vd, (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+                     [(set DPR:$Vd,
+                           (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
 
 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
                      (and DPR:$Vm, (vnotd DPR:$Vd)))),
@@ -3803,7 +3804,8 @@ def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
                      (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
                      N3RegFrm, IIC_VCNTiQ,
                      "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
-                     [(set QPR:$Vd, (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+                     [(set QPR:$Vd,
+                           (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
 
 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
                      (and QPR:$Vm, (vnotq QPR:$Vd)))),
@@ -4212,17 +4214,12 @@ def  VSWPq    : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
 // Vector Move Operations.
 
 //   VMOV     : Vector Move (Register)
+def : InstAlias<"vmov${p} $Vd, $Vm",
+                (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>;
+def : InstAlias<"vmov${p} $Vd, $Vm",
+                (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>;
 
 let neverHasSideEffects = 1 in {
-def  VMOVDneon: N3VX<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$Vm),
-                     N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> {
-  let Vn{4-0} = Vm{4-0};
-}
-def  VMOVQ    : N3VX<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$Vm),
-                     N3RegFrm, IIC_VMOV, "vmov", "$Vd, $Vm", "", []> {
-  let Vn{4-0} = Vm{4-0};
-}
-
 // Pseudo vector move instructions for QQ and QQQQ registers. This should
 // be expanded after register allocation is completed.
 def  VMOVQQ   : PseudoInst<(outs QQPR:$dst), (ins QQPR:$src),
@@ -4702,11 +4699,10 @@ def VEXTd32 : VEXTd<"vext", "32", v2i32> {
   let Inst{11-10} = index{1-0};
   let Inst{9-8}    = 0b00;
 }
-def VEXTdf  : VEXTd<"vext", "32", v2f32> {
-  let Inst{11-10}    = index{1-0};
-  let Inst{9-8}  = 0b00;
-
-}
+def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn),
+                           (v2f32 DPR:$Vm),
+                           (i32 imm:$index))),
+          (VEXTd32 DPR:$Vn, DPR:$Vm, imm:$index)>;
 
 def VEXTq8  : VEXTq<"vext", "8",  v16i8> {
   let Inst{11-8} = index{3-0};
@@ -4719,10 +4715,10 @@ def VEXTq32 : VEXTq<"vext", "32", v4i32> {
   let Inst{11-10} = index{1-0};
   let Inst{9-8}    = 0b00;
 }
-def VEXTqf  : VEXTq<"vext", "32", v4f32> {
-  let Inst{11-10} = index{1-0};
-  let Inst{9-8}   = 0b00;
-}
+def : Pat<(v4f32 (NEONvext (v4f32 QPR:$Vn),
+                           (v4f32 QPR:$Vm),
+                           (i32 imm:$index))),
+          (VEXTq32 QPR:$Vn, QPR:$Vm, imm:$index)>;
 
 //   VTRN     : Vector Transpose
 
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 4777189..bfe83ec 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -26,17 +26,14 @@ def imm_comp_XFORM : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(~((uint32_t)N->getZExtValue()), MVT::i32);
 }]>;
 
-/// imm0_7 predicate - True if the 32-bit immediate is in the range [0,7].
-def imm0_7 : ImmLeaf<i32, [{
-  return Imm >= 0 && Imm < 8;
-}]>;
 def imm0_7_neg : PatLeaf<(i32 imm), [{
   return (uint32_t)-N->getZExtValue() < 8;
 }], imm_neg_XFORM>;
 
-def imm0_255 : ImmLeaf<i32, [{
-  return Imm >= 0 && Imm < 256;
-}]>;
+def imm0_255_asmoperand : AsmOperandClass { let Name = "Imm0_255"; }
+def imm0_255 : Operand<i32>, ImmLeaf<i32, [{ return Imm >= 0 && Imm < 256; }]> {
+  let ParserMatchClass = imm0_255_asmoperand;
+}
 def imm0_255_comp : PatLeaf<(i32 imm), [{
   return ~((uint32_t)N->getZExtValue()) < 256;
 }]>;
@@ -74,10 +71,12 @@ def t_adrlabel : Operand<i32> {
 // Scaled 4 immediate.
 def t_imm_s4 : Operand<i32> {
   let PrintMethod = "printThumbS4ImmOperand";
+  let OperandType = "OPERAND_IMMEDIATE";
 }
 
 // Define Thumb specific addressing modes.
 
+let OperandType = "OPERAND_PCREL" in {
 def t_brtarget : Operand<OtherVT> {
   let EncoderMethod = "getThumbBRTargetOpValue";
 }
@@ -97,6 +96,7 @@ def t_bltarget : Operand<i32> {
 def t_blxtarget : Operand<i32> {
   let EncoderMethod = "getThumbBLXTargetOpValue";
 }
+}
 
 def MemModeRegThumbAsmOperand : AsmOperandClass {
   let Name = "MemModeRegThumb";
@@ -360,27 +360,6 @@ def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
 //  Control Flow Instructions.
 //
 
-let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
-  def tBX_RET : TI<(outs), (ins), IIC_Br, "bx\tlr",
-                   [(ARMretflag)]>,
-                T1Special<{1,1,0,?}> {
-    // A6.2.3 & A8.6.25
-    let Inst{6-3} = 0b1110; // Rm = lr
-    let Inst{2-0} = 0b000;
-  }
-
-  // Alternative return instruction used by vararg functions.
-  def tBX_RET_vararg : TI<(outs), (ins tGPR:$Rm),
-                          IIC_Br, "bx\t$Rm",
-                          []>,
-                       T1Special<{1,1,0,?}> {
-    // A6.2.3 & A8.6.25
-    bits<4> Rm;
-    let Inst{6-3} = Rm;
-    let Inst{2-0} = 0b000;
-  }
-}
-
 // Indirect branches
 let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
   def tBX : TI<(outs), (ins GPR:$Rm, pred:$p), IIC_Br, "bx${p}\t$Rm", []>,
@@ -390,31 +369,16 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
     let Inst{6-3} = Rm;
     let Inst{2-0} = 0b000;
   }
-
-  def tBRIND : TI<(outs), (ins GPR:$Rm),
-                  IIC_Br,
-                  "mov\tpc, $Rm",
-                  [(brind GPR:$Rm)]>,
-               T1Special<{1,0,?,?}> {
-    // A8.6.97
-    bits<4> Rm;
-    let Inst{7}   = 1;          // <Rd> = Inst{7:2-0} = pc
-    let Inst{6-3} = Rm;
-    let Inst{2-0} = 0b111;
-  }
 }
 
-// FIXME: remove when we have a way to marking a MI with these properties.
-let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
-    hasExtraDefRegAllocReq = 1 in
-def tPOP_RET : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops),
-                   IIC_iPop_Br,
-                   "pop${p}\t$regs", []>,
-               T1Misc<{1,1,0,?,?,?,?}> {
-  // A8.6.121
-  bits<16> regs;
-  let Inst{8}   = regs{15};     // registers = P:'0000000':register_list
-  let Inst{7-0} = regs{7-0};
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
+  def tBX_RET : tPseudoExpand<(outs), (ins pred:$p), 2, IIC_Br,
+                   [(ARMretflag)], (tBX LR, pred:$p)>;
+
+  // Alternative return instruction used by vararg functions.
+  def tBX_RET_vararg : tPseudoExpand<(outs), (ins tGPR:$Rm, pred:$p),
+                   2, IIC_Br, [],
+                   (tBX GPR:$Rm, pred:$p)>;
 }
 
 // All calls clobber the non-callee saved registers. SP is marked as a use to
@@ -464,7 +428,7 @@ let isCall = 1,
 
   // ARMv4T
   def tBX_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
-                  Size4Bytes, IIC_Br,
+                  4, IIC_Br,
                   [(ARMcall_nolink tGPR:$func)]>,
             Requires<[IsThumb, IsThumb1Only, IsNotDarwin]>;
 }
@@ -516,7 +480,7 @@ let isCall = 1,
 
   // ARMv4T
   def tBXr9_CALL : tPseudoInst<(outs), (ins tGPR:$func, variable_ops),
-                   Size4Bytes, IIC_Br,
+                   4, IIC_Br,
                    [(ARMcall_nolink tGPR:$func)]>,
               Requires<[IsThumb, IsThumb1Only, IsDarwin]>;
 }
@@ -534,12 +498,12 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
   // Just a pseudo for a tBL instruction. Needed to let regalloc know about
   // the clobber of LR.
   let Defs = [LR] in
-  def tBfar : tPseudoInst<(outs), (ins t_bltarget:$target),
-                          Size4Bytes, IIC_Br, []>;
+  def tBfar : tPseudoExpand<(outs), (ins t_bltarget:$target),
+                          4, IIC_Br, [], (tBL t_bltarget:$target)>;
 
   def tBR_JTr : tPseudoInst<(outs),
                       (ins tGPR:$target, i32imm:$jt, i32imm:$id),
-                      SizeSpecial, IIC_Br,
+                      0, IIC_Br,
                       [(ARMbrjt tGPR:$target, tjumptable:$jt, imm:$id)]> {
     list<Predicate> Predicates = [IsThumb, IsThumb1Only];
   }
@@ -583,6 +547,33 @@ let isBranch = 1, isTerminator = 1 in {
   }
 }
 
+// Tail calls
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+  // Darwin versions.
+  let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+      Uses = [SP] in {
+    // tTAILJMPd: Darwin version uses a Thumb2 branch (no Thumb1 tail calls
+    // on Darwin), so it's in ARMInstrThumb2.td.
+    def tTAILJMPr : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+                     4, IIC_Br, [],
+                     (tBX GPR:$dst, (ops 14, zero_reg))>,
+                     Requires<[IsThumb, IsDarwin]>;
+  }
+  // Non-Darwin versions (the difference is R9).
+  let Defs = [R0, R1, R2, R3, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+      Uses = [SP] in {
+    def tTAILJMPdND : tPseudoExpand<(outs), (ins t_brtarget:$dst, variable_ops),
+                   4, IIC_Br, [],
+                   (tB t_brtarget:$dst)>,
+                 Requires<[IsThumb, IsNotDarwin]>;
+    def tTAILJMPrND : tPseudoExpand<(outs), (ins tcGPR:$dst, variable_ops),
+                     4, IIC_Br, [],
+                     (tBX GPR:$dst, (ops 14, zero_reg))>,
+                     Requires<[IsThumb, IsNotDarwin]>;
+  }
+}
+
+
 // A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only
 // A8.6.16 B: Encoding T1
 // If Inst{11-8} == 0b1111 then SEE SVC
@@ -685,19 +676,6 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
   let Inst{7-0} = addr;
 }
 
-// Special instruction for restore. It cannot clobber condition register
-// when it's expanded by eliminateCallFramePseudoInstr().
-let canFoldAsLoad = 1, mayLoad = 1, neverHasSideEffects = 1 in
-// FIXME: Pseudo for tLDRspi
-def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoad_i,
-                     "ldr", "\t$dst, $addr", []>,
-               T1LdStSP<{1,?,?}> {
-  bits<3> Rt;
-  bits<8> addr;
-  let Inst{10-8} = Rt;
-  let Inst{7-0} = addr;
-}
-
 // Load tconstpool
 // FIXME: Use ldr.n to work around a Darwin assembler bug.
 let canFoldAsLoad = 1, isReMaterializable = 1 in
@@ -739,9 +717,9 @@ defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rrs1,
 
 // A8.6.207 & A8.6.205
 defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rrs2,
-                                t_addrmode_is2, AddrModeT1_2,
-                                IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
-                                BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+                               t_addrmode_is2, AddrModeT1_2,
+                               IIC_iStore_bh_r, IIC_iStore_bh_i, "strh",
+                               BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
 
 
 def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
@@ -754,19 +732,6 @@ def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i,
   let Inst{7-0} = addr;
 }
 
-let mayStore = 1, neverHasSideEffects = 1 in
-// Special instruction for spill. It cannot clobber condition register when it's
-// expanded by eliminateCallFramePseudoInstr().
-// FIXME: Pseudo for tSTRspi
-def tSpill : T1pIs<(outs), (ins tGPR:$src, t_addrmode_sp:$addr), IIC_iStore_i,
-                  "str", "\t$src, $addr", []>,
-             T1LdStSP<{0,?,?}> {
-  bits<3> Rt;
-  bits<8> addr;
-  let Inst{10-8} = Rt;
-  let Inst{7-0} = addr;
-}
-
 //===----------------------------------------------------------------------===//
 //  Load / store multiple Instructions.
 //
@@ -911,7 +876,8 @@ def tADC :                      // A8.6.2
 
 // Add immediate
 def tADDi3 :                    // A8.6.4 T1
-  T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3), IIC_iALUi,
+  T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, i32imm:$imm3),
+                   IIC_iALUi,
                    "add", "\t$Rd, $Rm, $imm3",
                    [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]> {
   bits<3> imm3;
@@ -1071,7 +1037,7 @@ def tLSRrr :                    // A8.6.91
 
 // Move register
 let isMoveImm = 1 in
-def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins i32imm:$imm8), IIC_iMOVi,
+def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins imm0_255:$imm8), IIC_iMOVi,
                   "mov", "\t$Rd, $imm8",
                   [(set tGPR:$Rd, imm0_255:$imm8)]>,
              T1General<{1,0,0,?,?}> {
@@ -1082,18 +1048,18 @@ def tMOVi8 : T1sI<(outs tGPR:$Rd), (ins i32imm:$imm8), IIC_iMOVi,
   let Inst{7-0}  = imm8;
 }
 
-// TODO: A7-73: MOV(2) - mov setting flag.
+// A7-73: MOV(2) - mov setting flag.
 
 let neverHasSideEffects = 1 in {
-// FIXME: Make this predicable.
-def tMOVr       : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
-                      "mov\t$Rd, $Rm", []>,
-                  T1Special<0b1000> {
+def tMOVr : Thumb1pI<(outs GPR:$Rd), (ins GPR:$Rm), AddrModeNone,
+                      2, IIC_iMOVr,
+                      "mov", "\t$Rd, $Rm", "", []>,
+                  T1Special<{1,0,?,?}> {
   // A8.6.97
   bits<4> Rd;
   bits<4> Rm;
-  // Bits {7-6} are encoded by the T1Special value.
-  let Inst{5-3} = Rm{2-0};
+  let Inst{7}   = Rd{3};
+  let Inst{6-3} = Rm;
   let Inst{2-0} = Rd{2-0};
 }
 let Defs = [CPSR] in
@@ -1106,39 +1072,6 @@ def tMOVSr      : T1I<(outs tGPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
   let Inst{5-3}  = Rm;
   let Inst{2-0}  = Rd;
 }
-
-// FIXME: Make these predicable.
-def tMOVgpr2tgpr : T1I<(outs tGPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
-                       "mov\t$Rd, $Rm", []>,
-                   T1Special<{1,0,0,?}> {
-  // A8.6.97
-  bits<4> Rd;
-  bits<4> Rm;
-  // Bit {7} is encoded by the T1Special value.
-  let Inst{6-3} = Rm;
-  let Inst{2-0} = Rd{2-0};
-}
-def tMOVtgpr2gpr : T1I<(outs GPR:$Rd), (ins tGPR:$Rm), IIC_iMOVr,
-                       "mov\t$Rd, $Rm", []>,
-                   T1Special<{1,0,?,0}> {
-  // A8.6.97
-  bits<4> Rd;
-  bits<4> Rm;
-  // Bit {6} is encoded by the T1Special value.
-  let Inst{7}   = Rd{3};
-  let Inst{5-3} = Rm{2-0};
-  let Inst{2-0} = Rd{2-0};
-}
-def tMOVgpr2gpr  : T1I<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVr,
-                       "mov\t$Rd, $Rm", []>,
-                   T1Special<{1,0,?,?}> {
-  // A8.6.97
-  bits<4> Rd;
-  bits<4> Rm;
-  let Inst{7}   = Rd{3};
-  let Inst{6-3} = Rm;
-  let Inst{2-0} = Rd{2-0};
-}
 } // neverHasSideEffects
 
 // Multiply register
@@ -1175,31 +1108,16 @@ def tREV16 :                    // A8.6.135
   T1pIMiscEncode<{1,0,1,0,0,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
                  IIC_iUNAr,
                  "rev16", "\t$Rd, $Rm",
-             [(set tGPR:$Rd,
-                   (or (and (srl tGPR:$Rm, (i32 8)), 0xFF),
-                       (or (and (shl tGPR:$Rm, (i32 8)), 0xFF00),
-                           (or (and (srl tGPR:$Rm, (i32 8)), 0xFF0000),
-                               (and (shl tGPR:$Rm, (i32 8)), 0xFF000000)))))]>,
+             [(set tGPR:$Rd, (rotr (bswap tGPR:$Rm), (i32 16)))]>,
                 Requires<[IsThumb, IsThumb1Only, HasV6]>;
 
 def tREVSH :                    // A8.6.136
   T1pIMiscEncode<{1,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
                  IIC_iUNAr,
                  "revsh", "\t$Rd, $Rm",
-                 [(set tGPR:$Rd,
-                       (sext_inreg
-                         (or (srl tGPR:$Rm, (i32 8)),
-                             (shl tGPR:$Rm, (i32 8))), i16))]>,
+                 [(set tGPR:$Rd, (sra (bswap tGPR:$Rm), (i32 16)))]>,
                  Requires<[IsThumb, IsThumb1Only, HasV6]>;
 
-def : T1Pat<(sext_inreg (or (srl (and tGPR:$Rm, 0xFF00), (i32 8)),
-                            (shl tGPR:$Rm, (i32 8))), i16),
-            (tREVSH tGPR:$Rm)>,
-      Requires<[IsThumb, IsThumb1Only, HasV6]>;
-
-def : T1Pat<(sra (bswap tGPR:$Rm), (i32 16)), (tREVSH tGPR:$Rm)>,
-      Requires<[IsThumb, IsThumb1Only, HasV6]>;
-
 // Rotate right register
 def tROR :                      // A8.6.139
   T1sItDPEncode<0b0111, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm),
@@ -1294,31 +1212,6 @@ let usesCustomInserter = 1 in  // Expanded after instruction selection.
               NoItinerary,
              [/*(set tGPR:$dst, (ARMcmov tGPR:$false, tGPR:$true, imm:$cc))*/]>;
 
-
-// 16-bit movcc in IT blocks for Thumb2.
-let neverHasSideEffects = 1 in {
-def tMOVCCr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iCMOVr,
-                    "mov", "\t$Rdn, $Rm", []>,
-              T1Special<{1,0,?,?}> {
-  bits<4> Rdn;
-  bits<4> Rm;
-  let Inst{7}   = Rdn{3};
-  let Inst{6-3} = Rm;
-  let Inst{2-0} = Rdn{2-0};
-}
-
-let isMoveImm = 1 in
-def tMOVCCi : T1pIt<(outs tGPR:$Rdn), (ins tGPR:$Rn, i32imm:$Rm), IIC_iCMOVi,
-                    "mov", "\t$Rdn, $Rm", []>,
-              T1General<{1,0,0,?,?}> {
-  bits<3> Rdn;
-  bits<8> Rm;
-  let Inst{10-8} = Rdn;
-  let Inst{7-0}  = Rm;
-}
-
-} // neverHasSideEffects
-
 // tLEApcrel - Load a pc-relative address into a register without offending the
 // assembler.
 
@@ -1333,118 +1226,22 @@ def tADR : T1I<(outs tGPR:$Rd), (ins t_adrlabel:$addr, pred:$p),
 
 let neverHasSideEffects = 1, isReMaterializable = 1 in
 def tLEApcrel   : tPseudoInst<(outs tGPR:$Rd), (ins i32imm:$label, pred:$p),
-                              Size2Bytes, IIC_iALUi, []>;
+                              2, IIC_iALUi, []>;
 
 def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
                               (ins i32imm:$label, nohash_imm:$id, pred:$p),
-                              Size2Bytes, IIC_iALUi, []>;
-
-//===----------------------------------------------------------------------===//
-// Move between coprocessor and ARM core register -- for disassembly only
-//
-
-class tMovRCopro<string opc, bit direction, dag oops, dag iops,
-                 list<dag> pattern>
-  : T1Cop<oops, iops, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
-          pattern> {
-  let Inst{27-24} = 0b1110;
-  let Inst{20} = direction;
-  let Inst{4} = 1;
-
-  bits<4> Rt;
-  bits<4> cop;
-  bits<3> opc1;
-  bits<3> opc2;
-  bits<4> CRm;
-  bits<4> CRn;
-
-  let Inst{15-12} = Rt;
-  let Inst{11-8}  = cop;
-  let Inst{23-21} = opc1;
-  let Inst{7-5}   = opc2;
-  let Inst{3-0}   = CRm;
-  let Inst{19-16} = CRn;
-}
-
-def tMCR : tMovRCopro<"mcr", 0 /* from ARM core register to coprocessor */,
-           (outs),
-           (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn,
-                c_imm:$CRm, i32imm:$opc2),
-           [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
-                         imm:$CRm, imm:$opc2)]>;
-def tMRC : tMovRCopro<"mrc", 1 /* from coprocessor to ARM core register */,
-           (outs GPR:$Rt),
-           (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
-           []>;
-
-def : Pat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
-          (tMRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>,
-          Requires<[IsThumb, HasV6T2]>;
-
-class tMovRRCopro<string opc, bit direction,
-                  list<dag> pattern = [/* For disassembly only */]>
-  : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
-          !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
-  let Inst{27-24} = 0b1100;
-  let Inst{23-21} = 0b010;
-  let Inst{20} = direction;
-
-  bits<4> Rt;
-  bits<4> Rt2;
-  bits<4> cop;
-  bits<4> opc1;
-  bits<4> CRm;
-
-  let Inst{15-12} = Rt;
-  let Inst{19-16} = Rt2;
-  let Inst{11-8}  = cop;
-  let Inst{7-4}   = opc1;
-  let Inst{3-0}   = CRm;
-}
-
-def tMCRR : tMovRRCopro<"mcrr", 0 /* from ARM core register to coprocessor */,
-                        [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
-                                       imm:$CRm)]>;
-def tMRRC : tMovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>;
-
-//===----------------------------------------------------------------------===//
-// Other Coprocessor Instructions.  For disassembly only.
-//
-def tCDP : T1Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
-                 c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
-                 "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
-                 [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
-                               imm:$CRm, imm:$opc2)]> {
-  let Inst{27-24} = 0b1110;
-
-  bits<4> opc1;
-  bits<4> CRn;
-  bits<4> CRd;
-  bits<4> cop;
-  bits<3> opc2;
-  bits<4> CRm;
-
-  let Inst{3-0}   = CRm;
-  let Inst{4}     = 0;
-  let Inst{7-5}   = opc2;
-  let Inst{11-8}  = cop;
-  let Inst{15-12} = CRd;
-  let Inst{19-16} = CRn;
-  let Inst{23-20} = opc1;
-}
+                              2, IIC_iALUi, []>;
 
 //===----------------------------------------------------------------------===//
 // TLS Instructions
 //
 
 // __aeabi_read_tp preserves the registers r1-r3.
-let isCall = 1, Defs = [R0, LR], Uses = [SP] in
-def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br,
-                   "bl\t__aeabi_read_tp",
-                   [(set R0, ARMthread_pointer)]> {
-  // Encoding is 0xf7fffffe.
-  let Inst = 0xf7fffffe;
-}
+// This is a pseudo inst so that we can get the encoding right,
+// complete with fixup for the aeabi_read_tp function.
+let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in
+def tTPsoft : tPseudoInst<(outs), (ins), 4, IIC_Br,
+                          [(set R0, ARMthread_pointer)]>;
 
 //===----------------------------------------------------------------------===//
 // SJLJ Exception handling intrinsics
@@ -1463,14 +1260,14 @@ def tTPsoft : TIx2<0b11110, 0b11, 1, (outs), (ins), IIC_Br,
 let Defs = [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7, R12, CPSR ],
     hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in
 def tInt_eh_sjlj_setjmp : ThumbXI<(outs),(ins tGPR:$src, tGPR:$val),
-                                  AddrModeNone, SizeSpecial, NoItinerary, "","",
+                                  AddrModeNone, 0, NoItinerary, "","",
                           [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>;
 
 // FIXME: Non-Darwin version(s)
 let isBarrier = 1, hasSideEffects = 1, isTerminator = 1, isCodeGenOnly = 1,
     Defs = [ R7, LR, SP ] in
 def tInt_eh_sjlj_longjmp : XI<(outs), (ins GPR:$src, GPR:$scratch),
-                              AddrModeNone, SizeSpecial, IndexModeNone,
+                              AddrModeNone, 0, IndexModeNone,
                               Pseudo, NoItinerary, "", "",
                               [(ARMeh_sjlj_longjmp GPR:$src, GPR:$scratch)]>,
                              Requires<[IsThumb, IsDarwin]>;
@@ -1583,3 +1380,18 @@ def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
                [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
                                            imm:$cp))]>,
                Requires<[IsThumb, IsThumb1Only]>;
+
+// Pseudo-instruction for merged POP and return.
+// FIXME: remove when we have a way to marking a MI with these properties.
+let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
+    hasExtraDefRegAllocReq = 1 in
+def tPOP_RET : tPseudoExpand<(outs), (ins pred:$p, reglist:$regs, variable_ops),
+                           2, IIC_iPop_Br, [],
+                           (tPOP pred:$p, reglist:$regs)>;
+
+// Indirect branch using "mov pc, $Rm"
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
+  def tBRIND : tPseudoExpand<(outs), (ins GPR:$Rm, pred:$p),
+                  2, IIC_Br, [(brind GPR:$Rm)],
+                  (tMOVr PC, GPR:$Rm, pred:$p)>;
+}
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 598660c..c2c6cbc 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -44,9 +44,11 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
 // t2_so_imm - Match a 32-bit immediate operand, which is an
 // 8-bit immediate rotated by an arbitrary number of bits, or an 8-bit
 // immediate splatted into multiple bytes of the word.
+def t2_so_imm_asmoperand : AsmOperandClass { let Name = "T2SOImm"; }
 def t2_so_imm : Operand<i32>, ImmLeaf<i32, [{
     return ARM_AM::getT2SOImmVal(Imm) != -1;
   }]> {
+  let ParserMatchClass = t2_so_imm_asmoperand;
   let EncoderMethod = "getT2SOImmOpValue";
 }
 
@@ -463,7 +465,8 @@ multiclass T2I_un_irs<bits<4> opcod, string opc,
 /// changed to modify CPSR.
 multiclass T2I_bin_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                       PatFrag opnode, bit Commutable = 0, string wide = ""> {
+                       PatFrag opnode, string baseOpc, bit Commutable = 0,
+                       string wide = ""> {
    // shifted imm
    def ri : T2sTwoRegImm<
                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm), iii,
@@ -495,14 +498,31 @@ multiclass T2I_bin_irs<bits<4> opcod, string opc,
      let Inst{26-25} = 0b01;
      let Inst{24-21} = opcod;
    }
+  // Assembly aliases for optional destination operand when it's the same
+  // as the source operand.
+  def : InstAlias<!strconcat(opc, "${s}${p} $Rdn, $imm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "ri")) rGPR:$Rdn, rGPR:$Rdn,
+                                                    t2_so_imm:$imm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsThumb2]>;
+  def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $Rm"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rr")) rGPR:$Rdn, rGPR:$Rdn,
+                                                    rGPR:$Rm, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsThumb2]>;
+  def : InstAlias<!strconcat(opc, "${s}${p}", wide, " $Rdn, $shift"),
+     (!cast<Instruction>(!strconcat(baseOpc, "rs")) rGPR:$Rdn, rGPR:$Rdn,
+                                                    t2_so_reg:$shift, pred:$p,
+                                                    cc_out:$s)>,
+     Requires<[IsThumb2]>;
 }
 
 /// T2I_bin_w_irs - Same as T2I_bin_irs except these operations need
-//  the ".w" prefix to indicate that they are wide.
+//  the ".w" suffix to indicate that they are wide.
 multiclass T2I_bin_w_irs<bits<4> opcod, string opc,
                      InstrItinClass iii, InstrItinClass iir, InstrItinClass iis,
-                         PatFrag opnode, bit Commutable = 0> :
-    T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, Commutable, ".w">;
+                         PatFrag opnode, string baseOpc, bit Commutable = 0> :
+    T2I_bin_irs<opcod, opc, iii, iir, iis, opnode, baseOpc, Commutable, ".w">;
 
 /// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
 /// reversed.  The 'rr' form is only defined for the disassembler; for codegen
@@ -696,18 +716,18 @@ let usesCustomInserter = 1 in {
 multiclass T2I_adde_sube_s_irs<PatFrag opnode, bit Commutable = 0> {
    // shifted imm
    def ri : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_imm:$imm),
-                Size4Bytes, IIC_iALUi,
+                4, IIC_iALUi,
                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_imm:$imm))]>;
    // register
    def rr : t2PseudoInst<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
-                Size4Bytes, IIC_iALUr,
+                4, IIC_iALUr,
                 [(set rGPR:$Rd, (opnode rGPR:$Rn, rGPR:$Rm))]> {
      let isCommutable = Commutable;
    }
    // shifted register
    def rs : t2PseudoInst<
                 (outs rGPR:$Rd), (ins rGPR:$Rn, t2_so_reg:$ShiftedRm),
-                Size4Bytes, IIC_iALUsi,
+                4, IIC_iALUsi,
                 [(set rGPR:$Rd, (opnode rGPR:$Rn, t2_so_reg:$ShiftedRm))]>;
 }
 }
@@ -1018,7 +1038,8 @@ multiclass T2I_ext_rrot_uxtb16<bits<3> opcod, string opc, PatFrag opnode> {
 // supported yet.
 multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> {
   def r     : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iEXTr,
-                  opc, "\t$Rd, $Rm", []> {
+                  opc, "\t$Rd, $Rm", []>,
+          Requires<[IsThumb2, HasT2ExtractPack]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
@@ -1028,7 +1049,8 @@ multiclass T2I_ext_rrot_sxtb16<bits<3> opcod, string opc> {
      let Inst{5-4} = 0b00; // rotate
    }
   def r_rot : T2TwoReg<(outs rGPR:$Rd), (ins rGPR:$Rm, i32imm:$rot), IIC_iEXTr,
-                  opc, "\t$Rd, $Rm, ror $rot", []> {
+                  opc, "\t$Rd, $Rm, ror $rot", []>,
+          Requires<[IsThumb2, HasT2ExtractPack]> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
      let Inst{22-20} = opcod;
@@ -1084,7 +1106,7 @@ multiclass T2I_exta_rrot_DO<bits<3> opcod, string opc> {
      let Inst{7} = 1;
      let Inst{5-4} = 0b00; // rotate
    }
-  def rr_rot : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot),
+  def rr_rot :T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, i32imm:$rot),
                   IIC_iEXTAsr, opc, "\t$Rd, $Rn, $Rm, ror $rot", []> {
      let Inst{31-27} = 0b11111;
      let Inst{26-23} = 0b0100;
@@ -1142,93 +1164,13 @@ def t2ADR : T2PCOneRegImm<(outs rGPR:$Rd),
 
 let neverHasSideEffects = 1, isReMaterializable = 1 in
 def t2LEApcrel   : t2PseudoInst<(outs rGPR:$Rd), (ins i32imm:$label, pred:$p),
-                                Size4Bytes, IIC_iALUi, []>;
+                                4, IIC_iALUi, []>;
 def t2LEApcrelJT : t2PseudoInst<(outs rGPR:$Rd),
                                 (ins i32imm:$label, nohash_imm:$id, pred:$p),
-                                Size4Bytes, IIC_iALUi,
+                                4, IIC_iALUi,
                                 []>;
 
 
-// FIXME: None of these add/sub SP special instructions should be necessary
-// at all for thumb2 since they use the same encodings as the generic
-// add/sub instructions. In thumb1 we need them since they have dedicated
-// encodings. At the least, they should be pseudo instructions.
-// ADD r, sp, {so_imm|i12}
-let isCodeGenOnly = 1 in {
-def t2ADDrSPi   : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm),
-                        IIC_iALUi, "add", ".w\t$Rd, $Rn, $imm", []> {
-  let Inst{31-27} = 0b11110;
-  let Inst{25} = 0;
-  let Inst{24-21} = 0b1000;
-  let Inst{15} = 0;
-}
-def t2ADDrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm),
-                       IIC_iALUi, "addw", "\t$Rd, $Rn, $imm", []> {
-  let Inst{31-27} = 0b11110;
-  let Inst{25-20} = 0b100000;
-  let Inst{15} = 0;
-}
-
-// ADD r, sp, so_reg
-def t2ADDrSPs   : T2sTwoRegShiftedReg<
-                        (outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$ShiftedRm),
-                        IIC_iALUsi, "add", ".w\t$Rd, $Rn, $ShiftedRm", []> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-25} = 0b01;
-  let Inst{24-21} = 0b1000;
-  let Inst{15} = 0;
-}
-
-// SUB r, sp, {so_imm|i12}
-def t2SUBrSPi   : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_imm:$imm),
-                        IIC_iALUi, "sub", ".w\t$Rd, $Rn, $imm", []> {
-  let Inst{31-27} = 0b11110;
-  let Inst{25} = 0;
-  let Inst{24-21} = 0b1101;
-  let Inst{15} = 0;
-}
-def t2SUBrSPi12 : T2TwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, imm0_4095:$imm),
-                       IIC_iALUi, "subw", "\t$Rd, $Rn, $imm", []> {
-  let Inst{31-27} = 0b11110;
-  let Inst{25-20} = 0b101010;
-  let Inst{15} = 0;
-}
-
-// SUB r, sp, so_reg
-def t2SUBrSPs   : T2sTwoRegImm<(outs GPR:$Rd), (ins GPR:$Rn, t2_so_reg:$imm),
-                       IIC_iALUsi,
-                       "sub", "\t$Rd, $Rn, $imm", []> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-25} = 0b01;
-  let Inst{24-21} = 0b1101;
-  let Inst{19-16} = 0b1101; // Rn = sp
-  let Inst{15} = 0;
-}
-} // end isCodeGenOnly = 1
-
-// Signed and unsigned division on v7-M
-def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
-                 "sdiv", "\t$Rd, $Rn, $Rm",
-                 [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
-                 Requires<[HasDivide, IsThumb2]> {
-  let Inst{31-27} = 0b11111;
-  let Inst{26-21} = 0b011100;
-  let Inst{20} = 0b1;
-  let Inst{15-12} = 0b1111;
-  let Inst{7-4} = 0b1111;
-}
-
-def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
-                 "udiv", "\t$Rd, $Rn, $Rm",
-                 [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
-                 Requires<[HasDivide, IsThumb2]> {
-  let Inst{31-27} = 0b11111;
-  let Inst{26-21} = 0b011101;
-  let Inst{20} = 0b1;
-  let Inst{15-12} = 0b1111;
-  let Inst{7-4} = 0b1111;
-}
-
 //===----------------------------------------------------------------------===//
 //  Load / store Instructions.
 //
@@ -1668,6 +1610,10 @@ def t2MOVi : T2sOneRegImm<(outs rGPR:$Rd), (ins t2_so_imm:$imm), IIC_iMOVi,
   let Inst{15} = 0;
 }
 
+def : InstAlias<"mov${s}${p} $Rd, $imm", (t2MOVi rGPR:$Rd, t2_so_imm:$imm,
+                                                 pred:$p, cc_out:$s)>,
+                Requires<[IsThumb2]>;
+
 let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in
 def t2MOVi16 : T2I<(outs rGPR:$Rd), (ins i32imm_hilo16:$imm), IIC_iMOVi,
                    "movw", "\t$Rd, $imm",
@@ -1788,8 +1734,10 @@ defm t2ADC  : T2I_adde_sube_irs<0b1010, "adc",
                           BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
 defm t2SBC  : T2I_adde_sube_irs<0b1011, "sbc",
                           BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
-defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
-defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
+defm t2ADCS : T2I_adde_sube_s_irs<BinOpFrag<(adde_live_carry node:$LHS,
+                                                             node:$RHS)>, 1>;
+defm t2SBCS : T2I_adde_sube_s_irs<BinOpFrag<(sube_live_carry node:$LHS,
+                                                             node:$RHS)>>;
 
 // RSB
 defm t2RSB  : T2I_rbin_irs  <0b1110, "rsb",
@@ -1833,7 +1781,8 @@ def : T2Pat<(adde_live_carry       rGPR:$src, t2_so_imm_not:$imm),
 // Select Bytes -- for disassembly only
 
 def t2SEL : T2ThreeReg<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm),
-                NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []> {
+                NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-24} = 0b010;
   let Inst{23} = 0b1;
@@ -1849,7 +1798,8 @@ class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc,
               list<dag> pat = [/* For disassembly only; pattern left blank */],
               dag iops = (ins rGPR:$Rn, rGPR:$Rm),
               string asm = "\t$Rd, $Rn, $Rm">
-  : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat> {
+  : T2I<(outs rGPR:$Rd), iops, NoItinerary, opc, asm, pat>,
+    Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0101;
   let Inst{22-20} = op22_20;
@@ -1947,12 +1897,14 @@ class T2FourReg_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops,
 
 def t2USAD8   : T2ThreeReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
                                            (ins rGPR:$Rn, rGPR:$Rm),
-                        NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []> {
+                        NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{15-12} = 0b1111;
 }
 def t2USADA8  : T2FourReg_mac<0, 0b111, 0b0000, (outs rGPR:$Rd),
                        (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), NoItinerary,
-                        "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>;
+                        "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 
 // Signed/Unsigned saturate -- for disassembly only
 
@@ -1985,7 +1937,8 @@ def t2SSAT: T2SatI<
 def t2SSAT16: T2SatI<
                 (outs rGPR:$Rd), (ins ssat_imm:$sat_imm, rGPR:$Rn), NoItinerary,
                 "ssat16", "\t$Rd, $sat_imm, $Rn",
-                [/* For disassembly only; pattern left blank */]> {
+                [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11110;
   let Inst{25-22} = 0b1100;
   let Inst{20} = 0;
@@ -2005,10 +1958,11 @@ def t2USAT: T2SatI<
   let Inst{15} = 0;
 }
 
-def t2USAT16: T2SatI<
-                    (outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn), NoItinerary,
-                   "usat16", "\t$dst, $sat_imm, $Rn",
-                   [/* For disassembly only; pattern left blank */]> {
+def t2USAT16: T2SatI<(outs rGPR:$dst), (ins i32imm:$sat_imm, rGPR:$Rn),
+                     NoItinerary,
+                     "usat16", "\t$dst, $sat_imm, $Rn",
+                     [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11110;
   let Inst{25-22} = 0b1110;
   let Inst{20} = 0;
@@ -2084,17 +2038,18 @@ def t2MOVsra_flag : T2TwoRegShiftImm<
 
 defm t2AND  : T2I_bin_w_irs<0b0000, "and",
                             IIC_iBITi, IIC_iBITr, IIC_iBITsi,
-                            BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+                            BinOpFrag<(and node:$LHS, node:$RHS)>, "t2AND", 1>;
 defm t2ORR  : T2I_bin_w_irs<0b0010, "orr",
                             IIC_iBITi, IIC_iBITr, IIC_iBITsi,
-                            BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
+                            BinOpFrag<(or  node:$LHS, node:$RHS)>, "t2ORR", 1>;
 defm t2EOR  : T2I_bin_w_irs<0b0100, "eor",
                             IIC_iBITi, IIC_iBITr, IIC_iBITsi,
-                            BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
+                            BinOpFrag<(xor node:$LHS, node:$RHS)>, "t2EOR", 1>;
 
 defm t2BIC  : T2I_bin_w_irs<0b0001, "bic",
                             IIC_iBITi, IIC_iBITr, IIC_iBITsi,
-                            BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+                            BinOpFrag<(and node:$LHS, (not node:$RHS))>,
+                            "t2BIC">;
 
 class T2BitFI<dag oops, dag iops, InstrItinClass itin,
               string opc, string asm, list<dag> pattern>
@@ -2194,7 +2149,8 @@ let Constraints = "$src = $Rd" in {
 
 defm t2ORN  : T2I_bin_irs<0b0011, "orn",
                           IIC_iBITi, IIC_iBITr, IIC_iBITsi,
-                          BinOpFrag<(or  node:$LHS, (not node:$RHS))>, 0, "">;
+                          BinOpFrag<(or  node:$LHS, (not node:$RHS))>,
+                          "t2ORN", 0, "">;
 
 // Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
 let AddedComplexity = 1 in
@@ -2277,7 +2233,8 @@ def t2UMLAL : T2MulLong<0b110, 0b0000,
 def t2UMAAL : T2MulLong<0b110, 0b0110,
                   (outs rGPR:$RdLo, rGPR:$RdHi),
                   (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64,
-                  "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+                  "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 } // neverHasSideEffects
 
 // Rounding variants of the below included for disassembly only
@@ -2285,7 +2242,8 @@ def t2UMAAL : T2MulLong<0b110, 0b0110,
 // Most significant word multiply
 def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
                   "smmul", "\t$Rd, $Rn, $Rm",
-                  [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]> {
+                  [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
@@ -2294,7 +2252,8 @@ def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
 }
 
 def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
-                  "smmulr", "\t$Rd, $Rn, $Rm", []> {
+                  "smmulr", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
@@ -2305,7 +2264,8 @@ def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
 def t2SMMLA : T2FourReg<
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
                 "smmla", "\t$Rd, $Rn, $Rm, $Ra",
-                [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]> {
+                [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
@@ -2314,7 +2274,8 @@ def t2SMMLA : T2FourReg<
 
 def t2SMMLAR: T2FourReg<
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
-                  "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []> {
+                  "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
@@ -2324,7 +2285,8 @@ def t2SMMLAR: T2FourReg<
 def t2SMMLS: T2FourReg<
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
                 "smmls", "\t$Rd, $Rn, $Rm, $Ra",
-                [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]> {
+                [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b110;
@@ -2333,7 +2295,8 @@ def t2SMMLS: T2FourReg<
 
 def t2SMMLSR:T2FourReg<
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
-                "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []> {
+                "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b110;
@@ -2344,7 +2307,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
   def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
-                                      (sext_inreg rGPR:$Rm, i16)))]> {
+                                      (sext_inreg rGPR:$Rm, i16)))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2356,7 +2320,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
   def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
-                                      (sra rGPR:$Rm, (i32 16))))]> {
+                                      (sra rGPR:$Rm, (i32 16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2368,7 +2333,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
   def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
-                                      (sext_inreg rGPR:$Rm, i16)))]> {
+                                      (sext_inreg rGPR:$Rm, i16)))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2380,7 +2346,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
   def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
-                                      (sra rGPR:$Rm, (i32 16))))]> {
+                                      (sra rGPR:$Rm, (i32 16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2392,7 +2359,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
   def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
-                                    (sext_inreg rGPR:$Rm, i16)), (i32 16)))]> {
+                                    (sext_inreg rGPR:$Rm, i16)), (i32 16)))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -2404,7 +2372,8 @@ multiclass T2I_smul<string opc, PatFrag opnode> {
   def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
               !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
               [(set rGPR:$Rd, (sra (opnode rGPR:$Rn,
-                                    (sra rGPR:$Rm, (i32 16))), (i32 16)))]> {
+                                    (sra rGPR:$Rm, (i32 16))), (i32 16)))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -2421,7 +2390,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
               !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
               [(set rGPR:$Rd, (add rGPR:$Ra,
                                (opnode (sext_inreg rGPR:$Rn, i16),
-                                       (sext_inreg rGPR:$Rm, i16))))]> {
+                                       (sext_inreg rGPR:$Rm, i16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2433,7 +2403,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
              !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
              [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
-                                                 (sra rGPR:$Rm, (i32 16)))))]> {
+                                                 (sra rGPR:$Rm, (i32 16)))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2445,7 +2416,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
               !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
               [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
-                                               (sext_inreg rGPR:$Rm, i16))))]> {
+                                               (sext_inreg rGPR:$Rm, i16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2457,7 +2429,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
               !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
              [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
-                                                 (sra rGPR:$Rm, (i32 16)))))]> {
+                                                 (sra rGPR:$Rm, (i32 16)))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b001;
@@ -2469,7 +2442,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
               !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
               [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
-                                    (sext_inreg rGPR:$Rm, i16)), (i32 16))))]> {
+                                    (sext_inreg rGPR:$Rm, i16)), (i32 16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -2481,7 +2455,8 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
         (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
               !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
               [(set rGPR:$Rd, (add rGPR:$Ra, (sra (opnode rGPR:$Rn,
-                                      (sra rGPR:$Rm, (i32 16))), (i32 16))))]> {
+                                      (sra rGPR:$Rm, (i32 16))), (i32 16))))]>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
     let Inst{22-20} = 0b011;
@@ -2496,66 +2471,108 @@ defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
 // Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
 def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
          (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
-           [/* For disassembly only; pattern left blank */]>;
+           [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
          (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
-           [/* For disassembly only; pattern left blank */]>;
+           [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
          (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
-           [/* For disassembly only; pattern left blank */]>;
+           [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
          (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
-           [/* For disassembly only; pattern left blank */]>;
+           [/* For disassembly only; pattern left blank */]>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 
 // Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
 // These are for disassembly only.
 
 def t2SMUAD: T2ThreeReg_mac<
             0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
-            IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []> {
+            IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{15-12} = 0b1111;
 }
 def t2SMUADX:T2ThreeReg_mac<
             0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
-            IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []> {
+            IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{15-12} = 0b1111;
 }
 def t2SMUSD: T2ThreeReg_mac<
             0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
-            IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []> {
+            IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{15-12} = 0b1111;
 }
 def t2SMUSDX:T2ThreeReg_mac<
             0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
-            IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []> {
+            IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>,
+          Requires<[IsThumb2, HasThumb2DSP]> {
   let Inst{15-12} = 0b1111;
 }
 def t2SMLAD   : T2ThreeReg_mac<
             0, 0b010, 0b0000, (outs rGPR:$Rd),
             (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
-            "\t$Rd, $Rn, $Rm, $Ra", []>;
+            "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLADX  : T2FourReg_mac<
             0, 0b010, 0b0001, (outs rGPR:$Rd),
             (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
-            "\t$Rd, $Rn, $Rm, $Ra", []>;
+            "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLSD   : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
             (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
-            "\t$Rd, $Rn, $Rm, $Ra", []>;
+            "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLSDX  : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
             (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
-            "\t$Rd, $Rn, $Rm, $Ra", []>;
+            "\t$Rd, $Rn, $Rm, $Ra", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLALD  : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
                         (ins rGPR:$Rm, rGPR:$Rn), IIC_iMAC64, "smlald",
-                        "\t$Ra, $Rd, $Rm, $Rn", []>;
+                        "\t$Ra, $Rd, $Rm, $Rn", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
                         (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlaldx",
-                        "\t$Ra, $Rd, $Rm, $Rn", []>;
+                        "\t$Ra, $Rd, $Rm, $Rn", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLSLD  : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
                         (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsld",
-                        "\t$Ra, $Rd, $Rm, $Rn", []>;
+                        "\t$Ra, $Rd, $Rm, $Rn", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
 def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
                         (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
-                        "\t$Ra, $Rd, $Rm, $Rn", []>;
+                        "\t$Ra, $Rd, $Rm, $Rn", []>,
+          Requires<[IsThumb2, HasThumb2DSP]>;
+
+//===----------------------------------------------------------------------===//
+//  Division Instructions.
+//  Signed and unsigned division on v7-M
+//
+def t2SDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+                 "sdiv", "\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (sdiv rGPR:$Rn, rGPR:$Rm))]>,
+                 Requires<[HasDivide, IsThumb2]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-21} = 0b011100;
+  let Inst{20} = 0b1;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = 0b1111;
+}
+
+def t2UDIV : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iALUi,
+                 "udiv", "\t$Rd, $Rn, $Rm",
+                 [(set rGPR:$Rd, (udiv rGPR:$Rn, rGPR:$Rm))]>,
+                 Requires<[HasDivide, IsThumb2]> {
+  let Inst{31-27} = 0b11111;
+  let Inst{26-21} = 0b011101;
+  let Inst{20} = 0b1;
+  let Inst{15-12} = 0b1111;
+  let Inst{7-4} = 0b1111;
+}
 
 //===----------------------------------------------------------------------===//
 //  Misc. Arithmetic Instructions.
@@ -2585,25 +2602,16 @@ def t2REV : T2I_misc<0b01, 0b00, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
 
 def t2REV16 : T2I_misc<0b01, 0b01, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
                        "rev16", ".w\t$Rd, $Rm",
-                [(set rGPR:$Rd,
-                    (or (and (srl rGPR:$Rm, (i32 8)), 0xFF),
-                        (or (and (shl rGPR:$Rm, (i32 8)), 0xFF00),
-                            (or (and (srl rGPR:$Rm, (i32 8)), 0xFF0000),
-                               (and (shl rGPR:$Rm, (i32 8)), 0xFF000000)))))]>;
+                [(set rGPR:$Rd, (rotr (bswap rGPR:$Rm), (i32 16)))]>;
 
 def t2REVSH : T2I_misc<0b01, 0b11, (outs rGPR:$Rd), (ins rGPR:$Rm), IIC_iUNAr,
                        "revsh", ".w\t$Rd, $Rm",
-                 [(set rGPR:$Rd,
-                    (sext_inreg
-                      (or (srl rGPR:$Rm, (i32 8)),
-                          (shl rGPR:$Rm, (i32 8))), i16))]>;
+                 [(set rGPR:$Rd, (sra (bswap rGPR:$Rm), (i32 16)))]>;
 
-def : T2Pat<(sext_inreg (or (srl (and rGPR:$Rm, 0xFF00), (i32 8)),
-                            (shl rGPR:$Rm, (i32 8))), i16),
+def : T2Pat<(or (sra (shl rGPR:$Rm, (i32 24)), (i32 16)),
+                (and (srl rGPR:$Rm, (i32 8)), 0xFF)),
             (t2REVSH rGPR:$Rm)>;
 
-def : T2Pat<(sra (bswap rGPR:$Rm), (i32 16)), (t2REVSH rGPR:$Rm)>;
-
 def t2PKHBT : T2ThreeReg<
             (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, shift_imm:$sh),
                   IIC_iBITsi, "pkhbt", "\t$Rd, $Rn, $Rm$sh",
@@ -2699,33 +2707,21 @@ defm t2TEQ  : T2I_cmp_irs<0b0100, "teq",
 // FIXME: should be able to write a pattern for ARMcmov, but can't use
 // a two-value operand where a dag node expects two operands. :(
 let neverHasSideEffects = 1 in {
-def t2MOVCCr : T2TwoReg<
-                   (outs rGPR:$Rd), (ins rGPR:$false, rGPR:$Rm), IIC_iCMOVr,
-                   "mov", ".w\t$Rd, $Rm",
+def t2MOVCCr : t2PseudoInst<(outs rGPR:$Rd),
+                            (ins rGPR:$false, rGPR:$Rm, pred:$p),
+                            4, IIC_iCMOVr,
    [/*(set rGPR:$Rd, (ARMcmov rGPR:$false, rGPR:$Rm, imm:$cc, CCR:$ccr))*/]>,
-                RegConstraint<"$false = $Rd"> {
-  let Inst{31-27} = 0b11101;
-  let Inst{26-25} = 0b01;
-  let Inst{24-21} = 0b0010;
-  let Inst{20} = 0; // The S bit.
-  let Inst{19-16} = 0b1111; // Rn
-  let Inst{14-12} = 0b000;
-  let Inst{7-4} = 0b0000;
-}
+                RegConstraint<"$false = $Rd">;
 
 let isMoveImm = 1 in
-def t2MOVCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm),
-                   IIC_iCMOVi, "mov", ".w\t$Rd, $imm",
+def t2MOVCCi : t2PseudoInst<(outs rGPR:$Rd),
+                            (ins rGPR:$false, t2_so_imm:$imm, pred:$p),
+                   4, IIC_iCMOVi,
 [/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm:$imm, imm:$cc, CCR:$ccr))*/]>,
-                   RegConstraint<"$false = $Rd"> {
-  let Inst{31-27} = 0b11110;
-  let Inst{25} = 0;
-  let Inst{24-21} = 0b0010;
-  let Inst{20} = 0; // The S bit.
-  let Inst{19-16} = 0b1111; // Rn
-  let Inst{15} = 0;
-}
+                   RegConstraint<"$false = $Rd">;
 
+// FIXME: Pseudo-ize these. For now, just mark codegen only.
+let isCodeGenOnly = 1 in {
 let isMoveImm = 1 in
 def t2MOVCCi16 : T2I<(outs rGPR:$Rd), (ins rGPR:$false, i32imm_hilo16:$imm),
                       IIC_iCMOVi,
@@ -2792,6 +2788,7 @@ def t2MOVCCror : T2I_movcc_sh<0b11, (outs rGPR:$Rd),
                              (ins rGPR:$false, rGPR:$Rm, i32imm:$imm),
                              IIC_iCMOVsi, "ror", ".w\t$Rd, $Rm, $imm", []>,
                  RegConstraint<"$false = $Rd">;
+} // isCodeGenOnly = 1
 } // neverHasSideEffects
 
 //===----------------------------------------------------------------------===//
@@ -2826,7 +2823,7 @@ def t2ISB : AInoP<(outs), (ins), ThumbFrm, NoItinerary, "isb", "",
   let Inst{3-0} = 0b1111;
 }
 
-class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
                 InstrItinClass itin, string opc, string asm, string cstr,
                 list<dag> pattern, bits<4> rt2 = 0b1111>
   : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
@@ -2842,7 +2839,7 @@ class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   let Inst{19-16} = addr;
   let Inst{15-12} = Rt;
 }
-class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, int sz,
                 InstrItinClass itin, string opc, string asm, string cstr,
                 list<dag> pattern, bits<4> rt2 = 0b1111>
   : Thumb2I<oops, iops, am, sz, itin, opc, asm, cstr, pattern> {
@@ -2861,16 +2858,15 @@ class T2I_strex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
 }
 
 let mayLoad = 1 in {
-def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone,
-                         Size4Bytes, NoItinerary, "ldrexb", "\t$Rt, $addr",
-                         "", []>;
-def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone,
-                         Size4Bytes, NoItinerary, "ldrexh", "\t$Rt, $addr",
-                         "", []>;
-def t2LDREX  : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone,
-                       Size4Bytes, NoItinerary,
-                       "ldrex", "\t$Rt, $addr", "",
-                      []> {
+def t2LDREXB : T2I_ldrex<0b00, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+                         AddrModeNone, 4, NoItinerary,
+                         "ldrexb", "\t$Rt, $addr", "", []>;
+def t2LDREXH : T2I_ldrex<0b01, (outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+                         AddrModeNone, 4, NoItinerary,
+                         "ldrexh", "\t$Rt, $addr", "", []>;
+def t2LDREX  : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr),
+                       AddrModeNone, 4, NoItinerary,
+                       "ldrex", "\t$Rt, $addr", "", []> {
   let Inst{31-27} = 0b11101;
   let Inst{26-20} = 0b0000101;
   let Inst{11-8} = 0b1111;
@@ -2884,7 +2880,7 @@ def t2LDREX  : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_reg:$addr), AddrModeNone
 let hasExtraDefRegAllocReq = 1 in
 def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
                          (ins t2addrmode_reg:$addr),
-                         AddrModeNone, Size4Bytes, NoItinerary,
+                         AddrModeNone, 4, NoItinerary,
                          "ldrexd", "\t$Rt, $Rt2, $addr", "",
                          [], {?, ?, ?, ?}> {
   bits<4> Rt2;
@@ -2893,14 +2889,16 @@ def t2LDREXD : T2I_ldrex<0b11, (outs rGPR:$Rt, rGPR:$Rt2),
 }
 
 let mayStore = 1, Constraints = "@earlyclobber $Rd" in {
-def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
-                  AddrModeNone, Size4Bytes, NoItinerary,
-                  "strexb", "\t$Rd, $Rt, $addr", "", []>;
-def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
-                  AddrModeNone, Size4Bytes, NoItinerary,
-                  "strexh", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREXB : T2I_strex<0b00, (outs rGPR:$Rd),
+                         (ins rGPR:$Rt, t2addrmode_reg:$addr),
+                         AddrModeNone, 4, NoItinerary,
+                         "strexb", "\t$Rd, $Rt, $addr", "", []>;
+def t2STREXH : T2I_strex<0b01, (outs rGPR:$Rd),
+                         (ins rGPR:$Rt, t2addrmode_reg:$addr),
+                         AddrModeNone, 4, NoItinerary,
+                         "strexh", "\t$Rd, $Rt, $addr", "", []>;
 def t2STREX  : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
-                  AddrModeNone, Size4Bytes, NoItinerary,
+                  AddrModeNone, 4, NoItinerary,
                   "strex", "\t$Rd, $Rt, $addr", "",
                   []> {
   let Inst{31-27} = 0b11101;
@@ -2919,7 +2917,7 @@ def t2STREX  : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_reg:$addr),
 let hasExtraSrcRegAllocReq = 1, Constraints = "@earlyclobber $Rd" in
 def t2STREXD : T2I_strex<0b11, (outs rGPR:$Rd),
                          (ins rGPR:$Rt, rGPR:$Rt2, t2addrmode_reg:$addr),
-                         AddrModeNone, Size4Bytes, NoItinerary,
+                         AddrModeNone, 4, NoItinerary,
                          "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [],
                          {?, ?, ?, ?}> {
   bits<4> Rt2;
@@ -2940,22 +2938,6 @@ def t2CLREX : T2XI<(outs), (ins), NoItinerary, "clrex",
 }
 
 //===----------------------------------------------------------------------===//
-// TLS Instructions
-//
-
-// __aeabi_read_tp preserves the registers r1-r3.
-let isCall = 1,
-  Defs = [R0, R12, LR, CPSR], Uses = [SP] in {
-  def t2TPsoft : T2XI<(outs), (ins), IIC_Br,
-                     "bl\t__aeabi_read_tp",
-                     [(set R0, ARMthread_pointer)]> {
-    let Inst{31-27} = 0b11110;
-    let Inst{15-14} = 0b11;
-    let Inst{12} = 1;
-  }
-}
-
-//===----------------------------------------------------------------------===//
 // SJLJ Exception handling intrinsics
 //   eh_sjlj_setjmp() is an instruction sequence to store the return
 //   address and save #0 in R0 for the non-longjmp case.
@@ -2973,7 +2955,7 @@ let Defs =
     QQQQ0, QQQQ1, QQQQ2, QQQQ3 ],
   hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
   def t2Int_eh_sjlj_setjmp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
-                               AddrModeNone, SizeSpecial, NoItinerary, "", "",
+                               AddrModeNone, 0, NoItinerary, "", "",
                           [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
                              Requires<[IsThumb2, HasVFP2]>;
 }
@@ -2982,7 +2964,7 @@ let Defs =
   [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7,  R8,  R9,  R10, R11, R12, LR, CPSR ],
   hasSideEffects = 1, isBarrier = 1, isCodeGenOnly = 1 in {
   def t2Int_eh_sjlj_setjmp_nofp : Thumb2XI<(outs), (ins tGPR:$src, tGPR:$val),
-                               AddrModeNone, SizeSpecial, NoItinerary, "", "",
+                               AddrModeNone, 0, NoItinerary, "", "",
                           [(set R0, (ARMeh_sjlj_setjmp tGPR:$src, tGPR:$val))]>,
                                   Requires<[IsThumb2, NoVFP]>;
 }
@@ -2993,28 +2975,14 @@ let Defs =
 //
 
 // FIXME: remove when we have a way to marking a MI with these properties.
-// FIXME: $dst1 should be a def. But the extra ops must be in the end of the
-// operand list.
 // FIXME: Should pc be an implicit operand like PICADD, etc?
 let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
     hasExtraDefRegAllocReq = 1, isCodeGenOnly = 1 in
-def t2LDMIA_RET: T2XIt<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
-                                        reglist:$regs, variable_ops),
-                        IIC_iLoad_mBr,
-                        "ldmia${p}.w\t$Rn!, $regs",
-                        "$Rn = $wb", []> {
-  bits<4>  Rn;
-  bits<16> regs;
-
-  let Inst{31-27} = 0b11101;
-  let Inst{26-25} = 0b00;
-  let Inst{24-23} = 0b01;     // Increment After
-  let Inst{22}    = 0;
-  let Inst{21}    = 1;        // Writeback
-  let Inst{20}    = 1;
-  let Inst{19-16} = Rn;
-  let Inst{15-0}  = regs;
-}
+def t2LDMIA_RET: t2PseudoExpand<(outs GPR:$wb), (ins GPR:$Rn, pred:$p,
+                                                   reglist:$regs, variable_ops),
+                              4, IIC_iLoad_mBr, [],
+            (t2LDMIA_UPD GPR:$wb, GPR:$Rn, pred:$p, reglist:$regs)>,
+                         RegConstraint<"$Rn = $wb">;
 
 let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
 let isPredicable = 1 in
@@ -3036,17 +3004,17 @@ def t2B   : T2XI<(outs), (ins uncondbrtarget:$target), IIC_Br,
 let isNotDuplicable = 1, isIndirectBranch = 1 in {
 def t2BR_JT : t2PseudoInst<(outs),
           (ins GPR:$target, GPR:$index, i32imm:$jt, i32imm:$id),
-           SizeSpecial, IIC_Br,
+           0, IIC_Br,
           [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt, imm:$id)]>;
 
 // FIXME: Add a non-pc based case that can be predicated.
 def t2TBB_JT : t2PseudoInst<(outs),
         (ins GPR:$index, i32imm:$jt, i32imm:$id),
-         SizeSpecial, IIC_Br, []>;
+         0, IIC_Br, []>;
 
 def t2TBH_JT : t2PseudoInst<(outs),
         (ins GPR:$index, i32imm:$jt, i32imm:$id),
-         SizeSpecial, IIC_Br, []>;
+         0, IIC_Br, []>;
 
 def t2TBB : T2I<(outs), (ins GPR:$Rn, GPR:$Rm), IIC_Br,
                     "tbb", "\t[$Rn, $Rm]", []> {
@@ -3094,11 +3062,22 @@ def t2Bcc : T2I<(outs), (ins brtarget:$target), IIC_Br,
   let Inst{10-0} = target{11-1};
 }
 
+// Tail calls. The Darwin version of thumb tail calls uses a t2 branch, so
+// it goes here.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
+  // Darwin version.
+  let Defs = [R0, R1, R2, R3, R9, R12, QQQQ0, QQQQ2, QQQQ3, PC],
+      Uses = [SP] in
+  def tTAILJMPd: tPseudoExpand<(outs), (ins uncondbrtarget:$dst, variable_ops),
+                   4, IIC_Br, [],
+                   (t2B uncondbrtarget:$dst)>,
+                 Requires<[IsThumb2, IsDarwin]>;
+}
 
 // IT block
 let Defs = [ITSTATE] in
 def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
-                    AddrModeNone, Size2Bytes,  IIC_iALUx,
+                    AddrModeNone, 2,  IIC_iALUx,
                     "it$mask\t$cc", "", []> {
   // 16-bit instruction.
   let Inst{31-16} = 0x0000;
@@ -3178,8 +3157,7 @@ def t2WFE   : T2I_hint<0b00000010, "wfe",   ".w">;
 def t2WFI   : T2I_hint<0b00000011, "wfi",   ".w">;
 def t2SEV   : T2I_hint<0b00000100, "sev",   ".w">;
 
-def t2DBG : T2I<(outs),(ins i32imm:$opt), NoItinerary, "dbg", "\t$opt",
-                [/* For disassembly only; pattern left blank */]> {
+def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt", []> {
   let Inst{31-20} = 0xf3a;
   let Inst{15-14} = 0b10;
   let Inst{12} = 0;
@@ -3347,12 +3325,13 @@ def t2MSR : T2SpecialReg<0b111100111000 /* op31-20 */, 0b10 /* op15-14 */,
 }
 
 //===----------------------------------------------------------------------===//
-// Move between coprocessor and ARM core register -- for disassembly only
+// Move between coprocessor and ARM core register
 //
 
-class t2MovRCopro<string opc, bit direction, dag oops, dag iops,
+class t2MovRCopro<bits<4> Op, string opc, bit direction, dag oops, dag iops,
                   list<dag> pattern>
-  : T2Cop<oops, iops, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
+  : T2Cop<Op, oops, iops,
+          !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"),
           pattern> {
   let Inst{27-24} = 0b1110;
   let Inst{20} = direction;
@@ -3373,22 +3352,10 @@ class t2MovRCopro<string opc, bit direction, dag oops, dag iops,
   let Inst{19-16} = CRn;
 }
 
-def t2MCR2 : t2MovRCopro<"mcr2", 0 /* from ARM core register to coprocessor */,
-             (outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, c_imm:$CRn,
-                          c_imm:$CRm, i32imm:$opc2),
-             [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
-                            imm:$CRm, imm:$opc2)]>;
-def t2MRC2 : t2MovRCopro<"mrc2", 1 /* from coprocessor to ARM core register */,
-             (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn,
-                                  c_imm:$CRm, i32imm:$opc2), []>;
-
-def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn,
-                            imm:$CRm, imm:$opc2),
-              (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
-
-class t2MovRRCopro<string opc, bit direction,
-                   list<dag> pattern = [/* For disassembly only */]>
-  : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
+class t2MovRRCopro<bits<4> Op, string opc, bit direction,
+                   list<dag> pattern = []>
+  : T2Cop<Op, (outs),
+          (ins p_imm:$cop, imm0_15:$opc1, GPR:$Rt, GPR:$Rt2, c_imm:$CRm),
           !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> {
   let Inst{27-24} = 0b1100;
   let Inst{23-21} = 0b010;
@@ -3407,19 +3374,77 @@ class t2MovRRCopro<string opc, bit direction,
   let Inst{3-0}   = CRm;
 }
 
-def t2MCRR2 : t2MovRRCopro<"mcrr2",
-                           0 /* from ARM core register to coprocessor */,
+/* from ARM core register to coprocessor */
+def t2MCR : t2MovRCopro<0b1110, "mcr", 0,
+           (outs),
+           (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                c_imm:$CRm, imm0_7:$opc2),
+           [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+                         imm:$CRm, imm:$opc2)]>;
+def t2MCR2 : t2MovRCopro<0b1111, "mcr2", 0,
+             (outs), (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn,
+                          c_imm:$CRm, imm0_7:$opc2),
+             [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn,
+                            imm:$CRm, imm:$opc2)]>;
+
+/* from coprocessor to ARM core register */
+def t2MRC : t2MovRCopro<0b1110, "mrc", 1,
+           (outs GPR:$Rt),
+           (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+           []>;
+
+def t2MRC2 : t2MovRCopro<0b1111, "mrc2", 1,
+             (outs GPR:$Rt), (ins p_imm:$cop, i32imm:$opc1, c_imm:$CRn,
+                                  c_imm:$CRm, i32imm:$opc2), []>;
+
+def : T2v6Pat<(int_arm_mrc  imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+              (t2MRC imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+def : T2v6Pat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2),
+              (t2MRC2 imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2)>;
+
+
+/* from ARM core register to coprocessor */
+def t2MCRR : t2MovRRCopro<0b1110, "mcrr", 0,
+                        [(int_arm_mcrr imm:$cop, imm:$opc1, GPR:$Rt, GPR:$Rt2,
+                                       imm:$CRm)]>;
+def t2MCRR2 : t2MovRRCopro<0b1111, "mcrr2", 0,
                            [(int_arm_mcrr2 imm:$cop, imm:$opc1, GPR:$Rt,
                                            GPR:$Rt2, imm:$CRm)]>;
-def t2MRRC2 : t2MovRRCopro<"mrrc2",
-                           1 /* from coprocessor to ARM core register */>;
+/* from coprocessor to ARM core register */
+def t2MRRC : t2MovRRCopro<0b1110, "mrrc", 1>;
+
+def t2MRRC2 : t2MovRRCopro<0b1111, "mrrc2", 1>;
 
 //===----------------------------------------------------------------------===//
-// Other Coprocessor Instructions.  For disassembly only.
+// Other Coprocessor Instructions.
 //
 
-def t2CDP2 : T2Cop<(outs), (ins p_imm:$cop, i32imm:$opc1,
-                   c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, i32imm:$opc2),
+def tCDP : T2Cop<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+                 c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
+                 "cdp\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
+                 [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
+                               imm:$CRm, imm:$opc2)]> {
+  let Inst{27-24} = 0b1110;
+
+  bits<4> opc1;
+  bits<4> CRn;
+  bits<4> CRd;
+  bits<4> cop;
+  bits<3> opc2;
+  bits<4> CRm;
+
+  let Inst{3-0}   = CRm;
+  let Inst{4}     = 0;
+  let Inst{7-5}   = opc2;
+  let Inst{11-8}  = cop;
+  let Inst{15-12} = CRd;
+  let Inst{19-16} = CRn;
+  let Inst{23-20} = opc1;
+}
+
+def t2CDP2 : T2Cop<0b1111, (outs), (ins p_imm:$cop, imm0_15:$opc1,
+                   c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2),
                    "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2",
                    [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn,
                                   imm:$CRm, imm:$opc2)]> {
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 376bd96..f1f3cb9 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -94,7 +94,8 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
     let Inst{20}    = L_bit;
   }
   def DIA_UPD :
-    AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+    AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
+                               variable_ops),
           IndexModeUpd, itin_upd,
           !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
     let Inst{24-23} = 0b01;       // Increment After
@@ -102,7 +103,8 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
     let Inst{20}    = L_bit;
   }
   def DDB_UPD :
-    AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs, variable_ops),
+    AXDI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, dpr_reglist:$regs,
+                               variable_ops),
           IndexModeUpd, itin_upd,
           !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
     let Inst{24-23} = 0b10;       // Decrement Before
@@ -124,7 +126,8 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
     let D = VFPNeonDomain;
   }
   def SIA_UPD :
-    AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+    AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
+                               variable_ops),
           IndexModeUpd, itin_upd,
           !strconcat(asm, "ia${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
     let Inst{24-23} = 0b01;       // Increment After
@@ -136,7 +139,8 @@ multiclass vfp_ldst_mult<string asm, bit L_bit,
     let D = VFPNeonDomain;
   }
   def SDB_UPD :
-    AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs, variable_ops),
+    AXSI4<(outs GPR:$wb), (ins GPR:$Rn, pred:$p, spr_reglist:$regs,
+                               variable_ops),
           IndexModeUpd, itin_upd,
           !strconcat(asm, "db${p}\t$Rn!, $regs"), "$Rn = $wb", []> {
     let Inst{24-23} = 0b10;       // Decrement Before
@@ -162,6 +166,15 @@ defm VSTM : vfp_ldst_mult<"vstm", 0, IIC_fpLoad_m, IIC_fpLoad_mu>;
 def : MnemonicAlias<"vldm", "vldmia">;
 def : MnemonicAlias<"vstm", "vstmia">;
 
+def : InstAlias<"vpush${p} $r", (VSTMDDB_UPD SP, pred:$p, dpr_reglist:$r)>,
+                Requires<[HasVFP2]>;
+def : InstAlias<"vpush${p} $r", (VSTMSDB_UPD SP, pred:$p, spr_reglist:$r)>,
+                Requires<[HasVFP2]>;
+def : InstAlias<"vpop${p} $r",  (VLDMDIA_UPD SP, pred:$p, dpr_reglist:$r)>,
+                Requires<[HasVFP2]>;
+def : InstAlias<"vpop${p} $r",  (VLDMSIA_UPD SP, pred:$p, spr_reglist:$r)>,
+                Requires<[HasVFP2]>;
+
 // FLDMX, FSTMX - mixing S/D registers for pre-armv6 cores
 
 //===----------------------------------------------------------------------===//
@@ -860,7 +873,7 @@ def VULTOD : AVConv1XI<0b11101, 0b11, 0b1011, 0b1011, 1,
 } // End of 'let Constraints = "$a = $dst", isCodeGenOnly = 1 in'
 
 //===----------------------------------------------------------------------===//
-// FP FMA Operations.
+// FP Multiply-Accumulate Operations.
 //
 
 def VMLAD : ADbI<0b11100, 0b00, 0, 0,
@@ -977,12 +990,12 @@ def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin),
 
 let neverHasSideEffects = 1 in {
 def VMOVDcc  : ARMPseudoInst<(outs DPR:$Dd), (ins DPR:$Dn, DPR:$Dm, pred:$p),
-                    Size4Bytes, IIC_fpUNA64,
+                    4, IIC_fpUNA64,
                     [/*(set DPR:$Dd, (ARMcmov DPR:$Dn, DPR:$Dm, imm:$cc))*/]>,
                  RegConstraint<"$Dn = $Dd">;
 
 def VMOVScc  : ARMPseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, pred:$p),
-                    Size4Bytes, IIC_fpUNA32,
+                    4, IIC_fpUNA32,
                     [/*(set SPR:$Sd, (ARMcmov SPR:$Sn, SPR:$Sm, imm:$cc))*/]>,
                  RegConstraint<"$Sn = $Sd">;
 } // neverHasSideEffects
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index f4645f1..c6efea1 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -329,13 +329,9 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
       if (NewBase == 0)
         return false;
     }
-    int BaseOpc = !isThumb2
-      ? ARM::ADDri
-      : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri);
+    int BaseOpc = !isThumb2 ? ARM::ADDri : ARM::t2ADDri;
     if (Offset < 0) {
-      BaseOpc = !isThumb2
-        ? ARM::SUBri
-        : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri);
+      BaseOpc = !isThumb2 ? ARM::SUBri : ARM::t2SUBri;
       Offset = - Offset;
     }
     int ImmedOffset = isThumb2
@@ -516,8 +512,6 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base,
   if (!MI)
     return false;
   if (MI->getOpcode() != ARM::t2SUBri &&
-      MI->getOpcode() != ARM::t2SUBrSPi &&
-      MI->getOpcode() != ARM::t2SUBrSPi12 &&
       MI->getOpcode() != ARM::tSUBspi &&
       MI->getOpcode() != ARM::SUBri)
     return false;
@@ -541,8 +535,6 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base,
   if (!MI)
     return false;
   if (MI->getOpcode() != ARM::t2ADDri &&
-      MI->getOpcode() != ARM::t2ADDrSPi &&
-      MI->getOpcode() != ARM::t2ADDrSPi12 &&
       MI->getOpcode() != ARM::tADDspi &&
       MI->getOpcode() != ARM::ADDri)
     return false;
@@ -1461,19 +1453,19 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
   while (++I != E) {
     if (I->isDebugValue() || MemOps.count(&*I))
       continue;
-    const TargetInstrDesc &TID = I->getDesc();
-    if (TID.isCall() || TID.isTerminator() || I->hasUnmodeledSideEffects())
+    const MCInstrDesc &MCID = I->getDesc();
+    if (MCID.isCall() || MCID.isTerminator() || I->hasUnmodeledSideEffects())
       return false;
-    if (isLd && TID.mayStore())
+    if (isLd && MCID.mayStore())
       return false;
     if (!isLd) {
-      if (TID.mayLoad())
+      if (MCID.mayLoad())
         return false;
       // It's not safe to move the first 'str' down.
       // str r1, [r0]
       // strh r5, [r0]
       // str r4, [r0, #+4]
-      if (TID.mayStore())
+      if (MCID.mayStore())
         return false;
     }
     for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
@@ -1672,14 +1664,14 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
           Ops.pop_back();
           Ops.pop_back();
 
-          const TargetInstrDesc &TID = TII->get(NewOpc);
-          const TargetRegisterClass *TRC = TID.OpInfo[0].getRegClass(TRI);
+          const MCInstrDesc &MCID = TII->get(NewOpc);
+          const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI);
           MRI->constrainRegClass(EvenReg, TRC);
           MRI->constrainRegClass(OddReg, TRC);
 
           // Form the pair instruction.
           if (isLd) {
-            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, TID)
+            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
               .addReg(EvenReg, RegState::Define)
               .addReg(OddReg, RegState::Define)
               .addReg(BaseReg);
@@ -1691,7 +1683,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
             MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
             ++NumLDRDFormed;
           } else {
-            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, TID)
+            MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
               .addReg(EvenReg)
               .addReg(OddReg)
               .addReg(BaseReg);
@@ -1742,8 +1734,8 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
   while (MBBI != E) {
     for (; MBBI != E; ++MBBI) {
       MachineInstr *MI = MBBI;
-      const TargetInstrDesc &TID = MI->getDesc();
-      if (TID.isCall() || TID.isTerminator()) {
+      const MCInstrDesc &MCID = MI->getDesc();
+      if (MCID.isCall() || MCID.isTerminator()) {
         // Stop at barriers.
         ++MBBI;
         break;
diff --git a/lib/Target/ARM/ARMMCCodeEmitter.cpp b/lib/Target/ARM/ARMMCCodeEmitter.cpp
index c5f727d..39be3f0 100644
--- a/lib/Target/ARM/ARMMCCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMMCCodeEmitter.cpp
@@ -21,8 +21,11 @@
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
 STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
@@ -32,19 +35,30 @@ namespace {
 class ARMMCCodeEmitter : public MCCodeEmitter {
   ARMMCCodeEmitter(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
   void operator=(const ARMMCCodeEmitter &); // DO NOT IMPLEMENT
-  const TargetMachine &TM;
-  const TargetInstrInfo &TII;
-  const ARMSubtarget *Subtarget;
-  MCContext &Ctx;
+  const MCInstrInfo &MCII;
+  const MCSubtargetInfo &STI;
 
 public:
-  ARMMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
-    : TM(tm), TII(*TM.getInstrInfo()),
-      Subtarget(&TM.getSubtarget<ARMSubtarget>()), Ctx(ctx) {
+  ARMMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+                   MCContext &ctx)
+    : MCII(mcii), STI(sti) {
   }
 
   ~ARMMCCodeEmitter() {}
 
+  bool isThumb() const {
+    // FIXME: Can tablegen auto-generate this?
+    return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
+  }
+  bool isThumb2() const {
+    return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) != 0;
+  }
+  bool isTargetDarwin() const {
+    Triple TT(STI.getTargetTriple());
+    Triple::OSType OS = TT.getOS();
+    return OS == Triple::Darwin || OS == Triple::MacOSX || OS == Triple::IOS;
+  }
+
   unsigned getMachineSoImmOpValue(unsigned SoImm) const;
 
   // getBinaryCodeForInstr - TableGen'erated function for getting the
@@ -320,9 +334,10 @@ public:
 
 } // end anonymous namespace
 
-MCCodeEmitter *llvm::createARMMCCodeEmitter(const Target &, TargetMachine &TM,
+MCCodeEmitter *llvm::createARMMCCodeEmitter(const MCInstrInfo &MCII,
+                                            const MCSubtargetInfo &STI,
                                             MCContext &Ctx) {
-  return new ARMMCCodeEmitter(TM, Ctx);
+  return new ARMMCCodeEmitter(MCII, STI, Ctx);
 }
 
 /// NEONThumb2DataIPostEncoder - Post-process encoded NEON data-processing
@@ -330,7 +345,7 @@ MCCodeEmitter *llvm::createARMMCCodeEmitter(const Target &, TargetMachine &TM,
 /// Thumb2 mode.
 unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI,
                                                  unsigned EncodedValue) const {
-  if (Subtarget->isThumb2()) {
+  if (isThumb2()) {
     // NEON Thumb2 data-processsing encodings are very simple: bit 24 is moved
     // to bit 12 of the high half-word (i.e. bit 28), and bits 27-24 are
     // set to 1111.
@@ -349,7 +364,7 @@ unsigned ARMMCCodeEmitter::NEONThumb2DataIPostEncoder(const MCInst &MI,
 /// Thumb2 mode.
 unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI,
                                                  unsigned EncodedValue) const {
-  if (Subtarget->isThumb2()) {
+  if (isThumb2()) {
     EncodedValue &= 0xF0FFFFFF;
     EncodedValue |= 0x09000000;
   }
@@ -362,7 +377,7 @@ unsigned ARMMCCodeEmitter::NEONThumb2LoadStorePostEncoder(const MCInst &MI,
 /// Thumb2 mode.
 unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI,
                                                  unsigned EncodedValue) const {
-  if (Subtarget->isThumb2()) {
+  if (isThumb2()) {
     EncodedValue &= 0x00FFFFFF;
     EncodedValue |= 0xEE000000;
   }
@@ -374,7 +389,7 @@ unsigned ARMMCCodeEmitter::NEONThumb2DupPostEncoder(const MCInst &MI,
 /// them to their Thumb2 form if we are currently in Thumb2 mode.
 unsigned ARMMCCodeEmitter::
 VFPThumb2PostEncoder(const MCInst &MI, unsigned EncodedValue) const {
-  if (Subtarget->isThumb2()) {
+  if (isThumb2()) {
     EncodedValue &= 0x0FFFFFFF;
     EncodedValue |= 0xE0000000;
   }
@@ -515,7 +530,7 @@ getBranchTargetOpValue(const MCInst &MI, unsigned OpIdx,
                        SmallVectorImpl<MCFixup> &Fixups) const {
   // FIXME: This really, really shouldn't use TargetMachine. We don't want
   // coupling between MC and TM anywhere we can help it.
-  if (Subtarget->isThumb2())
+  if (isThumb2())
     return
       ::getBranchTargetOpValue(MI, OpIdx, ARM::fixup_t2_condbranch, Fixups);
   return getARMBranchTargetOpValue(MI, OpIdx, Fixups);
@@ -624,7 +639,7 @@ getAddrModeImm12OpValue(const MCInst &MI, unsigned OpIdx,
     const MCExpr *Expr = MO.getExpr();
 
     MCFixupKind Kind;
-    if (Subtarget->isThumb2())
+    if (isThumb2())
       Kind = MCFixupKind(ARM::fixup_t2_ldst_pcrel_12);
     else
       Kind = MCFixupKind(ARM::fixup_arm_ldst_pcrel_12);
@@ -709,22 +724,22 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx,
     switch (ARM16Expr->getKind()) {
     default: assert(0 && "Unsupported ARMFixup");
     case ARMMCExpr::VK_ARM_HI16:
-      if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E))
-        Kind = MCFixupKind(Subtarget->isThumb2()
+      if (!isTargetDarwin() && EvaluateAsPCRel(E))
+        Kind = MCFixupKind(isThumb2()
                            ? ARM::fixup_t2_movt_hi16_pcrel
                            : ARM::fixup_arm_movt_hi16_pcrel);
       else
-        Kind = MCFixupKind(Subtarget->isThumb2()
+        Kind = MCFixupKind(isThumb2()
                            ? ARM::fixup_t2_movt_hi16
                            : ARM::fixup_arm_movt_hi16);
       break;
     case ARMMCExpr::VK_ARM_LO16:
-      if (!Subtarget->isTargetDarwin() && EvaluateAsPCRel(E))
-        Kind = MCFixupKind(Subtarget->isThumb2()
+      if (!isTargetDarwin() && EvaluateAsPCRel(E))
+        Kind = MCFixupKind(isThumb2()
                            ? ARM::fixup_t2_movw_lo16_pcrel
                            : ARM::fixup_arm_movw_lo16_pcrel);
       else
-        Kind = MCFixupKind(Subtarget->isThumb2()
+        Kind = MCFixupKind(isThumb2()
                            ? ARM::fixup_t2_movw_lo16
                            : ARM::fixup_arm_movw_lo16);
       break;
@@ -898,7 +913,7 @@ getAddrMode5OpValue(const MCInst &MI, unsigned OpIdx,
     assert(MO.isExpr() && "Unexpected machine operand type!");
     const MCExpr *Expr = MO.getExpr();
     MCFixupKind Kind;
-    if (Subtarget->isThumb2())
+    if (isThumb2())
       Kind = MCFixupKind(ARM::fixup_t2_pcrel_10);
     else
       Kind = MCFixupKind(ARM::fixup_arm_pcrel_10);
@@ -1274,21 +1289,21 @@ void ARMMCCodeEmitter::
 EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                   SmallVectorImpl<MCFixup> &Fixups) const {
   // Pseudo instructions don't get encoded.
-  const TargetInstrDesc &Desc = TII.get(MI.getOpcode());
+  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
   uint64_t TSFlags = Desc.TSFlags;
   if ((TSFlags & ARMII::FormMask) == ARMII::Pseudo)
     return;
+
   int Size;
-  // Basic size info comes from the TSFlags field.
-  switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
-  default: llvm_unreachable("Unexpected instruction size!");
-  case ARMII::Size2Bytes: Size = 2; break;
-  case ARMII::Size4Bytes: Size = 4; break;
-  }
+  if (Desc.getSize() == 2 || Desc.getSize() == 4)
+    Size = Desc.getSize();
+  else
+    llvm_unreachable("Unexpected instruction size!");
+  
   uint32_t Binary = getBinaryCodeForInstr(MI, Fixups);
   // Thumb 32-bit wide instructions need to emit the high order halfword
   // first.
-  if (Subtarget->isThumb() && Size == 4) {
+  if (isThumb() && Size == 4) {
     EmitConstant(Binary >> 16, 2, OS);
     EmitConstant(Binary & 0xffff, 2, OS);
   } else
diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp
index 59d6050..7411b59 100644
--- a/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/ARMMCInstLower.cpp
@@ -23,43 +23,94 @@
 using namespace llvm;
 
 
-static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
-                              ARMAsmPrinter &Printer) {
-  MCContext &Ctx = Printer.OutContext;
+MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO,
+                                      const MCSymbol *Symbol) {
   const MCExpr *Expr;
   switch (MO.getTargetFlags()) {
   default: {
-    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
+    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+                                   OutContext);
     switch (MO.getTargetFlags()) {
     default:
       assert(0 && "Unknown target flag on symbol operand");
     case 0:
       break;
     case ARMII::MO_LO16:
-      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
-      Expr = ARMMCExpr::CreateLower16(Expr, Ctx);
+      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+                                     OutContext);
+      Expr = ARMMCExpr::CreateLower16(Expr, OutContext);
       break;
     case ARMII::MO_HI16:
-      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, Ctx);
-      Expr = ARMMCExpr::CreateUpper16(Expr, Ctx);
+      Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None,
+                                     OutContext);
+      Expr = ARMMCExpr::CreateUpper16(Expr, OutContext);
       break;
     }
     break;
   }
 
   case ARMII::MO_PLT:
-    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT, Ctx);
+    Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_ARM_PLT,
+                                   OutContext);
     break;
   }
 
   if (!MO.isJTI() && MO.getOffset())
     Expr = MCBinaryExpr::CreateAdd(Expr,
-                                   MCConstantExpr::Create(MO.getOffset(), Ctx),
-                                   Ctx);
+                                   MCConstantExpr::Create(MO.getOffset(),
+                                                          OutContext),
+                                   OutContext);
   return MCOperand::CreateExpr(Expr);
 
 }
 
+bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
+                                 MCOperand &MCOp) {
+  switch (MO.getType()) {
+  default:
+    assert(0 && "unknown operand type");
+    return false;
+  case MachineOperand::MO_Register:
+    // Ignore all non-CPSR implicit register operands.
+    if (MO.isImplicit() && MO.getReg() != ARM::CPSR)
+      return false;
+    assert(!MO.getSubReg() && "Subregs should be eliminated!");
+    MCOp = MCOperand::CreateReg(MO.getReg());
+    break;
+  case MachineOperand::MO_Immediate:
+    MCOp = MCOperand::CreateImm(MO.getImm());
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
+        MO.getMBB()->getSymbol(), OutContext));
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+   MCOp = GetSymbolRef(MO,
+                        GetExternalSymbolSymbol(MO.getSymbolName()));
+    break;
+  case MachineOperand::MO_JumpTableIndex:
+    MCOp = GetSymbolRef(MO, GetJTISymbol(MO.getIndex()));
+    break;
+  case MachineOperand::MO_ConstantPoolIndex:
+    MCOp = GetSymbolRef(MO, GetCPISymbol(MO.getIndex()));
+    break;
+  case MachineOperand::MO_BlockAddress:
+    MCOp = GetSymbolRef(MO, GetBlockAddressSymbol(MO.getBlockAddress()));
+    break;
+  case MachineOperand::MO_FPImmediate: {
+    APFloat Val = MO.getFPImm()->getValueAPF();
+    bool ignored;
+    Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+    MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
+    break;
+  }
+  }
+  return true;
+}
+
 void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                         ARMAsmPrinter &AP) {
   OutMI.setOpcode(MI->getOpcode());
@@ -68,48 +119,7 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
     const MachineOperand &MO = MI->getOperand(i);
 
     MCOperand MCOp;
-    switch (MO.getType()) {
-    default:
-      MI->dump();
-      assert(0 && "unknown operand type");
-    case MachineOperand::MO_Register:
-      // Ignore all non-CPSR implicit register operands.
-      if (MO.isImplicit() && MO.getReg() != ARM::CPSR) continue;
-      assert(!MO.getSubReg() && "Subregs should be eliminated!");
-      MCOp = MCOperand::CreateReg(MO.getReg());
-      break;
-    case MachineOperand::MO_Immediate:
-      MCOp = MCOperand::CreateImm(MO.getImm());
-      break;
-    case MachineOperand::MO_MachineBasicBlock:
-      MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
-                       MO.getMBB()->getSymbol(), AP.OutContext));
-      break;
-    case MachineOperand::MO_GlobalAddress:
-      MCOp = GetSymbolRef(MO, AP.Mang->getSymbol(MO.getGlobal()), AP);
-      break;
-    case MachineOperand::MO_ExternalSymbol:
-      MCOp = GetSymbolRef(MO,
-                          AP.GetExternalSymbolSymbol(MO.getSymbolName()), AP);
-      break;
-    case MachineOperand::MO_JumpTableIndex:
-      MCOp = GetSymbolRef(MO, AP.GetJTISymbol(MO.getIndex()), AP);
-      break;
-    case MachineOperand::MO_ConstantPoolIndex:
-      MCOp = GetSymbolRef(MO, AP.GetCPISymbol(MO.getIndex()), AP);
-      break;
-    case MachineOperand::MO_BlockAddress:
-      MCOp = GetSymbolRef(MO,AP.GetBlockAddressSymbol(MO.getBlockAddress()),AP);
-      break;
-    case MachineOperand::MO_FPImmediate: {
-      APFloat Val = MO.getFPImm()->getValueAPF();
-      bool ignored;
-      Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
-      MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
-      break;
-    }
-    }
-
-    OutMI.addOperand(MCOp);
+    if (AP.lowerOperand(MO, MCOp))
+      OutMI.addOperand(MCOp);
   }
 }
diff --git a/lib/Target/ARM/ARMMachObjectWriter.cpp b/lib/Target/ARM/ARMMachObjectWriter.cpp
new file mode 100644
index 0000000..a36e47d
--- /dev/null
+++ b/lib/Target/ARM/ARMMachObjectWriter.cpp
@@ -0,0 +1,389 @@
+//===-- ARMMachObjectWriter.cpp - ARM Mach Object Writer ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMFixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Object/MachOFormat.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+using namespace llvm::object;
+
+namespace {
+class ARMMachObjectWriter : public MCMachObjectTargetWriter {
+  void RecordARMScatteredRelocation(MachObjectWriter *Writer,
+                                    const MCAssembler &Asm,
+                                    const MCAsmLayout &Layout,
+                                    const MCFragment *Fragment,
+                                    const MCFixup &Fixup,
+                                    MCValue Target,
+                                    unsigned Log2Size,
+                                    uint64_t &FixedValue);
+  void RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
+                                   const MCAssembler &Asm,
+                                   const MCAsmLayout &Layout,
+                                   const MCFragment *Fragment,
+                                   const MCFixup &Fixup, MCValue Target,
+                                   uint64_t &FixedValue);
+
+public:
+  ARMMachObjectWriter(bool Is64Bit, uint32_t CPUType,
+                      uint32_t CPUSubtype)
+    : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
+                               /*UseAggressiveSymbolFolding=*/true) {}
+
+  void RecordRelocation(MachObjectWriter *Writer,
+                        const MCAssembler &Asm, const MCAsmLayout &Layout,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
+                        MCValue Target, uint64_t &FixedValue);
+};
+}
+
+static bool getARMFixupKindMachOInfo(unsigned Kind, unsigned &RelocType,
+                              unsigned &Log2Size) {
+  RelocType = unsigned(macho::RIT_Vanilla);
+  Log2Size = ~0U;
+
+  switch (Kind) {
+  default:
+    return false;
+
+  case FK_Data_1:
+    Log2Size = llvm::Log2_32(1);
+    return true;
+  case FK_Data_2:
+    Log2Size = llvm::Log2_32(2);
+    return true;
+  case FK_Data_4:
+    Log2Size = llvm::Log2_32(4);
+    return true;
+  case FK_Data_8:
+    Log2Size = llvm::Log2_32(8);
+    return true;
+
+    // Handle 24-bit branch kinds.
+  case ARM::fixup_arm_ldst_pcrel_12:
+  case ARM::fixup_arm_pcrel_10:
+  case ARM::fixup_arm_adr_pcrel_12:
+  case ARM::fixup_arm_condbranch:
+  case ARM::fixup_arm_uncondbranch:
+    RelocType = unsigned(macho::RIT_ARM_Branch24Bit);
+    // Report as 'long', even though that is not quite accurate.
+    Log2Size = llvm::Log2_32(4);
+    return true;
+
+    // Handle Thumb branches.
+  case ARM::fixup_arm_thumb_br:
+    RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+    Log2Size = llvm::Log2_32(2);
+    return true;
+
+  case ARM::fixup_t2_uncondbranch:
+  case ARM::fixup_arm_thumb_bl:
+  case ARM::fixup_arm_thumb_blx:
+    RelocType = unsigned(macho::RIT_ARM_ThumbBranch22Bit);
+    Log2Size = llvm::Log2_32(4);
+    return true;
+
+  case ARM::fixup_arm_movt_hi16:
+  case ARM::fixup_arm_movt_hi16_pcrel:
+  case ARM::fixup_t2_movt_hi16:
+  case ARM::fixup_t2_movt_hi16_pcrel:
+    RelocType = unsigned(macho::RIT_ARM_HalfDifference);
+    // Report as 'long', even though that is not quite accurate.
+    Log2Size = llvm::Log2_32(4);
+    return true;
+
+  case ARM::fixup_arm_movw_lo16:
+  case ARM::fixup_arm_movw_lo16_pcrel:
+  case ARM::fixup_t2_movw_lo16:
+  case ARM::fixup_t2_movw_lo16_pcrel:
+    RelocType = unsigned(macho::RIT_ARM_Half);
+    // Report as 'long', even though that is not quite accurate.
+    Log2Size = llvm::Log2_32(4);
+    return true;
+  }
+}
+
+void ARMMachObjectWriter::
+RecordARMMovwMovtRelocation(MachObjectWriter *Writer,
+                            const MCAssembler &Asm,
+                            const MCAsmLayout &Layout,
+                            const MCFragment *Fragment,
+                            const MCFixup &Fixup,
+                            MCValue Target,
+                            uint64_t &FixedValue) {
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Type = macho::RIT_ARM_Half;
+
+  // See <reloc.h>.
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+  if (!A_SD->getFragment())
+    report_fatal_error("symbol '" + A->getName() +
+                       "' can not be undefined in a subtraction expression");
+
+  uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+  uint32_t Value2 = 0;
+  uint64_t SecAddr =
+    Writer->getSectionAddress(A_SD->getFragment()->getParent());
+  FixedValue += SecAddr;
+
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+    if (!B_SD->getFragment())
+      report_fatal_error("symbol '" + B->getSymbol().getName() +
+                         "' can not be undefined in a subtraction expression");
+
+    // Select the appropriate difference relocation type.
+    Type = macho::RIT_ARM_HalfDifference;
+    Value2 = Writer->getSymbolAddress(B_SD, Layout);
+    FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+  }
+
+  // Relocations are written out in reverse order, so the PAIR comes first.
+  // ARM_RELOC_HALF and ARM_RELOC_HALF_SECTDIFF abuse the r_length field:
+  //
+  // For these two r_type relocations they always have a pair following them and
+  // the r_length bits are used differently.  The encoding of the r_length is as
+  // follows:
+  //   low bit of r_length:
+  //      0 - :lower16: for movw instructions
+  //      1 - :upper16: for movt instructions
+  //   high bit of r_length:
+  //      0 - arm instructions
+  //      1 - thumb instructions
+  // the other half of the relocated expression is in the following pair
+  // relocation entry in the the low 16 bits of r_address field.
+  unsigned ThumbBit = 0;
+  unsigned MovtBit = 0;
+  switch ((unsigned)Fixup.getKind()) {
+  default: break;
+  case ARM::fixup_arm_movt_hi16:
+  case ARM::fixup_arm_movt_hi16_pcrel:
+    MovtBit = 1;
+    break;
+  case ARM::fixup_t2_movt_hi16:
+  case ARM::fixup_t2_movt_hi16_pcrel:
+    MovtBit = 1;
+    // Fallthrough
+  case ARM::fixup_t2_movw_lo16:
+  case ARM::fixup_t2_movw_lo16_pcrel:
+    ThumbBit = 1;
+    break;
+  }
+
+
+  if (Type == macho::RIT_ARM_HalfDifference) {
+    uint32_t OtherHalf = MovtBit
+      ? (FixedValue & 0xffff) : ((FixedValue & 0xffff0000) >> 16);
+
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((OtherHalf       <<  0) |
+                 (macho::RIT_Pair << 24) |
+                 (MovtBit         << 28) |
+                 (ThumbBit        << 29) |
+                 (IsPCRel         << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value2;
+    Writer->addRelocation(Fragment->getParent(), MRE);
+  }
+
+  macho::RelocationEntry MRE;
+  MRE.Word0 = ((FixupOffset <<  0) |
+               (Type        << 24) |
+               (MovtBit     << 28) |
+               (ThumbBit    << 29) |
+               (IsPCRel     << 30) |
+               macho::RF_Scattered);
+  MRE.Word1 = Value;
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer,
+                                                    const MCAssembler &Asm,
+                                                    const MCAsmLayout &Layout,
+                                                    const MCFragment *Fragment,
+                                                    const MCFixup &Fixup,
+                                                    MCValue Target,
+                                                    unsigned Log2Size,
+                                                    uint64_t &FixedValue) {
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Type = macho::RIT_Vanilla;
+
+  // See <reloc.h>.
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+  if (!A_SD->getFragment())
+    report_fatal_error("symbol '" + A->getName() +
+                       "' can not be undefined in a subtraction expression");
+
+  uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+  uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
+  FixedValue += SecAddr;
+  uint32_t Value2 = 0;
+
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+    if (!B_SD->getFragment())
+      report_fatal_error("symbol '" + B->getSymbol().getName() +
+                         "' can not be undefined in a subtraction expression");
+
+    // Select the appropriate difference relocation type.
+    Type = macho::RIT_Difference;
+    Value2 = Writer->getSymbolAddress(B_SD, Layout);
+    FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+  }
+
+  // Relocations are written out in reverse order, so the PAIR comes first.
+  if (Type == macho::RIT_Difference ||
+      Type == macho::RIT_Generic_LocalDifference) {
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((0         <<  0) |
+                 (macho::RIT_Pair  << 24) |
+                 (Log2Size  << 28) |
+                 (IsPCRel   << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value2;
+    Writer->addRelocation(Fragment->getParent(), MRE);
+  }
+
+  macho::RelocationEntry MRE;
+  MRE.Word0 = ((FixupOffset <<  0) |
+               (Type        << 24) |
+               (Log2Size    << 28) |
+               (IsPCRel     << 30) |
+               macho::RF_Scattered);
+  MRE.Word1 = Value;
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer,
+                                           const MCAssembler &Asm,
+                                           const MCAsmLayout &Layout,
+                                           const MCFragment *Fragment,
+                                           const MCFixup &Fixup,
+                                           MCValue Target,
+                                           uint64_t &FixedValue) {
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Log2Size;
+  unsigned RelocType = macho::RIT_Vanilla;
+  if (!getARMFixupKindMachOInfo(Fixup.getKind(), RelocType, Log2Size)) {
+    report_fatal_error("unknown ARM fixup kind!");
+    return;
+  }
+
+  // If this is a difference or a defined symbol plus an offset, then we need a
+  // scattered relocation entry.  Differences always require scattered
+  // relocations.
+  if (Target.getSymB()) {
+    if (RelocType == macho::RIT_ARM_Half ||
+        RelocType == macho::RIT_ARM_HalfDifference)
+      return RecordARMMovwMovtRelocation(Writer, Asm, Layout, Fragment, Fixup,
+                                         Target, FixedValue);
+    return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+                                        Target, Log2Size, FixedValue);
+  }
+
+  // Get the symbol data, if any.
+  MCSymbolData *SD = 0;
+  if (Target.getSymA())
+    SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+  // FIXME: For other platforms, we need to use scattered relocations for
+  // internal relocations with offsets.  If this is an internal relocation with
+  // an offset, it also needs a scattered relocation entry.
+  //
+  // Is this right for ARM?
+  uint32_t Offset = Target.getConstant();
+  if (IsPCRel && RelocType == macho::RIT_Vanilla)
+    Offset += 1 << Log2Size;
+  if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
+    return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+                                        Target, Log2Size, FixedValue);
+
+  // See <reloc.h>.
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned Index = 0;
+  unsigned IsExtern = 0;
+  unsigned Type = 0;
+
+  if (Target.isAbsolute()) { // constant
+    // FIXME!
+    report_fatal_error("FIXME: relocations to absolute targets "
+                       "not yet implemented");
+  } else {
+    // Resolve constant variables.
+    if (SD->getSymbol().isVariable()) {
+      int64_t Res;
+      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+            Res, Layout, Writer->getSectionAddressMap())) {
+        FixedValue = Res;
+        return;
+      }
+    }
+
+    // Check whether we need an external or internal relocation.
+    if (Writer->doesSymbolRequireExternRelocation(SD)) {
+      IsExtern = 1;
+      Index = SD->getIndex();
+
+      // For external relocations, make sure to offset the fixup value to
+      // compensate for the addend of the symbol address, if it was
+      // undefined. This occurs with weak definitions, for example.
+      if (!SD->Symbol->isUndefined())
+        FixedValue -= Layout.getSymbolOffset(SD);
+    } else {
+      // The index is the section ordinal (1-based).
+      const MCSectionData &SymSD = Asm.getSectionData(
+        SD->getSymbol().getSection());
+      Index = SymSD.getOrdinal() + 1;
+      FixedValue += Writer->getSectionAddress(&SymSD);
+    }
+    if (IsPCRel)
+      FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+
+    // The type is determined by the fixup kind.
+    Type = RelocType;
+  }
+
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = FixupOffset;
+  MRE.Word1 = ((Index     <<  0) |
+               (IsPCRel   << 24) |
+               (Log2Size  << 25) |
+               (IsExtern  << 27) |
+               (Type      << 28));
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+MCObjectWriter *llvm::createARMMachObjectWriter(raw_ostream &OS,
+                                                bool Is64Bit,
+                                                uint32_t CPUType,
+                                                uint32_t CPUSubtype) {
+  return createMachObjectWriter(new ARMMachObjectWriter(Is64Bit,
+                                                        CPUType,
+                                                        CPUSubtype),
+                                OS, /*IsLittleEndian=*/true);
+}
diff --git a/lib/Target/ARM/ARMRegisterInfo.td b/lib/Target/ARM/ARMRegisterInfo.td
index 99418733..76eb496 100644
--- a/lib/Target/ARM/ARMRegisterInfo.td
+++ b/lib/Target/ARM/ARMRegisterInfo.td
@@ -200,45 +200,16 @@ def FPEXC   : ARMReg<8, "fpexc">;
 // r11 == Frame Pointer (arm-style backtraces)
 // r10 == Stack Limit
 //
-def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
-                                           R7, R8, R9, R10, R11, R12,
-                                           SP, LR, PC]> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned ARM_GPR_AO[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R12,ARM::LR,
-      ARM::R4, ARM::R5, ARM::R6, ARM::R7,
-      ARM::R8, ARM::R9, ARM::R10, ARM::R11 };
-
-    // For Thumb1 mode, we don't want to allocate hi regs at all, as we
-    // don't know how to spill them. If we make our prologue/epilogue code
-    // smarter at some point, we can go back to using the above allocation
-    // orders for the Thumb1 instructions that know how to use hi regs.
-    static const unsigned THUMB_GPR_AO[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
-
-    GPRClass::iterator
-    GPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.isThumb1Only())
-        return THUMB_GPR_AO;
-      return ARM_GPR_AO;
-    }
-
-    GPRClass::iterator
-    GPRClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.isThumb1Only())
-        return THUMB_GPR_AO + (sizeof(THUMB_GPR_AO)/sizeof(unsigned));
-      return ARM_GPR_AO + (sizeof(ARM_GPR_AO)/sizeof(unsigned));
-    }
+def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12),
+                                               SP, LR, PC)> {
+  // Allocate LR as the first CSR since it is always saved anyway.
+  // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't
+  // know how to spill them. If we make our prologue/epilogue code smarter at
+  // some point, we can go back to using the above allocation orders for the
+  // Thumb1 instructions that know how to use hi regs.
+  let AltOrders = [(add LR, GPR), (trunc GPR, 8)];
+  let AltOrderSelect = [{
+      return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
   }];
 }
 
@@ -246,263 +217,98 @@ def GPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
 // register range for operands, but have undefined behaviours when PC
 // or SP (R13 or R15) are used. The ARM ISA refers to these operands
 // via the BadReg() pseudo-code description.
-def rGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6,
-                                            R7, R8, R9, R10, R11, R12, LR]> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned ARM_rGPR_AO[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R12,ARM::LR,
-      ARM::R4, ARM::R5, ARM::R6, ARM::R7,
-      ARM::R8, ARM::R9, ARM::R10,
-      ARM::R11 };
-
-    // For Thumb1 mode, we don't want to allocate hi regs at all, as we
-    // don't know how to spill them. If we make our prologue/epilogue code
-    // smarter at some point, we can go back to using the above allocation
-    // orders for the Thumb1 instructions that know how to use hi regs.
-    static const unsigned THUMB_rGPR_AO[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R4, ARM::R5, ARM::R6, ARM::R7 };
-
-    rGPRClass::iterator
-    rGPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.isThumb1Only())
-        return THUMB_rGPR_AO;
-      return ARM_rGPR_AO;
-    }
-
-    rGPRClass::iterator
-    rGPRClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-
-      if (Subtarget.isThumb1Only())
-        return THUMB_rGPR_AO + (sizeof(THUMB_rGPR_AO)/sizeof(unsigned));
-      return ARM_rGPR_AO + (sizeof(ARM_rGPR_AO)/sizeof(unsigned));
-    }
+def rGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, SP, PC)> {
+  let AltOrders = [(add LR, rGPR), (trunc rGPR, 8)];
+  let AltOrderSelect = [{
+      return 1 + MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
   }];
 }
 
 // Thumb registers are R0-R7 normally. Some instructions can still use
 // the general GPR register class above (MOV, e.g.)
-def tGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {}
+def tGPR : RegisterClass<"ARM", [i32], 32, (trunc GPR, 8)>;
+
+// The high registers in thumb mode, R8-R15.
+def hGPR : RegisterClass<"ARM", [i32], 32, (sub GPR, tGPR)>;
 
 // For tail calls, we can't use callee-saved registers, as they are restored
 // to the saved value before the tail call, which would clobber a call address.
 // Note, getMinimalPhysRegClass(R0) returns tGPR because of the names of
 // this class and the preceding one(!)  This is what we want.
-def tcGPR : RegisterClass<"ARM", [i32], 32, [R0, R1, R2, R3, R9, R12]> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // R9 is available.
-    static const unsigned ARM_GPR_R9_TC[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R9, ARM::R12 };
-    // R9 is not available.
-    static const unsigned ARM_GPR_NOR9_TC[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3,
-      ARM::R12 };
-
-    // For Thumb1 mode, we don't want to allocate hi regs at all, as we
-    // don't know how to spill them. If we make our prologue/epilogue code
-    // smarter at some point, we can go back to using the above allocation
-    // orders for the Thumb1 instructions that know how to use hi regs.
-    static const unsigned THUMB_GPR_AO_TC[] = {
-      ARM::R0, ARM::R1, ARM::R2, ARM::R3 };
-
-    tcGPRClass::iterator
-    tcGPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.isThumb1Only())
-        return THUMB_GPR_AO_TC;
-      return Subtarget.isTargetDarwin() ? ARM_GPR_R9_TC : ARM_GPR_NOR9_TC;
-    }
-
-    tcGPRClass::iterator
-    tcGPRClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-
-      if (Subtarget.isThumb1Only())
-        return THUMB_GPR_AO_TC + (sizeof(THUMB_GPR_AO_TC)/sizeof(unsigned));
-
-      return Subtarget.isTargetDarwin() ?
-        ARM_GPR_R9_TC + (sizeof(ARM_GPR_R9_TC)/sizeof(unsigned)) :
-        ARM_GPR_NOR9_TC + (sizeof(ARM_GPR_NOR9_TC)/sizeof(unsigned));
-    }
+def tcGPR : RegisterClass<"ARM", [i32], 32, (add R0, R1, R2, R3, R9, R12)> {
+  let AltOrders = [(and tcGPR, tGPR)];
+  let AltOrderSelect = [{
+      return MF.getTarget().getSubtarget<ARMSubtarget>().isThumb1Only();
   }];
 }
 
-
 // Scalar single precision floating point register class..
-def SPR : RegisterClass<"ARM", [f32], 32, [S0, S1, S2, S3, S4, S5, S6, S7, S8,
-  S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, S20, S21, S22,
-  S23, S24, S25, S26, S27, S28, S29, S30, S31]>;
+def SPR : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 31)>;
 
 // Subset of SPR which can be used as a source of NEON scalars for 16-bit
 // operations
-def SPR_8 : RegisterClass<"ARM", [f32], 32,
-                          [S0, S1,  S2,  S3,  S4,  S5,  S6,  S7,
-                           S8, S9, S10, S11, S12, S13, S14, S15]>;
+def SPR_8 : RegisterClass<"ARM", [f32], 32, (trunc SPR, 16)>;
 
 // Scalar double precision floating point / generic 64-bit vector register
 // class.
 // ARM requires only word alignment for double. It's more performant if it
 // is double-word alignment though.
 def DPR : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
-                        [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
-                         D8,  D9,  D10, D11, D12, D13, D14, D15,
-                         D16, D17, D18, D19, D20, D21, D22, D23,
-                         D24, D25, D26, D27, D28, D29, D30, D31]> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // VFP2 / VFPv3-D16
-    static const unsigned ARM_DPR_VFP2[] = {
-      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
-      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
-      ARM::D8,  ARM::D9,  ARM::D10, ARM::D11,
-      ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
-    // VFP3: D8-D15 are callee saved and should be allocated last.
-    // Save other low registers for use as DPR_VFP2 and DPR_8 classes.
-    static const unsigned ARM_DPR_VFP3[] = {
-      ARM::D16, ARM::D17, ARM::D18, ARM::D19,
-      ARM::D20, ARM::D21, ARM::D22, ARM::D23,
-      ARM::D24, ARM::D25, ARM::D26, ARM::D27,
-      ARM::D28, ARM::D29, ARM::D30, ARM::D31,
-      ARM::D0,  ARM::D1,  ARM::D2,  ARM::D3,
-      ARM::D4,  ARM::D5,  ARM::D6,  ARM::D7,
-      ARM::D8,  ARM::D9,  ARM::D10, ARM::D11,
-      ARM::D12, ARM::D13, ARM::D14, ARM::D15 };
-
-    DPRClass::iterator
-    DPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.hasVFP3() && !Subtarget.hasD16())
-        return ARM_DPR_VFP3;
-      return ARM_DPR_VFP2;
-    }
-
-    DPRClass::iterator
-    DPRClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const ARMSubtarget &Subtarget = TM.getSubtarget<ARMSubtarget>();
-      if (Subtarget.hasVFP3() && !Subtarget.hasD16())
-        return ARM_DPR_VFP3 + (sizeof(ARM_DPR_VFP3)/sizeof(unsigned));
-      else
-        return ARM_DPR_VFP2 + (sizeof(ARM_DPR_VFP2)/sizeof(unsigned));
-    }
-  }];
+                        (sequence "D%u", 0, 31)> {
+  // Allocate non-VFP2 registers D16-D31 first.
+  let AltOrders = [(rotl DPR, 16)];
+  let AltOrderSelect = [{ return 1; }];
 }
 
 // Subset of DPR that are accessible with VFP2 (and so that also have
 // 32-bit SPR subregs).
 def DPR_VFP2 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
-                             [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7,
-                              D8,  D9,  D10, D11, D12, D13, D14, D15]> {
+                             (trunc DPR, 16)> {
   let SubRegClasses = [(SPR ssub_0, ssub_1)];
 }
 
 // Subset of DPR which can be used as a source of NEON scalars for 16-bit
 // operations
 def DPR_8 : RegisterClass<"ARM", [f64, v8i8, v4i16, v2i32, v1i64, v2f32], 64,
-                          [D0,  D1,  D2,  D3,  D4,  D5,  D6,  D7]> {
+                          (trunc DPR, 8)> {
   let SubRegClasses = [(SPR_8 ssub_0, ssub_1)];
 }
 
 // Generic 128-bit vector register class.
 def QPR : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
-                        [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7,
-                         Q8,  Q9,  Q10, Q11, Q12, Q13, Q14, Q15]> {
+                        (sequence "Q%u", 0, 15)> {
   let SubRegClasses = [(DPR dsub_0, dsub_1)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // Q4-Q7 are callee saved and should be allocated last.
-    // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
-    static const unsigned ARM_QPR[] = {
-      ARM::Q8,  ARM::Q9,  ARM::Q10, ARM::Q11,
-      ARM::Q12, ARM::Q13, ARM::Q14, ARM::Q15,
-      ARM::Q0,  ARM::Q1,  ARM::Q2,  ARM::Q3,
-      ARM::Q4,  ARM::Q5,  ARM::Q6,  ARM::Q7 };
-
-    QPRClass::iterator
-    QPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      return ARM_QPR;
-    }
-
-    QPRClass::iterator
-    QPRClass::allocation_order_end(const MachineFunction &MF) const {
-      return ARM_QPR + (sizeof(ARM_QPR)/sizeof(unsigned));
-    }
-  }];
+  // Allocate non-VFP2 aliases Q8-Q15 first.
+  let AltOrders = [(rotl QPR, 8)];
+  let AltOrderSelect = [{ return 1; }];
 }
 
 // Subset of QPR that have 32-bit SPR subregs.
 def QPR_VFP2 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-                             128,
-                             [Q0,  Q1,  Q2,  Q3,  Q4,  Q5,  Q6,  Q7]> {
+                             128, (trunc QPR, 8)> {
   let SubRegClasses = [(SPR      ssub_0, ssub_1, ssub_2, ssub_3),
                        (DPR_VFP2 dsub_0, dsub_1)];
 }
 
 // Subset of QPR that have DPR_8 and SPR_8 subregs.
 def QPR_8 : RegisterClass<"ARM", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-                           128,
-                           [Q0,  Q1,  Q2,  Q3]> {
+                           128, (trunc QPR, 4)> {
   let SubRegClasses = [(SPR_8 ssub_0, ssub_1, ssub_2, ssub_3),
                        (DPR_8 dsub_0, dsub_1)];
 }
 
 // Pseudo 256-bit vector register class to model pairs of Q registers
 // (4 consecutive D registers).
-def QQPR : RegisterClass<"ARM", [v4i64],
-                         256,
-                         [QQ0, QQ1, QQ2, QQ3, QQ4, QQ5, QQ6, QQ7]> {
+def QQPR : RegisterClass<"ARM", [v4i64], 256, (sequence "QQ%u", 0, 7)> {
   let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3),
                        (QPR qsub_0, qsub_1)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // QQ2-QQ3 are callee saved and should be allocated last.
-    // Save other low registers for use as QPR_VFP2 and QPR_8 classes.
-    static const unsigned ARM_QQPR[] = {
-      ARM::QQ4, ARM::QQ5, ARM::QQ6, ARM::QQ7,
-      ARM::QQ0, ARM::QQ1, ARM::QQ2, ARM::QQ3 };
-
-    QQPRClass::iterator
-    QQPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      return ARM_QQPR;
-    }
-
-    QQPRClass::iterator
-    QQPRClass::allocation_order_end(const MachineFunction &MF) const {
-      return ARM_QQPR + (sizeof(ARM_QQPR)/sizeof(unsigned));
-    }
-  }];
+  // Allocate non-VFP2 aliases first.
+  let AltOrders = [(rotl QQPR, 4)];
+  let AltOrderSelect = [{ return 1; }];
 }
 
 // Subset of QQPR that have 32-bit SPR subregs.
-def QQPR_VFP2 : RegisterClass<"ARM", [v4i64],
-                              256,
-                              [QQ0, QQ1, QQ2, QQ3]> {
+def QQPR_VFP2 : RegisterClass<"ARM", [v4i64], 256, (trunc QQPR, 4)> {
   let SubRegClasses = [(SPR      ssub_0, ssub_1, ssub_2, ssub_3),
                        (DPR_VFP2 dsub_0, dsub_1, dsub_2, dsub_3),
                        (QPR_VFP2 qsub_0, qsub_1)];
@@ -511,35 +317,16 @@ def QQPR_VFP2 : RegisterClass<"ARM", [v4i64],
 
 // Pseudo 512-bit vector register class to model 4 consecutive Q registers
 // (8 consecutive D registers).
-def QQQQPR : RegisterClass<"ARM", [v8i64],
-                         256,
-                         [QQQQ0, QQQQ1, QQQQ2, QQQQ3]> {
+def QQQQPR : RegisterClass<"ARM", [v8i64], 256, (sequence "QQQQ%u", 0, 3)> {
   let SubRegClasses = [(DPR dsub_0, dsub_1, dsub_2, dsub_3,
                             dsub_4, dsub_5, dsub_6, dsub_7),
                        (QPR qsub_0, qsub_1, qsub_2, qsub_3)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // QQQQ1 is callee saved and should be allocated last.
-    // Save QQQQ0 for use as QPR_VFP2 and QPR_8 classes.
-    static const unsigned ARM_QQQQPR[] = {
-      ARM::QQQQ2, ARM::QQQQ3, ARM::QQQQ0, ARM::QQQQ1 };
-
-    QQQQPRClass::iterator
-    QQQQPRClass::allocation_order_begin(const MachineFunction &MF) const {
-      return ARM_QQQQPR;
-    }
-
-    QQQQPRClass::iterator
-    QQQQPRClass::allocation_order_end(const MachineFunction &MF) const {
-      return ARM_QQQQPR + (sizeof(ARM_QQQQPR)/sizeof(unsigned));
-    }
-  }];
+  // Allocate non-VFP2 aliases first.
+  let AltOrders = [(rotl QQQQPR, 2)];
+  let AltOrderSelect = [{ return 1; }];
 }
 
 // Condition code registers.
-def CCR : RegisterClass<"ARM", [i32], 32, [CPSR]> {
+def CCR : RegisterClass<"ARM", [i32], 32, (add CPSR)> {
   let isAllocatable = 0;
 }
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index c6f266b..1cab9e4 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -7,17 +7,21 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the ARM specific subclass of TargetSubtarget.
+// This file implements the ARM specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "ARMSubtarget.h"
-#include "ARMGenSubtarget.inc"
 #include "ARMBaseRegisterInfo.h"
 #include "llvm/GlobalValue.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "ARMGenSubtargetInfo.inc"
+
 using namespace llvm;
 
 static cl::opt<bool>
@@ -31,17 +35,25 @@ static cl::opt<bool>
 StrictAlign("arm-strict-align", cl::Hidden,
             cl::desc("Disallow all unaligned memory accesses"));
 
-ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
-                           bool isT)
-  : ARMArchVersion(V4)
+ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS)
+  : ARMGenSubtargetInfo(TT, CPU, FS)
   , ARMProcFamily(Others)
-  , ARMFPUType(None)
+  , HasV4TOps(false)
+  , HasV5TOps(false)
+  , HasV5TEOps(false)
+  , HasV6Ops(false)
+  , HasV6T2Ops(false)
+  , HasV7Ops(false)
+  , HasVFPv2(false)
+  , HasVFPv3(false)
+  , HasNEON(false)
   , UseNEONForSinglePrecisionFP(false)
   , SlowFPVMLx(false)
   , HasVMLxForwarding(false)
   , SlowFPBrcc(false)
-  , IsThumb(isT)
-  , ThumbMode(Thumb1)
+  , InThumbMode(false)
+  , HasThumb2(false)
   , NoARM(false)
   , PostRAScheduler(false)
   , IsR9Reserved(ReserveR9)
@@ -56,94 +68,40 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
   , HasMPExtension(false)
   , FPOnlySP(false)
   , AllowsUnalignedMem(false)
+  , Thumb2DSP(false)
   , stackAlignment(4)
-  , CPUString("generic")
+  , CPUString(CPU)
   , TargetTriple(TT)
   , TargetABI(ARM_ABI_APCS) {
-  // Default to soft float ABI
-  if (FloatABIType == FloatABI::Default)
-    FloatABIType = FloatABI::Soft;
-
   // Determine default and user specified characteristics
-
-  // When no arch is specified either by CPU or by attributes, make the default
-  // ARMv4T.
-  const char *ARMArchFeature = "";
-  if (CPUString == "generic" && (FS.empty() || FS == "generic")) {
-    ARMArchVersion = V4T;
-    ARMArchFeature = ",+v4t";
+  if (CPUString.empty())
+    CPUString = "generic";
+
+  // Insert the architecture feature derived from the target triple into the
+  // feature string. This is important for setting features that are implied
+  // based on the architecture version.
+  std::string ArchFS = ARM_MC::ParseARMTriple(TT);
+  if (!FS.empty()) {
+    if (!ArchFS.empty())
+      ArchFS = ArchFS + "," + FS;
+    else
+      ArchFS = FS;
   }
+  ParseSubtargetFeatures(CPUString, ArchFS);
 
-  // Set the boolean corresponding to the current target triple, or the default
-  // if one cannot be determined, to true.
-  unsigned Len = TT.length();
-  unsigned Idx = 0;
+  // Thumb2 implies at least V6T2. FIXME: Fix tests to explicitly specify a
+  // ARM version or CPU and then remove this.
+  if (!HasV6T2Ops && hasThumb2())
+    HasV4TOps = HasV5TOps = HasV5TEOps = HasV6Ops = HasV6T2Ops = true;
 
-  if (Len >= 5 && TT.substr(0, 4) == "armv")
-    Idx = 4;
-  else if (Len >= 6 && TT.substr(0, 5) == "thumb") {
-    IsThumb = true;
-    if (Len >= 7 && TT[5] == 'v')
-      Idx = 6;
-  }
-  if (Idx) {
-    unsigned SubVer = TT[Idx];
-    if (SubVer >= '7' && SubVer <= '9') {
-      ARMArchVersion = V7A;
-      ARMArchFeature = ",+v7a";
-      if (Len >= Idx+2 && TT[Idx+1] == 'm') {
-        ARMArchVersion = V7M;
-        ARMArchFeature = ",+v7m";
-      }
-    } else if (SubVer == '6') {
-      ARMArchVersion = V6;
-      ARMArchFeature = ",+v6";
-      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2') {
-        ARMArchVersion = V6T2;
-        ARMArchFeature = ",+v6t2";
-      }
-    } else if (SubVer == '5') {
-      ARMArchVersion = V5T;
-      ARMArchFeature = ",+v5t";
-      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e') {
-        ARMArchVersion = V5TE;
-        ARMArchFeature = ",+v5te";
-      }
-    } else if (SubVer == '4') {
-      if (Len >= Idx+2 && TT[Idx+1] == 't') {
-        ARMArchVersion = V4T;
-        ARMArchFeature = ",+v4t";
-      } else {
-        ARMArchVersion = V4;
-        ARMArchFeature = "";
-      }
-    }
-  }
-
-  if (TT.find("eabi") != std::string::npos)
-    TargetABI = ARM_ABI_AAPCS;
-
-  // Parse features string.  If the first entry in FS (the CPU) is missing,
-  // insert the architecture feature derived from the target triple.  This is
-  // important for setting features that are implied based on the architecture
-  // version.
-  std::string FSWithArch;
-  if (FS.empty())
-    FSWithArch = std::string(ARMArchFeature);
-  else if (FS.find(',') == 0)
-    FSWithArch = std::string(ARMArchFeature) + FS;
-  else
-    FSWithArch = FS;
-  CPUString = ParseSubtargetFeatures(FSWithArch, CPUString);
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUString);
 
   // After parsing Itineraries, set ItinData.IssueWidth.
   computeIssueWidth();
 
-  // Thumb2 implies at least V6T2.
-  if (ARMArchVersion >= V6T2)
-    ThumbMode = Thumb2;
-  else if (ThumbMode >= Thumb2)
-    ARMArchVersion = V6T2;
+  if (TT.find("eabi") != std::string::npos)
+    TargetABI = ARM_ABI_AAPCS;
 
   if (isAAPCS_ABI())
     stackAlignment = 8;
@@ -151,7 +109,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
   if (!isTargetDarwin())
     UseMovt = hasV6T2Ops();
   else {
-    IsR9Reserved = ReserveR9 | (ARMArchVersion < V6);
+    IsR9Reserved = ReserveR9 | !HasV6Ops;
     UseMovt = DarwinUseMOVT && hasV6T2Ops();
   }
 
@@ -247,9 +205,9 @@ void ARMSubtarget::computeIssueWidth() {
 
 bool ARMSubtarget::enablePostRAScheduler(
            CodeGenOpt::Level OptLevel,
-           TargetSubtarget::AntiDepBreakMode& Mode,
+           TargetSubtargetInfo::AntiDepBreakMode& Mode,
            RegClassVector& CriticalPathRCs) const {
-  Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+  Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
   CriticalPathRCs.clear();
   CriticalPathRCs.push_back(&ARM::GPRRegClass);
   return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 0271c87..c650872 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -7,50 +7,49 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the ARM specific subclass of TargetSubtarget.
+// This file declares the ARM specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef ARMSUBTARGET_H
 #define ARMSUBTARGET_H
 
-#include "llvm/Target/TargetInstrItineraries.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "MCTargetDesc/ARMMCTargetDesc.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/ADT/Triple.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "ARMGenSubtargetInfo.inc"
+
 namespace llvm {
 class GlobalValue;
+class StringRef;
 
-class ARMSubtarget : public TargetSubtarget {
+class ARMSubtarget : public ARMGenSubtargetInfo {
 protected:
-  enum ARMArchEnum {
-    V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M
-  };
-
   enum ARMProcFamilyEnum {
     Others, CortexA8, CortexA9
   };
 
-  enum ARMFPEnum {
-    None, VFPv2, VFPv3, NEON
-  };
-
-  enum ThumbTypeEnum {
-    Thumb1,
-    Thumb2
-  };
-
-  /// ARMArchVersion - ARM architecture version: V4, V4T (base), V5T, V5TE,
-  /// V6, V6T2, V7A, V7M.
-  ARMArchEnum ARMArchVersion;
-
   /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
   ARMProcFamilyEnum ARMProcFamily;
 
-  /// ARMFPUType - Floating Point Unit type.
-  ARMFPEnum ARMFPUType;
+  /// HasV4TOps, HasV5TOps, HasV5TEOps, HasV6Ops, HasV6T2Ops, HasV7Ops -
+  /// Specify whether target support specific ARM ISA variants.
+  bool HasV4TOps;
+  bool HasV5TOps;
+  bool HasV5TEOps;
+  bool HasV6Ops;
+  bool HasV6T2Ops;
+  bool HasV7Ops;
+
+  /// HasVFPv2, HasVFPv3, HasNEON - Specify what floating point ISAs are
+  /// supported.
+  bool HasVFPv2;
+  bool HasVFPv3;
+  bool HasNEON;
 
   /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been
   /// specified. Use the method useNEONForSinglePrecisionFP() to
@@ -68,11 +67,11 @@ protected:
   /// SlowFPBrcc - True if floating point compare + branch is slow.
   bool SlowFPBrcc;
 
-  /// IsThumb - True if we are in thumb mode, false if in ARM mode.
-  bool IsThumb;
+  /// InThumbMode - True if compiling for Thumb, false for ARM.
+  bool InThumbMode;
 
-  /// ThumbMode - Indicates supported Thumb version.
-  ThumbTypeEnum ThumbMode;
+  /// HasThumb2 - True if Thumb2 instructions are supported.
+  bool HasThumb2;
 
   /// NoARM - True if subtarget does not support ARM mode execution.
   bool NoARM;
@@ -128,6 +127,10 @@ protected:
   /// ARMTargetLowering::allowsUnalignedMemoryAccesses().
   bool AllowsUnalignedMem;
 
+  /// Thumb2DSP - If true, the subtarget supports the v7 DSP (saturating arith
+  /// and such) instructions in Thumb2 code.
+  bool Thumb2DSP;
+
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
   unsigned stackAlignment;
@@ -154,7 +157,8 @@ protected:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  ARMSubtarget(const std::string &TT, const std::string &FS, bool isThumb);
+  ARMSubtarget(const std::string &TT, const std::string &CPU,
+               const std::string &FS);
 
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
   /// that still makes it profitable to inline the call.
@@ -165,28 +169,28 @@ protected:
   }
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   void computeIssueWidth();
 
-  bool hasV4TOps()  const { return ARMArchVersion >= V4T;  }
-  bool hasV5TOps()  const { return ARMArchVersion >= V5T;  }
-  bool hasV5TEOps() const { return ARMArchVersion >= V5TE; }
-  bool hasV6Ops()   const { return ARMArchVersion >= V6;   }
-  bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
-  bool hasV7Ops()   const { return ARMArchVersion >= V7A;  }
+  bool hasV4TOps()  const { return HasV4TOps;  }
+  bool hasV5TOps()  const { return HasV5TOps;  }
+  bool hasV5TEOps() const { return HasV5TEOps; }
+  bool hasV6Ops()   const { return HasV6Ops;   }
+  bool hasV6T2Ops() const { return HasV6T2Ops; }
+  bool hasV7Ops()   const { return HasV7Ops;  }
 
   bool isCortexA8() const { return ARMProcFamily == CortexA8; }
   bool isCortexA9() const { return ARMProcFamily == CortexA9; }
 
   bool hasARMOps() const { return !NoARM; }
 
-  bool hasVFP2() const { return ARMFPUType >= VFPv2; }
-  bool hasVFP3() const { return ARMFPUType >= VFPv3; }
-  bool hasNEON() const { return ARMFPUType >= NEON;  }
+  bool hasVFP2() const { return HasVFPv2; }
+  bool hasVFP3() const { return HasVFPv3; }
+  bool hasNEON() const { return HasNEON;  }
   bool useNEONForSinglePrecisionFP() const {
     return hasNEON() && UseNEONForSinglePrecisionFP; }
+
   bool hasDivide() const { return HasHardwareDivide; }
   bool hasT2ExtractPack() const { return HasT2ExtractPack; }
   bool hasDataBarrier() const { return HasDataBarrier; }
@@ -197,6 +201,7 @@ protected:
   bool prefers32BitThumb() const { return Pref32BitThumb; }
   bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
   bool hasMPExtension() const { return HasMPExtension; }
+  bool hasThumb2DSP() const { return Thumb2DSP; }
 
   bool hasFP16() const { return HasFP16; }
   bool hasD16() const { return HasD16; }
@@ -209,10 +214,10 @@ protected:
   bool isAPCS_ABI() const { return TargetABI == ARM_ABI_APCS; }
   bool isAAPCS_ABI() const { return TargetABI == ARM_ABI_AAPCS; }
 
-  bool isThumb() const { return IsThumb; }
-  bool isThumb1Only() const { return IsThumb && (ThumbMode == Thumb1); }
-  bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); }
-  bool hasThumb2() const { return ThumbMode >= Thumb2; }
+  bool isThumb() const { return InThumbMode; }
+  bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
+  bool isThumb2() const { return InThumbMode && HasThumb2; }
+  bool hasThumb2() const { return HasThumb2; }
 
   bool isR9Reserved() const { return IsR9Reserved; }
 
@@ -226,7 +231,7 @@ protected:
 
   /// enablePostRAScheduler - True at 'More' optimization.
   bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                             TargetSubtarget::AntiDepBreakMode& Mode,
+                             TargetSubtargetInfo::AntiDepBreakMode& Mode,
                              RegClassVector& CriticalPathRCs) const;
 
   /// getInstrItins - Return the instruction itineraies based on subtarget
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 29aa4f7..f0b176a 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARMTargetMachine.h"
-#include "ARMMCAsmInfo.h"
 #include "ARMFrameLowering.h"
 #include "ARM.h"
 #include "llvm/PassManager.h"
@@ -22,15 +21,6 @@
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
-  Triple TheTriple(TT);
-
-  if (TheTriple.isOSDarwin())
-    return new ARMMCAsmInfoDarwin();
-
-  return new ARMELFMCAsmInfo();
-}
-
 // This is duplicated code. Refactor this.
 static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
                                     MCContext &Ctx, TargetAsmBackend &TAB,
@@ -56,10 +46,6 @@ extern "C" void LLVMInitializeARMTarget() {
   RegisterTargetMachine<ARMTargetMachine> X(TheARMTarget);
   RegisterTargetMachine<ThumbTargetMachine> Y(TheThumbTarget);
 
-  // Register the target asm info.
-  RegisterAsmInfoFn A(TheARMTarget, createMCAsmInfo);
-  RegisterAsmInfoFn B(TheThumbTarget, createMCAsmInfo);
-
   // Register the MC Code Emitter
   TargetRegistry::RegisterCodeEmitter(TheARMTarget, createARMMCCodeEmitter);
   TargetRegistry::RegisterCodeEmitter(TheThumbTarget, createARMMCCodeEmitter);
@@ -78,18 +64,23 @@ extern "C" void LLVMInitializeARMTarget() {
 ///
 ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T,
                                            const std::string &TT,
-                                           const std::string &FS,
-                                           bool isThumb)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS, isThumb),
+                                           const std::string &CPU,
+                                           const std::string &FS)
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS),
     JITInfo(),
     InstrItins(Subtarget.getInstrItineraryData()) {
   DefRelocModel = getRelocationModel();
+
+  // Default to soft float ABI
+  if (FloatABIType == FloatABI::Default)
+    FloatABIType = FloatABI::Soft;
 }
 
 ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &CPU,
                                    const std::string &FS)
-  : ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget),
+  : ARMBaseTargetMachine(T, TT, CPU, FS), InstrInfo(Subtarget),
     DataLayout(Subtarget.isAPCS_ABI() ?
                std::string("e-p:32:32-f64:32:64-i64:32:64-"
                            "v128:32:128-v64:32:64-n32") :
@@ -105,8 +96,9 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
 }
 
 ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &CPU,
                                        const std::string &FS)
-  : ARMBaseTargetMachine(T, TT, FS, true),
+  : ARMBaseTargetMachine(T, TT, CPU, FS),
     InstrInfo(Subtarget.hasThumb2()
               ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget))
               : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index e0aa149..bc3d46a 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -41,7 +41,7 @@ private:
 
 public:
   ARMBaseTargetMachine(const Target &T, const std::string &TT,
-                       const std::string &FS, bool isThumb);
+                       const std::string &CPU, const std::string &FS);
 
   virtual       ARMJITInfo       *getJITInfo()         { return &JITInfo; }
   virtual const ARMSubtarget  *getSubtargetImpl() const { return &Subtarget; }
@@ -70,7 +70,7 @@ class ARMTargetMachine : public ARMBaseTargetMachine {
   ARMFrameLowering    FrameLowering;
  public:
   ARMTargetMachine(const Target &T, const std::string &TT,
-                   const std::string &FS);
+                   const std::string &CPU, const std::string &FS);
 
   virtual const ARMRegisterInfo  *getRegisterInfo() const {
     return &InstrInfo.getRegisterInfo();
@@ -109,7 +109,7 @@ class ThumbTargetMachine : public ARMBaseTargetMachine {
   OwningPtr<ARMFrameLowering> FrameLowering;
 public:
   ThumbTargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS);
+                     const std::string &CPU, const std::string &FS);
 
   /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo
   virtual const ARMBaseRegisterInfo *getRegisterInfo() const {
diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
index 2428ce1..d9a5fa2 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp
@@ -87,8 +87,9 @@ public:
     : ARMBaseAsmLexer(T, MAI) {
     std::string tripleString("arm-unknown-unknown");
     std::string featureString;
+    std::string CPU;
     OwningPtr<const TargetMachine>
-      targetMachine(T.createTargetMachine(tripleString, featureString));
+      targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
     InitRegisterMap(targetMachine->getRegisterInfo());
   }
 };
@@ -99,8 +100,9 @@ public:
     : ARMBaseAsmLexer(T, MAI) {
     std::string tripleString("thumb-unknown-unknown");
     std::string featureString;
+    std::string CPU;
     OwningPtr<const TargetMachine>
-      targetMachine(T.createTargetMachine(tripleString, featureString));
+      targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
     InitRegisterMap(targetMachine->getRegisterInfo());
   }
 };
diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index 4bc12c9..a474127 100644
--- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -20,14 +20,17 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetAsmParser.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+
 using namespace llvm;
 
 namespace {
@@ -35,8 +38,8 @@ namespace {
 class ARMOperand;
 
 class ARMAsmParser : public TargetAsmParser {
+  MCSubtargetInfo &STI;
   MCAsmParser &Parser;
-  TargetMachine &TM;
 
   MCAsmParser &getParser() const { return Parser; }
   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
@@ -47,7 +50,7 @@ class ARMAsmParser : public TargetAsmParser {
   int TryParseRegister();
   virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
   bool TryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &);
-  bool TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
+  int TryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &);
   bool ParseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &);
   bool ParseMemory(SmallVectorImpl<MCParsedAsmOperand*> &,
                    ARMII::AddrMode AddrMode);
@@ -79,6 +82,18 @@ class ARMAsmParser : public TargetAsmParser {
   void GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
                              bool &CanAcceptPredicationCode);
 
+  bool isThumb() const {
+    // FIXME: Can tablegen auto-generate this?
+    return (STI.getFeatureBits() & ARM::ModeThumb) != 0;
+  }
+  bool isThumbOne() const {
+    return isThumb() && (STI.getFeatureBits() & ARM::FeatureThumb2) == 0;
+  }
+  void SwitchMode() {
+    unsigned FB = ComputeAvailableFeatures(STI.ToggleFeature(ARM::ModeThumb));
+    setAvailableFeatures(FB);
+  }
+
   /// @name Auto-generated Match Functions
   /// {
 
@@ -113,13 +128,13 @@ class ARMAsmParser : public TargetAsmParser {
                                   const SmallVectorImpl<MCParsedAsmOperand*> &);
 
 public:
-  ARMAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
-    : TargetAsmParser(T), Parser(_Parser), TM(_TM) {
-      MCAsmParserExtension::Initialize(_Parser);
-      // Initialize the set of available features.
-      setAvailableFeatures(ComputeAvailableFeatures(
-          &TM.getSubtarget<ARMSubtarget>()));
-    }
+  ARMAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+    : TargetAsmParser(), STI(_STI), Parser(_Parser) {
+    MCAsmParserExtension::Initialize(_Parser);
+
+    // Initialize the set of available features.
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
 
   virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -146,6 +161,7 @@ class ARMOperand : public MCParsedAsmOperand {
     RegisterList,
     DPRRegisterList,
     SPRRegisterList,
+    ShiftedRegister,
     Shifter,
     Token
   } Kind;
@@ -207,8 +223,14 @@ class ARMOperand : public MCParsedAsmOperand {
 
     struct {
       ARM_AM::ShiftOpc ShiftTy;
-      unsigned RegNum;
+      unsigned Imm;
     } Shift;
+    struct {
+      ARM_AM::ShiftOpc ShiftTy;
+      unsigned SrcReg;
+      unsigned ShiftReg;
+      unsigned ShiftImm;
+    } ShiftedReg;
   };
 
   ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {}
@@ -255,6 +277,9 @@ public:
     case Shifter:
       Shift = o.Shift;
       break;
+    case ShiftedRegister:
+      ShiftedReg = o.ShiftedReg;
+      break;
     }
   }
 
@@ -350,6 +375,46 @@ public:
   bool isCondCode() const { return Kind == CondCode; }
   bool isCCOut() const { return Kind == CCOut; }
   bool isImm() const { return Kind == Immediate; }
+  bool isImm0_255() const {
+    if (Kind != Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 256;
+  }
+  bool isImm0_7() const {
+    if (Kind != Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 8;
+  }
+  bool isImm0_15() const {
+    if (Kind != Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 16;
+  }
+  bool isImm0_65535() const {
+    if (Kind != Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return Value >= 0 && Value < 65536;
+  }
+  bool isT2SOImm() const {
+    if (Kind != Immediate)
+      return false;
+    const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
+    if (!CE) return false;
+    int64_t Value = CE->getValue();
+    return ARM_AM::getT2SOImmVal(Value) != -1;
+  }
   bool isReg() const { return Kind == Register; }
   bool isRegList() const { return Kind == RegisterList; }
   bool isDPRRegList() const { return Kind == DPRRegisterList; }
@@ -358,6 +423,7 @@ public:
   bool isMemBarrierOpt() const { return Kind == MemBarrierOpt; }
   bool isMemory() const { return Kind == Memory; }
   bool isShifter() const { return Kind == Shifter; }
+  bool isShiftedReg() const { return Kind == ShiftedRegister; }
   bool isMemMode2() const {
     if (getMemAddrMode() != ARMII::AddrMode2)
       return false;
@@ -488,6 +554,18 @@ public:
     Inst.addOperand(MCOperand::CreateReg(getReg()));
   }
 
+  void addShiftedRegOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 3 && "Invalid number of operands!");
+    assert(isShiftedReg() && "addShiftedRegOperands() on non ShiftedReg!");
+    assert((ShiftedReg.ShiftReg == 0 ||
+            ARM_AM::getSORegOffset(ShiftedReg.ShiftImm) == 0) &&
+           "Invalid shifted register operand!");
+    Inst.addOperand(MCOperand::CreateReg(ShiftedReg.SrcReg));
+    Inst.addOperand(MCOperand::CreateReg(ShiftedReg.ShiftReg));
+    Inst.addOperand(MCOperand::CreateImm(
+      ARM_AM::getSORegOpc(ShiftedReg.ShiftTy, ShiftedReg.ShiftImm)));
+  }
+
   void addShifterOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateImm(
@@ -515,6 +593,31 @@ public:
     addExpr(Inst, getImm());
   }
 
+  void addImm0_255Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addImm0_7Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addImm0_15Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addImm0_65535Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
+  void addT2SOImmOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    addExpr(Inst, getImm());
+  }
+
   void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt())));
@@ -648,7 +751,7 @@ public:
     Inst.addOperand(MCOperand::CreateImm(unsigned(getProcIFlags())));
   }
 
-  virtual void dump(raw_ostream &OS) const;
+  virtual void print(raw_ostream &OS) const;
 
   static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) {
     ARMOperand *Op = new ARMOperand(CondCode);
@@ -699,6 +802,21 @@ public:
     return Op;
   }
 
+  static ARMOperand *CreateShiftedRegister(ARM_AM::ShiftOpc ShTy,
+                                           unsigned SrcReg,
+                                           unsigned ShiftReg,
+                                           unsigned ShiftImm,
+                                           SMLoc S, SMLoc E) {
+    ARMOperand *Op = new ARMOperand(ShiftedRegister);
+    Op->ShiftedReg.ShiftTy = ShTy;
+    Op->ShiftedReg.SrcReg = SrcReg;
+    Op->ShiftedReg.ShiftReg = ShiftReg;
+    Op->ShiftedReg.ShiftImm = ShiftImm;
+    Op->StartLoc = S;
+    Op->EndLoc = E;
+    return Op;
+  }
+
   static ARMOperand *CreateShifter(ARM_AM::ShiftOpc ShTy,
                                    SMLoc S, SMLoc E) {
     ARMOperand *Op = new ARMOperand(Shifter);
@@ -802,7 +920,7 @@ public:
 
 } // end anonymous namespace.
 
-void ARMOperand::dump(raw_ostream &OS) const {
+void ARMOperand::print(raw_ostream &OS) const {
   switch (Kind) {
   case CondCode:
     OS << "<ARMCC::" << ARMCondCodeToString(getCondCode()) << ">";
@@ -863,7 +981,15 @@ void ARMOperand::dump(raw_ostream &OS) const {
     OS << "<register " << getReg() << ">";
     break;
   case Shifter:
-    OS << "<shifter " << getShiftOpcStr(Shift.ShiftTy) << ">";
+    OS << "<shifter " << ARM_AM::getShiftOpcStr(Shift.ShiftTy) << ">";
+    break;
+  case ShiftedRegister:
+    OS << "<so_reg"
+       << ShiftedReg.SrcReg
+       << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(ShiftedReg.ShiftImm))
+       << ", " << ShiftedReg.ShiftReg << ", "
+       << ARM_AM::getSORegOffset(ShiftedReg.ShiftImm)
+       << ">";
     break;
   case RegisterList:
   case DPRRegisterList:
@@ -927,11 +1053,12 @@ int ARMAsmParser::TryParseRegister() {
   return RegNum;
 }
 
-/// Try to parse a register name.  The token must be an Identifier when called,
-/// and if it is a register name the token is eaten and the register number is
-/// returned.  Otherwise return -1.
-///
-bool ARMAsmParser::TryParseShiftRegister(
+// Try to parse a shifter  (e.g., "lsl <amt>"). On success, return 0.
+// If a recoverable error occurs, return 1. If an irrecoverable error
+// occurs, return -1. An irrecoverable error is one where tokens have been
+// consumed in the process of trying to parse the shifter (i.e., when it is
+// indeed a shifter operand, but malformed).
+int ARMAsmParser::TryParseShiftRegister(
                                SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   SMLoc S = Parser.getTok().getLoc();
   const AsmToken &Tok = Parser.getTok();
@@ -948,18 +1075,69 @@ bool ARMAsmParser::TryParseShiftRegister(
       .Default(ARM_AM::no_shift);
 
   if (ShiftTy == ARM_AM::no_shift)
-    return true;
-
-  Parser.Lex(); // Eat shift-type operand;
-  int RegNum = TryParseRegister();
-  if (RegNum == -1)
-    return Error(Parser.getTok().getLoc(), "register expected");
+    return 1;
+
+  Parser.Lex(); // Eat the operator.
+
+  // The source register for the shift has already been added to the
+  // operand list, so we need to pop it off and combine it into the shifted
+  // register operand instead.
+  OwningPtr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val());
+  if (!PrevOp->isReg())
+    return Error(PrevOp->getStartLoc(), "shift must be of a register");
+  int SrcReg = PrevOp->getReg();
+  int64_t Imm = 0;
+  int ShiftReg = 0;
+  if (ShiftTy == ARM_AM::rrx) {
+    // RRX Doesn't have an explicit shift amount. The encoder expects
+    // the shift register to be the same as the source register. Seems odd,
+    // but OK.
+    ShiftReg = SrcReg;
+  } else {
+    // Figure out if this is shifted by a constant or a register (for non-RRX).
+    if (Parser.getTok().is(AsmToken::Hash)) {
+      Parser.Lex(); // Eat hash.
+      SMLoc ImmLoc = Parser.getTok().getLoc();
+      const MCExpr *ShiftExpr = 0;
+      if (getParser().ParseExpression(ShiftExpr)) {
+        Error(ImmLoc, "invalid immediate shift value");
+        return -1;
+      }
+      // The expression must be evaluatable as an immediate.
+      const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ShiftExpr);
+      if (!CE) {
+        Error(ImmLoc, "invalid immediate shift value");
+        return -1;
+      }
+      // Range check the immediate.
+      // lsl, ror: 0 <= imm <= 31
+      // lsr, asr: 0 <= imm <= 32
+      Imm = CE->getValue();
+      if (Imm < 0 ||
+          ((ShiftTy == ARM_AM::lsl || ShiftTy == ARM_AM::ror) && Imm > 31) ||
+          ((ShiftTy == ARM_AM::lsr || ShiftTy == ARM_AM::asr) && Imm > 32)) {
+        Error(ImmLoc, "immediate shift value out of range");
+        return -1;
+      }
+    } else if (Parser.getTok().is(AsmToken::Identifier)) {
+      ShiftReg = TryParseRegister();
+      SMLoc L = Parser.getTok().getLoc();
+      if (ShiftReg == -1) {
+        Error (L, "expected immediate or register in shift operand");
+        return -1;
+      }
+    } else {
+      Error (Parser.getTok().getLoc(),
+                    "expected immediate or register in shift operand");
+      return -1;
+    }
+  }
 
-  Operands.push_back(ARMOperand::CreateReg(RegNum,S, Parser.getTok().getLoc()));
-  Operands.push_back(ARMOperand::CreateShifter(ShiftTy,
+  Operands.push_back(ARMOperand::CreateShiftedRegister(ShiftTy, SrcReg,
+                                                       ShiftReg, Imm,
                                                S, Parser.getTok().getLoc()));
 
-  return false;
+  return 0;
 }
 
 
@@ -1162,10 +1340,14 @@ tryParseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) {
   unsigned Opt = StringSwitch<unsigned>(OptStr.slice(0, OptStr.size()))
     .Case("sy",    ARM_MB::SY)
     .Case("st",    ARM_MB::ST)
+    .Case("sh",    ARM_MB::ISH)
     .Case("ish",   ARM_MB::ISH)
+    .Case("shst",  ARM_MB::ISHST)
     .Case("ishst", ARM_MB::ISHST)
     .Case("nsh",   ARM_MB::NSH)
+    .Case("un",    ARM_MB::NSH)
     .Case("nshst", ARM_MB::NSHST)
+    .Case("unst",  ARM_MB::NSHST)
     .Case("osh",   ARM_MB::OSH)
     .Case("oshst", ARM_MB::OSHST)
     .Default(~0U);
@@ -1604,15 +1786,18 @@ bool ARMAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands,
   default:
     Error(Parser.getTok().getLoc(), "unexpected token in operand");
     return true;
-  case AsmToken::Identifier:
+  case AsmToken::Identifier: {
     if (!TryParseRegisterWithWriteBack(Operands))
       return false;
-    if (!TryParseShiftRegister(Operands))
+    int Res = TryParseShiftRegister(Operands);
+    if (Res == 0) // success
       return false;
-
+    else if (Res == -1) // irrecoverable error
+      return true;
 
     // Fall though for the Identifier case that is not a register or a
     // special name.
+  }
   case AsmToken::Integer: // things like 1f and 2b as a branch targets
   case AsmToken::Dot: {   // . as a branch target
     // This was not a register so parse other operands that start with an
@@ -1761,30 +1946,35 @@ static StringRef SplitMnemonic(StringRef Mnemonic,
       Mnemonic == "vcle" ||
       (Mnemonic == "smlal" || Mnemonic == "umaal" || Mnemonic == "umlal" ||
        Mnemonic == "vabal" || Mnemonic == "vmlal" || Mnemonic == "vpadal" ||
-       Mnemonic == "vqdmlal"))
+       Mnemonic == "vqdmlal" || Mnemonic == "bics"))
     return Mnemonic;
 
-  // First, split out any predication code.
-  unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2))
-    .Case("eq", ARMCC::EQ)
-    .Case("ne", ARMCC::NE)
-    .Case("hs", ARMCC::HS)
-    .Case("lo", ARMCC::LO)
-    .Case("mi", ARMCC::MI)
-    .Case("pl", ARMCC::PL)
-    .Case("vs", ARMCC::VS)
-    .Case("vc", ARMCC::VC)
-    .Case("hi", ARMCC::HI)
-    .Case("ls", ARMCC::LS)
-    .Case("ge", ARMCC::GE)
-    .Case("lt", ARMCC::LT)
-    .Case("gt", ARMCC::GT)
-    .Case("le", ARMCC::LE)
-    .Case("al", ARMCC::AL)
-    .Default(~0U);
-  if (CC != ~0U) {
-    Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
-    PredicationCode = CC;
+  // First, split out any predication code. Ignore mnemonics we know aren't
+  // predicated but do have a carry-set and so weren't caught above.
+  if (Mnemonic != "adcs") {
+    unsigned CC = StringSwitch<unsigned>(Mnemonic.substr(Mnemonic.size()-2))
+      .Case("eq", ARMCC::EQ)
+      .Case("ne", ARMCC::NE)
+      .Case("hs", ARMCC::HS)
+      .Case("cs", ARMCC::HS)
+      .Case("lo", ARMCC::LO)
+      .Case("cc", ARMCC::LO)
+      .Case("mi", ARMCC::MI)
+      .Case("pl", ARMCC::PL)
+      .Case("vs", ARMCC::VS)
+      .Case("vc", ARMCC::VC)
+      .Case("hi", ARMCC::HI)
+      .Case("ls", ARMCC::LS)
+      .Case("ge", ARMCC::GE)
+      .Case("lt", ARMCC::LT)
+      .Case("gt", ARMCC::GT)
+      .Case("le", ARMCC::LE)
+      .Case("al", ARMCC::AL)
+      .Default(~0U);
+    if (CC != ~0U) {
+      Mnemonic = Mnemonic.slice(0, Mnemonic.size() - 2);
+      PredicationCode = CC;
+    }
   }
 
   // Next, determine if we have a carry setting bit. We explicitly ignore all
@@ -1824,8 +2014,6 @@ static StringRef SplitMnemonic(StringRef Mnemonic,
 void ARMAsmParser::
 GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
                       bool &CanAcceptPredicationCode) {
-  bool isThumb = TM.getSubtarget<ARMSubtarget>().isThumb();
-
   if (Mnemonic == "and" || Mnemonic == "lsl" || Mnemonic == "lsr" ||
       Mnemonic == "rrx" || Mnemonic == "ror" || Mnemonic == "sub" ||
       Mnemonic == "smull" || Mnemonic == "add" || Mnemonic == "adc" ||
@@ -1834,7 +2022,7 @@ GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
       Mnemonic == "rsb" || Mnemonic == "rsc" || Mnemonic == "orn" ||
       Mnemonic == "sbc" || Mnemonic == "mla" || Mnemonic == "umull" ||
       Mnemonic == "eor" || Mnemonic == "smlal" ||
-      (Mnemonic == "mov" && !isThumb)) {
+      (Mnemonic == "mov" && !isThumbOne())) {
     CanAcceptCarrySet = true;
   } else {
     CanAcceptCarrySet = false;
@@ -1851,10 +2039,9 @@ GetMnemonicAcceptInfo(StringRef Mnemonic, bool &CanAcceptCarrySet,
     CanAcceptPredicationCode = true;
   }
 
-  if (isThumb)
+  if (isThumb())
     if (Mnemonic == "bkpt" || Mnemonic == "mcr" || Mnemonic == "mcrr" ||
-        Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp" ||
-        Mnemonic == "mov")
+        Mnemonic == "mrc" || Mnemonic == "mrrc" || Mnemonic == "cdp")
       CanAcceptPredicationCode = false;
 }
 
@@ -1884,20 +2071,22 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc,
   bool CanAcceptCarrySet, CanAcceptPredicationCode;
   GetMnemonicAcceptInfo(Head, CanAcceptCarrySet, CanAcceptPredicationCode);
 
+  // If we had a carry-set on an instruction that can't do that, issue an
+  // error.
+  if (!CanAcceptCarrySet && CarrySetting) {
+    Parser.EatToEndOfStatement();
+    return Error(NameLoc, "instruction '" + Head +
+                 "' can not set flags, but 's' suffix specified");
+  }
+
   // Add the carry setting operand, if necessary.
   //
   // FIXME: It would be awesome if we could somehow invent a location such that
   // match errors on this operand would print a nice diagnostic about how the
   // 's' character in the mnemonic resulted in a CCOut operand.
-  if (CanAcceptCarrySet) {
+  if (CanAcceptCarrySet)
     Operands.push_back(ARMOperand::CreateCCOut(CarrySetting ? ARM::CPSR : 0,
                                                NameLoc));
-  } else {
-    // This mnemonic can't ever accept a carry set, but the user wrote one (or
-    // misspelled another mnemonic).
-
-    // FIXME: Issue a nice error.
-  }
 
   // Add the predication code operand, if necessary.
   if (CanAcceptPredicationCode) {
@@ -1988,7 +2177,7 @@ MatchAndEmitInstruction(SMLoc IDLoc,
     // that updates the condition codes if it ends in 's'.  So see if the
     // mnemonic ends in 's' and if so try removing the 's' and adding a CCOut
     // operand with a value of CPSR.
-    else if(MatchResult == Match_MnemonicFail) {
+    else if (MatchResult == Match_MnemonicFail) {
       // Get the instruction mnemonic, which is the first token.
       StringRef Mnemonic = ((ARMOperand*)Operands[0])->getToken();
       if (Mnemonic.substr(Mnemonic.size()-1) == "s") {
@@ -2174,20 +2363,15 @@ bool ARMAsmParser::ParseDirectiveCode(SMLoc L) {
     return Error(Parser.getTok().getLoc(), "unexpected token in directive");
   Parser.Lex();
 
-  // FIXME: We need to be able switch subtargets at this point so that
-  // MatchInstructionImpl() will work when it gets the AvailableFeatures which
-  // includes Feature_IsThumb or not to match the right instructions.  This is
-  // blocked on the FIXME in llvm-mc.cpp when creating the TargetMachine.
-  if (Val == 16){
-    assert(TM.getSubtarget<ARMSubtarget>().isThumb() &&
-	   "switching between arm/thumb not yet suppported via .code 16)");
+  if (Val == 16) {
+    if (!isThumb())
+      SwitchMode();
     getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
-  }
-  else{
-    assert(!TM.getSubtarget<ARMSubtarget>().isThumb() &&
-           "switching between thumb/arm not yet suppported via .code 32)");
+  } else {
+    if (isThumb())
+      SwitchMode();
     getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
-   }
+  }
 
   return false;
 }
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index d3b8b54..21608d0 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -1,18 +1,16 @@
 set(LLVM_TARGET_DEFINITIONS ARM.td)
 
-tablegen(ARMGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(ARMGenRegisterNames.inc -gen-register-enums)
-tablegen(ARMGenRegisterInfo.inc -gen-register-desc)
-tablegen(ARMGenInstrNames.inc -gen-instr-enums)
-tablegen(ARMGenInstrInfo.inc -gen-instr-desc)
+tablegen(ARMGenRegisterInfo.inc -gen-register-info)
+tablegen(ARMGenInstrInfo.inc -gen-instr-info)
 tablegen(ARMGenCodeEmitter.inc -gen-emitter)
 tablegen(ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
+tablegen(ARMGenMCPseudoLowering.inc -gen-pseudo-lowering)
 tablegen(ARMGenAsmWriter.inc -gen-asm-writer)
 tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(ARMGenDAGISel.inc -gen-dag-isel)
 tablegen(ARMGenFastISel.inc -gen-fast-isel)
 tablegen(ARMGenCallingConv.inc -gen-callingconv)
-tablegen(ARMGenSubtarget.inc -gen-subtarget)
+tablegen(ARMGenSubtargetInfo.inc -gen-subtarget)
 tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info)
 tablegen(ARMGenDecoderTables.inc -gen-arm-decoder)
 
@@ -34,10 +32,10 @@ add_llvm_target(ARMCodeGen
   ARMISelLowering.cpp
   ARMInstrInfo.cpp
   ARMJITInfo.cpp
+  ARMMachObjectWriter.cpp
   ARMMCCodeEmitter.cpp
   ARMMCExpr.cpp
   ARMLoadStoreOptimizer.cpp
-  ARMMCAsmInfo.cpp
   ARMMCInstLower.cpp
   ARMRegisterInfo.cpp
   ARMSelectionDAGInfo.cpp
@@ -67,3 +65,4 @@ add_subdirectory(TargetInfo)
 add_subdirectory(AsmParser)
 add_subdirectory(Disassembler)
 add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index 271ca8c..d89c80a 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -24,8 +24,8 @@
 //#define DEBUG(X) do { X; } while (0)
 
 /// ARMGenInstrInfo.inc - ARMGenInstrInfo.inc contains the static const
-/// TargetInstrDesc ARMInsts[] definition and the TargetOperandInfo[]'s
-/// describing the operand info for each ARMInsts[i].
+/// MCInstrDesc ARMInsts[] definition and the MCOperandInfo[]'s describing the
+/// operand info for each ARMInsts[i].
 ///
 /// Together with an instruction's encoding format, we can take advantage of the
 /// NumOperands and the OpInfo fields of the target instruction description in
@@ -46,10 +46,10 @@
 ///   dag DefaultOps = (ops (i32 14), (i32 zero_reg));
 /// }
 ///
-/// which is manifested by the TargetOperandInfo[] of:
+/// which is manifested by the MCOperandInfo[] of:
 ///
-/// { 0, 0|(1<<TOI::Predicate), 0 },
-/// { ARM::CCRRegClassID, 0|(1<<TOI::Predicate), 0 }
+/// { 0, 0|(1<<MCOI::Predicate), 0 },
+/// { ARM::CCRRegClassID, 0|(1<<MCOI::Predicate), 0 }
 ///
 /// So the first predicate MCOperand corresponds to the immediate part of the
 /// ARM condition field (Inst{31-28}), and the second predicate MCOperand
@@ -66,12 +66,14 @@
 ///   dag DefaultOps = (ops (i32 zero_reg));
 /// }
 ///
-/// which is manifested by the one TargetOperandInfo of:
+/// which is manifested by the one MCOperandInfo of:
 ///
-/// { ARM::CCRRegClassID, 0|(1<<TOI::OptionalDef), 0 }
+/// { ARM::CCRRegClassID, 0|(1<<MCOI::OptionalDef), 0 }
 ///
-/// And this maps to one MCOperand with the regsiter kind of ARM::CPSR.
-#include "ARMGenInstrInfo.inc"
+
+namespace llvm {
+extern MCInstrDesc ARMInsts[];
+}
 
 using namespace llvm;
 
@@ -588,9 +590,9 @@ static bool BadRegsMulFrm(unsigned Opcode, uint32_t insn) {
 static bool DisassembleMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  unsigned short NumDefs = MCID.getNumDefs();
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -739,9 +741,9 @@ static bool DisassembleCoprocessor(MCInst &MI, unsigned Opcode, uint32_t insn,
     if (PW) {
       MI.addOperand(MCOperand::CreateReg(0));
       ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
-      const TargetInstrDesc &TID = ARMInsts[Opcode];
+      const MCInstrDesc &MCID = ARMInsts[Opcode];
       unsigned IndexMode =
-                  (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+                 (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
       unsigned Offset = ARM_AM::getAM2Opc(AddrOpcode, slice(insn, 7, 0) << 2,
                                           ARM_AM::no_shift, IndexMode);
       MI.addOperand(MCOperand::CreateImm(Offset));
@@ -802,7 +804,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (CoprocessorOpcode(Opcode))
     return DisassembleCoprocessor(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   // MRS and MRSsys take one GPR reg Rd.
@@ -901,7 +903,7 @@ static bool DisassembleBrFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleBrMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   unsigned &OpIdx = NumOpsAdded;
@@ -976,10 +978,10 @@ static bool BadRegsDPFrm(unsigned Opcode, uint32_t insn) {
 static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
-  bool isUnary = isUnaryDP(TID.TSFlags);
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  unsigned short NumDefs = MCID.getNumDefs();
+  bool isUnary = isUnaryDP(MCID.TSFlags);
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1041,7 +1043,7 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   }
 
   // If this is a two-address operand, skip it, e.g., MOVCCr operand 1.
-  if (isUnary && (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) {
+  if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) {
     MI.addOperand(MCOperand::CreateReg(0));
     ++OpIdx;
   }
@@ -1089,10 +1091,10 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  unsigned short NumDefs = TID.getNumDefs();
-  bool isUnary = isUnaryDP(TID.TSFlags);
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  unsigned short NumDefs = MCID.getNumDefs();
+  bool isUnary = isUnaryDP(MCID.TSFlags);
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1118,7 +1120,7 @@ static bool DisassembleDPSoRegFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   }
 
   // If this is a two-address operand, skip it, e.g., MOVCCs operand 1.
-  if (isUnary && (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1)) {
+  if (isUnary && (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)) {
     MI.addOperand(MCOperand::CreateReg(0));
     ++OpIdx;
   }
@@ -1244,17 +1246,17 @@ static bool BadRegsLdStFrm(unsigned Opcode, uint32_t insn, bool Store, bool WBac
 static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  bool isPrePost = isPrePostLdSt(TID.TSFlags);
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  bool isPrePost = isPrePostLdSt(MCID.TSFlags);
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   if (!OpInfo) return false;
 
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
 
-  assert(((!isStore && TID.getNumDefs() > 0) ||
-          (isStore && (TID.getNumDefs() == 0 || isPrePost)))
+  assert(((!isStore && MCID.getNumDefs() > 0) ||
+          (isStore && (MCID.getNumDefs() == 0 || isPrePost)))
          && "Invalid arguments");
 
   // Operand 0 of a pre- and post-indexed store is the address base writeback.
@@ -1291,7 +1293,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
          "Reg operand expected");
-  assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
+  assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1))
          && "Index mode or tied_to operand expected");
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
@@ -1308,7 +1310,7 @@ static bool DisassembleLdStFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
   unsigned IndexMode =
-               (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+               (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
   if (getIBit(insn) == 0) {
     // For pre- and post-indexed case, add a reg0 operand (Addressing Mode #2).
     // Otherwise, skip the reg operand since for addrmode_imm12, Rn has already
@@ -1379,17 +1381,17 @@ static bool HasDualReg(unsigned Opcode) {
 static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, bool isStore, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  bool isPrePost = isPrePostLdSt(TID.TSFlags);
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  bool isPrePost = isPrePostLdSt(MCID.TSFlags);
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   if (!OpInfo) return false;
 
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
 
-  assert(((!isStore && TID.getNumDefs() > 0) ||
-          (isStore && (TID.getNumDefs() == 0 || isPrePost)))
+  assert(((!isStore && MCID.getNumDefs() > 0) ||
+          (isStore && (MCID.getNumDefs() == 0 || isPrePost)))
          && "Invalid arguments");
 
   // Operand 0 of a pre- and post-indexed store is the address base writeback.
@@ -1433,7 +1435,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(OpInfo[OpIdx].RegClass == ARM::GPRRegClassID &&
          "Reg operand expected");
-  assert((!isPrePost || (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1))
+  assert((!isPrePost || (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1))
          && "Offset mode or tied_to operand expected");
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
                                                      decodeRn(insn))));
@@ -1451,7 +1453,7 @@ static bool DisassembleLdStMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   ARM_AM::AddrOpc AddrOpcode = getUBit(insn) ? ARM_AM::add : ARM_AM::sub;
   unsigned IndexMode =
-                  (TID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
+                 (MCID.TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
   if (getAM3IBit(insn) == 1) {
     MI.addOperand(MCOperand::CreateReg(0));
 
@@ -1539,7 +1541,7 @@ static bool DisassembleLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   unsigned &OpIdx = NumOpsAdded;
@@ -1591,7 +1593,7 @@ static bool DisassembleLdStExFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleArithMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1653,8 +1655,8 @@ static bool DisassembleSatFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
     return false;
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands
 
   // Disassemble register def.
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -1696,7 +1698,7 @@ static bool DisassembleExtFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (decodeRd(insn) == 15 || decodeRm(insn) == 15)
     return false;
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1802,7 +1804,7 @@ static bool DisassembleVFPUnaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(NumOps >= 1 && "VFPUnaryFrm expects NumOps >= 1");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1842,8 +1844,8 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(NumOps >= 3 && "VFPBinaryFrm expects NumOps >= 3");
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1858,7 +1860,7 @@ static bool DisassembleVFPBinaryFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
   ++OpIdx;
 
   // Skip tied_to operand constraint.
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
     assert(NumOps >= 4 && "Expect >=4 operands");
     MI.addOperand(MCOperand::CreateReg(0));
     ++OpIdx;
@@ -1886,8 +1888,8 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(NumOps >= 2 && "VFPConv1Frm expects NumOps >= 2");
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   if (!OpInfo) return false;
 
   bool SP = slice(insn, 8, 8) == 0; // A8.6.295 & A8.6.297
@@ -1903,7 +1905,7 @@ static bool DisassembleVFPConv1Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
                     getRegisterEnum(B, RegClassID,
                                     decodeVFPRd(insn, SP))));
 
-    assert(TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
+    assert(MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 &&
            "Tied to operand expected");
     MI.addOperand(MI.getOperand(0));
 
@@ -1961,7 +1963,7 @@ static bool DisassembleVFPConv3Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(NumOps >= 3 && "VFPConv3Frm expects NumOps >= 3");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
@@ -2011,7 +2013,7 @@ static bool DisassembleVFPConv5Frm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(NumOps >= 3 && "VFPConv5Frm expects NumOps >= 3");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -2136,7 +2138,7 @@ static bool DisassembleVFPLdStMulFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleVFPMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -2402,8 +2404,8 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, bool Store, bool DblSpaced,
     unsigned alignment, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
 
   // At least one DPR register plus addressing mode #6.
   assert(NumOps >= 3 && "Expect >= 3 operands");
@@ -2507,7 +2509,7 @@ static bool DisassembleNLdSt0(MCInst &MI, unsigned Opcode, uint32_t insn,
     }
 
     while (OpIdx < NumOps && (unsigned)OpInfo[OpIdx].RegClass == RegClass) {
-      assert(TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1 &&
+      assert(MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1 &&
              "Tied to operand expected");
       MI.addOperand(MCOperand::CreateReg(0));
       ++OpIdx;
@@ -2757,8 +2759,8 @@ static bool DisassembleNLdSt(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleN1RegModImmFrm(MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
 
   assert(NumOps >= 2 &&
          (OpInfo[0].RegClass == ARM::DPRRegClassID ||
@@ -2848,8 +2850,8 @@ enum N2VFlag {
 static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, N2VFlag Flag, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opc];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opc];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
 
   assert(NumOps >= 2 &&
          (OpInfo[0].RegClass == ARM::DPRRegClassID ||
@@ -2878,7 +2880,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
   ++OpIdx;
 
   // VPADAL...
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
     // TIED_TO operand.
     MI.addOperand(MCOperand::CreateReg(0));
     ++OpIdx;
@@ -2892,7 +2894,7 @@ static bool DisassembleNVdVmOptImm(MCInst &MI, unsigned Opc, uint32_t insn,
   // VZIP and others have two TIED_TO reg operands.
   int Idx;
   while (OpIdx < NumOps &&
-         (Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+         (Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
     // Add TIED_TO operand.
     MI.addOperand(MI.getOperand(Idx));
     ++OpIdx;
@@ -2945,8 +2947,8 @@ static bool DisassembleNVecDupLnFrm(MCInst &MI, unsigned Opc, uint32_t insn,
 static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, bool LeftShift, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
 
   assert(NumOps >= 3 &&
          (OpInfo[0].RegClass == ARM::DPRRegClassID ||
@@ -2964,7 +2966,7 @@ static bool DisassembleNVectorShift(MCInst &MI, unsigned Opcode, uint32_t insn,
                                                      decodeNEONRd(insn))));
   ++OpIdx;
 
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
     // TIED_TO operand.
     MI.addOperand(MCOperand::CreateReg(0));
     ++OpIdx;
@@ -3044,8 +3046,8 @@ enum N3VFlag {
 static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, N3VFlag Flag, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
 
   // No checking for OpInfo[2] because of MOVDneon/MOVQ with only two regs.
   assert(NumOps >= 3 &&
@@ -3076,7 +3078,7 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
   ++OpIdx;
 
   // VABA, VABAL, VBSLd, VBSLq, ...
-  if (TID.getOperandConstraint(OpIdx, TOI::TIED_TO) != -1) {
+  if (MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) {
     // TIED_TO operand.
     MI.addOperand(MCOperand::CreateReg(0));
     ++OpIdx;
@@ -3091,11 +3093,6 @@ static bool DisassembleNVdVnVmOptImm(MCInst &MI, unsigned Opcode, uint32_t insn,
                                          : decodeNEONRm(insn))));
   ++OpIdx;
 
-  // Special case handling for VMOVDneon and VMOVQ because they are marked as
-  // N3RegFrm.
-  if (Opcode == ARM::VMOVDneon || Opcode == ARM::VMOVQ)
-    return true;
-
   // Dm = Inst{5:3-0} => NEON Rm
   // or
   // Dm is restricted to D0-D7 if size is 16, D0-D15 otherwise
@@ -3163,8 +3160,8 @@ static bool DisassembleNVecMulScalarFrm(MCInst &MI, unsigned Opcode,
 static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps >= 3 &&
@@ -3192,7 +3189,7 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   // Process tied_to operand constraint.
   int Idx;
-  if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+  if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
     MI.addOperand(MI.getOperand(Idx));
     ++OpIdx;
   }
@@ -3221,11 +3218,11 @@ static bool DisassembleNVTBLFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   if (!OpInfo) return false;
 
-  assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
+  assert(MCID.getNumDefs() == 1 && NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::GPRRegClassID &&
          OpInfo[1].RegClass == ARM::DPRRegClassID &&
          OpInfo[2].RegClass < 0 &&
@@ -3255,14 +3252,14 @@ static bool DisassembleNGetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   if (!OpInfo) return false;
 
-  assert(TID.getNumDefs() == 1 && NumOps >= 3 &&
+  assert(MCID.getNumDefs() == 1 && NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::DPRRegClassID &&
          OpInfo[1].RegClass == ARM::DPRRegClassID &&
-         TID.getOperandConstraint(1, TOI::TIED_TO) != -1 &&
+         MCID.getOperandConstraint(1, MCOI::TIED_TO) != -1 &&
          OpInfo[2].RegClass == ARM::GPRRegClassID &&
          OpInfo[3].RegClass < 0 &&
          "Expect >= 3 operands with one dst operand");
@@ -3294,7 +3291,7 @@ static bool DisassembleNSetLnFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleNDupFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
   assert(NumOps >= 2 &&
          (OpInfo[0].RegClass == ARM::DPRRegClassID ||
@@ -3379,7 +3376,7 @@ static bool DisassemblePreLoadFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleMiscFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  if (Opcode == ARM::DMB || Opcode == ARM::DSB) {
+  if (Opcode == ARM::DMB || Opcode == ARM::DSB || Opcode == ARM::ISB) {
     // Inst{3-0} encodes the memory barrier option for the variants.
     unsigned opt = slice(insn, 3, 0);
     switch (opt) {
@@ -3604,11 +3601,11 @@ bool ARMBasicMCBuilder::DoPredicateOperands(MCInst& MI, unsigned Opcode,
 
   assert(NumOpsRemaining > 0 && "Invalid argument");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned Idx = MI.getNumOperands();
 
   // First, we check whether this instr specifies the PredicateOperand through
-  // a pair of TargetOperandInfos with isPredicate() property.
+  // a pair of MCOperandInfos with isPredicate() property.
   if (NumOpsRemaining >= 2 &&
       OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
       OpInfo[Idx].RegClass < 0 &&
@@ -3636,13 +3633,13 @@ bool ARMBasicMCBuilder::TryPredicateAndSBitModifier(MCInst& MI, unsigned Opcode,
 
   assert(NumOpsRemaining > 0 && "Invalid argument");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   const std::string &Name = ARMInsts[Opcode].Name;
   unsigned Idx = MI.getNumOperands();
   uint64_t TSFlags = ARMInsts[Opcode].TSFlags;
 
   // First, we check whether this instr specifies the PredicateOperand through
-  // a pair of TargetOperandInfos with isPredicate() property.
+  // a pair of MCOperandInfos with isPredicate() property.
   if (NumOpsRemaining >= 2 &&
       OpInfo[Idx].isPredicate() && OpInfo[Idx+1].isPredicate() &&
       OpInfo[Idx].RegClass < 0 &&
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 9639c8a..834c6f6 100644
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -350,7 +350,7 @@ static inline unsigned decodeRotate(uint32_t insn) {
 static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -425,8 +425,8 @@ static bool DisassembleThumb1General(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -454,7 +454,7 @@ static bool DisassembleThumb1DP(MCInst &MI, unsigned Opcode, uint32_t insn,
   assert(OpIdx < NumOps && OpInfo[OpIdx].RegClass == ARM::tGPRRegClassID
          && "Thumb reg operand expected");
   int Idx;
-  if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+  if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
     // The reg operand is tied to the first reg operand.
     MI.addOperand(MI.getOperand(Idx));
     ++OpIdx;
@@ -511,8 +511,8 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
     return true;
   }
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -530,7 +530,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(OpIdx < NumOps && "More operands expected");
   int Idx;
-  if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+  if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
     // The reg operand is tied to the first reg operand.
     MI.addOperand(MI.getOperand(Idx));
     ++OpIdx;
@@ -554,7 +554,7 @@ static bool DisassembleThumb1Special(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
@@ -602,7 +602,7 @@ static bool DisassembleThumb1LdPC(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps >= 2 &&
@@ -630,8 +630,8 @@ static bool DisassembleThumb2Ldpci(MCInst &MI, unsigned Opcode,
 static bool DisassembleThumb1LdSt(unsigned opA, MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   assert(NumOps >= 2
@@ -680,7 +680,7 @@ static bool DisassembleThumb1LdStSP(MCInst &MI, unsigned Opcode, uint32_t insn,
   assert((Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
          && "Unexpected opcode");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps >= 3 &&
@@ -708,7 +708,7 @@ static bool DisassembleThumb1AddPCi(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(Opcode == ARM::tADDrPCi && "Unexpected opcode");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps >= 2 && OpInfo[0].RegClass == ARM::tGPRRegClassID &&
@@ -733,7 +733,7 @@ static bool DisassembleThumb1AddSPi(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   assert(Opcode == ARM::tADDrSPi && "Unexpected opcode");
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps >= 3 &&
@@ -810,7 +810,7 @@ static bool DisassembleThumb1Misc(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (Opcode == ARM::tPUSH || Opcode == ARM::tPOP)
     return DisassembleThumb1PushPop(MI, Opcode, insn, NumOps, NumOpsAdded, B);
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
   // Predicate operands are handled elsewhere.
   if (NumOps == 2 &&
@@ -958,7 +958,7 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
   if (Opcode == ARM::tTRAP)
     return true;
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps == 3 && OpInfo[0].RegClass < 0 &&
@@ -989,7 +989,7 @@ static bool DisassembleThumb1CondBr(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb1Br(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps == 1 && OpInfo[0].RegClass < 0 && "1 imm operand expected");
@@ -1226,7 +1226,7 @@ static bool DisassembleThumb2LdStMul(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   unsigned &OpIdx = NumOpsAdded;
@@ -1316,7 +1316,7 @@ static bool DisassembleThumb2LdStEx(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb2LdStDual(MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
   if (!OpInfo) return false;
 
   assert(NumOps >= 4
@@ -1423,8 +1423,8 @@ static inline bool Thumb2ShiftOpcode(unsigned Opcode) {
 static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   // Special case handling.
@@ -1467,7 +1467,7 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
 
   if (ThreeReg) {
     int Idx;
-    if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+    if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
       // Process tied_to operand constraint.
       MI.addOperand(MI.getOperand(Idx));
       ++OpIdx;
@@ -1521,8 +1521,8 @@ static bool DisassembleThumb2DPSoReg(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1550,7 +1550,7 @@ static bool DisassembleThumb2DPModImm(MCInst &MI, unsigned Opcode,
       return false;
     }
     int Idx;
-    if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+    if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
       // The reg operand is tied to the first reg operand.
       MI.addOperand(MI.getOperand(Idx));
     } else {
@@ -1590,8 +1590,8 @@ static inline bool Thumb2SaturateOpcode(unsigned Opcode) {
 /// o t2SSAT16, t2USAT16: Rs sat_pos Rn
 static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
                                  unsigned &NumOpsAdded, BO B) {
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  NumOpsAdded = TID.getNumOperands() - 2; // ignore predicate operands
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  NumOpsAdded = MCID.getNumOperands() - 2; // ignore predicate operands
 
   // Disassemble the register def.
   MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::rGPRRegClassID,
@@ -1635,8 +1635,8 @@ static bool DisassembleThumb2Sat(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
     uint32_t insn, unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -1659,7 +1659,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
   if (TwoReg) {
     assert(NumOps >= 3 && "Expect >= 3 operands");
     int Idx;
-    if ((Idx = TID.getOperandConstraint(OpIdx, TOI::TIED_TO)) != -1) {
+    if ((Idx = MCID.getOperandConstraint(OpIdx, MCOI::TIED_TO)) != -1) {
       // Process tied_to operand constraint.
       MI.addOperand(MI.getOperand(Idx));
     } else {
@@ -1907,8 +1907,8 @@ static bool DisassembleThumb2PreLoad(MCInst &MI, unsigned Opcode, uint32_t insn,
   // t2PLDs:                      Rn Rm imm2=Inst{5-4}
   // Same pattern applies for t2PLDW* and t2PLI*.
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -2073,8 +2073,8 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
   // See, for example, A6.3.7 Load word: Table A6-18 Load word.
   if (Load && Rn == 15)
     return DisassembleThumb2Ldpci(MI, Opcode, insn, NumOps, NumOpsAdded, B);
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -2085,7 +2085,7 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
          "Expect >= 3 operands and first two as reg operands");
 
   bool ThreeReg = (OpInfo[2].RegClass > 0);
-  bool TIED_TO = ThreeReg && TID.getOperandConstraint(2, TOI::TIED_TO) != -1;
+  bool TIED_TO = ThreeReg && MCID.getOperandConstraint(2, MCOI::TIED_TO) != -1;
   bool Imm12 = !ThreeReg && slice(insn, 23, 23) == 1; // ARMInstrThumb2.td
 
   // Build the register operands, followed by the immediate.
@@ -2160,8 +2160,8 @@ static bool DisassembleThumb2LdSt(bool Load, MCInst &MI, unsigned Opcode,
 static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetInstrDesc &TID = ARMInsts[Opcode];
-  const TargetOperandInfo *OpInfo = TID.OpInfo;
+  const MCInstrDesc &MCID = ARMInsts[Opcode];
+  const MCOperandInfo *OpInfo = MCID.OpInfo;
   unsigned &OpIdx = NumOpsAdded;
 
   OpIdx = 0;
@@ -2214,7 +2214,7 @@ static bool DisassembleThumb2DPReg(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
   assert(NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::rGPRRegClassID &&
@@ -2259,7 +2259,7 @@ static bool DisassembleThumb2Mul(MCInst &MI, unsigned Opcode, uint32_t insn,
 static bool DisassembleThumb2LongMul(MCInst &MI, unsigned Opcode, uint32_t insn,
     unsigned short NumOps, unsigned &NumOpsAdded, BO B) {
 
-  const TargetOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
+  const MCOperandInfo *OpInfo = ARMInsts[Opcode].OpInfo;
 
   assert(NumOps >= 3 &&
          OpInfo[0].RegClass == ARM::rGPRRegClassID &&
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index 8ae87f8..78d3e47 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -126,38 +126,6 @@ void ARMInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
   }
 }
 
-static void printSOImm(raw_ostream &O, int64_t V, raw_ostream *CommentStream,
-                       const MCAsmInfo *MAI) {
-  // Break it up into two parts that make up a shifter immediate.
-  V = ARM_AM::getSOImmVal(V);
-  assert(V != -1 && "Not a valid so_imm value!");
-
-  unsigned Imm = ARM_AM::getSOImmValImm(V);
-  unsigned Rot = ARM_AM::getSOImmValRot(V);
-
-  // Print low-level immediate formation info, per
-  // A5.2.3: Data-processing (immediate), and
-  // A5.2.4: Modified immediate constants in ARM instructions
-  if (Rot) {
-    O << "#" << Imm << ", #" << Rot;
-    // Pretty printed version.
-    if (CommentStream)
-      *CommentStream << (int)ARM_AM::rotr32(Imm, Rot) << "\n";
-  } else {
-    O << "#" << Imm;
-  }
-}
-
-
-/// printSOImmOperand - SOImm is 4-bit rotate amount in bits 8-11 with 8-bit
-/// immediate in bits 0-7.
-void ARMInstPrinter::printSOImmOperand(const MCInst *MI, unsigned OpNum,
-                                       raw_ostream &O) {
-  const MCOperand &MO = MI->getOperand(OpNum);
-  assert(MO.isImm() && "Not a valid so_imm value!");
-  printSOImm(O, MO.getImm(), CommentStream, &MAI);
-}
-
 // so_reg is a 4-operand unit corresponding to register forms of the A5.1
 // "Addressing Mode 1 - Data-processing operands" forms.  This includes:
 //    REG 0   0           - e.g. R5
@@ -174,6 +142,8 @@ void ARMInstPrinter::printSORegOperand(const MCInst *MI, unsigned OpNum,
   // Print the shift opc.
   ARM_AM::ShiftOpc ShOpc = ARM_AM::getSORegShOp(MO3.getImm());
   O << ", " << ARM_AM::getShiftOpcStr(ShOpc);
+  if (ShOpc == ARM_AM::rrx)
+    return;
   if (MO2.getReg()) {
     O << ' ' << getRegisterName(MO2.getReg());
     assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0);
diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
index bde0eb9..d5f238b 100644
--- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h
@@ -19,11 +19,10 @@
 namespace llvm {
 
 class MCOperand;
-class TargetMachine;
 
 class ARMInstPrinter : public MCInstPrinter {
 public:
-  ARMInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+  ARMInstPrinter(const MCAsmInfo &MAI)
     : MCInstPrinter(MAI) {}
 
   virtual void printInst(const MCInst *MI, raw_ostream &O);
@@ -39,8 +38,6 @@ public:
 
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
 
-  void printSOImmOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-
   void printSORegOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
 
   void printAddrMode2Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O);
diff --git a/lib/Target/ARM/ARMMCAsmInfo.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 53b4c95..53b4c95 100644
--- a/lib/Target/ARM/ARMMCAsmInfo.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
diff --git a/lib/Target/ARM/ARMMCAsmInfo.h b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
index 90f7822..90f7822 100644
--- a/lib/Target/ARM/ARMMCAsmInfo.h
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
new file mode 100644
index 0000000..f8fcf2b
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -0,0 +1,144 @@
+//===-- ARMMCTargetDesc.cpp - ARM Target Descriptions -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides ARM specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMMCTargetDesc.h"
+#include "ARMMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_REGINFO_MC_DESC
+#include "ARMGenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "ARMGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "ARMGenSubtargetInfo.inc"
+
+using namespace llvm;
+
+std::string ARM_MC::ParseARMTriple(StringRef TT) {
+  // Set the boolean corresponding to the current target triple, or the default
+  // if one cannot be determined, to true.
+  unsigned Len = TT.size();
+  unsigned Idx = 0;
+
+  // FIXME: Enahnce Triple helper class to extract ARM version.
+  bool isThumb = false;
+  if (Len >= 5 && TT.substr(0, 4) == "armv")
+    Idx = 4;
+  else if (Len >= 6 && TT.substr(0, 5) == "thumb") {
+    isThumb = true;
+    if (Len >= 7 && TT[5] == 'v')
+      Idx = 6;
+  }
+
+  std::string ARMArchFeature;
+  if (Idx) {
+    unsigned SubVer = TT[Idx];
+    if (SubVer >= '7' && SubVer <= '9') {
+      if (Len >= Idx+2 && TT[Idx+1] == 'm') {
+        // v7m: FeatureNoARM, FeatureDB, FeatureHWDiv
+        ARMArchFeature = "+v7,+noarm,+db,+hwdiv";
+      } else if (Len >= Idx+3 && TT[Idx+1] == 'e'&& TT[Idx+2] == 'm') {
+        // v7em: FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2,
+        //       FeatureT2XtPk
+        ARMArchFeature = "+v7,+noarm,+db,+hwdiv,+t2dsp,t2xtpk";
+      } else
+        // v7a: FeatureNEON, FeatureDB, FeatureDSPThumb2
+        ARMArchFeature = "+v7,+neon,+db,+t2dsp";
+    } else if (SubVer == '6') {
+      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == '2')
+        ARMArchFeature = "+v6t2";
+      else
+        ARMArchFeature = "+v6";
+    } else if (SubVer == '5') {
+      if (Len >= Idx+3 && TT[Idx+1] == 't' && TT[Idx+2] == 'e')
+        ARMArchFeature = "+v5te";
+      else
+        ARMArchFeature = "+v5t";
+    } else if (SubVer == '4' && Len >= Idx+2 && TT[Idx+1] == 't')
+      ARMArchFeature = "+v4t";
+  }
+
+  if (isThumb) {
+    if (ARMArchFeature.empty())
+      ARMArchFeature = "+thumb-mode";
+    else
+      ARMArchFeature += ",+thumb-mode";
+  }
+
+  return ARMArchFeature;
+}
+
+MCSubtargetInfo *ARM_MC::createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                  StringRef FS) {
+  std::string ArchFS = ARM_MC::ParseARMTriple(TT);
+  if (!FS.empty()) {
+    if (!ArchFS.empty())
+      ArchFS = ArchFS + "," + FS.str();
+    else
+      ArchFS = FS;
+  }
+
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitARMMCSubtargetInfo(X, TT, CPU, ArchFS);
+  return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeARMMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheARMTarget,
+                                          ARM_MC::createARMMCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(TheThumbTarget,
+                                          ARM_MC::createARMMCSubtargetInfo);
+}
+
+static MCInstrInfo *createARMMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitARMMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeARMMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheARMTarget, createARMMCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(TheThumbTarget, createARMMCInstrInfo);
+}
+
+static MCRegisterInfo *createARMMCRegisterInfo() {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitARMMCRegisterInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeARMMCRegInfo() {
+  TargetRegistry::RegisterMCRegInfo(TheARMTarget, createARMMCRegisterInfo);
+  TargetRegistry::RegisterMCRegInfo(TheThumbTarget, createARMMCRegisterInfo);
+}
+
+static MCAsmInfo *createARMMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin())
+    return new ARMMCAsmInfoDarwin();
+
+  return new ARMELFMCAsmInfo();
+}
+
+extern "C" void LLVMInitializeARMMCAsmInfo() {
+  // Register the target asm info.
+  RegisterMCAsmInfoFn A(TheARMTarget, createARMMCAsmInfo);
+  RegisterMCAsmInfoFn B(TheThumbTarget, createARMMCAsmInfo);
+}
diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
new file mode 100644
index 0000000..74701e3
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -0,0 +1,52 @@
+//===-- ARMMCTargetDesc.h - ARM Target Descriptions -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides ARM specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ARMMCTARGETDESC_H
+#define ARMMCTARGETDESC_H
+
+#include <string>
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheARMTarget, TheThumbTarget;
+
+namespace ARM_MC {
+  std::string ParseARMTriple(StringRef TT);
+
+  /// createARMMCSubtargetInfo - Create a ARM MCSubtargetInfo instance.
+  /// This is exposed so Asm parser, etc. do not need to go through
+  /// TargetRegistry.
+  MCSubtargetInfo *createARMMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                            StringRef FS);
+}
+
+} // End llvm namespace
+
+// Defines symbolic names for ARM registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "ARMGenRegisterInfo.inc"
+
+// Defines symbolic names for the ARM instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "ARMGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "ARMGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..68daf42
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMARMDesc
+  ARMMCTargetDesc.cpp
+  ARMMCAsmInfo.cpp
+  )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/ARM/MCTargetDesc/Makefile b/lib/Target/ARM/MCTargetDesc/Makefile
new file mode 100644
index 0000000..448ed9d
--- /dev/null
+++ b/lib/Target/ARM/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/ARM/TargetDesc/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMARMDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/MLxExpansionPass.cpp b/lib/Target/ARM/MLxExpansionPass.cpp
index f6d0242..2df0053 100644
--- a/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/lib/Target/ARM/MLxExpansionPass.cpp
@@ -137,11 +137,11 @@ unsigned MLxExpansion::getDefReg(MachineInstr *MI) const {
 
 bool MLxExpansion::hasRAWHazard(unsigned Reg, MachineInstr *MI) const {
   // FIXME: Detect integer instructions properly.
-  const TargetInstrDesc &TID = MI->getDesc();
-  unsigned Domain = TID.TSFlags & ARMII::DomainMask;
-  if (TID.mayStore())
+  const MCInstrDesc &MCID = MI->getDesc();
+  unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
+  if (MCID.mayStore())
     return false;
-  unsigned Opcode = TID.getOpcode();
+  unsigned Opcode = MCID.getOpcode();
   if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
     return false;
   if ((Domain & ARMII::DomainVFP) || (Domain & ARMII::DomainNEON))
@@ -218,18 +218,18 @@ MLxExpansion::ExpandFPMLxInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
   ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI->getOperand(NextOp).getImm();
   unsigned PredReg = MI->getOperand(++NextOp).getReg();
 
-  const TargetInstrDesc &TID1 = TII->get(MulOpc);
-  const TargetInstrDesc &TID2 = TII->get(AddSubOpc);
-  unsigned TmpReg = MRI->createVirtualRegister(TID1.getRegClass(0, TRI));
+  const MCInstrDesc &MCID1 = TII->get(MulOpc);
+  const MCInstrDesc &MCID2 = TII->get(AddSubOpc);
+  unsigned TmpReg = MRI->createVirtualRegister(TII->getRegClass(MCID1, 0, TRI));
 
-  MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID1, TmpReg)
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID1, TmpReg)
     .addReg(Src1Reg, getKillRegState(Src1Kill))
     .addReg(Src2Reg, getKillRegState(Src2Kill));
   if (HasLane)
     MIB.addImm(LaneImm);
   MIB.addImm(Pred).addReg(PredReg);
 
-  MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), TID2)
+  MIB = BuildMI(MBB, *MI, MI->getDebugLoc(), MCID2)
     .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead));
 
   if (NegAcc) {
@@ -273,15 +273,15 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
       continue;
     }
 
-    const TargetInstrDesc &TID = MI->getDesc();
-    if (TID.isBarrier()) {
+    const MCInstrDesc &MCID = MI->getDesc();
+    if (MCID.isBarrier()) {
       clearStack();
       Skip = 0;
       ++MII;
       continue;
     }
 
-    unsigned Domain = TID.TSFlags & ARMII::DomainMask;
+    unsigned Domain = MCID.TSFlags & ARMII::DomainMask;
     if (Domain == ARMII::DomainGeneral) {
       if (++Skip == 2)
         // Assume dual issues of non-VFP / NEON instructions.
@@ -291,7 +291,7 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
 
       unsigned MulOpc, AddSubOpc;
       bool NegAcc, HasLane;
-      if (!TII->isFpMLxInstruction(TID.getOpcode(),
+      if (!TII->isFpMLxInstruction(MCID.getOpcode(),
                                    MulOpc, AddSubOpc, NegAcc, HasLane) ||
           !FindMLxHazard(MI))
         pushStack(MI);
diff --git a/lib/Target/ARM/Makefile b/lib/Target/ARM/Makefile
index 65a6494..eb8c603 100644
--- a/lib/Target/ARM/Makefile
+++ b/lib/Target/ARM/Makefile
@@ -12,14 +12,14 @@ LIBRARYNAME = LLVMARMCodeGen
 TARGET = ARM
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = ARMGenRegisterInfo.h.inc ARMGenRegisterNames.inc \
-                ARMGenRegisterInfo.inc ARMGenInstrNames.inc \
-                ARMGenInstrInfo.inc ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
-                ARMGenDAGISel.inc ARMGenSubtarget.inc \
+BUILT_SOURCES = ARMGenRegisterInfo.inc ARMGenInstrInfo.inc \
+		ARMGenAsmWriter.inc ARMGenAsmMatcher.inc \
+                ARMGenDAGISel.inc ARMGenSubtargetInfo.inc \
                 ARMGenCodeEmitter.inc ARMGenCallingConv.inc \
                 ARMGenDecoderTables.inc ARMGenEDInfo.inc \
-                ARMGenFastISel.inc ARMGenMCCodeEmitter.inc
+                ARMGenFastISel.inc ARMGenMCCodeEmitter.inc \
+		ARMGenMCPseudoLowering.inc
 
-DIRS = InstPrinter AsmParser Disassembler TargetInfo
+DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 965665c..c85d1e9 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -77,7 +77,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
       }
 
       if (inNEONDomain(Domain, isA8)) {
-        // Convert VMOVD to VMOVDneon
+        // Convert VMOVD to VORRd
         unsigned DestReg = MI->getOperand(0).getReg();
 
         DEBUG({errs() << "vmov convert: "; MI->dump();});
@@ -88,7 +88,8 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
         //  - The imp-defs / imp-uses are superregs only, we don't care about
         //    them.
         AddDefaultPred(BuildMI(MBB, *MI, MI->getDebugLoc(),
-                             TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg));
+                             TII->get(ARM::VORRd), DestReg)
+          .addReg(SrcReg).addReg(SrcReg));
         MBB.erase(MI);
         MachineBasicBlock::iterator I = prior(NextMII);
         MI = &*I;
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 8ba9a27..2f6842e 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -681,3 +681,21 @@ is compiled and optimized to:
     str    r1, [r0]
 
 //===---------------------------------------------------------------------===//
+
+Improve codegen for select's:
+if (x != 0) x = 1
+if (x == 1) x = 1
+
+ARM codegen used to look like this:
+       mov     r1, r0
+       cmp     r1, #1
+       mov     r0, #0
+       moveq   r0, #1
+
+The naive lowering select between two different values. It should recognize the
+test is equality test so it's more a conditional move rather than a select:
+       cmp     r0, #1
+       movne   r0, #0
+
+Currently this is a ARM specific dag combine. We probably should make it into a
+target-neutral one.
diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp
index dee3d27..c258870 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -136,8 +136,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
     BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
       .addFrameIndex(FramePtrSpillFI).addImm(0)
       .setMIFlags(MachineInstr::FrameSetup);
-    if (NumBytes > 7)
-      // If offset is > 7 then sp cannot be adjusted in a single instruction,
+    if (NumBytes > 508)
+      // If offset is > 508 then sp cannot be adjusted in a single instruction,
       // try restoring from fp instead.
       AFI->setShouldRestoreSPFromFP(true);
   }
@@ -160,7 +160,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const {
   // will be allocated after this, so we can still use the base pointer
   // to reference locals.
   if (RegInfo->hasBasePointer(MF))
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), BasePtr).addReg(ARM::SP);
+    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr)
+                   .addReg(ARM::SP));
 
   // If the frame has variable sized objects then the epilogue must restore
   // the sp from fp. We can assume there's an FP here since hasFP already
@@ -177,7 +178,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
 }
 
 static bool isCSRestore(MachineInstr *MI, const unsigned *CSRegs) {
-  if (MI->getOpcode() == ARM::tRestore &&
+  if (MI->getOpcode() == ARM::tLDRspi &&
       MI->getOperand(1).isFI() &&
       isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs))
     return true;
@@ -239,11 +240,13 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
                "No scratch register to restore SP from FP!");
         emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
                                   TII, *RegInfo);
-        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
-          .addReg(ARM::R4);
+        AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                               ARM::SP)
+          .addReg(ARM::R4));
       } else
-        BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
-          .addReg(FramePtr);
+        AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),
+                               ARM::SP)
+          .addReg(FramePtr));
     } else {
       if (MBBI->getOpcode() == ARM::tBX_RET &&
           &MBB.front() != MBBI &&
@@ -270,8 +273,8 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
 
     emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, VARegSaveSize);
 
-    BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
-      .addReg(ARM::R3, RegState::Kill);
+    AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
+      .addReg(ARM::R3, RegState::Kill));
     // erase the old tBX_RET instruction
     MBB.erase(MBBI);
   }
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 3fbb433..218311d 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -13,7 +13,6 @@
 
 #include "Thumb1InstrInfo.h"
 #include "ARM.h"
-#include "ARMGenInstrInfo.inc"
 #include "ARMMachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -37,18 +36,8 @@ void Thumb1InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator I, DebugLoc DL,
                                   unsigned DestReg, unsigned SrcReg,
                                   bool KillSrc) const {
-  bool tDest = ARM::tGPRRegClass.contains(DestReg);
-  bool tSrc  = ARM::tGPRRegClass.contains(SrcReg);
-  unsigned Opc = ARM::tMOVgpr2gpr;
-  if (tDest && tSrc)
-    Opc = ARM::tMOVr;
-  else if (tSrc)
-    Opc = ARM::tMOVtgpr2gpr;
-  else if (tDest)
-    Opc = ARM::tMOVgpr2tgpr;
-
-  BuildMI(MBB, I, DL, get(Opc), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+  AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc)));
   assert(ARM::GPRRegClass.contains(DestReg, SrcReg) &&
          "Thumb1 can only copy GPR registers");
 }
@@ -76,7 +65,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                               MachineMemOperand::MOStore,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSpill))
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tSTRspi))
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   }
@@ -105,7 +94,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                               MachineMemOperand::MOLoad,
                               MFI.getObjectSize(FI),
                               MFI.getObjectAlignment(FI));
-    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
+    AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg)
                    .addFrameIndex(FI).addImm(0).addMemOperand(MMO));
   }
 }
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 6bf5650..4eb0b6c 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -239,13 +239,13 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
       unsigned Chunk = (1 << 3) - 1;
       unsigned ThisVal = (Bytes > Chunk) ? Chunk : Bytes;
       Bytes -= ThisVal;
-      const TargetInstrDesc &TID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
+      const MCInstrDesc &MCID = TII.get(isSub ? ARM::tSUBi3 : ARM::tADDi3);
       const MachineInstrBuilder MIB =
-        AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg).setMIFlags(MIFlags));
+        AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg).setMIFlags(MIFlags));
       AddDefaultPred(MIB.addReg(BaseReg, RegState::Kill).addImm(ThisVal));
     } else {
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
-        .addReg(BaseReg, RegState::Kill)
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), DestReg)
+        .addReg(BaseReg, RegState::Kill))
         .setMIFlags(MIFlags);
     }
     BaseReg = DestReg;
@@ -291,8 +291,8 @@ void llvm::emitThumbRegPlusImmediate(MachineBasicBlock &MBB,
   }
 
   if (ExtraOpc) {
-    const TargetInstrDesc &TID = TII.get(ExtraOpc);
-    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg))
+    const MCInstrDesc &MCID = TII.get(ExtraOpc);
+    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
                    .addReg(DestReg, RegState::Kill)
                    .addImm(((unsigned)NumBytes) & 3)
                    .setMIFlags(MIFlags));
@@ -360,8 +360,8 @@ static void emitThumbConstant(MachineBasicBlock &MBB,
   if (Imm > 0)
     emitThumbRegPlusImmediate(MBB, MBBI, dl, DestReg, DestReg, Imm, TII, MRI);
   if (isSub) {
-    const TargetInstrDesc &TID = TII.get(ARM::tRSB);
-    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TID, DestReg))
+    const MCInstrDesc &MCID = TII.get(ARM::tRSB);
+    AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, MCID, DestReg))
                    .addReg(DestReg, RegState::Kill));
   }
 }
@@ -377,11 +377,9 @@ static void removeOperands(MachineInstr &MI, unsigned i) {
 static unsigned convertToNonSPOpcode(unsigned Opcode) {
   switch (Opcode) {
   case ARM::tLDRspi:
-  case ARM::tRestore:           // FIXME: Should this opcode be here?
     return ARM::tLDRi;
 
   case ARM::tSTRspi:
-  case ARM::tSpill:             // FIXME: Should this opcode be here?
     return ARM::tSTRi;
   }
 
@@ -396,7 +394,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
   MachineBasicBlock &MBB = *MI.getParent();
   DebugLoc dl = MI.getDebugLoc();
   unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = MI.getDesc();
+  const MCInstrDesc &Desc = MI.getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
 
   if (Opcode == ARM::tADDrSPi) {
@@ -419,13 +417,12 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
     unsigned PredReg;
     if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
       // Turn it into a move.
-      MI.setDesc(TII.get(ARM::tMOVgpr2tgpr));
+      MI.setDesc(TII.get(ARM::tMOVr));
       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
-      // Remove offset and remaining explicit predicate operands.
-      do MI.RemoveOperand(FrameRegIdx+1);
-      while (MI.getNumOperands() > FrameRegIdx+1 &&
-             (!MI.getOperand(FrameRegIdx+1).isReg() ||
-              !MI.getOperand(FrameRegIdx+1).isImm()));
+      // Remove offset and add predicate operands.
+      MI.RemoveOperand(FrameRegIdx+1);
+      MachineInstrBuilder MIB(&MI);
+      AddDefaultPred(MIB);
       return true;
     }
 
@@ -524,7 +521,7 @@ rewriteFrameIndex(MachineBasicBlock::iterator II, unsigned FrameRegIdx,
 
     // If this is a thumb spill / restore, we will be using a constpool load to
     // materialize the offset.
-    if (Opcode == ARM::tRestore || Opcode == ARM::tSpill) {
+    if (Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
       ImmOp.ChangeToImmediate(0);
     } else {
       // Otherwise, it didn't fit. Pull in what we can to simplify the immed.
@@ -567,8 +564,9 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
   // the function, the offset will be negative. Use R12 instead since that's
   // a call clobbered register that we know won't be used in Thumb1 mode.
   DebugLoc DL;
-  BuildMI(MBB, I, DL, TII.get(ARM::tMOVtgpr2gpr)).
-    addReg(ARM::R12, RegState::Define).addReg(Reg, RegState::Kill);
+  AddDefaultPred(BuildMI(MBB, I, DL, TII.get(ARM::tMOVr))
+    .addReg(ARM::R12, RegState::Define)
+    .addReg(Reg, RegState::Kill));
 
   // The UseMI is where we would like to restore the register. If there's
   // interference with R12 before then, however, we'll need to restore it
@@ -591,8 +589,8 @@ Thumb1RegisterInfo::saveScavengerRegister(MachineBasicBlock &MBB,
     }
   }
   // Restore the register from R12
-  BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVgpr2tgpr)).
-    addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill);
+  AddDefaultPred(BuildMI(MBB, UseMI, DL, TII.get(ARM::tMOVr)).
+    addReg(Reg, RegState::Define).addReg(ARM::R12, RegState::Kill));
 
   return true;
 }
@@ -653,7 +651,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   assert(Offset && "This code isn't needed if offset already handled!");
 
   unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = MI.getDesc();
+  const MCInstrDesc &Desc = MI.getDesc();
 
   // Remove predicate first.
   int PIdx = MI.findFirstPredOperandIdx();
@@ -664,7 +662,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     // Use the destination register to materialize sp + offset.
     unsigned TmpReg = MI.getOperand(0).getReg();
     bool UseRR = false;
-    if (Opcode == ARM::tRestore) {
+    if (Opcode == ARM::tLDRspi) {
       if (FrameReg == ARM::SP)
         emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg,
                                  Offset, false, TII, *this);
@@ -687,7 +685,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       VReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
       bool UseRR = false;
 
-      if (Opcode == ARM::tSpill) {
+      if (Opcode == ARM::tSTRspi) {
         if (FrameReg == ARM::SP)
           emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg,
                                    Offset, false, TII, *this);
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 45e6937..360ec00 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -98,9 +98,6 @@ static bool isCopy(MachineInstr *MI) {
   case ARM::MOVr:
   case ARM::MOVr_TC:
   case ARM::tMOVr:
-  case ARM::tMOVgpr2tgpr:
-  case ARM::tMOVtgpr2gpr:
-  case ARM::tMOVgpr2gpr:
   case ARM::t2MOVr:
     return true;
   }
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index d169dbb..51b56aa 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -15,7 +15,6 @@
 #include "ARM.h"
 #include "ARMConstantPoolValue.h"
 #include "ARMAddressingModes.h"
-#include "ARMGenInstrInfo.inc"
 #include "ARMMachineFunctionInfo.h"
 #include "Thumb2InstrInfo.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -113,18 +112,8 @@ void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   if (!ARM::GPRRegClass.contains(DestReg, SrcReg))
     return ARMBaseInstrInfo::copyPhysReg(MBB, I, DL, DestReg, SrcReg, KillSrc);
 
-  bool tDest = ARM::tGPRRegClass.contains(DestReg);
-  bool tSrc  = ARM::tGPRRegClass.contains(SrcReg);
-  unsigned Opc = ARM::tMOVgpr2gpr;
-  if (tDest && tSrc)
-    Opc = ARM::tMOVr;
-  else if (tSrc)
-    Opc = ARM::tMOVtgpr2gpr;
-  else if (tDest)
-    Opc = ARM::tMOVgpr2tgpr;
-
-  BuildMI(MBB, I, DL, get(Opc), DestReg)
-    .addReg(SrcReg, getKillRegState(KillSrc));
+  AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg)
+    .addReg(SrcReg, getKillRegState(KillSrc)));
 }
 
 void Thumb2InstrInfo::
@@ -232,8 +221,8 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
     unsigned Opc = 0;
     if (DestReg == ARM::SP && BaseReg != ARM::SP) {
       // mov sp, rn. Note t2MOVr cannot be used.
-      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg)
-        .addReg(BaseReg).setMIFlags(MIFlags);
+      AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr),DestReg)
+        .addReg(BaseReg).setMIFlags(MIFlags));
       BaseReg = ARM::SP;
       continue;
     }
@@ -252,7 +241,7 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
       }
 
       // sub rd, sp, so_imm
-      Opc = isSub ? ARM::t2SUBrSPi : ARM::t2ADDrSPi;
+      Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri;
       if (ARM_AM::getT2SOImmVal(NumBytes) != -1) {
         NumBytes = 0;
       } else {
@@ -396,7 +385,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
                                unsigned FrameReg, int &Offset,
                                const ARMBaseInstrInfo &TII) {
   unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = MI.getDesc();
+  const MCInstrDesc &Desc = MI.getDesc();
   unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);
   bool isSub = false;
 
@@ -410,25 +399,24 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
     unsigned PredReg;
     if (Offset == 0 && getInstrPredicate(&MI, PredReg) == ARMCC::AL) {
       // Turn it into a move.
-      MI.setDesc(TII.get(ARM::tMOVgpr2gpr));
+      MI.setDesc(TII.get(ARM::tMOVr));
       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
       // Remove offset and remaining explicit predicate operands.
       do MI.RemoveOperand(FrameRegIdx+1);
-      while (MI.getNumOperands() > FrameRegIdx+1 &&
-             (!MI.getOperand(FrameRegIdx+1).isReg() ||
-              !MI.getOperand(FrameRegIdx+1).isImm()));
+      while (MI.getNumOperands() > FrameRegIdx+1);
+      MachineInstrBuilder MIB(&MI);
+      AddDefaultPred(MIB);
       return true;
     }
 
-    bool isSP = FrameReg == ARM::SP;
     bool HasCCOut = Opcode != ARM::t2ADDri12;
 
     if (Offset < 0) {
       Offset = -Offset;
       isSub = true;
-      MI.setDesc(TII.get(isSP ? ARM::t2SUBrSPi : ARM::t2SUBri));
+      MI.setDesc(TII.get(ARM::t2SUBri));
     } else {
-      MI.setDesc(TII.get(isSP ? ARM::t2ADDrSPi : ARM::t2ADDri));
+      MI.setDesc(TII.get(ARM::t2ADDri));
     }
 
     // Common case: small offset, fits into instruction.
@@ -444,9 +432,7 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
     // Another common case: imm12.
     if (Offset < 4096 &&
         (!HasCCOut || MI.getOperand(MI.getNumOperands()-1).getReg() == 0)) {
-      unsigned NewOpc = isSP
-        ? (isSub ? ARM::t2SUBrSPi12 : ARM::t2ADDrSPi12)
-        : (isSub ? ARM::t2SUBri12   : ARM::t2ADDri12);
+      unsigned NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12;
       MI.setDesc(TII.get(NewOpc));
       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
       MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);
@@ -579,8 +565,7 @@ void
 Thumb2InstrInfo::scheduleTwoAddrSource(MachineInstr *SrcMI,
                                        MachineInstr *UseMI,
                                        const TargetRegisterInfo &TRI) const {
-  if (SrcMI->getOpcode() != ARM::tMOVgpr2gpr ||
-      SrcMI->getOperand(1).isKill())
+  if (SrcMI->getOpcode() != ARM::tMOVr || SrcMI->getOperand(1).isKill())
     return;
 
   unsigned PredReg = 0;
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index ce2e966..c741a6e 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -57,10 +57,8 @@ namespace {
   static const ReduceEntry ReduceTable[] = {
     // Wide,        Narrow1,      Narrow2,     imm1,imm2,  lo1, lo2, P/C, PF, S
     { ARM::t2ADCrr, 0,            ARM::tADC,     0,   0,    0,   1,  0,0, 0,0 },
-    { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0,0 },
+    { ARM::t2ADDri, ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  0,0, 0,1 },
     { ARM::t2ADDrr, ARM::tADDrr,  ARM::tADDhirr, 0,   0,    1,   0,  0,1, 0,0 },
-    // Note: immediate scale is 4.
-    { ARM::t2ADDrSPi,ARM::tADDrSPi,0,            8,   0,    1,   0,  1,0, 0,1 },
     { ARM::t2ADDSri,ARM::tADDi3,  ARM::tADDi8,   3,   8,    1,   1,  2,2, 0,1 },
     { ARM::t2ADDSrr,ARM::tADDrr,  0,             0,   0,    1,   0,  2,0, 0,1 },
     { ARM::t2ANDrr, 0,            ARM::tAND,     0,   0,    0,   1,  0,0, 1,0 },
@@ -84,9 +82,7 @@ namespace {
     { ARM::t2MOVi,  ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,0 },
     { ARM::t2MOVi16,ARM::tMOVi8,  0,             8,   0,    1,   0,  0,0, 0,1 },
     // FIXME: Do we need the 16-bit 'S' variant?
-    { ARM::t2MOVr,ARM::tMOVgpr2gpr,0,            0,   0,    0,   0,  1,0, 0,0 },
-    { ARM::t2MOVCCr,0,            ARM::tMOVCCr,  0,   0,    0,   0,  0,1, 0,0 },
-    { ARM::t2MOVCCi,0,            ARM::tMOVCCi,  0,   8,    0,   1,  0,1, 0,0 },
+    { ARM::t2MOVr,ARM::tMOVr,     0,             0,   0,    0,   0,  1,0, 0,0 },
     { ARM::t2MUL,   0,            ARM::tMUL,     0,   0,    0,   1,  0,0, 1,0 },
     { ARM::t2MVNr,  ARM::tMVN,    0,             0,   0,    1,   0,  0,0, 0,0 },
     { ARM::t2ORRrr, 0,            ARM::tORR,     0,   0,    0,   1,  0,0, 1,0 },
@@ -189,8 +185,8 @@ Thumb2SizeReduce::Thumb2SizeReduce() : MachineFunctionPass(ID) {
   }
 }
 
-static bool HasImplicitCPSRDef(const TargetInstrDesc &TID) {
-  for (const unsigned *Regs = TID.ImplicitDefs; *Regs; ++Regs)
+static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) {
+  for (const unsigned *Regs = MCID.ImplicitDefs; *Regs; ++Regs)
     if (*Regs == ARM::CPSR)
       return true;
   return false;
@@ -291,7 +287,7 @@ static bool VerifyLowRegs(MachineInstr *MI) {
                  Opc == ARM::t2LDMDB     || Opc == ARM::t2LDMIA_UPD ||
                  Opc == ARM::t2LDMDB_UPD);
   bool isLROk = (Opc == ARM::t2STMIA_UPD || Opc == ARM::t2STMDB_UPD);
-  bool isSPOk = isPCOk || isLROk || (Opc == ARM::t2ADDrSPi);
+  bool isSPOk = isPCOk || isLROk;
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg() || MO.isImplicit())
@@ -481,14 +477,54 @@ bool
 Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
                                 const ReduceEntry &Entry,
                                 bool LiveCPSR, MachineInstr *CPSRDef) {
+  unsigned Opc = MI->getOpcode();
+  if (Opc == ARM::t2ADDri) {
+    // If the source register is SP, try to reduce to tADDrSPi, otherwise
+    // it's a normal reduce.
+    if (MI->getOperand(1).getReg() != ARM::SP) {
+      if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef))
+        return true;
+      return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
+    }
+    // Try to reduce to tADDrSPi.
+    unsigned Imm = MI->getOperand(2).getImm();
+    // The immediate must be in range, the destination register must be a low
+    // reg, the predicate must be "always" and the condition flags must not
+    // be being set.
+    if (Imm & 3 || Imm > 1020)
+      return false;
+    if (!isARMLowRegister(MI->getOperand(0).getReg()))
+      return false;
+    if (MI->getOperand(3).getImm() != ARMCC::AL)
+      return false;
+    const MCInstrDesc &MCID = MI->getDesc();
+    if (MCID.hasOptionalDef() &&
+        MI->getOperand(MCID.getNumOperands()-1).getReg() == ARM::CPSR)
+      return false;
+
+    MachineInstrBuilder MIB = BuildMI(MBB, *MI, MI->getDebugLoc(),
+                                      TII->get(ARM::tADDrSPi))
+      .addOperand(MI->getOperand(0))
+      .addOperand(MI->getOperand(1))
+      .addImm(Imm / 4); // The tADDrSPi has an implied scale by four.
+
+    // Transfer MI flags.
+    MIB.setMIFlags(MI->getFlags());
+
+    DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " <<*MIB);
+
+    MBB.erase(MI);
+    ++NumNarrows;
+    return true;
+  }
+
   if (Entry.LowRegs1 && !VerifyLowRegs(MI))
     return false;
 
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (TID.mayLoad() || TID.mayStore())
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MCID.mayLoad() || MCID.mayStore())
     return ReduceLoadStore(MBB, MI, Entry);
 
-  unsigned Opc = MI->getOpcode();
   switch (Opc) {
   default: break;
   case ARM::t2ADDSri:
@@ -531,13 +567,6 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
       return true;
     return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
   }
-  case ARM::t2ADDrSPi: {
-    static const ReduceEntry NarrowEntry =
-      { ARM::t2ADDrSPi,ARM::tADDspi, 0, 7, 0, 1, 0, 1, 0, 0,1 };
-    if (MI->getOperand(0).getReg() == ARM::SP)
-      return ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef);
-    return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef);
-  }
   }
   return false;
 }
@@ -576,23 +605,23 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
   }
 
   // Check if it's possible / necessary to transfer the predicate.
-  const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc2);
+  const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc2);
   unsigned PredReg = 0;
   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
   bool SkipPred = false;
   if (Pred != ARMCC::AL) {
-    if (!NewTID.isPredicable())
+    if (!NewMCID.isPredicable())
       // Can't transfer predicate, fail.
       return false;
   } else {
-    SkipPred = !NewTID.isPredicable();
+    SkipPred = !NewMCID.isPredicable();
   }
 
   bool HasCC = false;
   bool CCDead = false;
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (TID.hasOptionalDef()) {
-    unsigned NumOps = TID.getNumOperands();
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (MCID.hasOptionalDef()) {
+    unsigned NumOps = MCID.getNumOperands();
     HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
     if (HasCC && MI->getOperand(NumOps-1).isDead())
       CCDead = true;
@@ -602,15 +631,15 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
 
   // Avoid adding a false dependency on partial flag update by some 16-bit
   // instructions which has the 's' bit set.
-  if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC &&
+  if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
       canAddPseudoFlagDep(CPSRDef, MI))
     return false;
 
   // Add the 16-bit instruction.
   DebugLoc dl = MI->getDebugLoc();
-  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
   MIB.addOperand(MI->getOperand(0));
-  if (NewTID.hasOptionalDef()) {
+  if (NewMCID.hasOptionalDef()) {
     if (HasCC)
       AddDefaultT1CC(MIB, CCDead);
     else
@@ -618,11 +647,11 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI,
   }
 
   // Transfer the rest of operands.
-  unsigned NumOps = TID.getNumOperands();
+  unsigned NumOps = MCID.getNumOperands();
   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
-    if (i < NumOps && TID.OpInfo[i].isOptionalDef())
+    if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
       continue;
-    if (SkipPred && TID.OpInfo[i].isPredicate())
+    if (SkipPred && MCID.OpInfo[i].isPredicate())
       continue;
     MIB.addOperand(MI->getOperand(i));
   }
@@ -645,47 +674,44 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
     return false;
 
   unsigned Limit = ~0U;
-  unsigned Scale = (Entry.WideOpc == ARM::t2ADDrSPi) ? 4 : 1;
   if (Entry.Imm1Limit)
-    Limit = ((1 << Entry.Imm1Limit) - 1) * Scale;
+    Limit = (1 << Entry.Imm1Limit) - 1;
 
-  const TargetInstrDesc &TID = MI->getDesc();
-  for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) {
-    if (TID.OpInfo[i].isPredicate())
+  const MCInstrDesc &MCID = MI->getDesc();
+  for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) {
+    if (MCID.OpInfo[i].isPredicate())
       continue;
     const MachineOperand &MO = MI->getOperand(i);
     if (MO.isReg()) {
       unsigned Reg = MO.getReg();
       if (!Reg || Reg == ARM::CPSR)
         continue;
-      if (Entry.WideOpc == ARM::t2ADDrSPi && Reg == ARM::SP)
-        continue;
       if (Entry.LowRegs1 && !isARMLowRegister(Reg))
         return false;
     } else if (MO.isImm() &&
-               !TID.OpInfo[i].isPredicate()) {
-      if (((unsigned)MO.getImm()) > Limit || (MO.getImm() & (Scale-1)) != 0)
+               !MCID.OpInfo[i].isPredicate()) {
+      if (((unsigned)MO.getImm()) > Limit)
         return false;
     }
   }
 
   // Check if it's possible / necessary to transfer the predicate.
-  const TargetInstrDesc &NewTID = TII->get(Entry.NarrowOpc1);
+  const MCInstrDesc &NewMCID = TII->get(Entry.NarrowOpc1);
   unsigned PredReg = 0;
   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
   bool SkipPred = false;
   if (Pred != ARMCC::AL) {
-    if (!NewTID.isPredicable())
+    if (!NewMCID.isPredicable())
       // Can't transfer predicate, fail.
       return false;
   } else {
-    SkipPred = !NewTID.isPredicable();
+    SkipPred = !NewMCID.isPredicable();
   }
 
   bool HasCC = false;
   bool CCDead = false;
-  if (TID.hasOptionalDef()) {
-    unsigned NumOps = TID.getNumOperands();
+  if (MCID.hasOptionalDef()) {
+    unsigned NumOps = MCID.getNumOperands();
     HasCC = (MI->getOperand(NumOps-1).getReg() == ARM::CPSR);
     if (HasCC && MI->getOperand(NumOps-1).isDead())
       CCDead = true;
@@ -695,15 +721,15 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
 
   // Avoid adding a false dependency on partial flag update by some 16-bit
   // instructions which has the 's' bit set.
-  if (Entry.PartFlag && NewTID.hasOptionalDef() && HasCC &&
+  if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC &&
       canAddPseudoFlagDep(CPSRDef, MI))
     return false;
 
   // Add the 16-bit instruction.
   DebugLoc dl = MI->getDebugLoc();
-  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewTID);
+  MachineInstrBuilder MIB = BuildMI(MBB, *MI, dl, NewMCID);
   MIB.addOperand(MI->getOperand(0));
-  if (NewTID.hasOptionalDef()) {
+  if (NewMCID.hasOptionalDef()) {
     if (HasCC)
       AddDefaultT1CC(MIB, CCDead);
     else
@@ -711,29 +737,25 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI,
   }
 
   // Transfer the rest of operands.
-  unsigned NumOps = TID.getNumOperands();
+  unsigned NumOps = MCID.getNumOperands();
   for (unsigned i = 1, e = MI->getNumOperands(); i != e; ++i) {
-    if (i < NumOps && TID.OpInfo[i].isOptionalDef())
+    if (i < NumOps && MCID.OpInfo[i].isOptionalDef())
       continue;
-    if ((TID.getOpcode() == ARM::t2RSBSri ||
-         TID.getOpcode() == ARM::t2RSBri) && i == 2)
+    if ((MCID.getOpcode() == ARM::t2RSBSri ||
+         MCID.getOpcode() == ARM::t2RSBri) && i == 2)
       // Skip the zero immediate operand, it's now implicit.
       continue;
-    bool isPred = (i < NumOps && TID.OpInfo[i].isPredicate());
+    bool isPred = (i < NumOps && MCID.OpInfo[i].isPredicate());
     if (SkipPred && isPred)
         continue;
     const MachineOperand &MO = MI->getOperand(i);
-    if (Scale > 1 && !isPred && MO.isImm())
-      MIB.addImm(MO.getImm() / Scale);
-    else {
-      if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
-        // Skip implicit def of CPSR. Either it's modeled as an optional
-        // def now or it's already an implicit def on the new instruction.
-        continue;
-      MIB.addOperand(MO);
-    }
+    if (MO.isReg() && MO.isImplicit() && MO.getReg() == ARM::CPSR)
+      // Skip implicit def of CPSR. Either it's modeled as an optional
+      // def now or it's already an implicit def on the new instruction.
+      continue;
+    MIB.addOperand(MO);
   }
-  if (!TID.isPredicable() && NewTID.isPredicable())
+  if (!MCID.isPredicable() && NewMCID.isPredicable())
     AddDefaultPred(MIB);
 
   // Transfer MI flags.
diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h
index 2c359da..6ffaf45 100644
--- a/lib/Target/Alpha/Alpha.h
+++ b/lib/Target/Alpha/Alpha.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_ALPHA_H
 #define TARGET_ALPHA_H
 
+#include "MCTargetDesc/AlphaMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -37,17 +38,6 @@ namespace llvm {
   FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm);
   FunctionPass *createAlphaBranchSelectionPass();
 
-  extern Target TheAlphaTarget;
-
 } // end namespace llvm;
 
-// Defines symbolic names for Alpha registers.  This defines a mapping from
-// register name to register number.
-//
-#include "AlphaGenRegisterNames.inc"
-
-// Defines symbolic names for the Alpha instructions.
-//
-#include "AlphaGenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index 0875cfd..de003fb 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -122,6 +122,9 @@ AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FPOW , MVT::f32, Expand);
   setOperationAction(ISD::FPOW , MVT::f64, Expand);
 
+  setOperationAction(ISD::FMA, MVT::f64, Expand);
+  setOperationAction(ISD::FMA, MVT::f32, Expand);
+
   setOperationAction(ISD::SETCC, MVT::f32, Promote);
 
   setOperationAction(ISD::BITCAST, MVT::f32, Promote);
@@ -824,41 +827,24 @@ AlphaTargetLowering::getSingleConstraintMatchWeight(
   return weight;
 }
 
-std::vector<unsigned> AlphaTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const {
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::pair<unsigned, const TargetRegisterClass*> AlphaTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
+{
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
-    default: break;  // Unknown constriant letter
-    case 'f':
-      return make_vector<unsigned>(Alpha::F0 , Alpha::F1 , Alpha::F2 ,
-                                   Alpha::F3 , Alpha::F4 , Alpha::F5 ,
-                                   Alpha::F6 , Alpha::F7 , Alpha::F8 ,
-                                   Alpha::F9 , Alpha::F10, Alpha::F11,
-                                   Alpha::F12, Alpha::F13, Alpha::F14,
-                                   Alpha::F15, Alpha::F16, Alpha::F17,
-                                   Alpha::F18, Alpha::F19, Alpha::F20,
-                                   Alpha::F21, Alpha::F22, Alpha::F23,
-                                   Alpha::F24, Alpha::F25, Alpha::F26,
-                                   Alpha::F27, Alpha::F28, Alpha::F29,
-                                   Alpha::F30, Alpha::F31, 0);
     case 'r':
-      return make_vector<unsigned>(Alpha::R0 , Alpha::R1 , Alpha::R2 ,
-                                   Alpha::R3 , Alpha::R4 , Alpha::R5 ,
-                                   Alpha::R6 , Alpha::R7 , Alpha::R8 ,
-                                   Alpha::R9 , Alpha::R10, Alpha::R11,
-                                   Alpha::R12, Alpha::R13, Alpha::R14,
-                                   Alpha::R15, Alpha::R16, Alpha::R17,
-                                   Alpha::R18, Alpha::R19, Alpha::R20,
-                                   Alpha::R21, Alpha::R22, Alpha::R23,
-                                   Alpha::R24, Alpha::R25, Alpha::R26,
-                                   Alpha::R27, Alpha::R28, Alpha::R29,
-                                   Alpha::R30, Alpha::R31, 0);
+      return std::make_pair(0U, Alpha::GPRCRegisterClass);
+    case 'f':
+      return VT == MVT::f64 ? std::make_pair(0U, Alpha::F8RCRegisterClass) :
+	std::make_pair(0U, Alpha::F4RCRegisterClass);
     }
   }
-
-  return std::vector<unsigned>();
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
+
 //===----------------------------------------------------------------------===//
 //  Other Lowering Code
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h
index d38c314..13383f4 100644
--- a/lib/Target/Alpha/AlphaISelLowering.h
+++ b/lib/Target/Alpha/AlphaISelLowering.h
@@ -94,9 +94,9 @@ namespace llvm {
     ConstraintWeight getSingleConstraintMatchWeight(
       AsmOperandInfo &info, const char *constraint) const;
 
-    std::vector<unsigned>
-      getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                        EVT VT) const;
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint,
+				 EVT VT) const;
 
     MachineBasicBlock *
       EmitInstrWithCustomInserter(MachineInstr *MI,
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index 5a2f561..4dcec8f 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -14,17 +14,21 @@
 #include "Alpha.h"
 #include "AlphaInstrInfo.h"
 #include "AlphaMachineFunctionInfo.h"
-#include "AlphaGenInstrInfo.inc"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "AlphaGenInstrInfo.inc"
 using namespace llvm;
 
 AlphaInstrInfo::AlphaInstrInfo()
-  : TargetInstrInfoImpl(AlphaInsts, array_lengthof(AlphaInsts)),
-    RI(*this) { }
+  : AlphaGenInstrInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
+    RI(*this) {
+}
 
 
 unsigned 
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
index ee6077a..337a85c 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -17,9 +17,12 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "AlphaRegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "AlphaGenInstrInfo.inc"
+
 namespace llvm {
 
-class AlphaInstrInfo : public TargetInstrInfoImpl {
+class AlphaInstrInfo : public AlphaGenInstrInfo {
   const AlphaRegisterInfo RI;
 public:
   AlphaInstrInfo();
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index d6c3809..df8f157 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -33,10 +33,14 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include <cstdlib>
+
+#define GET_REGINFO_TARGET_DESC
+#include "AlphaGenRegisterInfo.inc"
+
 using namespace llvm;
 
 AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
-  : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
+  : AlphaGenRegisterInfo(),
     TII(tii) {
 }
 
@@ -204,10 +208,8 @@ int AlphaRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum, bool isEH) const {
   return -1;
 }
 
-#include "AlphaGenRegisterInfo.inc"
-
 std::string AlphaRegisterInfo::getPrettyName(unsigned reg)
 {
-  std::string s(RegisterDescriptors[reg].Name);
+  std::string s(AlphaRegDesc[reg].Name);
   return s;
 }
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
index ffe6cf1..1072bf7 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -15,7 +15,9 @@
 #define ALPHAREGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "AlphaGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "AlphaGenRegisterInfo.inc"
 
 namespace llvm {
 
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.td b/lib/Target/Alpha/AlphaRegisterInfo.td
index d644f05..32120d7 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.td
+++ b/lib/Target/Alpha/AlphaRegisterInfo.td
@@ -110,10 +110,10 @@ def F31 : FPR<31, "$f31">, DwarfRegNum<[64]>;
   // $28 is undefined after any and all calls
 
 /// Register classes
-def GPRC : RegisterClass<"Alpha", [i64], 64,
+def GPRC : RegisterClass<"Alpha", [i64], 64, (add
      // Volatile
-     [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22,
-      R23, R24, R25, R28, 
+     R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22,
+     R23, R24, R25, R28,
      //Special meaning, but volatile
      R27, //procedure address
      R26, //return address
@@ -121,18 +121,13 @@ def GPRC : RegisterClass<"Alpha", [i64], 64,
      // Non-volatile
      R9, R10, R11, R12, R13, R14,
 // Don't allocate 15, 30, 31
-     R15, R30, R31 ]>; //zero
+     R15, R30, R31)>; //zero
 
-def F4RC : RegisterClass<"Alpha", [f32], 64, [F0, F1, 
+def F4RC : RegisterClass<"Alpha", [f32], 64, (add F0, F1,
         F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
         F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
         // Saved:
         F2, F3, F4, F5, F6, F7, F8, F9,
-        F31 ]>; //zero
+        F31)>; //zero
 
-def F8RC : RegisterClass<"Alpha", [f64], 64, [F0, F1, 
-        F10, F11, F12, F13, F14, F15, F16, F17, F18, F19,
-        F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
-        // Saved:
-        F2, F3, F4, F5, F6, F7, F8, F9,
-        F31 ]>; //zero
+def F8RC : RegisterClass<"Alpha", [f64], 64, (add F4RC)>;
diff --git a/lib/Target/Alpha/AlphaSubtarget.cpp b/lib/Target/Alpha/AlphaSubtarget.cpp
index bda7104..624a5e2 100644
--- a/lib/Target/Alpha/AlphaSubtarget.cpp
+++ b/lib/Target/Alpha/AlphaSubtarget.cpp
@@ -7,19 +7,30 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the Alpha specific subclass of TargetSubtarget.
+// This file implements the Alpha specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "AlphaSubtarget.h"
 #include "Alpha.h"
-#include "AlphaGenSubtarget.inc"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "AlphaGenSubtargetInfo.inc"
+
 using namespace llvm;
 
-AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &FS)
-  : HasCT(false) {
-  std::string CPU = "generic";
+AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &CPU,
+                               const std::string &FS)
+  : AlphaGenSubtargetInfo(TT, CPU, FS), HasCT(false) {
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "generic";
 
   // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
+  ParseSubtargetFeatures(CPUName, FS);
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUName);
 }
diff --git a/lib/Target/Alpha/AlphaSubtarget.h b/lib/Target/Alpha/AlphaSubtarget.h
index f0eb93c..70b3116 100644
--- a/lib/Target/Alpha/AlphaSubtarget.h
+++ b/lib/Target/Alpha/AlphaSubtarget.h
@@ -7,21 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the Alpha specific subclass of TargetSubtarget.
+// This file declares the Alpha specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef ALPHASUBTARGET_H
 #define ALPHASUBTARGET_H
 
-#include "llvm/Target/TargetInstrItineraries.h"
-#include "llvm/Target/TargetSubtarget.h"
-
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "AlphaGenSubtargetInfo.inc"
+
 namespace llvm {
+class StringRe;
 
-class AlphaSubtarget : public TargetSubtarget {
+class AlphaSubtarget : public AlphaGenSubtargetInfo {
 protected:
 
   bool HasCT;
@@ -32,12 +35,12 @@ public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  AlphaSubtarget(const std::string &TT, const std::string &FS);
+  AlphaSubtarget(const std::string &TT, const std::string &CPU,
+                 const std::string &FS);
   
   /// ParseSubtargetFeatures - Parses features string setting specified 
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   bool hasCT() const { return HasCT; }
 };
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index b53533b..3b65d41 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Alpha.h"
-#include "AlphaMCAsmInfo.h"
 #include "AlphaTargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/Support/FormattedStream.h"
@@ -21,15 +20,15 @@ using namespace llvm;
 extern "C" void LLVMInitializeAlphaTarget() { 
   // Register the target.
   RegisterTargetMachine<AlphaTargetMachine> X(TheAlphaTarget);
-  RegisterAsmInfo<AlphaMCAsmInfo> Y(TheAlphaTarget);
 }
 
 AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &CPU,
                                        const std::string &FS)
-  : LLVMTargetMachine(T, TT),
+  : LLVMTargetMachine(T, TT, CPU, FS),
     DataLayout("e-f128:128:128-n64"),
     FrameLowering(Subtarget),
-    Subtarget(TT, FS),
+    Subtarget(TT, CPU, FS),
     TLInfo(*this),
     TSInfo(*this) {
   setRelocationModel(Reloc::PIC_);
diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h
index 26238fb..cf00e58 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.h
+++ b/lib/Target/Alpha/AlphaTargetMachine.h
@@ -37,7 +37,7 @@ class AlphaTargetMachine : public LLVMTargetMachine {
 
 public:
   AlphaTargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS);
+                     const std::string &CPU, const std::string &FS);
 
   virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetFrameLowering  *getFrameLowering() const {
diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt
index 454262a..a6027bb 100644
--- a/lib/Target/Alpha/CMakeLists.txt
+++ b/lib/Target/Alpha/CMakeLists.txt
@@ -1,14 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS Alpha.td)
 
-tablegen(AlphaGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(AlphaGenRegisterNames.inc -gen-register-enums)
-tablegen(AlphaGenRegisterInfo.inc -gen-register-desc)
-tablegen(AlphaGenInstrNames.inc -gen-instr-enums)
-tablegen(AlphaGenInstrInfo.inc -gen-instr-desc)
+tablegen(AlphaGenRegisterInfo.inc -gen-register-info)
+tablegen(AlphaGenInstrInfo.inc -gen-instr-info)
 tablegen(AlphaGenAsmWriter.inc -gen-asm-writer)
 tablegen(AlphaGenDAGISel.inc -gen-dag-isel)
 tablegen(AlphaGenCallingConv.inc -gen-callingconv)
-tablegen(AlphaGenSubtarget.inc -gen-subtarget)
+tablegen(AlphaGenSubtargetInfo.inc -gen-subtarget)
 
 add_llvm_target(AlphaCodeGen
   AlphaAsmPrinter.cpp
@@ -18,7 +15,6 @@ add_llvm_target(AlphaCodeGen
   AlphaISelLowering.cpp
   AlphaFrameLowering.cpp
   AlphaLLRP.cpp
-  AlphaMCAsmInfo.cpp
   AlphaRegisterInfo.cpp
   AlphaSubtarget.cpp
   AlphaTargetMachine.cpp
@@ -26,3 +22,4 @@ add_llvm_target(AlphaCodeGen
   )
 
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.cpp b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp
index a35e884..a35e884 100644
--- a/lib/Target/Alpha/AlphaMCAsmInfo.cpp
+++ b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp
diff --git a/lib/Target/Alpha/AlphaMCAsmInfo.h b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h
index 837844b..837844b 100644
--- a/lib/Target/Alpha/AlphaMCAsmInfo.h
+++ b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h
diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
new file mode 100644
index 0000000..562052b
--- /dev/null
+++ b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp
@@ -0,0 +1,57 @@
+//===-- AlphaMCTargetDesc.cpp - Alpha Target Descriptions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AlphaMCTargetDesc.h"
+#include "AlphaMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AlphaGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AlphaGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "AlphaGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createAlphaMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitAlphaMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeAlphaMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo);
+}
+
+static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                   StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitAlphaMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeAlphaMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget,
+                                          createAlphaMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeAlphaMCAsmInfo() {
+  RegisterMCAsmInfo<AlphaMCAsmInfo> X(TheAlphaTarget);
+}
diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h
new file mode 100644
index 0000000..b0619e6
--- /dev/null
+++ b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMCTARGETDESC_H
+#define ALPHAMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheAlphaTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for Alpha registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "AlphaGenRegisterInfo.inc"
+
+// Defines symbolic names for the Alpha instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "AlphaGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "AlphaGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt b/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..ad0dd26
--- /dev/null
+++ b/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMAlphaDesc
+  AlphaMCTargetDesc.cpp
+  AlphaMCAsmInfo.cpp
+  )
diff --git a/lib/Target/Alpha/MCTargetDesc/Makefile b/lib/Target/Alpha/MCTargetDesc/Makefile
new file mode 100644
index 0000000..d55175f
--- /dev/null
+++ b/lib/Target/Alpha/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Alpha/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMAlphaDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile
index 9564be6..f48847a 100644
--- a/lib/Target/Alpha/Makefile
+++ b/lib/Target/Alpha/Makefile
@@ -12,12 +12,10 @@ LIBRARYNAME = LLVMAlphaCodeGen
 TARGET = Alpha
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = AlphaGenRegisterInfo.h.inc AlphaGenRegisterNames.inc \
-                AlphaGenRegisterInfo.inc AlphaGenInstrNames.inc \
-                AlphaGenInstrInfo.inc \
+BUILT_SOURCES = AlphaGenRegisterInfo.inc AlphaGenInstrInfo.inc \
                 AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \
-                AlphaGenCallingConv.inc AlphaGenSubtarget.inc
+                AlphaGenCallingConv.inc AlphaGenSubtargetInfo.inc
 
-DIRS = TargetInfo
+DIRS = TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Blackfin/Blackfin.h b/lib/Target/Blackfin/Blackfin.h
index ec1fa86..a00ff4c 100644
--- a/lib/Target/Blackfin/Blackfin.h
+++ b/lib/Target/Blackfin/Blackfin.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_BLACKFIN_H
 #define TARGET_BLACKFIN_H
 
+#include "MCTargetDesc/BlackfinMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -24,15 +25,7 @@ namespace llvm {
 
   FunctionPass *createBlackfinISelDag(BlackfinTargetMachine &TM,
                                       CodeGenOpt::Level OptLevel);
-  extern Target TheBlackfinTarget;
 
 } // end namespace llvm
 
-// Defines symbolic names for Blackfin registers.  This defines a mapping from
-// register name to register number.
-#include "BlackfinGenRegisterNames.inc"
-
-// Defines symbolic names for the Blackfin instructions.
-#include "BlackfinGenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
index 42659ae..215ca43 100644
--- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
+++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -146,21 +146,21 @@ void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) {
        NI != DAG.allnodes_end(); ++NI) {
     if (NI->use_empty() || !NI->isMachineOpcode())
       continue;
-    const TargetInstrDesc &DefTID = TII.get(NI->getMachineOpcode());
+    const MCInstrDesc &DefMCID = TII.get(NI->getMachineOpcode());
     for (SDNode::use_iterator UI = NI->use_begin(); !UI.atEnd(); ++UI) {
       if (!UI->isMachineOpcode())
         continue;
 
-      if (UI.getUse().getResNo() >= DefTID.getNumDefs())
+      if (UI.getUse().getResNo() >= DefMCID.getNumDefs())
         continue;
       const TargetRegisterClass *DefRC =
-        DefTID.OpInfo[UI.getUse().getResNo()].getRegClass(TRI);
+        TII.getRegClass(DefMCID, UI.getUse().getResNo(), TRI);
 
-      const TargetInstrDesc &UseTID = TII.get(UI->getMachineOpcode());
-      if (UseTID.getNumDefs()+UI.getOperandNo() >= UseTID.getNumOperands())
+      const MCInstrDesc &UseMCID = TII.get(UI->getMachineOpcode());
+      if (UseMCID.getNumDefs()+UI.getOperandNo() >= UseMCID.getNumOperands())
         continue;
       const TargetRegisterClass *UseRC =
-        UseTID.OpInfo[UseTID.getNumDefs()+UI.getOperandNo()].getRegClass(TRI);
+        TII.getRegClass(UseMCID, UseMCID.getNumDefs()+UI.getOperandNo(), TRI);
       if (!DefRC || !UseRC)
         continue;
       // We cannot copy CC <-> !(CC/D)
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index 588d9bd..d572832 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -621,39 +621,21 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
   case 'w': return Pair(0U, ALLRegisterClass);
   case 'Z': return Pair(P3, PRegisterClass);
   case 'Y': return Pair(P1, PRegisterClass);
+  case 'z': return Pair(0U, zConsRegisterClass);
+  case 'D': return Pair(0U, DConsRegisterClass);
+  case 'W': return Pair(0U, WConsRegisterClass);
+  case 'c': return Pair(0U, cConsRegisterClass);
+  case 't': return Pair(0U, tConsRegisterClass);
+  case 'u': return Pair(0U, uConsRegisterClass);
+  case 'k': return Pair(0U, kConsRegisterClass);
+  case 'y': return Pair(0U, yConsRegisterClass);
   }
 
   // Not implemented: q0-q7, qA. Use {R2} etc instead.
-  // Constraints z, D, W, c, t, u, k, and y use non-existing classes, defer to
-  // getRegClassForInlineAsmConstraint()
 
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
 
-std::vector<unsigned> BlackfinTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
-  using namespace BF;
-
-  if (Constraint.size() != 1)
-    return std::vector<unsigned>();
-
-  switch (Constraint[0]) {
-  case 'z': return make_vector<unsigned>(P0, P1, P2, 0);
-  case 'D': return make_vector<unsigned>(R0, R2, R4, R6, 0);
-  case 'W': return make_vector<unsigned>(R1, R3, R5, R7, 0);
-  case 'c': return make_vector<unsigned>(I0, I1, I2, I3,
-                                         B0, B1, B2, B3,
-                                         L0, L1, L2, L3, 0);
-  case 't': return make_vector<unsigned>(LT0, LT1, 0);
-  case 'u': return make_vector<unsigned>(LB0, LB1, 0);
-  case 'k': return make_vector<unsigned>(LC0, LC1, 0);
-  case 'y': return make_vector<unsigned>(RETS, RETN, RETI, RETX, RETE,
-                                         ASTAT, SEQSTAT, USP, 0);
-  }
-
-  return std::vector<unsigned>();
-}
-
 bool BlackfinTargetLowering::
 isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The Blackfin target isn't yet aware of offsets.
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h
index 9a54557..b65775b 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.h
+++ b/lib/Target/Blackfin/BlackfinISelLowering.h
@@ -48,9 +48,6 @@ namespace llvm {
 
     std::pair<unsigned, const TargetRegisterClass*>
     getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
-    std::vector<unsigned>
-    getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                      EVT VT) const;
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
     const char *getTargetNodeName(unsigned Opcode) const;
 
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
index 598cf2a..d190ae7 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.cpp
@@ -14,17 +14,20 @@
 #include "BlackfinInstrInfo.h"
 #include "BlackfinSubtarget.h"
 #include "Blackfin.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
 #include "BlackfinGenInstrInfo.inc"
 
 using namespace llvm;
 
 BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST)
-  : TargetInstrInfoImpl(BlackfinInsts, array_lengthof(BlackfinInsts)),
+  : BlackfinGenInstrInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP),
     RI(ST, *this),
     Subtarget(ST) {}
 
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h
index fdc1029..d22ddf0 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.h
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.h
@@ -17,9 +17,12 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "BlackfinRegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "BlackfinGenInstrInfo.inc"
+
 namespace llvm {
 
-  class BlackfinInstrInfo : public TargetInstrInfoImpl {
+  class BlackfinInstrInfo : public BlackfinGenInstrInfo {
     const BlackfinRegisterInfo RI;
     const BlackfinSubtarget& Subtarget;
   public:
diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
index 34a8d38..ae8ee9e 100644
--- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
@@ -83,7 +83,7 @@ bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const {
 
 static const FunctionType *getType(LLVMContext &Context, unsigned id) {
   const Type *ResultTy = NULL;
-  std::vector<const Type*> ArgTys;
+  std::vector<Type*> ArgTys;
   bool IsVarArg = false;
   
 #define GET_INTRINSIC_GENERATOR
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 6ca460e..3a7c104 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -29,13 +29,15 @@
 #include "llvm/Type.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "BlackfinGenRegisterInfo.inc"
+
 using namespace llvm;
 
 BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st,
                                            const TargetInstrInfo &tii)
-  : BlackfinGenRegisterInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP),
-    Subtarget(st),
-    TII(tii) {}
+  : BlackfinGenRegisterInfo(), Subtarget(st), TII(tii) {}
 
 const unsigned*
 BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
@@ -356,6 +358,3 @@ int BlackfinRegisterInfo::getLLVMRegNum(unsigned DwarfRegNum,
   llvm_unreachable("What is the dwarf register number");
   return -1;
 }
-
-#include "BlackfinGenRegisterInfo.inc"
-
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
index 375d277..86f45c1 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -16,7 +16,9 @@
 #define BLACKFINREGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "BlackfinGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "BlackfinGenRegisterInfo.inc"
 
 namespace llvm {
 
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td
index d8fd302..1c42205 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.td
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.td
@@ -195,108 +195,83 @@ def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>;
 def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>;
 
 // Register classes.
-def D16 : RegisterClass<"BF", [i16], 16,
-    [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L,
-     R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L]>;
+def D16L : RegisterClass<"BF", [i16], 16, (sequence "R%uL", 0, 7)>;
 
-def D16L : RegisterClass<"BF", [i16], 16,
-    [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L]>;
+def D16H : RegisterClass<"BF", [i16], 16, (sequence "R%uH", 0, 7)>;
 
-def D16H : RegisterClass<"BF", [i16], 16,
-    [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H]>;
-
-def P16 : RegisterClass<"BF", [i16], 16,
-    [P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L,
-     P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL]>;
+def D16 : RegisterClass<"BF", [i16], 16, (add D16L, D16H)>;
 
 def P16L : RegisterClass<"BF", [i16], 16,
-    [P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL]>;
+                         (add (sequence "P%uL", 0, 5), SPL, FPL)>;
 
 def P16H : RegisterClass<"BF", [i16], 16,
-    [P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH]>;
+                         (add (sequence "P%uH", 0, 5), SPH, FPH)>;
+
+def P16 : RegisterClass<"BF", [i16], 16, (add P16L, P16H)>;
 
-def DP16 : RegisterClass<"BF", [i16], 16,
-    [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L,
-     R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L,
-     P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L,
-     P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL]>;
+def DP16 : RegisterClass<"BF", [i16], 16, (add D16, P16)>;
 
-def DP16L : RegisterClass<"BF", [i16], 16,
-    [R0L, R1L, R2L, R3L, R4L, R5L, R6L, R7L,
-     P0L, P1L, P2L, P3L, P4L, P5L, SPL, FPL]>;
+def DP16L : RegisterClass<"BF", [i16], 16, (add D16L, P16L)>;
 
-def DP16H : RegisterClass<"BF", [i16], 16,
-    [R0H, R1H, R2H, R3H, R4H, R5H, R6H, R7H,
-     P0H, P1H, P2H, P3H, P4H, P5H, SPH, FPH]>;
+def DP16H : RegisterClass<"BF", [i16], 16, (add D16H, P16H)>;
 
 def GR16 : RegisterClass<"BF", [i16], 16,
-    [R0H, R0L, R1H, R1L, R2H, R2L, R3H, R3L,
-     R4H, R4L, R5H, R5L, R6H, R6L, R7H, R7L,
-     P0H, P0L, P1H, P1L, P2H, P2L, P3H, P3L,
-     P4H, P4L, P5H, P5L, SPH, SPL, FPH, FPL,
+    (add DP16,
      I0H, I0L, I1H, I1L, I2H, I2L, I3H, I3L,
      M0H, M0L, M1H, M1L, M2H, M2L, M3H, M3L,
      B0H, B0L, B1H, B1L, B2H, B2L, B3H, B3L,
-     L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L]>;
+     L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L)>;
 
-def D : RegisterClass<"BF", [i32], 32, [R0, R1, R2, R3, R4, R5, R6, R7]> {
+def D : RegisterClass<"BF", [i32], 32, (sequence "R%u", 0, 7)> {
   let SubRegClasses = [(D16L lo16), (D16H hi16)];
 }
 
-def P : RegisterClass<"BF", [i32], 32, [P0, P1, P2, P3, P4, P5, FP, SP]> {
+def P : RegisterClass<"BF", [i32], 32, (add (sequence "P%u", 0, 5), FP, SP)> {
   let SubRegClasses = [(P16L lo16), (P16H hi16)];
 }
 
-def I : RegisterClass<"BF", [i32], 32, [I0, I1, I2, I3]>;
-def M : RegisterClass<"BF", [i32], 32, [M0, M1, M2, M3]>;
-def B : RegisterClass<"BF", [i32], 32, [B0, B1, B2, B3]>;
-def L : RegisterClass<"BF", [i32], 32, [L0, L1, L2, L3]>;
-
-def DP : RegisterClass<"BF", [i32], 32,
-    [R0, R1, R2, R3, R4, R5, R6, R7,
-     P0, P1, P2, P3, P4, P5, FP, SP]> {
+def DP : RegisterClass<"BF", [i32], 32, (add D, P)> {
   let SubRegClasses = [(DP16L lo16), (DP16H hi16)];
 }
 
-def GR : RegisterClass<"BF", [i32], 32,
-    [R0, R1, R2, R3, R4, R5, R6, R7,
-     P0, P1, P2, P3, P4, P5,
-     I0, I1, I2, I3, M0, M1, M2, M3,
-     B0, B1, B2, B3, L0, L1, L2, L3,
-     FP, SP]>;
+def I : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3)>;
+def M : RegisterClass<"BF", [i32], 32, (add M0, M1, M2, M3)>;
+def B : RegisterClass<"BF", [i32], 32, (add B0, B1, B2, B3)>;
+def L : RegisterClass<"BF", [i32], 32, (add L0, L1, L2, L3)>;
+
+def GR : RegisterClass<"BF", [i32], 32, (add DP, I, M, B, L)>;
 
 def ALL : RegisterClass<"BF", [i32], 32,
-    [R0, R1, R2, R3, R4, R5, R6, R7,
-     P0, P1, P2, P3, P4, P5,
-     I0, I1, I2, I3, M0, M1, M2, M3,
-     B0, B1, B2, B3, L0, L1, L2, L3,
-     FP, SP,
+    (add GR,
      A0X, A0W, A1X, A1W, ASTAT, RETS,
      LC0, LT0, LB0, LC1, LT1, LB1, CYCLES, CYCLES2,
-     USP, SEQSTAT, SYSCFG, RETI, RETX, RETN, RETE, EMUDAT]>;
+     USP, SEQSTAT, SYSCFG, RETI, RETX, RETN, RETE, EMUDAT)>;
 
-def PI : RegisterClass<"BF", [i32], 32,
-    [P0, P1, P2, P3, P4, P5, I0, I1, I2, I3, FP, SP]>;
+def PI : RegisterClass<"BF", [i32], 32, (add P, I)>;
 
 // We are going to pretend that CC and !CC are 32-bit registers, even though
 // they only can hold 1 bit.
 let CopyCost = -1, Size = 8 in {
-def JustCC  : RegisterClass<"BF", [i32], 8, [CC]>;
-def NotCC   : RegisterClass<"BF", [i32], 8, [NCC]>;
-def AnyCC   : RegisterClass<"BF", [i32], 8, [CC, NCC]> {
-  let MethodProtos = [{
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    AnyCCClass::iterator
-    AnyCCClass::allocation_order_end(const MachineFunction &MF) const {
-      return allocation_order_begin(MF)+1;
-    }
-  }];
-}
+def JustCC  : RegisterClass<"BF", [i32], 8, (add CC)>;
+def NotCC   : RegisterClass<"BF", [i32], 8, (add NCC)>;
+def AnyCC   : RegisterClass<"BF", [i32], 8, (add CC, NCC)>;
 def StatBit : RegisterClass<"BF", [i1], 8,
-    [AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS]>;
+    (add AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS)>;
 }
 
 // Should be i40, but that isn't defined. It is not a legal type yet anyway.
-def Accu : RegisterClass<"BF", [i64], 64, [A0, A1]>;
+def Accu : RegisterClass<"BF", [i64], 64, (add A0, A1)>;
+
+// Register classes to match inline asm constraints.
+def zCons : RegisterClass<"BF", [i32], 32, (add P0, P1, P2)>;
+def DCons : RegisterClass<"BF", [i32], 32, (add R0, R2, R4, R6)>;
+def WCons : RegisterClass<"BF", [i32], 32, (add R1, R3, R5, R7)>;
+def cCons : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3,
+    	    			       	   	B0, B1, B2, B3,
+						L0, L1, L2, L3)>;
+def tCons : RegisterClass<"BF", [i32], 32, (add LT0, LT1)>;
+def uCons : RegisterClass<"BF", [i32], 32, (add LB0, LB1)>;
+def kCons : RegisterClass<"BF", [i32], 32, (add LC0, LC1)>;
+def yCons : RegisterClass<"BF", [i32], 32, (add RETS, RETN, RETI, RETX,
+    	    			       	   	RETE, ASTAT, SEQSTAT,
+						USP)>;
diff --git a/lib/Target/Blackfin/BlackfinSubtarget.cpp b/lib/Target/Blackfin/BlackfinSubtarget.cpp
index e104c52..ec919cd 100644
--- a/lib/Target/Blackfin/BlackfinSubtarget.cpp
+++ b/lib/Target/Blackfin/BlackfinSubtarget.cpp
@@ -7,18 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the blackfin specific subclass of TargetSubtarget.
+// This file implements the blackfin specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "BlackfinSubtarget.h"
-#include "BlackfinGenSubtarget.inc"
+#include "Blackfin.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "BlackfinGenSubtargetInfo.inc"
 
 using namespace llvm;
 
 BlackfinSubtarget::BlackfinSubtarget(const std::string &TT,
+                                     const std::string &CPU,
                                      const std::string &FS)
-  : sdram(false),
+  : BlackfinGenSubtargetInfo(TT, CPU, FS), sdram(false),
     icplb(false),
     wa_mi_shift(false),
     wa_csync(false),
@@ -30,7 +36,9 @@ BlackfinSubtarget::BlackfinSubtarget(const std::string &TT,
     wa_killed_mmr(false),
     wa_rets(false)
 {
-  std::string CPU = "generic";
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "generic";
   // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
+  ParseSubtargetFeatures(CPUName, FS);
 }
diff --git a/lib/Target/Blackfin/BlackfinSubtarget.h b/lib/Target/Blackfin/BlackfinSubtarget.h
index d667fe2..1a01a81 100644
--- a/lib/Target/Blackfin/BlackfinSubtarget.h
+++ b/lib/Target/Blackfin/BlackfinSubtarget.h
@@ -7,19 +7,23 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the BLACKFIN specific subclass of TargetSubtarget.
+// This file declares the BLACKFIN specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef BLACKFIN_SUBTARGET_H
 #define BLACKFIN_SUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "BlackfinGenSubtargetInfo.inc"
+
 namespace llvm {
+class StringRef;
 
-  class BlackfinSubtarget : public TargetSubtarget {
+  class BlackfinSubtarget : public BlackfinGenSubtargetInfo {
     bool sdram;
     bool icplb;
     bool wa_mi_shift;
@@ -32,12 +36,12 @@ namespace llvm {
     bool wa_killed_mmr;
     bool wa_rets;
   public:
-    BlackfinSubtarget(const std::string &TT, const std::string &FS);
+    BlackfinSubtarget(const std::string &TT, const std::string &CPU,
+                      const std::string &FS);
 
     /// ParseSubtargetFeatures - Parses features string setting specified
     /// subtarget options.  Definition of function is auto generated by tblgen.
-    std::string ParseSubtargetFeatures(const std::string &FS,
-                                       const std::string &CPU);
+    void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
   };
 
 } // end namespace llvm
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
index e11920f..a1c9f1c 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -12,7 +12,6 @@
 
 #include "BlackfinTargetMachine.h"
 #include "Blackfin.h"
-#include "BlackfinMCAsmInfo.h"
 #include "llvm/PassManager.h"
 #include "llvm/Target/TargetRegistry.h"
 
@@ -20,16 +19,15 @@ using namespace llvm;
 
 extern "C" void LLVMInitializeBlackfinTarget() {
   RegisterTargetMachine<BlackfinTargetMachine> X(TheBlackfinTarget);
-  RegisterAsmInfo<BlackfinMCAsmInfo> Y(TheBlackfinTarget);
-
 }
 
 BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
                                              const std::string &TT,
+                                             const std::string &CPU,
                                              const std::string &FS)
-  : LLVMTargetMachine(T, TT),
+  : LLVMTargetMachine(T, TT, CPU, FS),
     DataLayout("e-p:32:32-i64:32-f64:32-n32"),
-    Subtarget(TT, FS),
+    Subtarget(TT, CPU, FS),
     TLInfo(*this),
     TSInfo(*this),
     InstrInfo(Subtarget),
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h
index 29b2b17..bd7dc84 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.h
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.h
@@ -36,7 +36,7 @@ namespace llvm {
     BlackfinIntrinsicInfo IntrinsicInfo;
   public:
     BlackfinTargetMachine(const Target &T, const std::string &TT,
-                          const std::string &FS);
+                          const std::string &CPU, const std::string &FS);
 
     virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; }
     virtual const TargetFrameLowering *getFrameLowering() const {
diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt
index a47299f..d3f33a9 100644
--- a/lib/Target/Blackfin/CMakeLists.txt
+++ b/lib/Target/Blackfin/CMakeLists.txt
@@ -1,13 +1,10 @@
 set(LLVM_TARGET_DEFINITIONS Blackfin.td)
 
-tablegen(BlackfinGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(BlackfinGenRegisterNames.inc -gen-register-enums)
-tablegen(BlackfinGenRegisterInfo.inc -gen-register-desc)
-tablegen(BlackfinGenInstrNames.inc -gen-instr-enums)
-tablegen(BlackfinGenInstrInfo.inc -gen-instr-desc)
+tablegen(BlackfinGenRegisterInfo.inc -gen-register-info)
+tablegen(BlackfinGenInstrInfo.inc -gen-instr-info)
 tablegen(BlackfinGenAsmWriter.inc -gen-asm-writer)
 tablegen(BlackfinGenDAGISel.inc -gen-dag-isel)
-tablegen(BlackfinGenSubtarget.inc -gen-subtarget)
+tablegen(BlackfinGenSubtargetInfo.inc -gen-subtarget)
 tablegen(BlackfinGenCallingConv.inc -gen-callingconv)
 tablegen(BlackfinGenIntrinsics.inc -gen-tgt-intrinsic)
 
@@ -18,7 +15,6 @@ add_llvm_target(BlackfinCodeGen
   BlackfinISelDAGToDAG.cpp
   BlackfinISelLowering.cpp
   BlackfinFrameLowering.cpp
-  BlackfinMCAsmInfo.cpp
   BlackfinRegisterInfo.cpp
   BlackfinSubtarget.cpp
   BlackfinTargetMachine.cpp
@@ -26,3 +22,4 @@ add_llvm_target(BlackfinCodeGen
   )
 
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp
index 5b9d4a2..5b9d4a2 100644
--- a/lib/Target/Blackfin/BlackfinMCAsmInfo.cpp
+++ b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp
diff --git a/lib/Target/Blackfin/BlackfinMCAsmInfo.h b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h
index c372aa2..c372aa2 100644
--- a/lib/Target/Blackfin/BlackfinMCAsmInfo.h
+++ b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h
diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
new file mode 100644
index 0000000..0fa1471
--- /dev/null
+++ b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp
@@ -0,0 +1,60 @@
+//===-- BlackfinMCTargetDesc.cpp - Blackfin Target Descriptions -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Blackfin specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BlackfinMCTargetDesc.h"
+#include "BlackfinMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "BlackfinGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "BlackfinGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "BlackfinGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createBlackfinMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitBlackfinMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeBlackfinMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget,
+                                      createBlackfinMCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT,
+                                                      StringRef CPU,
+                                                      StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitBlackfinMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeBlackfinMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget,
+                                          createBlackfinMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeBlackfinMCAsmInfo() {
+  RegisterMCAsmInfo<BlackfinMCAsmInfo> X(TheBlackfinTarget);
+}
diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h
new file mode 100644
index 0000000..5bffe94
--- /dev/null
+++ b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- BlackfinMCTargetDesc.h - Blackfin Target Descriptions ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Blackfin specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef BLACKFINMCTARGETDESC_H
+#define BLACKFINMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheBlackfinTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for Blackfin registers.  This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "BlackfinGenRegisterInfo.inc"
+
+// Defines symbolic names for the Blackfin instructions.
+#define GET_INSTRINFO_ENUM
+#include "BlackfinGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "BlackfinGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt b/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..8cd924f
--- /dev/null
+++ b/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMBlackfinDesc
+  BlackfinMCTargetDesc.cpp
+  BlackfinMCAsmInfo.cpp
+  )
diff --git a/lib/Target/Blackfin/MCTargetDesc/Makefile b/lib/Target/Blackfin/MCTargetDesc/Makefile
new file mode 100644
index 0000000..6b26101
--- /dev/null
+++ b/lib/Target/Blackfin/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Blackfin/TargetDesc/Makefile -------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMBlackfinDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Blackfin/Makefile b/lib/Target/Blackfin/Makefile
index 5eb8e9a..756ac6b 100644
--- a/lib/Target/Blackfin/Makefile
+++ b/lib/Target/Blackfin/Makefile
@@ -12,13 +12,12 @@ LIBRARYNAME = LLVMBlackfinCodeGen
 TARGET = Blackfin
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = BlackfinGenRegisterInfo.h.inc BlackfinGenRegisterNames.inc \
-                BlackfinGenRegisterInfo.inc BlackfinGenInstrNames.inc \
-                BlackfinGenInstrInfo.inc BlackfinGenAsmWriter.inc \
-                BlackfinGenDAGISel.inc BlackfinGenSubtarget.inc \
+BUILT_SOURCES = BlackfinGenRegisterInfo.inc BlackfinGenInstrInfo.inc \
+		BlackfinGenAsmWriter.inc \
+                BlackfinGenDAGISel.inc BlackfinGenSubtargetInfo.inc \
 		BlackfinGenCallingConv.inc BlackfinGenIntrinsics.inc
 
-DIRS = TargetInfo
+DIRS = TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index fde2e29..415beb1 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -20,7 +20,6 @@
 #include "llvm/Instructions.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/InlineAsm.h"
@@ -37,6 +36,8 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -61,6 +62,12 @@ extern "C" void LLVMInitializeCBackendTarget() {
   RegisterTargetMachine<CTargetMachine> X(TheCBackendTarget);
 }
 
+extern "C" void LLVMInitializeCBackendMCAsmInfo() {}
+
+extern "C" void LLVMInitializeCBackendMCInstrInfo() {}
+
+extern "C" void LLVMInitializeCBackendMCSubtargetInfo() {}
+
 namespace {
   class CBEMCAsmInfo : public MCAsmInfo {
   public:
@@ -69,29 +76,6 @@ namespace {
       PrivateGlobalPrefix = "";
     }
   };
-  /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for
-  /// any unnamed structure types that are used by the program, and merges
-  /// external functions with the same name.
-  ///
-  class CBackendNameAllUsedStructsAndMergeFunctions : public ModulePass {
-  public:
-    static char ID;
-    CBackendNameAllUsedStructsAndMergeFunctions()
-        : ModulePass(ID) {
-          initializeFindUsedTypesPass(*PassRegistry::getPassRegistry());
-        }
-    void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<FindUsedTypes>();
-    }
-
-    virtual const char *getPassName() const {
-      return "C backend type canonicalizer";
-    }
-
-    virtual bool runOnModule(Module &M);
-  };
-
-  char CBackendNameAllUsedStructsAndMergeFunctions::ID = 0;
 
   /// CWriter - This class is the main chunk of code that converts an LLVM
   /// module to a C translation unit.
@@ -104,7 +88,7 @@ namespace {
     const MCAsmInfo* TAsm;
     MCContext *TCtx;
     const TargetData* TD;
-    std::map<const Type *, std::string> TypeNames;
+    
     std::map<const ConstantFP *, unsigned> FPConstantMap;
     std::set<Function*> intrinsicPrototypesAlreadyGenerated;
     std::set<const Argument*> ByValParams;
@@ -113,6 +97,10 @@ namespace {
     DenseMap<const Value*, unsigned> AnonValueNumbers;
     unsigned NextAnonValueNumber;
 
+    /// UnnamedStructIDs - This contains a unique ID for each struct that is
+    /// either anonymous or has no name.
+    DenseMap<const StructType*, unsigned> UnnamedStructIDs;
+    
   public:
     static char ID;
     explicit CWriter(formatted_raw_ostream &o)
@@ -158,9 +146,9 @@ namespace {
       delete TCtx;
       delete TAsm;
       FPConstantMap.clear();
-      TypeNames.clear();
       ByValParams.clear();
       intrinsicPrototypesAlreadyGenerated.clear();
+      UnnamedStructIDs.clear();
       return false;
     }
 
@@ -177,6 +165,8 @@ namespace {
                                               const AttrListPtr &PAL,
                                               const PointerType *Ty);
 
+    std::string getStructName(const StructType *ST);
+    
     /// writeOperandDeref - Print the result of dereferencing the specified
     /// operand with '*'.  This is equivalent to printing '*' then using
     /// writeOperand, but avoids excess syntax in some cases.
@@ -205,9 +195,12 @@ namespace {
     std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c);
 
     void lowerIntrinsics(Function &F);
+    /// Prints the definition of the intrinsic function F. Supports the 
+    /// intrinsics which need to be explicitly defined in the CBackend.
+    void printIntrinsicDefinition(const Function &F, raw_ostream &Out);
 
-    void printModuleTypes(const TypeSymbolTable &ST);
-    void printContainedStructs(const Type *Ty, std::set<const Type *> &);
+    void printModuleTypes();
+    void printContainedStructs(const Type *Ty, SmallPtrSet<const Type *, 16> &);
     void printFloatingPointConstants(Function &F);
     void printFloatingPointConstants(const Constant *C);
     void printFunctionSignature(const Function *F, bool Prototype);
@@ -278,7 +271,7 @@ namespace {
       return AI;
     }
 
-    // isInlineAsm - Check if the instruction is a call to an inline asm chunk
+    // isInlineAsm - Check if the instruction is a call to an inline asm chunk.
     static bool isInlineAsm(const Instruction& I) {
       if (const CallInst *CI = dyn_cast<CallInst>(&I))
         return isa<InlineAsm>(CI->getCalledValue());
@@ -351,6 +344,7 @@ namespace {
 char CWriter::ID = 0;
 
 
+
 static std::string CBEMangle(const std::string &S) {
   std::string Result;
 
@@ -366,90 +360,14 @@ static std::string CBEMangle(const std::string &S) {
   return Result;
 }
 
-
-/// This method inserts names for any unnamed structure types that are used by
-/// the program, and removes names from structure types that are not used by the
-/// program.
-///
-bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
-  // Get a set of types that are used by the program...
-  SetVector<const Type *> UT = getAnalysis<FindUsedTypes>().getTypes();
-
-  // Loop over the module symbol table, removing types from UT that are
-  // already named, and removing names for types that are not used.
-  //
-  TypeSymbolTable &TST = M.getTypeSymbolTable();
-  for (TypeSymbolTable::iterator TI = TST.begin(), TE = TST.end();
-       TI != TE; ) {
-    TypeSymbolTable::iterator I = TI++;
-
-    // If this isn't a struct or array type, remove it from our set of types
-    // to name. This simplifies emission later.
-    if (!I->second->isStructTy() && !I->second->isOpaqueTy() &&
-        !I->second->isArrayTy()) {
-      TST.remove(I);
-    } else {
-      // If this is not used, remove it from the symbol table.
-      if (!UT.count(I->second))
-        TST.remove(I);
-      else
-        UT.remove(I->second); // Only keep one name for this type.
-    }
-  }
-
-  // UT now contains types that are not named.  Loop over it, naming
-  // structure types.
-  //
-  bool Changed = false;
-  unsigned RenameCounter = 0;
-  for (SetVector<const Type *>::const_iterator I = UT.begin(), E = UT.end();
-       I != E; ++I)
-    if ((*I)->isStructTy() || (*I)->isArrayTy()) {
-      while (M.addTypeName("unnamed"+utostr(RenameCounter), *I))
-        ++RenameCounter;
-      Changed = true;
-    }
-
-
-  // Loop over all external functions and globals.  If we have two with
-  // identical names, merge them.
-  // FIXME: This code should disappear when we don't allow values with the same
-  // names when they have different types!
-  std::map<std::string, GlobalValue*> ExtSymbols;
-  for (Module::iterator I = M.begin(), E = M.end(); I != E;) {
-    Function *GV = I++;
-    if (GV->isDeclaration() && GV->hasName()) {
-      std::pair<std::map<std::string, GlobalValue*>::iterator, bool> X
-        = ExtSymbols.insert(std::make_pair(GV->getName(), GV));
-      if (!X.second) {
-        // Found a conflict, replace this global with the previous one.
-        GlobalValue *OldGV = X.first->second;
-        GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType()));
-        GV->eraseFromParent();
-        Changed = true;
-      }
-    }
-  }
-  // Do the same for globals.
-  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
-       I != E;) {
-    GlobalVariable *GV = I++;
-    if (GV->isDeclaration() && GV->hasName()) {
-      std::pair<std::map<std::string, GlobalValue*>::iterator, bool> X
-        = ExtSymbols.insert(std::make_pair(GV->getName(), GV));
-      if (!X.second) {
-        // Found a conflict, replace this global with the previous one.
-        GlobalValue *OldGV = X.first->second;
-        GV->replaceAllUsesWith(ConstantExpr::getBitCast(OldGV, GV->getType()));
-        GV->eraseFromParent();
-        Changed = true;
-      }
-    }
-  }
-
-  return Changed;
+std::string CWriter::getStructName(const StructType *ST) {
+  if (!ST->isAnonymous() && !ST->getName().empty())
+    return CBEMangle("l_"+ST->getName().str());
+  
+  return "l_unnamed_" + utostr(UnnamedStructIDs[ST]);
 }
 
+
 /// printStructReturnPointerFunctionType - This is like printType for a struct
 /// return type, except, instead of printing the type as void (*)(Struct*, ...)
 /// print it as "Struct (*)(...)", for struct return functions.
@@ -463,7 +381,7 @@ void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
   bool PrintedType = false;
 
   FunctionType::param_iterator I = FTy->param_begin(), E = FTy->param_end();
-  const Type *RetTy = cast<PointerType>(I->get())->getElementType();
+  const Type *RetTy = cast<PointerType>(*I)->getElementType();
   unsigned Idx = 1;
   for (++I, ++Idx; I != E; ++I, ++Idx) {
     if (PrintedType)
@@ -551,12 +469,6 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
     return Out;
   }
 
-  // Check to see if the type is named.
-  if (!IgnoreName || Ty->isOpaqueTy()) {
-    std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty);
-    if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar;
-  }
-
   switch (Ty->getTypeID()) {
   case Type::FunctionTyID: {
     const FunctionType *FTy = cast<FunctionType>(Ty);
@@ -591,6 +503,11 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
   }
   case Type::StructTyID: {
     const StructType *STy = cast<StructType>(Ty);
+    
+    // Check to see if the type is named.
+    if (!IgnoreName)
+      return Out << getStructName(STy) << ' ' << NameSoFar;
+    
     Out << NameSoFar + " {\n";
     unsigned Idx = 0;
     for (StructType::element_iterator I = STy->element_begin(),
@@ -631,12 +548,6 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
     return Out << "; }";
   }
 
-  case Type::OpaqueTyID: {
-    std::string TyName = "struct opaque_" + itostr(OpaqueCounter++);
-    assert(TypeNames.find(Ty) == TypeNames.end());
-    TypeNames[Ty] = TyName;
-    return Out << TyName << ' ' << NameSoFar;
-  }
   default:
     llvm_unreachable("Unhandled case in getTypeProps!");
   }
@@ -660,7 +571,7 @@ void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
 
   if (isString) {
     Out << '\"';
-    // Keep track of whether the last number was a hexadecimal escape
+    // Keep track of whether the last number was a hexadecimal escape.
     bool LastWasHex = false;
 
     // Do not include the last character, which we know is null
@@ -1751,7 +1662,7 @@ bool CWriter::doInitialization(Module &M) {
 
   std::string E;
   if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
-    TAsm = Match->createAsmInfo(Triple);
+    TAsm = Match->createMCAsmInfo(Triple);
 #endif
   TAsm = new CBEMCAsmInfo();
   TCtx = new MCContext(*TAsm, NULL);
@@ -1777,6 +1688,7 @@ bool CWriter::doInitialization(Module &M) {
   Out << "/* Provide Declarations */\n";
   Out << "#include <stdarg.h>\n";      // Varargs support
   Out << "#include <setjmp.h>\n";      // Unwind support
+  Out << "#include <limits.h>\n";      // With overflow intrinsics support.
   generateCompilerSpecificCode(Out, TD);
 
   // Provide a definition for `bool' if not compiling with a C++ compiler.
@@ -1820,8 +1732,8 @@ bool CWriter::doInitialization(Module &M) {
         << "/* End Module asm statements */\n";
   }
 
-  // Loop over the symbol table, emitting all named constants...
-  printModuleTypes(M.getTypeSymbolTable());
+  // Loop over the symbol table, emitting all named constants.
+  printModuleTypes();
 
   // Global variable declarations...
   if (!M.global_empty()) {
@@ -1855,29 +1767,46 @@ bool CWriter::doInitialization(Module &M) {
   Out << "float fmodf(float, float);\n";
   Out << "long double fmodl(long double, long double);\n";
 
+  // Store the intrinsics which will be declared/defined below.
+  SmallVector<const Function*, 8> intrinsicsToDefine;
+
   for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
     // Don't print declarations for intrinsic functions.
-    if (!I->isIntrinsic() && I->getName() != "setjmp" &&
-        I->getName() != "longjmp" && I->getName() != "_setjmp") {
-      if (I->hasExternalWeakLinkage())
-        Out << "extern ";
-      printFunctionSignature(I, true);
-      if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
-        Out << " __ATTRIBUTE_WEAK__";
-      if (I->hasExternalWeakLinkage())
-        Out << " __EXTERNAL_WEAK__";
-      if (StaticCtors.count(I))
-        Out << " __ATTRIBUTE_CTOR__";
-      if (StaticDtors.count(I))
-        Out << " __ATTRIBUTE_DTOR__";
-      if (I->hasHiddenVisibility())
-        Out << " __HIDDEN__";
-
-      if (I->hasName() && I->getName()[0] == 1)
-        Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
+    // Store the used intrinsics, which need to be explicitly defined.
+    if (I->isIntrinsic()) {
+      switch (I->getIntrinsicID()) {
+        default:
+          break;
+        case Intrinsic::uadd_with_overflow:
+        case Intrinsic::sadd_with_overflow:
+          intrinsicsToDefine.push_back(I);
+          break;
+      }
+      continue;
+    }
+
+    if (I->getName() == "setjmp" ||
+        I->getName() == "longjmp" || I->getName() == "_setjmp")
+      continue;
+
+    if (I->hasExternalWeakLinkage())
+      Out << "extern ";
+    printFunctionSignature(I, true);
+    if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
+      Out << " __ATTRIBUTE_WEAK__";
+    if (I->hasExternalWeakLinkage())
+      Out << " __EXTERNAL_WEAK__";
+    if (StaticCtors.count(I))
+      Out << " __ATTRIBUTE_CTOR__";
+    if (StaticDtors.count(I))
+      Out << " __ATTRIBUTE_DTOR__";
+    if (I->hasHiddenVisibility())
+      Out << " __HIDDEN__";
+
+    if (I->hasName() && I->getName()[0] == 1)
+      Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
 
-      Out << ";\n";
-    }
+    Out << ";\n";
   }
 
   // Output the global variable declarations
@@ -2012,6 +1941,14 @@ bool CWriter::doInitialization(Module &M) {
   Out << "return X <= Y ; }\n";
   Out << "static inline int llvm_fcmp_oge(double X, double Y) { ";
   Out << "return X >= Y ; }\n";
+
+  // Emit definitions of the intrinsics.
+  for (SmallVector<const Function*, 8>::const_iterator
+       I = intrinsicsToDefine.begin(),
+       E = intrinsicsToDefine.end(); I != E; ++I) {
+    printIntrinsicDefinition(**I, Out);
+  }
+
   return false;
 }
 
@@ -2085,11 +2022,10 @@ void CWriter::printFloatingPointConstants(const Constant *C) {
 }
 
 
-
 /// printSymbolTable - Run through symbol table looking for type names.  If a
 /// type name is found, emit its declaration...
 ///
-void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
+void CWriter::printModuleTypes() {
   Out << "/* Helper union for bitcasts */\n";
   Out << "typedef union {\n";
   Out << "  unsigned int Int32;\n";
@@ -2098,46 +2034,42 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
   Out << "  double Double;\n";
   Out << "} llvmBitCastUnion;\n";
 
-  // We are only interested in the type plane of the symbol table.
-  TypeSymbolTable::const_iterator I   = TST.begin();
-  TypeSymbolTable::const_iterator End = TST.end();
+  // Get all of the struct types used in the module.
+  std::vector<StructType*> StructTypes;
+  TheModule->findUsedStructTypes(StructTypes);
 
-  // If there are no type names, exit early.
-  if (I == End) return;
+  if (StructTypes.empty()) return;
 
-  // Print out forward declarations for structure types before anything else!
   Out << "/* Structure forward decls */\n";
-  for (; I != End; ++I) {
-    std::string Name = "struct " + CBEMangle("l_"+I->first);
-    Out << Name << ";\n";
-    TypeNames.insert(std::make_pair(I->second, Name));
-  }
 
-  Out << '\n';
+  unsigned NextTypeID = 0;
+  
+  // If any of them are missing names, add a unique ID to UnnamedStructIDs.
+  // Print out forward declarations for structure types.
+  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+    StructType *ST = StructTypes[i];
 
-  // Now we can print out typedefs.  Above, we guaranteed that this can only be
-  // for struct or opaque types.
-  Out << "/* Typedefs */\n";
-  for (I = TST.begin(); I != End; ++I) {
-    std::string Name = CBEMangle("l_"+I->first);
-    Out << "typedef ";
-    printType(Out, I->second, false, Name);
-    Out << ";\n";
+    if (ST->isAnonymous() || ST->getName().empty())
+      UnnamedStructIDs[ST] = NextTypeID++;
+
+    std::string Name = getStructName(ST);
+
+    Out << "typedef struct " << Name << ' ' << Name << ";\n";
   }
 
   Out << '\n';
 
-  // Keep track of which structures have been printed so far...
-  std::set<const Type *> StructPrinted;
+  // Keep track of which structures have been printed so far.
+  SmallPtrSet<const Type *, 16> StructPrinted;
 
   // Loop over all structures then push them into the stack so they are
   // printed in the correct order.
   //
   Out << "/* Structure contents */\n";
-  for (I = TST.begin(); I != End; ++I)
-    if (I->second->isStructTy() || I->second->isArrayTy())
+  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i)
+    if (StructTypes[i]->isStructTy())
       // Only print out used types!
-      printContainedStructs(I->second, StructPrinted);
+      printContainedStructs(StructTypes[i], StructPrinted);
 }
 
 // Push the struct onto the stack and recursively push all structs
@@ -2146,7 +2078,7 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
 // TODO:  Make this work properly with vector types
 //
 void CWriter::printContainedStructs(const Type *Ty,
-                                    std::set<const Type*> &StructPrinted) {
+                                SmallPtrSet<const Type *, 16> &StructPrinted) {
   // Don't walk through pointers.
   if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
     return;
@@ -2156,14 +2088,13 @@ void CWriter::printContainedStructs(const Type *Ty,
        E = Ty->subtype_end(); I != E; ++I)
     printContainedStructs(*I, StructPrinted);
 
-  if (Ty->isStructTy() || Ty->isArrayTy()) {
+  if (const StructType *ST = dyn_cast<StructType>(Ty)) {
     // Check to see if we have already printed this struct.
-    if (StructPrinted.insert(Ty).second) {
-      // Print structure type out.
-      std::string Name = TypeNames[Ty];
-      printType(Out, Ty, false, Name, true);
-      Out << ";\n\n";
-    }
+    if (!StructPrinted.insert(Ty)) return;
+    
+    // Print structure type out.
+    printType(Out, ST, false, getStructName(ST), true);
+    Out << ";\n\n";
   }
 }
 
@@ -2786,6 +2717,103 @@ void CWriter::visitSelectInst(SelectInst &I) {
   Out << "))";
 }
 
+// Returns the macro name or value of the max or min of an integer type
+// (as defined in limits.h).
+static void printLimitValue(const IntegerType &Ty, bool isSigned, bool isMax,
+                            raw_ostream &Out) {
+  const char* type;
+  const char* sprefix = "";
+
+  unsigned NumBits = Ty.getBitWidth();
+  if (NumBits <= 8) {
+    type = "CHAR";
+    sprefix = "S";
+  } else if (NumBits <= 16) {
+    type = "SHRT";
+  } else if (NumBits <= 32) {
+    type = "INT";
+  } else if (NumBits <= 64) {
+    type = "LLONG";
+  } else {
+    llvm_unreachable("Bit widths > 64 not implemented yet");
+  }
+
+  if (isSigned)
+    Out << sprefix << type << (isMax ? "_MAX" : "_MIN");
+  else
+    Out << "U" << type << (isMax ? "_MAX" : "0");
+}
+
+#ifndef NDEBUG
+static bool isSupportedIntegerSize(const IntegerType &T) {
+  return T.getBitWidth() == 8 || T.getBitWidth() == 16 ||
+         T.getBitWidth() == 32 || T.getBitWidth() == 64;
+}
+#endif
+
+void CWriter::printIntrinsicDefinition(const Function &F, raw_ostream &Out) {
+  const FunctionType *funT = F.getFunctionType();
+  const Type *retT = F.getReturnType();
+  const IntegerType *elemT = cast<IntegerType>(funT->getParamType(1));
+
+  assert(isSupportedIntegerSize(*elemT) &&
+         "CBackend does not support arbitrary size integers.");
+  assert(cast<StructType>(retT)->getElementType(0) == elemT &&
+         elemT == funT->getParamType(0) && funT->getNumParams() == 2);
+
+  switch (F.getIntrinsicID()) {
+  default:
+    llvm_unreachable("Unsupported Intrinsic.");
+  case Intrinsic::uadd_with_overflow:
+    // static inline Rty uadd_ixx(unsigned ixx a, unsigned ixx b) {
+    //   Rty r;
+    //   r.field0 = a + b;
+    //   r.field1 = (r.field0 < a);
+    //   return r;
+    // }
+    Out << "static inline ";
+    printType(Out, retT);
+    Out << GetValueName(&F);
+    Out << "(";
+    printSimpleType(Out, elemT, false);
+    Out << "a,";
+    printSimpleType(Out, elemT, false);
+    Out << "b) {\n  ";
+    printType(Out, retT);
+    Out << "r;\n";
+    Out << "  r.field0 = a + b;\n";
+    Out << "  r.field1 = (r.field0 < a);\n";
+    Out << "  return r;\n}\n";
+    break;
+    
+  case Intrinsic::sadd_with_overflow:            
+    // static inline Rty sadd_ixx(ixx a, ixx b) {
+    //   Rty r;
+    //   r.field1 = (b > 0 && a > XX_MAX - b) ||
+    //              (b < 0 && a < XX_MIN - b);
+    //   r.field0 = r.field1 ? 0 : a + b;
+    //   return r;
+    // }
+    Out << "static ";
+    printType(Out, retT);
+    Out << GetValueName(&F);
+    Out << "(";
+    printSimpleType(Out, elemT, true);
+    Out << "a,";
+    printSimpleType(Out, elemT, true);
+    Out << "b) {\n  ";
+    printType(Out, retT);
+    Out << "r;\n";
+    Out << "  r.field1 = (b > 0 && a > ";
+    printLimitValue(*elemT, true, true, Out);
+    Out << " - b) || (b < 0 && a < ";
+    printLimitValue(*elemT, true, false, Out);
+    Out << " - b);\n";
+    Out << "  r.field0 = r.field1 ? 0 : a + b;\n";
+    Out << "  return r;\n}\n";
+    break;
+  }
+}
 
 void CWriter::lowerIntrinsics(Function &F) {
   // This is used to keep track of intrinsics that get generated to a lowered
@@ -2816,6 +2844,8 @@ void CWriter::lowerIntrinsics(Function &F) {
           case Intrinsic::x86_sse2_cmp_sd:
           case Intrinsic::x86_sse2_cmp_pd:
           case Intrinsic::ppc_altivec_lvsl:
+          case Intrinsic::uadd_with_overflow:
+          case Intrinsic::sadd_with_overflow:
               // We directly implement these intrinsics
             break;
           default:
@@ -3109,6 +3139,14 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
     writeOperand(I.getArgOperand(0));
     Out << ")";
     return true;
+  case Intrinsic::uadd_with_overflow:
+  case Intrinsic::sadd_with_overflow:
+    Out << GetValueName(I.getCalledFunction()) << "(";
+    writeOperand(I.getArgOperand(0));
+    Out << ", ";
+    writeOperand(I.getArgOperand(1));
+    Out << ")";
+    return true;
   }
 }
 
@@ -3127,7 +3165,7 @@ std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
 
   std::string E;
   if (const Target *Match = TargetRegistry::lookupTarget(Triple, E))
-    TargetAsm = Match->createAsmInfo(Triple);
+    TargetAsm = Match->createMCAsmInfo(Triple);
   else
     return c.Codes[0];
 
@@ -3520,7 +3558,8 @@ void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
   for (const unsigned *b = IVI.idx_begin(), *i = b, *e = IVI.idx_end();
        i != e; ++i) {
     const Type *IndexedTy =
-      ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), b, i+1);
+      ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(),
+                                       ArrayRef<unsigned>(b, i+1));
     if (IndexedTy->isArrayTy())
       Out << ".array[" << *i << "]";
     else
@@ -3541,7 +3580,8 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
     for (const unsigned *b = EVI.idx_begin(), *i = b, *e = EVI.idx_end();
          i != e; ++i) {
       const Type *IndexedTy =
-        ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), b, i+1);
+        ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(),
+                                         ArrayRef<unsigned>(b, i+1));
       if (IndexedTy->isArrayTy())
         Out << ".array[" << *i << "]";
       else
@@ -3565,7 +3605,6 @@ bool CTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
   PM.add(createGCLoweringPass());
   PM.add(createLowerInvokePass());
   PM.add(createCFGSimplificationPass());   // clean up after lower invoke.
-  PM.add(new CBackendNameAllUsedStructsAndMergeFunctions());
   PM.add(new CWriter(o));
   PM.add(createGCInfoDeleter());
   return false;
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
index 6fed195..e64216b 100644
--- a/lib/Target/CBackend/CTargetMachine.h
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -20,8 +20,9 @@
 namespace llvm {
 
 struct CTargetMachine : public TargetMachine {
-  CTargetMachine(const Target &T, const std::string &TT, const std::string &FS)
-    : TargetMachine(T) {}
+  CTargetMachine(const Target &T, const std::string &TT,
+                 const std::string &CPU, const std::string &FS)
+    : TargetMachine(T, TT, CPU, FS) {}
 
   virtual bool addPassesToEmitFile(PassManagerBase &PM,
                                    formatted_raw_ostream &Out,
diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt
index 09b48ce..f982316 100644
--- a/lib/Target/CMakeLists.txt
+++ b/lib/Target/CMakeLists.txt
@@ -1,6 +1,5 @@
 add_llvm_library(LLVMTarget
   Mangler.cpp
-  SubtargetFeature.cpp
   Target.cpp
   TargetAsmInfo.cpp
   TargetAsmLexer.cpp
@@ -13,7 +12,7 @@ add_llvm_library(LLVMTarget
   TargetLoweringObjectFile.cpp
   TargetMachine.cpp
   TargetRegisterInfo.cpp
-  TargetSubtarget.cpp
+  TargetSubtargetInfo.cpp
   )
 
 set(LLVM_ENUM_ASM_PRINTERS "")
diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt
index a2a2ef1..0b94e0c 100644
--- a/lib/Target/CellSPU/CMakeLists.txt
+++ b/lib/Target/CellSPU/CMakeLists.txt
@@ -1,14 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS SPU.td)
 
-tablegen(SPUGenInstrNames.inc -gen-instr-enums)
-tablegen(SPUGenRegisterNames.inc -gen-register-enums)
 tablegen(SPUGenAsmWriter.inc -gen-asm-writer)
 tablegen(SPUGenCodeEmitter.inc -gen-emitter)
-tablegen(SPUGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(SPUGenRegisterInfo.inc -gen-register-desc)
-tablegen(SPUGenInstrInfo.inc -gen-instr-desc)
+tablegen(SPUGenRegisterInfo.inc -gen-register-info)
+tablegen(SPUGenInstrInfo.inc -gen-instr-info)
 tablegen(SPUGenDAGISel.inc -gen-dag-isel)
-tablegen(SPUGenSubtarget.inc -gen-subtarget)
+tablegen(SPUGenSubtargetInfo.inc -gen-subtarget)
 tablegen(SPUGenCallingConv.inc -gen-callingconv)
 
 add_llvm_target(CellSPUCodeGen
@@ -18,7 +15,6 @@ add_llvm_target(CellSPUCodeGen
   SPUISelDAGToDAG.cpp
   SPUISelLowering.cpp
   SPUFrameLowering.cpp
-  SPUMCAsmInfo.cpp
   SPURegisterInfo.cpp
   SPUSubtarget.cpp
   SPUTargetMachine.cpp
@@ -27,3 +23,4 @@ add_llvm_target(CellSPUCodeGen
   )
 
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..85fb258
--- /dev/null
+++ b/lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMCellSPUDesc
+  SPUMCTargetDesc.cpp
+  SPUMCAsmInfo.cpp
+  )
diff --git a/lib/Target/CellSPU/MCTargetDesc/Makefile b/lib/Target/CellSPU/MCTargetDesc/Makefile
new file mode 100644
index 0000000..10d9a42
--- /dev/null
+++ b/lib/Target/CellSPU/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/CellSPU/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMCellSPUDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
index 99aaeb0..8c1176a 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.cpp
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp
@@ -15,6 +15,8 @@
 using namespace llvm;
 
 SPULinuxMCAsmInfo::SPULinuxMCAsmInfo(const Target &T, StringRef TT) {
+  IsLittleEndian = false;
+
   ZeroDirective = "\t.space\t";
   Data64bitsDirective = "\t.quad\t";
   AlignmentIsInBytes = false;
diff --git a/lib/Target/CellSPU/SPUMCAsmInfo.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
index 7f850d3..7f850d3 100644
--- a/lib/Target/CellSPU/SPUMCAsmInfo.h
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
new file mode 100644
index 0000000..26c5a4b
--- /dev/null
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp
@@ -0,0 +1,56 @@
+//===-- SPUMCTargetDesc.cpp - Cell SPU Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Cell SPU specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SPUMCTargetDesc.h"
+#include "SPUMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SPUGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SPUGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSPUMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitSPUMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeCellSPUMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheCellSPUTarget, createSPUMCInstrInfo);
+}
+
+static MCSubtargetInfo *createSPUMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                 StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitSPUMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeCellSPUMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheCellSPUTarget,
+                                          createSPUMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeCellSPUMCAsmInfo() {
+  RegisterMCAsmInfo<SPULinuxMCAsmInfo> X(TheCellSPUTarget);
+}
diff --git a/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
new file mode 100644
index 0000000..c5c037d
--- /dev/null
+++ b/lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- SPUMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPUMCTARGETDESC_H
+#define SPUMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheCellSPUTarget;
+
+} // End llvm namespace
+
+// Define symbolic names for Cell registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SPUGenRegisterInfo.inc"
+
+// Defines symbolic names for the SPU instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "SPUGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SPUGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/CellSPU/Makefile b/lib/Target/CellSPU/Makefile
index 77c66be..d7a8247 100644
--- a/lib/Target/CellSPU/Makefile
+++ b/lib/Target/CellSPU/Makefile
@@ -10,12 +10,11 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMCellSPUCodeGen
 TARGET = SPU
-BUILT_SOURCES = SPUGenInstrNames.inc SPUGenRegisterNames.inc \
+BUILT_SOURCES = SPUGenInstrInfo.inc SPUGenRegisterInfo.inc \
 		SPUGenAsmWriter.inc SPUGenCodeEmitter.inc \
-		SPUGenRegisterInfo.h.inc SPUGenRegisterInfo.inc \
-		SPUGenInstrInfo.inc SPUGenDAGISel.inc \
-		SPUGenSubtarget.inc SPUGenCallingConv.inc
+		SPUGenDAGISel.inc \
+		SPUGenSubtargetInfo.inc SPUGenCallingConv.inc
 
-DIRS = TargetInfo
+DIRS = TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/CellSPU/SPU.h b/lib/Target/CellSPU/SPU.h
index 72f8430..b51fbc7 100644
--- a/lib/Target/CellSPU/SPU.h
+++ b/lib/Target/CellSPU/SPU.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_TARGET_IBMCELLSPU_H
 #define LLVM_TARGET_IBMCELLSPU_H
 
+#include "MCTargetDesc/SPUMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -25,11 +26,6 @@ namespace llvm {
   FunctionPass *createSPUISelDag(SPUTargetMachine &TM);
   FunctionPass *createSPUNopFillerPass(SPUTargetMachine &tm);
 
-  extern Target TheCellSPUTarget;
 }
 
-// Defines symbolic names for the SPU instructions.
-//
-#include "SPUGenInstrNames.inc"
-
 #endif /* LLVM_TARGET_IBMCELLSPU_H */
diff --git a/lib/Target/CellSPU/SPUFrameLowering.cpp b/lib/Target/CellSPU/SPUFrameLowering.cpp
index 432f4a1..a3e7e73 100644
--- a/lib/Target/CellSPU/SPUFrameLowering.cpp
+++ b/lib/Target/CellSPU/SPUFrameLowering.cpp
@@ -13,7 +13,6 @@
 
 #include "SPU.h"
 #include "SPUFrameLowering.h"
-#include "SPURegisterNames.h"
 #include "SPUInstrBuilder.h"
 #include "SPUInstrInfo.h"
 #include "llvm/Function.h"
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 9351ffd..a297d03 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -16,7 +16,6 @@
 #include "SPUTargetMachine.h"
 #include "SPUHazardRecognizers.h"
 #include "SPUFrameLowering.h"
-#include "SPURegisterNames.h"
 #include "SPUTargetMachine.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index f9b5041..f0ceee2 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -10,7 +10,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SPURegisterNames.h"
 #include "SPUISelLowering.h"
 #include "SPUTargetMachine.h"
 #include "SPUFrameLowering.h"
@@ -221,6 +220,9 @@ SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 
+  setOperationAction(ISD::FMA, MVT::f64, Expand);
+  setOperationAction(ISD::FMA, MVT::f32, Expand);
+
   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index 080434d..e67b10c 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -11,17 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SPURegisterNames.h"
 #include "SPUInstrInfo.h"
 #include "SPUInstrBuilder.h"
 #include "SPUTargetMachine.h"
-#include "SPUGenInstrInfo.inc"
 #include "SPUHazardRecognizers.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/MC/MCContext.h"
+
+#define GET_INSTRINFO_CTOR
+#include "SPUGenInstrInfo.inc"
 
 using namespace llvm;
 
@@ -51,7 +53,7 @@ namespace {
 }
 
 SPUInstrInfo::SPUInstrInfo(SPUTargetMachine &tm)
-  : TargetInstrInfoImpl(SPUInsts, sizeof(SPUInsts)/sizeof(SPUInsts[0])),
+  : SPUGenInstrInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP),
     TM(tm),
     RI(*TM.getSubtargetImpl(), *this)
 { /* NOP */ }
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index e5e9148..bc1ba71 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -18,9 +18,12 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "SPURegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "SPUGenInstrInfo.inc"
+
 namespace llvm {
   //! Cell SPU instruction information class
-  class SPUInstrInfo : public TargetInstrInfoImpl {
+  class SPUInstrInfo : public SPUGenInstrInfo {
     SPUTargetMachine &TM;
     const SPURegisterInfo RI;
   public:
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index 623ae76..19896c0 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -14,7 +14,6 @@
 #define DEBUG_TYPE "reginfo"
 #include "SPU.h"
 #include "SPURegisterInfo.h"
-#include "SPURegisterNames.h"
 #include "SPUInstrBuilder.h"
 #include "SPUSubtarget.h"
 #include "SPUMachineFunction.h"
@@ -43,6 +42,9 @@
 #include "llvm/ADT/STLExtras.h"
 #include <cstdlib>
 
+#define GET_REGINFO_TARGET_DESC
+#include "SPUGenRegisterInfo.inc"
+
 using namespace llvm;
 
 /// getRegisterNumbering - Given the enum value for some register, e.g.
@@ -185,9 +187,7 @@ unsigned SPURegisterInfo::getRegisterNumbering(unsigned RegEnum) {
 
 SPURegisterInfo::SPURegisterInfo(const SPUSubtarget &subtarget,
                                  const TargetInstrInfo &tii) :
-  SPUGenRegisterInfo(SPU::ADJCALLSTACKDOWN, SPU::ADJCALLSTACKUP),
-  Subtarget(subtarget),
-  TII(tii)
+  SPUGenRegisterInfo(), Subtarget(subtarget), TII(tii)
 {
 }
 
@@ -371,5 +371,3 @@ SPURegisterInfo::findScratchRegister(MachineBasicBlock::iterator II,
   assert( Reg && "Register scavenger failed");
   return Reg;
 }
-
-#include "SPUGenRegisterInfo.inc"
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 6ecf0f2..5e014f8 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -16,7 +16,9 @@
 #define SPU_REGISTERINFO_H
 
 #include "SPU.h"
-#include "SPUGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "SPUGenRegisterInfo.inc"
 
 namespace llvm {
   class SPUSubtarget;
diff --git a/lib/Target/CellSPU/SPURegisterInfo.td b/lib/Target/CellSPU/SPURegisterInfo.td
index cce0c82..e16f51f 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.td
+++ b/lib/Target/CellSPU/SPURegisterInfo.td
@@ -155,147 +155,29 @@ def R127 : SPUVecReg<127, "$127">, DwarfRegNum<[127]>;
 
 // The SPU's registers as 128-bit wide entities, and can function as general
 // purpose registers, where the operands are in the "preferred slot":
+// The non-volatile registers are allocated in reverse order, like PPC does it.
 def GPRC : RegisterClass<"SPU", [i128], 128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+                         (add (sequence "R%u", 0, 79),
+                              (sequence "R%u", 127, 80))>;
 
 // The SPU's registers as 64-bit wide (double word integer) "preferred slot":
-def R64C : RegisterClass<"SPU", [i64], 128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+def R64C : RegisterClass<"SPU", [i64], 128, (add GPRC)>;
 
 // The SPU's registers as 64-bit wide (double word) FP "preferred slot":
-def R64FP : RegisterClass<"SPU", [f64], 128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+def R64FP : RegisterClass<"SPU", [f64], 128, (add GPRC)>;
 
 // The SPU's registers as 32-bit wide (word) "preferred slot":
-def R32C : RegisterClass<"SPU", [i32], 128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+def R32C : RegisterClass<"SPU", [i32], 128, (add GPRC)>;
 
 // The SPU's registers as single precision floating point "preferred slot":
-def R32FP : RegisterClass<"SPU", [f32], 128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+def R32FP : RegisterClass<"SPU", [f32], 128, (add GPRC)>;
 
 // The SPU's registers as 16-bit wide (halfword) "preferred slot":
-def R16C : RegisterClass<"SPU", [i16], 128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+def R16C : RegisterClass<"SPU", [i16], 128, (add GPRC)>;
 
 // The SPU's registers as 8-bit wide (byte) "preferred slot":
-def R8C : RegisterClass<"SPU", [i8], 128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+def R8C : RegisterClass<"SPU", [i8], 128, (add GPRC)>;
 
 // The SPU's registers as vector registers:
-def VECREG : RegisterClass<"SPU",
-                           [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64],
-                           128,
- [
-   /* volatile register */
-   R3, R4, R5, R6, R7, R8, R9, R10, R11, R12, R13, R14, R15, R16, 
-   R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-   R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46,
-   R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61,
-   R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76,
-   R77, R78, R79,
-   /* non-volatile register: take hint from PPC and allocate in reverse order */
-   R127, R126, R125, R124, R123, R122, R121, R120, R119, R118, R117, R116, R115,
-   R114, R113, R112, R111, R110, R109, R108, R107, R106, R105, R104, R103, R102,
-   R101, R100, R99, R98, R97, R96, R95, R94, R93, R92, R91, R90, R89, R88, R87,
-   R86, R85, R84, R83, R82, R81, R80, 
-   /* environment ptr, SP, LR */ 
-   R2, R1, R0 ]>;
+def VECREG : RegisterClass<"SPU", [v16i8,v8i16,v4i32,v4f32,v2i64,v2f64], 128,
+                           (add GPRC)>;
diff --git a/lib/Target/CellSPU/SPURegisterNames.h b/lib/Target/CellSPU/SPURegisterNames.h
index 6c3afdf..e557ed3 100644
--- a/lib/Target/CellSPU/SPURegisterNames.h
+++ b/lib/Target/CellSPU/SPURegisterNames.h
@@ -13,6 +13,7 @@
 // Define symbolic names for Cell registers.  This defines a mapping from
 // register name to register number.
 //
-#include "SPUGenRegisterNames.inc"
+#define GET_REGINFO_ENUM
+#include "SPUGenRegisterInfo.inc"
 
 #endif
diff --git a/lib/Target/CellSPU/SPUSubtarget.cpp b/lib/Target/CellSPU/SPUSubtarget.cpp
index 07c8352..856dc82 100644
--- a/lib/Target/CellSPU/SPUSubtarget.cpp
+++ b/lib/Target/CellSPU/SPUSubtarget.cpp
@@ -7,19 +7,25 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the CellSPU-specific subclass of TargetSubtarget.
+// This file implements the CellSPU-specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "SPUSubtarget.h"
 #include "SPU.h"
-#include "SPUGenSubtarget.inc"
-#include "llvm/ADT/SmallVector.h"
 #include "SPURegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SPUGenSubtargetInfo.inc"
 
 using namespace llvm;
 
-SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &FS) :
+SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS) :
+  SPUGenSubtargetInfo(TT, CPU, FS),
   StackAlignment(16),
   ProcDirective(SPU::DEFAULT_PROC),
   UseLargeMem(false)
@@ -29,7 +35,10 @@ SPUSubtarget::SPUSubtarget(const std::string &TT, const std::string &FS) :
   std::string default_cpu("v0");
 
   // Parse features string.
-  ParseSubtargetFeatures(FS, default_cpu);
+  ParseSubtargetFeatures(default_cpu, FS);
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(default_cpu);
 }
 
 /// SetJITMode - This is called to inform the subtarget info that we are
@@ -40,9 +49,9 @@ void SPUSubtarget::SetJITMode() {
 /// Enable PostRA scheduling for optimization levels -O2 and -O3.
 bool SPUSubtarget::enablePostRAScheduler(
                        CodeGenOpt::Level OptLevel,
-                       TargetSubtarget::AntiDepBreakMode& Mode,
+                       TargetSubtargetInfo::AntiDepBreakMode& Mode,
                        RegClassVector& CriticalPathRCs) const {
-  Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+  Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
   // CriticalPathsRCs seems to be the set of
   // RegisterClasses that antidep breakings are performed for.
   // Do it for all register classes 
diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h
index d7929302..7c4aa14 100644
--- a/lib/Target/CellSPU/SPUSubtarget.h
+++ b/lib/Target/CellSPU/SPUSubtarget.h
@@ -7,20 +7,23 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the Cell SPU-specific subclass of TargetSubtarget.
+// This file declares the Cell SPU-specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef CELLSUBTARGET_H
 #define CELLSUBTARGET_H
 
-#include "llvm/Target/TargetInstrItineraries.h"
-#include "llvm/Target/TargetSubtarget.h"
-
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "SPUGenSubtargetInfo.inc"
+
 namespace llvm {
   class GlobalValue;
+  class StringRef;
 
   namespace SPU {
     enum {
@@ -29,7 +32,7 @@ namespace llvm {
     };
   }
     
-  class SPUSubtarget : public TargetSubtarget {
+  class SPUSubtarget : public SPUGenSubtargetInfo {
   protected:
     /// stackAlignment - The minimum alignment known to hold of the stack frame
     /// on entry to the function and which must be maintained by every function.
@@ -50,12 +53,12 @@ namespace llvm {
     /// This constructor initializes the data members to match that
     /// of the specified triple.
     ///
-    SPUSubtarget(const std::string &TT, const std::string &FS);
+    SPUSubtarget(const std::string &TT, const std::string &CPU,
+                 const std::string &FS);
     
     /// ParseSubtargetFeatures - Parses features string setting specified 
     /// subtarget options.  Definition of function is auto generated by tblgen.
-    std::string ParseSubtargetFeatures(const std::string &FS,
-                                       const std::string &CPU);
+    void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
     /// SetJITMode - This is called to inform the subtarget info that we are
     /// producing code for the JIT.
@@ -86,7 +89,7 @@ namespace llvm {
     }
 
     bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                               TargetSubtarget::AntiDepBreakMode& Mode,
+                               TargetSubtargetInfo::AntiDepBreakMode& Mode,
                                RegClassVector& CriticalPathRCs) const;
   };
 } // End llvm namespace
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index 3ed7361..3542a2b 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -12,8 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "SPU.h"
-#include "SPURegisterNames.h"
-#include "SPUMCAsmInfo.h"
 #include "SPUTargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
@@ -25,7 +23,6 @@ using namespace llvm;
 extern "C" void LLVMInitializeCellSPUTarget() { 
   // Register the target.
   RegisterTargetMachine<SPUTargetMachine> X(TheCellSPUTarget);
-  RegisterAsmInfo<SPULinuxMCAsmInfo> Y(TheCellSPUTarget);
 }
 
 const std::pair<unsigned, int> *
@@ -35,9 +32,9 @@ SPUFrameLowering::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
 }
 
 SPUTargetMachine::SPUTargetMachine(const Target &T, const std::string &TT,
-                                   const std::string &FS)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS),
+                                   const std::string &CPU,const std::string &FS)
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS),
     DataLayout(Subtarget.getTargetDataString()),
     InstrInfo(*this),
     FrameLowering(Subtarget),
diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h
index 75abd5e..d96f86d 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.h
+++ b/lib/Target/CellSPU/SPUTargetMachine.h
@@ -39,7 +39,7 @@ class SPUTargetMachine : public LLVMTargetMachine {
   InstrItineraryData  InstrItins;
 public:
   SPUTargetMachine(const Target &T, const std::string &TT,
-                   const std::string &FS);
+                   const std::string &CPU, const std::string &FS);
 
   /// Return the subtarget implementation object
   virtual const SPUSubtarget     *getSubtargetImpl() const {
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 797cfd5..10d18f6 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -22,7 +22,9 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
-#include "llvm/TypeSymbolTable.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -32,7 +34,7 @@
 #include "llvm/Config/config.h"
 #include <algorithm>
 #include <set>
-
+#include <map>
 using namespace llvm;
 
 static cl::opt<std::string>
@@ -75,6 +77,16 @@ extern "C" void LLVMInitializeCppBackendTarget() {
   RegisterTargetMachine<CPPTargetMachine> X(TheCppBackendTarget);
 }
 
+extern "C" void LLVMInitializeCppBackendMCAsmInfo() {}
+
+extern "C" void LLVMInitializeCppBackendMCInstrInfo() {
+  RegisterMCInstrInfo<MCInstrInfo> X(TheCppBackendTarget);
+}
+
+extern "C" void LLVMInitializeCppBackendMCSubtargetInfo() {
+  RegisterMCSubtargetInfo<MCSubtargetInfo> X(TheCppBackendTarget);
+}
+
 namespace {
   typedef std::vector<const Type*> TypeList;
   typedef std::map<const Type*,std::string> TypeMap;
@@ -92,8 +104,6 @@ namespace {
     uint64_t uniqueNum;
     TypeMap TypeNames;
     ValueMap ValueNames;
-    TypeMap UnresolvedTypes;
-    TypeList TypeStack;
     NameSet UsedNames;
     TypeSet DefinedTypes;
     ValueSet DefinedValues;
@@ -140,8 +150,7 @@ namespace {
     inline void printCppName(const Value* val);
 
     void printAttributes(const AttrListPtr &PAL, const std::string &name);
-    bool printTypeInternal(const Type* Ty);
-    inline void printType(const Type* Ty);
+    void printType(const Type* Ty);
     void printTypes(const Module* M);
 
     void printConstant(const Constant *CPV);
@@ -188,26 +197,11 @@ static std::string getTypePrefix(const Type *Ty) {
   case Type::ArrayTyID:    return "array_";
   case Type::PointerTyID:  return "ptr_";
   case Type::VectorTyID:   return "packed_";
-  case Type::OpaqueTyID:   return "opaque_";
   default:                 return "other_";
   }
   return "unknown_";
 }
 
-// Looks up the type in the symbol table and returns a pointer to its name or
-// a null pointer if it wasn't found. Note that this isn't the same as the
-// Mode::getTypeName function which will return an empty string, not a null
-// pointer if the name is not found.
-static const std::string *
-findTypeName(const TypeSymbolTable& ST, const Type* Ty) {
-  TypeSymbolTable::const_iterator TI = ST.begin();
-  TypeSymbolTable::const_iterator TE = ST.end();
-  for (;TI != TE; ++TI)
-    if (TI->second == Ty)
-      return &(TI->first);
-  return 0;
-}
-
 void CppWriter::error(const std::string& msg) {
   report_fatal_error(msg);
 }
@@ -379,18 +373,20 @@ std::string CppWriter::getCppName(const Type* Ty) {
   case Type::StructTyID:      prefix = "StructTy_"; break;
   case Type::ArrayTyID:       prefix = "ArrayTy_"; break;
   case Type::PointerTyID:     prefix = "PointerTy_"; break;
-  case Type::OpaqueTyID:      prefix = "OpaqueTy_"; break;
   case Type::VectorTyID:      prefix = "VectorTy_"; break;
   default:                    prefix = "OtherTy_"; break; // prevent breakage
   }
 
   // See if the type has a name in the symboltable and build accordingly
-  const std::string* tName = findTypeName(TheModule->getTypeSymbolTable(), Ty);
   std::string name;
-  if (tName)
-    name = std::string(prefix) + *tName;
-  else
-    name = std::string(prefix) + utostr(uniqueNum++);
+  if (const StructType *STy = dyn_cast<StructType>(Ty))
+    if (STy->hasName())
+      name = STy->getName();
+  
+  if (name.empty())
+    name = utostr(uniqueNum++);
+  
+  name = std::string(prefix) + name;
   sanitize(name);
 
   // Save the name
@@ -503,65 +499,38 @@ void CppWriter::printAttributes(const AttrListPtr &PAL,
   }
 }
 
-bool CppWriter::printTypeInternal(const Type* Ty) {
+void CppWriter::printType(const Type* Ty) {
   // We don't print definitions for primitive types
   if (Ty->isPrimitiveType() || Ty->isIntegerTy())
-    return false;
+    return;
 
   // If we already defined this type, we don't need to define it again.
   if (DefinedTypes.find(Ty) != DefinedTypes.end())
-    return false;
+    return;
 
   // Everything below needs the name for the type so get it now.
   std::string typeName(getCppName(Ty));
 
-  // Search the type stack for recursion. If we find it, then generate this
-  // as an OpaqueType, but make sure not to do this multiple times because
-  // the type could appear in multiple places on the stack. Once the opaque
-  // definition is issued, it must not be re-issued. Consequently we have to
-  // check the UnresolvedTypes list as well.
-  TypeList::const_iterator TI = std::find(TypeStack.begin(), TypeStack.end(),
-                                          Ty);
-  if (TI != TypeStack.end()) {
-    TypeMap::const_iterator I = UnresolvedTypes.find(Ty);
-    if (I == UnresolvedTypes.end()) {
-      Out << "PATypeHolder " << typeName;
-      Out << "_fwd = OpaqueType::get(mod->getContext());";
-      nl(Out);
-      UnresolvedTypes[Ty] = typeName;
-    }
-    return true;
-  }
-
-  // We're going to print a derived type which, by definition, contains other
-  // types. So, push this one we're printing onto the type stack to assist with
-  // recursive definitions.
-  TypeStack.push_back(Ty);
-
   // Print the type definition
   switch (Ty->getTypeID()) {
   case Type::FunctionTyID:  {
     const FunctionType* FT = cast<FunctionType>(Ty);
-    Out << "std::vector<const Type*>" << typeName << "_args;";
+    Out << "std::vector<Type*>" << typeName << "_args;";
     nl(Out);
     FunctionType::param_iterator PI = FT->param_begin();
     FunctionType::param_iterator PE = FT->param_end();
     for (; PI != PE; ++PI) {
       const Type* argTy = static_cast<const Type*>(*PI);
-      bool isForward = printTypeInternal(argTy);
+      printType(argTy);
       std::string argName(getCppName(argTy));
       Out << typeName << "_args.push_back(" << argName;
-      if (isForward)
-        Out << "_fwd";
       Out << ");";
       nl(Out);
     }
-    bool isForward = printTypeInternal(FT->getReturnType());
+    printType(FT->getReturnType());
     std::string retTypeName(getCppName(FT->getReturnType()));
     Out << "FunctionType* " << typeName << " = FunctionType::get(";
     in(); nl(Out) << "/*Result=*/" << retTypeName;
-    if (isForward)
-      Out << "_fwd";
     Out << ",";
     nl(Out) << "/*Params=*/" << typeName << "_args,";
     nl(Out) << "/*isVarArg=*/" << (FT->isVarArg() ? "true" : "false") << ");";
@@ -571,23 +540,37 @@ bool CppWriter::printTypeInternal(const Type* Ty) {
   }
   case Type::StructTyID: {
     const StructType* ST = cast<StructType>(Ty);
-    Out << "std::vector<const Type*>" << typeName << "_fields;";
+    if (!ST->isAnonymous()) {
+      Out << "StructType *" << typeName << " = ";
+      Out << "StructType::createNamed(mod->getContext(), \"";
+      printEscapedString(ST->getName());
+      Out << "\");";
+      nl(Out);
+      // Indicate that this type is now defined.
+      DefinedTypes.insert(Ty);
+    }
+
+    Out << "std::vector<Type*>" << typeName << "_fields;";
     nl(Out);
     StructType::element_iterator EI = ST->element_begin();
     StructType::element_iterator EE = ST->element_end();
     for (; EI != EE; ++EI) {
       const Type* fieldTy = static_cast<const Type*>(*EI);
-      bool isForward = printTypeInternal(fieldTy);
+      printType(fieldTy);
       std::string fieldName(getCppName(fieldTy));
       Out << typeName << "_fields.push_back(" << fieldName;
-      if (isForward)
-        Out << "_fwd";
       Out << ");";
       nl(Out);
     }
-    Out << "StructType* " << typeName << " = StructType::get("
-        << "mod->getContext(), "
-        << typeName << "_fields, /*isPacked=*/"
+
+    if (ST->isAnonymous()) {
+      Out << "StructType *" << typeName << " = ";
+      Out << "StructType::get(" << "mod->getContext(), ";
+    } else {
+      Out << typeName << "->setBody(";
+    }
+
+    Out << typeName << "_fields, /*isPacked=*/"
         << (ST->isPacked() ? "true" : "false") << ");";
     nl(Out);
     break;
@@ -595,122 +578,55 @@ bool CppWriter::printTypeInternal(const Type* Ty) {
   case Type::ArrayTyID: {
     const ArrayType* AT = cast<ArrayType>(Ty);
     const Type* ET = AT->getElementType();
-    bool isForward = printTypeInternal(ET);
-    std::string elemName(getCppName(ET));
-    Out << "ArrayType* " << typeName << " = ArrayType::get("
-        << elemName << (isForward ? "_fwd" : "")
-        << ", " << utostr(AT->getNumElements()) << ");";
-    nl(Out);
+    printType(ET);
+    if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+      std::string elemName(getCppName(ET));
+      Out << "ArrayType* " << typeName << " = ArrayType::get("
+          << elemName
+          << ", " << utostr(AT->getNumElements()) << ");";
+      nl(Out);
+    }
     break;
   }
   case Type::PointerTyID: {
     const PointerType* PT = cast<PointerType>(Ty);
     const Type* ET = PT->getElementType();
-    bool isForward = printTypeInternal(ET);
-    std::string elemName(getCppName(ET));
-    Out << "PointerType* " << typeName << " = PointerType::get("
-        << elemName << (isForward ? "_fwd" : "")
-        << ", " << utostr(PT->getAddressSpace()) << ");";
-    nl(Out);
+    printType(ET);
+    if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+      std::string elemName(getCppName(ET));
+      Out << "PointerType* " << typeName << " = PointerType::get("
+          << elemName
+          << ", " << utostr(PT->getAddressSpace()) << ");";
+      nl(Out);
+    }
     break;
   }
   case Type::VectorTyID: {
     const VectorType* PT = cast<VectorType>(Ty);
     const Type* ET = PT->getElementType();
-    bool isForward = printTypeInternal(ET);
-    std::string elemName(getCppName(ET));
-    Out << "VectorType* " << typeName << " = VectorType::get("
-        << elemName << (isForward ? "_fwd" : "")
-        << ", " << utostr(PT->getNumElements()) << ");";
-    nl(Out);
-    break;
-  }
-  case Type::OpaqueTyID: {
-    Out << "OpaqueType* " << typeName;
-    Out << " = OpaqueType::get(mod->getContext());";
-    nl(Out);
+    printType(ET);
+    if (DefinedTypes.find(Ty) == DefinedTypes.end()) {
+      std::string elemName(getCppName(ET));
+      Out << "VectorType* " << typeName << " = VectorType::get("
+          << elemName
+          << ", " << utostr(PT->getNumElements()) << ");";
+      nl(Out);
+    }
     break;
   }
   default:
     error("Invalid TypeID");
   }
 
-  // If the type had a name, make sure we recreate it.
-  const std::string* progTypeName =
-    findTypeName(TheModule->getTypeSymbolTable(),Ty);
-  if (progTypeName) {
-    Out << "mod->addTypeName(\"" << *progTypeName << "\", "
-        << typeName << ");";
-    nl(Out);
-  }
-
-  // Pop us off the type stack
-  TypeStack.pop_back();
-
   // Indicate that this type is now defined.
   DefinedTypes.insert(Ty);
 
-  // Early resolve as many unresolved types as possible. Search the unresolved
-  // types map for the type we just printed. Now that its definition is complete
-  // we can resolve any previous references to it. This prevents a cascade of
-  // unresolved types.
-  TypeMap::iterator I = UnresolvedTypes.find(Ty);
-  if (I != UnresolvedTypes.end()) {
-    Out << "cast<OpaqueType>(" << I->second
-        << "_fwd.get())->refineAbstractTypeTo(" << I->second << ");";
-    nl(Out);
-    Out << I->second << " = cast<";
-    switch (Ty->getTypeID()) {
-    case Type::FunctionTyID: Out << "FunctionType"; break;
-    case Type::ArrayTyID:    Out << "ArrayType"; break;
-    case Type::StructTyID:   Out << "StructType"; break;
-    case Type::VectorTyID:   Out << "VectorType"; break;
-    case Type::PointerTyID:  Out << "PointerType"; break;
-    case Type::OpaqueTyID:   Out << "OpaqueType"; break;
-    default:                 Out << "NoSuchDerivedType"; break;
-    }
-    Out << ">(" << I->second << "_fwd.get());";
-    nl(Out); nl(Out);
-    UnresolvedTypes.erase(I);
-  }
-
   // Finally, separate the type definition from other with a newline.
   nl(Out);
-
-  // We weren't a recursive type
-  return false;
-}
-
-// Prints a type definition. Returns true if it could not resolve all the
-// types in the definition but had to use a forward reference.
-void CppWriter::printType(const Type* Ty) {
-  assert(TypeStack.empty());
-  TypeStack.clear();
-  printTypeInternal(Ty);
-  assert(TypeStack.empty());
 }
 
 void CppWriter::printTypes(const Module* M) {
-  // Walk the symbol table and print out all its types
-  const TypeSymbolTable& symtab = M->getTypeSymbolTable();
-  for (TypeSymbolTable::const_iterator TI = symtab.begin(), TE = symtab.end();
-       TI != TE; ++TI) {
-
-    // For primitive types and types already defined, just add a name
-    TypeMap::const_iterator TNI = TypeNames.find(TI->second);
-    if (TI->second->isIntegerTy() || TI->second->isPrimitiveType() ||
-        TNI != TypeNames.end()) {
-      Out << "mod->addTypeName(\"";
-      printEscapedString(TI->first);
-      Out << "\", " << getCppName(TI->second) << ");";
-      nl(Out);
-      // For everything else, define the type
-    } else {
-      printType(TI->second);
-    }
-  }
-
-  // Add all of the global variables to the value table...
+  // Add all of the global variables to the value table.
   for (Module::const_global_iterator I = TheModule->global_begin(),
          E = TheModule->global_end(); I != E; ++I) {
     if (I->hasInitializer())
@@ -989,12 +905,12 @@ void CppWriter::printVariableUses(const GlobalVariable *GV) {
   nl(Out);
   printType(GV->getType());
   if (GV->hasInitializer()) {
-    Constant *Init = GV->getInitializer();
+    const Constant *Init = GV->getInitializer();
     printType(Init->getType());
-    if (Function *F = dyn_cast<Function>(Init)) {
+    if (const Function *F = dyn_cast<Function>(Init)) {
       nl(Out)<< "/ Function Declarations"; nl(Out);
       printFunctionHead(F);
-    } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+    } else if (const GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
       nl(Out) << "// Global Variable Declarations"; nl(Out);
       printVariableHead(gv);
       
@@ -1353,9 +1269,10 @@ void CppWriter::printInstruction(const Instruction *I,
     printEscapedString(phi->getName());
     Out << "\", " << bbname << ");";
     nl(Out);
-    for (unsigned i = 0; i < phi->getNumOperands(); i+=2) {
+    for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) {
       Out << iName << "->addIncoming("
-          << opNames[i] << ", " << opNames[i+1] << ");";
+          << opNames[PHINode::getOperandNumForIncomingValue(i)] << ", "
+          << getOpName(phi->getIncomingBlock(i)) << ");";
       nl(Out);
     }
     break;
@@ -1954,8 +1871,8 @@ void CppWriter::printVariable(const std::string& fname,
   Out << "}\n";
 }
 
-void CppWriter::printType(const std::string& fname,
-                          const std::string& typeName) {
+void CppWriter::printType(const std::string &fname,
+                          const std::string &typeName) {
   const Type* Ty = TheModule->getTypeByName(typeName);
   if (!Ty) {
     error(std::string("Type '") + typeName + "' not found in input module");
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index e42166e..7322e3e 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -23,8 +23,8 @@ class formatted_raw_ostream;
 
 struct CPPTargetMachine : public TargetMachine {
   CPPTargetMachine(const Target &T, const std::string &TT,
-                   const std::string &FS)
-    : TargetMachine(T) {}
+                   const std::string &CPU, const std::string &FS)
+    : TargetMachine(T, TT, CPU, FS) {}
 
   virtual bool addPassesToEmitFile(PassManagerBase &PM,
                                    formatted_raw_ostream &Out,
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
index 1903796..1596596 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp
@@ -86,8 +86,9 @@ namespace {
       : MBlazeBaseAsmLexer(T, MAI) {
       std::string tripleString("mblaze-unknown-unknown");
       std::string featureString;
+      std::string CPU;
       OwningPtr<const TargetMachine> 
-        targetMachine(T.createTargetMachine(tripleString, featureString));
+        targetMachine(T.createTargetMachine(tripleString, CPU, featureString));
       InitRegisterMap(targetMachine->getRegisterInfo());
     }
   };
diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
index 524f33d..eebd9d8 100644
--- a/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
+++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp
@@ -32,7 +32,6 @@ struct MBlazeOperand;
 
 class MBlazeAsmParser : public TargetAsmParser {
   MCAsmParser &Parser;
-  TargetMachine &TM;
 
   MCAsmParser &getParser() const { return Parser; }
   MCAsmLexer &getLexer() const { return Parser.getLexer(); }
@@ -64,8 +63,8 @@ class MBlazeAsmParser : public TargetAsmParser {
 
 
 public:
-  MBlazeAsmParser(const Target &T, MCAsmParser &_Parser, TargetMachine &_TM)
-    : TargetAsmParser(T), Parser(_Parser), TM(_TM) {}
+  MBlazeAsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser)
+    : TargetAsmParser(), Parser(_Parser) {}
 
   virtual bool ParseInstruction(StringRef Name, SMLoc NameLoc,
                                 SmallVectorImpl<MCParsedAsmOperand*> &Operands);
@@ -220,7 +219,7 @@ public:
     return StringRef(Tok.Data, Tok.Length);
   }
 
-  virtual void dump(raw_ostream &OS) const;
+  virtual void print(raw_ostream &OS) const;
 
   static MBlazeOperand *CreateToken(StringRef Str, SMLoc S) {
     MBlazeOperand *Op = new MBlazeOperand(Token);
@@ -280,7 +279,7 @@ public:
 
 } // end anonymous namespace.
 
-void MBlazeOperand::dump(raw_ostream &OS) const {
+void MBlazeOperand::print(raw_ostream &OS) const {
   switch (Kind) {
   case Immediate:
     getImm()->print(OS);
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
index 004057a..0bc5b78 100644
--- a/lib/Target/MBlaze/CMakeLists.txt
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -1,16 +1,13 @@
 set(LLVM_TARGET_DEFINITIONS MBlaze.td)
 
-tablegen(MBlazeGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(MBlazeGenRegisterNames.inc -gen-register-enums)
-tablegen(MBlazeGenRegisterInfo.inc -gen-register-desc)
-tablegen(MBlazeGenInstrNames.inc -gen-instr-enums)
-tablegen(MBlazeGenInstrInfo.inc -gen-instr-desc)
+tablegen(MBlazeGenRegisterInfo.inc -gen-register-info)
+tablegen(MBlazeGenInstrInfo.inc -gen-instr-info)
 tablegen(MBlazeGenCodeEmitter.inc -gen-emitter)
 tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer)
 tablegen(MBlazeGenAsmMatcher.inc -gen-asm-matcher)
 tablegen(MBlazeGenDAGISel.inc -gen-dag-isel)
 tablegen(MBlazeGenCallingConv.inc -gen-callingconv)
-tablegen(MBlazeGenSubtarget.inc -gen-subtarget)
+tablegen(MBlazeGenSubtargetInfo.inc -gen-subtarget)
 tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
 tablegen(MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info)
 
@@ -20,7 +17,6 @@ add_llvm_target(MBlazeCodeGen
   MBlazeISelDAGToDAG.cpp
   MBlazeISelLowering.cpp
   MBlazeFrameLowering.cpp
-  MBlazeMCAsmInfo.cpp
   MBlazeRegisterInfo.cpp
   MBlazeSubtarget.cpp
   MBlazeTargetMachine.cpp
@@ -38,3 +34,4 @@ add_subdirectory(AsmParser)
 add_subdirectory(Disassembler)
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
index 060a87b..88d80a1 100644
--- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
+++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp
@@ -27,9 +27,12 @@
 
 // #include "MBlazeGenDecoderTables.inc"
 // #include "MBlazeGenRegisterNames.inc"
-#include "MBlazeGenInstrInfo.inc"
 #include "MBlazeGenEDInfo.inc"
 
+namespace llvm {
+extern MCInstrDesc MBlazeInsts[];
+}
+
 using namespace llvm;
 
 const unsigned UNSUPPORTED = -1;
diff --git a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
index 13c4b49..eacca41 100644
--- a/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
+++ b/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h
@@ -18,11 +18,10 @@
 
 namespace llvm {
   class MCOperand;
-  class TargetMachine;
 
   class MBlazeInstPrinter : public MCInstPrinter {
   public:
-    MBlazeInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+    MBlazeInstPrinter(const MCAsmInfo &MAI)
       : MCInstPrinter(MAI) {}
 
     virtual void printInst(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/MBlaze/MBlaze.h b/lib/Target/MBlaze/MBlaze.h
index 00c73f0..3390794 100644
--- a/lib/Target/MBlaze/MBlaze.h
+++ b/lib/Target/MBlaze/MBlaze.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_MBLAZE_H
 #define TARGET_MBLAZE_H
 
+#include "MCTargetDesc/MBlazeMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -22,26 +23,20 @@ namespace llvm {
   class FunctionPass;
   class MachineCodeEmitter;
   class MCCodeEmitter;
+  class MCInstrInfo;
+  class MCSubtargetInfo;
   class TargetAsmBackend;
   class formatted_raw_ostream;
 
-  MCCodeEmitter *createMBlazeMCCodeEmitter(const Target &,
-                                           TargetMachine &TM,
+  MCCodeEmitter *createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+                                           const MCSubtargetInfo &STI,
                                            MCContext &Ctx);
-
+  
   TargetAsmBackend *createMBlazeAsmBackend(const Target &, const std::string &);
 
   FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM);
   FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM);
 
-  extern Target TheMBlazeTarget;
 } // end namespace llvm;
 
-// Defines symbolic names for MBlaze registers.  This defines a mapping from
-// register name to register number.
-#include "MBlazeGenRegisterNames.inc"
-
-// Defines symbolic names for the MBlaze instructions.
-#include "MBlazeGenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
index 0f0f60e..0016df5 100644
--- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
+++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp
@@ -319,11 +319,10 @@ isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB) const {
 }
 
 static MCInstPrinter *createMBlazeMCInstPrinter(const Target &T,
-                                                TargetMachine &TM,
                                                 unsigned SyntaxVariant,
                                                 const MCAsmInfo &MAI) {
   if (SyntaxVariant == 0)
-    return new MBlazeInstPrinter(TM, MAI);
+    return new MBlazeInstPrinter(MAI);
   return 0;
 }
 
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
index 973e968..c07570a 100644
--- a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -109,7 +109,7 @@ static bool delayHasHazard(MachineBasicBlock::iterator &candidate,
   // Hazard check
   MachineBasicBlock::iterator a = candidate;
   MachineBasicBlock::iterator b = slot;
-  TargetInstrDesc desc = candidate->getDesc();
+  MCInstrDesc desc = candidate->getDesc();
 
   // MBB layout:-
   //    candidate := a0 = operation(a1, a2)
@@ -183,7 +183,7 @@ static bool isDelayFiller(MachineBasicBlock &MBB,
   if (candidate == MBB.begin())
     return false;
 
-  TargetInstrDesc brdesc = (--candidate)->getDesc();
+  MCInstrDesc brdesc = (--candidate)->getDesc();
   return (brdesc.hasDelaySlot());
 }
 
@@ -211,7 +211,7 @@ findDelayInstr(MachineBasicBlock &MBB,MachineBasicBlock::iterator slot) {
       break;
 
     --I;
-    TargetInstrDesc desc = I->getDesc();
+    MCInstrDesc desc = I->getDesc();
     if (desc.hasDelaySlot() || desc.isBranch() || isDelayFiller(MBB,I) ||
         desc.isCall() || desc.isReturn() || desc.isBarrier() ||
         hasUnknownSideEffects(I))
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
index c5e0a89..62dfdcc 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.cpp
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -69,6 +69,7 @@ MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
 
   // Floating point operations which are not supported
   setOperationAction(ISD::FREM,       MVT::f32, Expand);
+  setOperationAction(ISD::FMA,        MVT::f32, Expand);
   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Expand);
   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand);
   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
@@ -1114,15 +1115,19 @@ MBlazeTargetLowering::getSingleConstraintMatchWeight(
   return weight;
 }
 
-/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
-/// return a list of registers that can be used to satisfy the constraint.
-/// This should only be used for C_RegisterClass constraints.
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
 std::pair<unsigned, const TargetRegisterClass*> MBlazeTargetLowering::
 getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':
       return std::make_pair(0U, MBlaze::GPRRegisterClass);
+      // TODO: These can't possibly be right, but match what was in
+      // getRegClassForInlineAsmConstraint.
+    case 'd':
+    case 'y':
     case 'f':
       if (VT == MVT::f32)
         return std::make_pair(0U, MBlaze::GPRRegisterClass);
@@ -1131,32 +1136,6 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
 
-/// Given a register class constraint, like 'r', if this corresponds directly
-/// to an LLVM register class, return a register of 0 and the register class
-/// pointer.
-std::vector<unsigned> MBlazeTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
-  if (Constraint.size() != 1)
-    return std::vector<unsigned>();
-
-  switch (Constraint[0]) {
-    default : break;
-    case 'r':
-    // GCC MBlaze Constraint Letters
-    case 'd':
-    case 'y':
-    case 'f':
-      return make_vector<unsigned>(
-        MBlaze::R3,  MBlaze::R4,  MBlaze::R5,  MBlaze::R6,
-        MBlaze::R7,  MBlaze::R9,  MBlaze::R10, MBlaze::R11,
-        MBlaze::R12, MBlaze::R19, MBlaze::R20, MBlaze::R21,
-        MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25,
-        MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29,
-        MBlaze::R30, MBlaze::R31, 0);
-  }
-  return std::vector<unsigned>();
-}
-
 bool MBlazeTargetLowering::
 isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The MBlaze target isn't yet aware of offsets.
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
index 265c1a7..bb128da 100644
--- a/lib/Target/MBlaze/MBlazeISelLowering.h
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -173,10 +173,6 @@ namespace llvm {
               getRegForInlineAsmConstraint(const std::string &Constraint,
               EVT VT) const;
 
-    std::vector<unsigned>
-    getRegClassForInlineAsmConstraint(const std::string &Constraint,
-              EVT VT) const;
-
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
     /// isFPImmLegal - Returns true if the target can instruction select the
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
index 794ebed..188f10a 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -14,18 +14,21 @@
 #include "MBlazeInstrInfo.h"
 #include "MBlazeTargetMachine.h"
 #include "MBlazeMachineFunction.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
 #include "MBlazeGenInstrInfo.inc"
 
 using namespace llvm;
 
 MBlazeInstrInfo::MBlazeInstrInfo(MBlazeTargetMachine &tm)
-  : TargetInstrInfoImpl(MBlazeInsts, array_lengthof(MBlazeInsts)),
+  : MBlazeGenInstrInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP),
     TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
 
 static bool isZeroImm(const MachineOperand &op) {
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
index b717da8..79f962b 100644
--- a/lib/Target/MBlaze/MBlazeInstrInfo.h
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -19,6 +19,9 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "MBlazeRegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "MBlazeGenInstrInfo.inc"
+
 namespace llvm {
 
 namespace MBlaze {
@@ -219,7 +222,7 @@ namespace MBlazeII {
   };
 }
 
-class MBlazeInstrInfo : public TargetInstrInfoImpl {
+class MBlazeInstrInfo : public MBlazeGenInstrInfo {
   MBlazeTargetMachine &TM;
   const MBlazeRegisterInfo RI;
 public:
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
index 7e4a2f5..32d67b2 100644
--- a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -92,7 +92,7 @@ bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
 
 static const FunctionType *getType(LLVMContext &Context, unsigned id) {
   const Type *ResultTy = NULL;
-  std::vector<const Type*> ArgTys;
+  std::vector<Type*> ArgTys;
   bool IsVarArg = false;
 
 #define GET_INTRINSIC_GENERATOR
diff --git a/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp b/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
index 3ece1a8..ddc636d 100644
--- a/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
+++ b/lib/Target/MBlaze/MBlazeMCCodeEmitter.cpp
@@ -29,13 +29,12 @@ namespace {
 class MBlazeMCCodeEmitter : public MCCodeEmitter {
   MBlazeMCCodeEmitter(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
   void operator=(const MBlazeMCCodeEmitter &); // DO NOT IMPLEMENT
-  const TargetMachine &TM;
-  const TargetInstrInfo &TII;
-  MCContext &Ctx;
+  const MCInstrInfo &MCII;
 
 public:
-  MBlazeMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
-    : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) {
+  MBlazeMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+                      MCContext &ctx)
+    : MCII(mcii) {
   }
 
   ~MBlazeMCCodeEmitter() {}
@@ -96,10 +95,10 @@ public:
 } // end anonymous namespace
 
 
-MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const Target &,
-                                               TargetMachine &TM,
+MCCodeEmitter *llvm::createMBlazeMCCodeEmitter(const MCInstrInfo &MCII,
+                                               const MCSubtargetInfo &STI,
                                                MCContext &Ctx) {
-  return new MBlazeMCCodeEmitter(TM, Ctx);
+  return new MBlazeMCCodeEmitter(MCII, STI, Ctx);
 }
 
 /// getMachineOpValue - Return binary encoding of operand. If the machine
@@ -179,7 +178,7 @@ void MBlazeMCCodeEmitter::
 EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                   SmallVectorImpl<MCFixup> &Fixups) const {
   unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = TII.get(Opcode);
+  const MCInstrDesc &Desc = MCII.get(Opcode);
   uint64_t TSFlags = Desc.TSFlags;
   // Keep track of the current byte being emitted.
   unsigned CurByte = 0;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
index 517279f..f0b201a 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -37,12 +37,14 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
 
+#define GET_REGINFO_TARGET_DESC
+#include "MBlazeGenRegisterInfo.inc"
+
 using namespace llvm;
 
 MBlazeRegisterInfo::
 MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii)
-  : MBlazeGenRegisterInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP),
-    Subtarget(ST), TII(tii) {}
+  : MBlazeGenRegisterInfo(), Subtarget(ST), TII(tii) {}
 
 /// getRegisterNumbering - Given the enum value for some register, e.g.
 /// MBlaze::R0, return the number that it corresponds to (e.g. 0).
@@ -359,6 +361,3 @@ int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNo, bool isEH) const {
 int MBlazeRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
   return MBlazeGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
 }
-
-#include "MBlazeGenRegisterInfo.inc"
-
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
index 3807839..7ebce21 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.h
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -17,7 +17,9 @@
 
 #include "MBlaze.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "MBlazeGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "MBlazeGenRegisterInfo.inc"
 
 namespace llvm {
 class MBlazeSubtarget;
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
index bd396ed..13c46ba 100644
--- a/lib/Target/MBlaze/MBlazeRegisterInfo.td
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -109,32 +109,9 @@ let Namespace = "MBlaze" in {
 // Register Classes
 //===----------------------------------------------------------------------===//
 
-def GPR : RegisterClass<"MBlaze", [i32,f32], 32,
-  [
-  // Return Values and Arguments
-  R3, R4, R5, R6, R7, R8, R9, R10,
+def GPR : RegisterClass<"MBlaze", [i32,f32], 32, (sequence "R%u", 0, 31)>;
 
-  // Not preserved across procedure calls
-  R11, R12,
-
-  // Callee save
-  R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
-
-  // Reserved
-  R0,  // Always zero
-  R1,  // The stack pointer
-  R2,  // Read-only small data area anchor
-  R13, // Read-write small data area anchor
-  R14, // Return address for interrupts
-  R15, // Return address for sub-routines
-  R16, // Return address for trap
-  R17, // Return address for exceptions
-  R18, // Reserved for assembler
-  R19  // The frame-pointer
-  ]>;
-
-def SPR : RegisterClass<"MBlaze", [i32], 32,
-  [
+def SPR : RegisterClass<"MBlaze", [i32], 32, (add
   // Reserved
   RPC,
   RMSR,
@@ -160,12 +137,12 @@ def SPR : RegisterClass<"MBlaze", [i32], 32,
   RPVR9,
   RPVR10,
   RPVR11
-  ]>
+  )>
 {
   // None of the special purpose registers are allocatable.
   let isAllocatable = 0;
 }
 
-def CRC : RegisterClass<"MBlaze", [i32], 32, [CARRY]> {
+def CRC : RegisterClass<"MBlaze", [i32], 32, (add CARRY)> {
   let CopyCost = -1;
 }
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp
index a80744a..eda141d 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.cpp
+++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -7,29 +7,42 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the MBlaze specific subclass of TargetSubtarget.
+// This file implements the MBlaze specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "MBlazeSubtarget.h"
 #include "MBlaze.h"
 #include "MBlazeRegisterInfo.h"
-#include "MBlazeGenSubtarget.inc"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MBlazeGenSubtargetInfo.inc"
+
 using namespace llvm;
 
-MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, const std::string &FS):
+MBlazeSubtarget::MBlazeSubtarget(const std::string &TT,
+                                 const std::string &CPU,
+                                 const std::string &FS):
+  MBlazeGenSubtargetInfo(TT, CPU, FS),
   HasBarrel(false), HasDiv(false), HasMul(false), HasPatCmp(false),
   HasFPU(false), HasMul64(false), HasSqrt(false)
 {
   // Parse features string.
-  std::string CPU = "mblaze";
-  CPU = ParseSubtargetFeatures(FS, CPU);
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "mblaze";
+  ParseSubtargetFeatures(CPUName, FS);
 
   // Only use instruction scheduling if the selected CPU has an instruction
   // itinerary (the default CPU is the only one that doesn't).
-  HasItin = CPU != "mblaze";
-  DEBUG(dbgs() << "CPU " << CPU << "(" << HasItin << ")\n");
+  HasItin = CPUName != "mblaze";
+  DEBUG(dbgs() << "CPU " << CPUName << "(" << HasItin << ")\n");
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUName);
 
   // Compute the issue width of the MBlaze itineraries
   computeIssueWidth();
@@ -41,11 +54,10 @@ void MBlazeSubtarget::computeIssueWidth() {
 
 bool MBlazeSubtarget::
 enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                      TargetSubtarget::AntiDepBreakMode& Mode,
+                      TargetSubtargetInfo::AntiDepBreakMode& Mode,
                       RegClassVector& CriticalPathRCs) const {
-  Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+  Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
   CriticalPathRCs.clear();
   CriticalPathRCs.push_back(&MBlaze::GPRRegClass);
   return HasItin && OptLevel >= CodeGenOpt::Default;
 }
-
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h
index 2255b28..43b0197 100644
--- a/lib/Target/MBlaze/MBlazeSubtarget.h
+++ b/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -7,21 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the MBlaze specific subclass of TargetSubtarget.
+// This file declares the MBlaze specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef MBLAZESUBTARGET_H
 #define MBLAZESUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
-#include "llvm/Target/TargetMachine.h"
-
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "MBlazeGenSubtargetInfo.inc"
+
 namespace llvm {
+class StringRef;
 
-class MBlazeSubtarget : public TargetSubtarget {
+class MBlazeSubtarget : public MBlazeGenSubtargetInfo {
 
 protected:
   bool HasBarrel;
@@ -39,12 +42,12 @@ public:
 
   /// This constructor initializes the data members to match that
   /// of the specified triple.
-  MBlazeSubtarget(const std::string &TT, const std::string &FS);
+  MBlazeSubtarget(const std::string &TT, const std::string &CPU,
+                  const std::string &FS);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   /// Compute the number of maximum number of issues per cycle for the
   /// MBlaze scheduling itineraries.
@@ -52,7 +55,7 @@ public:
 
   /// enablePostRAScheduler - True at 'More' optimization.
   bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
-                             TargetSubtarget::AntiDepBreakMode& Mode,
+                             TargetSubtargetInfo::AntiDepBreakMode& Mode,
                              RegClassVector& CriticalPathRCs) const;
 
   /// getInstrItins - Return the instruction itineraies based on subtarget.
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
index df34a83..7208874 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.cpp
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "MBlaze.h"
-#include "MBlazeMCAsmInfo.h"
 #include "MBlazeTargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
@@ -21,14 +20,6 @@
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
-  Triple TheTriple(TT);
-  switch (TheTriple.getOS()) {
-  default:
-    return new MBlazeMCAsmInfo();
-  }
-}
-
 static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
                                     MCContext &Ctx, TargetAsmBackend &TAB,
                                     raw_ostream &_OS,
@@ -55,9 +46,6 @@ extern "C" void LLVMInitializeMBlazeTarget() {
   // Register the target.
   RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget);
 
-  // Register the target asm info.
-  RegisterAsmInfoFn A(TheMBlazeTarget, createMCAsmInfo);
-
   // Register the MC code emitter
   TargetRegistry::RegisterCodeEmitter(TheMBlazeTarget,
                                       llvm::createMBlazeMCCodeEmitter);
@@ -80,9 +68,9 @@ extern "C" void LLVMInitializeMBlazeTarget() {
 // an easier handling.
 MBlazeTargetMachine::
 MBlazeTargetMachine(const Target &T, const std::string &TT,
-                    const std::string &FS):
-  LLVMTargetMachine(T, TT),
-  Subtarget(TT, FS),
+                    const std::string &CPU, const std::string &FS):
+  LLVMTargetMachine(T, TT, CPU, FS),
+  Subtarget(TT, CPU, FS),
   DataLayout("E-p:32:32:32-i8:8:8-i16:16:16"),
   InstrInfo(*this),
   FrameLowering(Subtarget),
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
index 48ce37a..cd6caaf 100644
--- a/lib/Target/MBlaze/MBlazeTargetMachine.h
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -42,7 +42,7 @@ namespace llvm {
 
   public:
     MBlazeTargetMachine(const Target &T, const std::string &TT,
-                        const std::string &FS);
+                        const std::string &CPU, const std::string &FS);
 
     virtual const MBlazeInstrInfo *getInstrInfo() const
     { return &InstrInfo; }
diff --git a/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..3d15708
--- /dev/null
+++ b/lib/Target/MBlaze/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMBlazeDesc
+  MBlazeMCTargetDesc.cpp
+  MBlazeMCAsmInfo.cpp
+  )
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
index 1467141..0d88466 100644
--- a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp
@@ -15,6 +15,8 @@
 using namespace llvm;
 
 MBlazeMCAsmInfo::MBlazeMCAsmInfo() {
+  IsLittleEndian              = false;
+  StackGrowsUp                = false;
   SupportsDebugInformation    = true;
   AlignmentIsInBytes          = false;
   PrivateGlobalPrefix         = "$";
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
index e68dd58..e68dd58 100644
--- a/lib/Target/MBlaze/MBlazeMCAsmInfo.h
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
new file mode 100644
index 0000000..20d6c0b
--- /dev/null
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp
@@ -0,0 +1,65 @@
+//===-- MBlazeMCTargetDesc.cpp - MBlaze Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MBlaze specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCTargetDesc.h"
+#include "MBlazeMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MBlazeGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MBlazeGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MBlazeGenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createMBlazeMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitMBlazeMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeMBlazeMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheMBlazeTarget, createMBlazeMCInstrInfo);
+}
+
+static MCSubtargetInfo *createMBlazeMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                    StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitMBlazeMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeMBlazeMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheMBlazeTarget,
+                                          createMBlazeMCSubtargetInfo);
+}
+
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+  switch (TheTriple.getOS()) {
+  default:
+    return new MBlazeMCAsmInfo();
+  }
+}
+
+extern "C" void LLVMInitializeMBlazeMCAsmInfo() {
+  RegisterMCAsmInfoFn X(TheMBlazeTarget, createMCAsmInfo);
+}
diff --git a/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
new file mode 100644
index 0000000..b14772e
--- /dev/null
+++ b/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- MBlazeMCTargetDesc.h - MBlaze Target Descriptions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MBlaze specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEMCTARGETDESC_H
+#define MBLAZEMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheMBlazeTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for MBlaze registers.  This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "MBlazeGenRegisterInfo.inc"
+
+// Defines symbolic names for the MBlaze instructions.
+#define GET_INSTRINFO_ENUM
+#include "MBlazeGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MBlazeGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/MBlaze/MCTargetDesc/Makefile b/lib/Target/MBlaze/MCTargetDesc/Makefile
new file mode 100644
index 0000000..71075ff
--- /dev/null
+++ b/lib/Target/MBlaze/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MBlaze/TargetDesc/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MBlaze/Makefile b/lib/Target/MBlaze/Makefile
index e01c60b..83c2a7d 100644
--- a/lib/Target/MBlaze/Makefile
+++ b/lib/Target/MBlaze/Makefile
@@ -11,15 +11,14 @@ LIBRARYNAME = LLVMMBlazeCodeGen
 TARGET = MBlaze
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = MBlazeGenRegisterInfo.h.inc MBlazeGenRegisterNames.inc \
-								MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \
-								MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \
-								MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \
-								MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \
-								MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc \
-								MBlazeGenEDInfo.inc
+BUILT_SOURCES = MBlazeGenRegisterInfo.inc MBlazeGenInstrInfo.inc \
+		MBlazeGenAsmWriter.inc \
+                MBlazeGenDAGISel.inc MBlazeGenAsmMatcher.inc \
+                MBlazeGenCodeEmitter.inc MBlazeGenCallingConv.inc \
+                MBlazeGenSubtargetInfo.inc MBlazeGenIntrinsics.inc \
+                MBlazeGenEDInfo.inc
 
-DIRS = InstPrinter AsmParser Disassembler TargetInfo
+DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt
index 2c7cbb6..33f3d44 100644
--- a/lib/Target/MSP430/CMakeLists.txt
+++ b/lib/Target/MSP430/CMakeLists.txt
@@ -1,14 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS MSP430.td)
 
-tablegen(MSP430GenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(MSP430GenRegisterNames.inc -gen-register-enums)
-tablegen(MSP430GenRegisterInfo.inc -gen-register-desc)
-tablegen(MSP430GenInstrNames.inc -gen-instr-enums)
-tablegen(MSP430GenInstrInfo.inc -gen-instr-desc)
+tablegen(MSP430GenRegisterInfo.inc -gen-register-info)
+tablegen(MSP430GenInstrInfo.inc -gen-instr-info)
 tablegen(MSP430GenAsmWriter.inc -gen-asm-writer)
 tablegen(MSP430GenDAGISel.inc -gen-dag-isel)
 tablegen(MSP430GenCallingConv.inc -gen-callingconv)
-tablegen(MSP430GenSubtarget.inc -gen-subtarget)
+tablegen(MSP430GenSubtargetInfo.inc -gen-subtarget)
 
 add_llvm_target(MSP430CodeGen
   MSP430BranchSelector.cpp
@@ -16,7 +13,6 @@ add_llvm_target(MSP430CodeGen
   MSP430ISelLowering.cpp
   MSP430InstrInfo.cpp
   MSP430FrameLowering.cpp
-  MSP430MCAsmInfo.cpp
   MSP430RegisterInfo.cpp
   MSP430Subtarget.cpp
   MSP430TargetMachine.cpp
@@ -27,3 +23,4 @@ add_llvm_target(MSP430CodeGen
 
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
index 63860dc..50d98b7 100644
--- a/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
+++ b/lib/Target/MSP430/InstPrinter/MSP430InstPrinter.h
@@ -18,11 +18,10 @@
 
 namespace llvm {
   class MCOperand;
-  class TargetMachine;
 
   class MSP430InstPrinter : public MCInstPrinter {
   public:
-    MSP430InstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+    MSP430InstPrinter(const MCAsmInfo &MAI)
       : MCInstPrinter(MAI) {}
 
     virtual void printInst(const MCInst *MI, raw_ostream &O);
diff --git a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..0f3ebd3
--- /dev/null
+++ b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMSP430Desc
+  MSP430MCTargetDesc.cpp
+  MSP430MCAsmInfo.cpp
+  )
diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
index 3f44944..ad7d380 100644
--- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp
@@ -15,6 +15,8 @@
 using namespace llvm;
 
 MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, StringRef TT) {
+  PointerSize = 2;
+
   PrivateGlobalPrefix = ".L";
   WeakRefDirective ="\t.weak\t";
   PCSymbol=".";
diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
index f3138a2..f3138a2 100644
--- a/lib/Target/MSP430/MSP430MCAsmInfo.h
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
new file mode 100644
index 0000000..43a704d
--- /dev/null
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -0,0 +1,58 @@
+//===-- MSP430MCTargetDesc.cpp - MSP430 Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MSP430 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MSP430MCTargetDesc.h"
+#include "MSP430MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MSP430GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MSP430GenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MSP430GenRegisterInfo.inc"
+
+using namespace llvm;
+
+
+static MCInstrInfo *createMSP430MCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitMSP430MCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeMSP430MCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createMSP430MCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                    StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitMSP430MCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeMSP430MCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target,
+                                          createMSP430MCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeMSP430MCAsmInfo() {
+  RegisterMCAsmInfo<MSP430MCAsmInfo> X(TheMSP430Target);
+}
diff --git a/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
new file mode 100644
index 0000000..0d8a6bd
--- /dev/null
+++ b/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- MSP430MCTargetDesc.h - MSP430 Target Descriptions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides MSP430 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMCTARGETDESC_H
+#define ALPHAMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheMSP430Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for MSP430 registers.
+// This defines a mapping from register name to register number.
+#define GET_REGINFO_ENUM
+#include "MSP430GenRegisterInfo.inc"
+
+// Defines symbolic names for the MSP430 instructions.
+#define GET_INSTRINFO_ENUM
+#include "MSP430GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MSP430GenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/MSP430/MCTargetDesc/Makefile b/lib/Target/MSP430/MCTargetDesc/Makefile
new file mode 100644
index 0000000..bb85799
--- /dev/null
+++ b/lib/Target/MSP430/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/MSP430/TargetDesc/Makefile ---------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMSP430Desc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSP430/MSP430.h b/lib/Target/MSP430/MSP430.h
index e742118..4574ce5 100644
--- a/lib/Target/MSP430/MSP430.h
+++ b/lib/Target/MSP430/MSP430.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_TARGET_MSP430_H
 #define LLVM_TARGET_MSP430_H
 
+#include "MCTargetDesc/MSP430MCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace MSP430CC {
@@ -41,15 +42,6 @@ namespace llvm {
 
   FunctionPass *createMSP430BranchSelectionPass();
 
-  extern Target TheMSP430Target;
-
 } // end namespace llvm;
 
-// Defines symbolic names for MSP430 registers.
-// This defines a mapping from register name to register number.
-#include "MSP430GenRegisterNames.inc"
-
-// Defines symbolic names for the MSP430 instructions.
-#include "MSP430GenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/MSP430/MSP430AsmPrinter.cpp b/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 5264d68..2042056 100644
--- a/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -15,7 +15,6 @@
 #define DEBUG_TYPE "asm-printer"
 #include "MSP430.h"
 #include "MSP430InstrInfo.h"
-#include "MSP430MCAsmInfo.h"
 #include "MSP430MCInstLower.h"
 #include "MSP430TargetMachine.h"
 #include "InstPrinter/MSP430InstPrinter.h"
@@ -28,6 +27,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
@@ -164,11 +164,10 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 }
 
 static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
-                                                TargetMachine &TM,
                                                 unsigned SyntaxVariant,
                                                 const MCAsmInfo &MAI) {
   if (SyntaxVariant == 0)
-    return new MSP430InstPrinter(TM, MAI);
+    return new MSP430InstPrinter(MAI);
   return 0;
 }
 
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index 424df13..846d093 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -15,18 +15,21 @@
 #include "MSP430InstrInfo.h"
 #include "MSP430MachineFunctionInfo.h"
 #include "MSP430TargetMachine.h"
-#include "MSP430GenInstrInfo.inc"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
 
+#define GET_INSTRINFO_CTOR
+#include "MSP430GenInstrInfo.inc"
+
 using namespace llvm;
 
 MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm)
-  : TargetInstrInfoImpl(MSP430Insts, array_lengthof(MSP430Insts)),
+  : MSP430GenInstrInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
     RI(tm, *this), TM(tm) {}
 
 void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
@@ -158,13 +161,13 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
 }
 
 bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isTerminator()) return false;
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isTerminator()) return false;
 
   // Conditional branch is a special case.
-  if (TID.isBranch() && !TID.isBarrier())
+  if (MCID.isBranch() && !MCID.isBarrier())
     return true;
-  if (!TID.isPredicable())
+  if (!MCID.isPredicable())
     return true;
   return !isPredicated(MI);
 }
@@ -293,7 +296,7 @@ MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
 /// instruction may be.  This returns the maximum number of bytes.
 ///
 unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
-  const TargetInstrDesc &Desc = MI->getDesc();
+  const MCInstrDesc &Desc = MI->getDesc();
 
   switch (Desc.TSFlags & MSP430II::SizeMask) {
   default:
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index e885cd3..90013f5 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -17,6 +17,9 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "MSP430RegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "MSP430GenInstrInfo.inc"
+
 namespace llvm {
 
 class MSP430TargetMachine;
@@ -37,7 +40,7 @@ namespace MSP430II {
   };
 }
 
-class MSP430InstrInfo : public TargetInstrInfoImpl {
+class MSP430InstrInfo : public MSP430GenInstrInfo {
   const MSP430RegisterInfo RI;
   MSP430TargetMachine &TM;
 public:
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 53f4c2e..1cc60bb 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -26,13 +26,15 @@
 #include "llvm/ADT/BitVector.h"
 #include "llvm/Support/ErrorHandling.h"
 
+#define GET_REGINFO_TARGET_DESC
+#include "MSP430GenRegisterInfo.inc"
+
 using namespace llvm;
 
 // FIXME: Provide proper call frame setup / destroy opcodes.
 MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
                                        const TargetInstrInfo &tii)
-  : MSP430GenRegisterInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP),
-    TM(tm), TII(tii) {
+  : MSP430GenRegisterInfo(), TM(tm), TII(tii) {
   StackAlign = TM.getFrameLowering()->getStackAlignment();
 }
 
@@ -117,12 +119,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       Amount = (Amount+StackAlign-1)/StackAlign*StackAlign;
 
       MachineInstr *New = 0;
-      if (Old->getOpcode() == getCallFrameSetupOpcode()) {
+      if (Old->getOpcode() == TII.getCallFrameSetupOpcode()) {
         New = BuildMI(MF, Old->getDebugLoc(),
                       TII.get(MSP430::SUB16ri), MSP430::SPW)
           .addReg(MSP430::SPW).addImm(Amount);
       } else {
-        assert(Old->getOpcode() == getCallFrameDestroyOpcode());
+        assert(Old->getOpcode() == TII.getCallFrameDestroyOpcode());
         // factor out the amount the callee already popped.
         uint64_t CalleeAmt = Old->getOperand(1).getImm();
         Amount -= CalleeAmt;
@@ -140,7 +142,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
         MBB.insert(I, New);
       }
     }
-  } else if (I->getOpcode() == getCallFrameDestroyOpcode()) {
+  } else if (I->getOpcode() == TII.getCallFrameDestroyOpcode()) {
     // If we are performing frame pointer elimination and if the callee pops
     // something off the stack pointer, add it back.
     if (uint64_t CalleeAmt = I->getOperand(1).getImm()) {
@@ -250,5 +252,3 @@ int MSP430RegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
   llvm_unreachable("Not implemented yet!");
   return 0;
 }
-
-#include "MSP430GenRegisterInfo.inc"
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index e820558..fb70594 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -15,7 +15,9 @@
 #define LLVM_TARGET_MSP430REGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "MSP430GenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "MSP430GenRegisterInfo.inc"
 
 namespace llvm {
 
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.td b/lib/Target/MSP430/MSP430RegisterInfo.td
index 3ef6ab2..d1c2e3f 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.td
+++ b/lib/Target/MSP430/MSP430RegisterInfo.td
@@ -66,19 +66,19 @@ def R15W : MSP430RegWithSubregs<15, "r15", [R15B]>;
 
 def GR8 : RegisterClass<"MSP430", [i8], 8,
    // Volatile registers
-  [R12B, R13B, R14B, R15B, R11B, R10B, R9B, R8B, R7B, R6B, R5B,
+  (add R12B, R13B, R14B, R15B, R11B, R10B, R9B, R8B, R7B, R6B, R5B,
    // Frame pointer, sometimes allocable
    FPB,
    // Volatile, but not allocable
-   PCB, SPB, SRB, CGB]>;
+   PCB, SPB, SRB, CGB)>;
 
 def GR16 : RegisterClass<"MSP430", [i16], 16,
    // Volatile registers
-  [R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W,
+  (add R12W, R13W, R14W, R15W, R11W, R10W, R9W, R8W, R7W, R6W, R5W,
    // Frame pointer, sometimes allocable
    FPW,
    // Volatile, but not allocable
-   PCW, SPW, SRW, CGW]>
+   PCW, SPW, SRW, CGW)>
 {
   let SubRegClasses = [(GR8 subreg_8bit)];
 }
diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp
index 1346cb9..b58c50a 100644
--- a/lib/Target/MSP430/MSP430Subtarget.cpp
+++ b/lib/Target/MSP430/MSP430Subtarget.cpp
@@ -7,19 +7,26 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the MSP430 specific subclass of TargetSubtarget.
+// This file implements the MSP430 specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "MSP430Subtarget.h"
 #include "MSP430.h"
-#include "MSP430GenSubtarget.inc"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MSP430GenSubtargetInfo.inc"
 
 using namespace llvm;
 
-MSP430Subtarget::MSP430Subtarget(const std::string &TT, const std::string &FS) {
-  std::string CPU = "generic";
+MSP430Subtarget::MSP430Subtarget(const std::string &TT,
+                                 const std::string &CPU,
+                                 const std::string &FS) :
+  MSP430GenSubtargetInfo(TT, CPU, FS) {
+  std::string CPUName = "generic";
 
   // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
+  ParseSubtargetFeatures(CPUName, FS);
 }
diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h
index 1070544..1ce5f11 100644
--- a/lib/Target/MSP430/MSP430Subtarget.h
+++ b/lib/Target/MSP430/MSP430Subtarget.h
@@ -7,31 +7,35 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the MSP430 specific subclass of TargetSubtarget.
+// This file declares the MSP430 specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_TARGET_MSP430_SUBTARGET_H
 #define LLVM_TARGET_MSP430_SUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "MSP430GenSubtargetInfo.inc"
 
 #include <string>
 
 namespace llvm {
+class StringRef;
 
-class MSP430Subtarget : public TargetSubtarget {
+class MSP430Subtarget : public MSP430GenSubtargetInfo {
   bool ExtendedInsts;
 public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  MSP430Subtarget(const std::string &TT, const std::string &FS);
+  MSP430Subtarget(const std::string &TT, const std::string &CPU,
+                  const std::string &FS);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 };
 } // End llvm namespace
 
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index fba9536..971f512 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "MSP430.h"
-#include "MSP430MCAsmInfo.h"
 #include "MSP430TargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/CodeGen/Passes.h"
@@ -23,14 +22,14 @@ using namespace llvm;
 extern "C" void LLVMInitializeMSP430Target() {
   // Register the target.
   RegisterTargetMachine<MSP430TargetMachine> X(TheMSP430Target);
-  RegisterAsmInfo<MSP430MCAsmInfo> Z(TheMSP430Target);
 }
 
 MSP430TargetMachine::MSP430TargetMachine(const Target &T,
                                          const std::string &TT,
+                                         const std::string &CPU,
                                          const std::string &FS)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS),
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS),
     // FIXME: Check TargetData string.
     DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
     InstrInfo(*this), TLInfo(*this), TSInfo(*this),
diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h
index cee3b04..2a9eea0 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.h
+++ b/lib/Target/MSP430/MSP430TargetMachine.h
@@ -39,7 +39,7 @@ class MSP430TargetMachine : public LLVMTargetMachine {
 
 public:
   MSP430TargetMachine(const Target &T, const std::string &TT,
-                      const std::string &FS);
+                      const std::string &CPU, const std::string &FS);
 
   virtual const TargetFrameLowering *getFrameLowering() const {
     return &FrameLowering;
diff --git a/lib/Target/MSP430/Makefile b/lib/Target/MSP430/Makefile
index fa4e80b..82216ed 100644
--- a/lib/Target/MSP430/Makefile
+++ b/lib/Target/MSP430/Makefile
@@ -12,13 +12,12 @@ LIBRARYNAME = LLVMMSP430CodeGen
 TARGET = MSP430
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = MSP430GenRegisterInfo.h.inc MSP430GenRegisterNames.inc \
-		MSP430GenRegisterInfo.inc MSP430GenInstrNames.inc \
-		MSP430GenInstrInfo.inc MSP430GenAsmWriter.inc \
+BUILT_SOURCES = MSP430GenRegisterInfo.inc MSP430GenInstrInfo.inc \
+		MSP430GenAsmWriter.inc \
 		MSP430GenDAGISel.inc MSP430GenCallingConv.inc \
-		MSP430GenSubtarget.inc
+		MSP430GenSubtargetInfo.inc
 
-DIRS = InstPrinter TargetInfo
+DIRS = InstPrinter TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt
index fd16516..36ab1a9 100644
--- a/lib/Target/Mips/CMakeLists.txt
+++ b/lib/Target/Mips/CMakeLists.txt
@@ -1,14 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS Mips.td)
 
-tablegen(MipsGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(MipsGenRegisterNames.inc -gen-register-enums)
-tablegen(MipsGenRegisterInfo.inc -gen-register-desc)
-tablegen(MipsGenInstrNames.inc -gen-instr-enums)
-tablegen(MipsGenInstrInfo.inc -gen-instr-desc)
+tablegen(MipsGenRegisterInfo.inc -gen-register-info)
+tablegen(MipsGenInstrInfo.inc -gen-instr-info)
 tablegen(MipsGenAsmWriter.inc -gen-asm-writer)
 tablegen(MipsGenDAGISel.inc -gen-dag-isel)
 tablegen(MipsGenCallingConv.inc -gen-callingconv)
-tablegen(MipsGenSubtarget.inc -gen-subtarget)
+tablegen(MipsGenSubtargetInfo.inc -gen-subtarget)
 
 add_llvm_target(MipsCodeGen
   MipsAsmPrinter.cpp
@@ -19,7 +16,8 @@ add_llvm_target(MipsCodeGen
   MipsISelDAGToDAG.cpp
   MipsISelLowering.cpp
   MipsFrameLowering.cpp
-  MipsMCAsmInfo.cpp
+  MipsMCInstLower.cpp
+  MipsMCSymbolRefExpr.cpp
   MipsRegisterInfo.cpp
   MipsSubtarget.cpp
   MipsTargetMachine.cpp
@@ -27,4 +25,6 @@ add_llvm_target(MipsCodeGen
   MipsSelectionDAGInfo.cpp
   )
 
+add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Mips/InstPrinter/CMakeLists.txt b/lib/Target/Mips/InstPrinter/CMakeLists.txt
new file mode 100644
index 0000000..8852fd4
--- /dev/null
+++ b/lib/Target/Mips/InstPrinter/CMakeLists.txt
@@ -0,0 +1,6 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMipsAsmPrinter
+  MipsInstPrinter.cpp
+  )
+add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
diff --git a/lib/Target/Mips/InstPrinter/Makefile b/lib/Target/Mips/InstPrinter/Makefile
new file mode 100644
index 0000000..63e38ef
--- /dev/null
+++ b/lib/Target/Mips/InstPrinter/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Mips/AsmPrinter/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMipsAsmPrinter
+
+# Hack: we need to include 'main' arm target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
new file mode 100644
index 0000000..41c1dd3
--- /dev/null
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -0,0 +1,127 @@
+//===-- MipsInstPrinter.cpp - Convert Mips MCInst to assembly syntax --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an Mips MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "asm-printer"
+#include "MipsInstPrinter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/StringExtras.h"
+using namespace llvm;
+
+#define GET_INSTRUCTION_NAME
+#include "MipsGenAsmWriter.inc"
+
+const char* Mips::MipsFCCToString(Mips::CondCode CC) {
+  switch (CC) {
+  case FCOND_F:
+  case FCOND_T:   return "f";
+  case FCOND_UN:
+  case FCOND_OR:  return "un";
+  case FCOND_OEQ:
+  case FCOND_UNE: return "eq";
+  case FCOND_UEQ:
+  case FCOND_ONE: return "ueq";
+  case FCOND_OLT:
+  case FCOND_UGE: return "olt";
+  case FCOND_ULT:
+  case FCOND_OGE: return "ult";
+  case FCOND_OLE:
+  case FCOND_UGT: return "ole";
+  case FCOND_ULE:
+  case FCOND_OGT: return "ule";
+  case FCOND_SF:
+  case FCOND_ST:  return "sf";
+  case FCOND_NGLE:
+  case FCOND_GLE: return "ngle";
+  case FCOND_SEQ:
+  case FCOND_SNE: return "seq";
+  case FCOND_NGL:
+  case FCOND_GL:  return "ngl";
+  case FCOND_LT:
+  case FCOND_NLT: return "lt";
+  case FCOND_NGE:
+  case FCOND_GE:  return "nge";
+  case FCOND_LE:
+  case FCOND_NLE: return "le";
+  case FCOND_NGT:
+  case FCOND_GT:  return "ngt";
+  }
+  llvm_unreachable("Impossible condition code!");
+}
+
+StringRef MipsInstPrinter::getOpcodeName(unsigned Opcode) const {
+  return getInstructionName(Opcode);
+}
+
+void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
+  OS << '$' << LowercaseString(getRegisterName(RegNo));
+}
+
+void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O) {
+  printInstruction(MI, O);
+}
+
+void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  if (Op.isReg()) {
+    printRegName(O, Op.getReg());
+    return;
+  }
+  
+  if (Op.isImm()) {
+    O << Op.getImm();
+    return;
+  }
+  
+  assert(Op.isExpr() && "unknown operand kind in printOperand");
+  O << *Op.getExpr();
+}
+
+void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum,
+                                       raw_ostream &O) {
+  const MCOperand &MO = MI->getOperand(opNum);
+  if (MO.isImm())
+    O << (unsigned short int)MO.getImm();
+  else
+    printOperand(MI, opNum, O);
+}
+
+void MipsInstPrinter::
+printMemOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+  // Load/Store memory operands -- imm($reg)
+  // If PIC target the target is loaded as the
+  // pattern lw $25,%call16($28)
+  printOperand(MI, opNum+1, O);
+  O << "(";
+  printOperand(MI, opNum, O);
+  O << ")";
+}
+
+void MipsInstPrinter::
+printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O) {
+  // when using stack locations for not load/store instructions
+  // print the same way as all normal 3 operand instructions.
+  printOperand(MI, opNum, O);
+  O << ", ";
+  printOperand(MI, opNum+1, O);
+  return;
+}
+
+void MipsInstPrinter::
+printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O) {
+  const MCOperand& MO = MI->getOperand(opNum);
+  O << MipsFCCToString((Mips::CondCode)MO.getImm());
+}
diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.h b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
new file mode 100644
index 0000000..680208e
--- /dev/null
+++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.h
@@ -0,0 +1,100 @@
+//===-- MipsInstPrinter.h - Convert Mips MCInst to assembly syntax ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints a Mips MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSINSTPRINTER_H
+#define MIPSINSTPRINTER_H
+#include "llvm/MC/MCInstPrinter.h"
+
+namespace llvm {
+// These enumeration declarations were orignally in MipsInstrInfo.h but
+// had to be moved here to avoid circular dependencies between
+// LLVMMipsCodeGen and LLVMMipsAsmPrinter. 
+namespace Mips {
+// Mips Branch Codes
+enum FPBranchCode {
+  BRANCH_F,
+  BRANCH_T,
+  BRANCH_FL,
+  BRANCH_TL,
+  BRANCH_INVALID
+};
+
+// Mips Condition Codes
+enum CondCode {
+  // To be used with float branch True
+  FCOND_F,
+  FCOND_UN,
+  FCOND_OEQ,
+  FCOND_UEQ,
+  FCOND_OLT,
+  FCOND_ULT,
+  FCOND_OLE,
+  FCOND_ULE,
+  FCOND_SF,
+  FCOND_NGLE,
+  FCOND_SEQ,
+  FCOND_NGL,
+  FCOND_LT,
+  FCOND_NGE,
+  FCOND_LE,
+  FCOND_NGT,
+
+  // To be used with float branch False
+  // This conditions have the same mnemonic as the
+  // above ones, but are used with a branch False;
+  FCOND_T,
+  FCOND_OR,
+  FCOND_UNE,
+  FCOND_ONE,
+  FCOND_UGE,
+  FCOND_OGE,
+  FCOND_UGT,
+  FCOND_OGT,
+  FCOND_ST,
+  FCOND_GLE,
+  FCOND_SNE,
+  FCOND_GL,
+  FCOND_NLT,
+  FCOND_GE,
+  FCOND_NLE,
+  FCOND_GT
+};
+
+const char *MipsFCCToString(Mips::CondCode CC);
+} // end namespace Mips
+
+class TargetMachine;
+
+class MipsInstPrinter : public MCInstPrinter {
+public:
+  MipsInstPrinter(const MCAsmInfo &MAI) : MCInstPrinter(MAI) {}
+  
+  // Autogenerated by tblgen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  static const char *getInstructionName(unsigned Opcode);
+  static const char *getRegisterName(unsigned RegNo);
+  
+  virtual StringRef getOpcodeName(unsigned Opcode) const;
+  virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
+  virtual void printInst(const MCInst *MI, raw_ostream &O);
+  
+private:
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printUnsignedImm(const MCInst *MI, int opNum, raw_ostream &O);
+  void printMemOperand(const MCInst *MI, int opNum, raw_ostream &O);
+  void printMemOperandEA(const MCInst *MI, int opNum, raw_ostream &O);
+  void printFCCOperand(const MCInst *MI, int opNum, raw_ostream &O);
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..97de75d
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMMipsDesc
+  MipsMCTargetDesc.cpp
+  MipsMCAsmInfo.cpp
+  )
diff --git a/lib/Target/Mips/MCTargetDesc/Makefile b/lib/Target/Mips/MCTargetDesc/Makefile
new file mode 100644
index 0000000..7fe2086
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Mips/TargetDesc/Makefile -----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMipsDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Mips/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index c86bf405..5d92425 100644
--- a/lib/Target/Mips/MipsMCAsmInfo.cpp
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -12,11 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "MipsMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
 using namespace llvm;
 
 MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+  if (TheTriple.getArch() == Triple::mips)
+    IsLittleEndian = false;
+
   AlignmentIsInBytes          = false;
-  Data16bitsDirective         = "\t.half\t";
+  Data16bitsDirective         = "\t.2byte\t";
   Data32bitsDirective         = "\t.4byte\t";
   Data64bitsDirective         = 0;
   PrivateGlobalPrefix         = "$";
@@ -28,4 +34,5 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Target &T, StringRef TT) {
   SupportsDebugInformation = true;
   ExceptionsType = ExceptionHandling::DwarfCFI;
   HasLEB128 = true;
+  DwarfRegNumForCFI = true;
 }
diff --git a/lib/Target/Mips/MipsMCAsmInfo.h b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
index 41b7192..41b7192 100644
--- a/lib/Target/Mips/MipsMCAsmInfo.h
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
new file mode 100644
index 0000000..06f0d0b
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -0,0 +1,58 @@
+//===-- MipsMCTargetDesc.cpp - Mips Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Mips specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCTargetDesc.h"
+#include "MipsMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "MipsGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "MipsGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "MipsGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createMipsMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitMipsMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeMipsMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheMipsTarget, createMipsMCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createMipsMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                  StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitMipsMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeMipsMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget,
+                                          createMipsMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeMipsMCAsmInfo() {
+  RegisterMCAsmInfo<MipsMCAsmInfo> X(TheMipsTarget);
+  RegisterMCAsmInfo<MipsMCAsmInfo> Y(TheMipselTarget);
+}
diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
new file mode 100644
index 0000000..3d18f11
--- /dev/null
+++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -0,0 +1,39 @@
+//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Alpha specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ALPHAMCTARGETDESC_H
+#define ALPHAMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheMipsTarget;
+extern Target TheMipselTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for Mips registers.  This defines a mapping from
+// register name to register number.
+#define GET_REGINFO_ENUM
+#include "MipsGenRegisterInfo.inc"
+
+// Defines symbolic names for the Mips instructions.
+#define GET_INSTRINFO_ENUM
+#include "MipsGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "MipsGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile
index d16b066..cc4a8ae 100644
--- a/lib/Target/Mips/Makefile
+++ b/lib/Target/Mips/Makefile
@@ -12,13 +12,12 @@ LIBRARYNAME = LLVMMipsCodeGen
 TARGET = Mips
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = MipsGenRegisterInfo.h.inc MipsGenRegisterNames.inc \
-                MipsGenRegisterInfo.inc MipsGenInstrNames.inc \
-                MipsGenInstrInfo.inc MipsGenAsmWriter.inc \
+BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \
+                MipsGenAsmWriter.inc \
                 MipsGenDAGISel.inc MipsGenCallingConv.inc \
-                MipsGenSubtarget.inc
+                MipsGenSubtargetInfo.inc
 
-DIRS = TargetInfo
+DIRS = InstPrinter TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/Mips/Mips.h b/lib/Target/Mips/Mips.h
index 76a26a9..984b5ad 100644
--- a/lib/Target/Mips/Mips.h
+++ b/lib/Target/Mips/Mips.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_MIPS_H
 #define TARGET_MIPS_H
 
+#include "MCTargetDesc/MipsMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -28,16 +29,6 @@ namespace llvm {
   FunctionPass *createMipsExpandPseudoPass(MipsTargetMachine &TM);
   FunctionPass *createMipsEmitGPRestorePass(MipsTargetMachine &TM);
 
-  extern Target TheMipsTarget;
-  extern Target TheMipselTarget;
-
 } // end namespace llvm;
 
-// Defines symbolic names for Mips registers.  This defines a mapping from
-// register name to register number.
-#include "MipsGenRegisterNames.inc"
-
-// Defines symbolic names for the Mips instructions.
-#include "MipsGenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td
index b79016d..433cd57 100644
--- a/lib/Target/Mips/Mips.td
+++ b/lib/Target/Mips/Mips.td
@@ -88,6 +88,14 @@ def : Proc<"allegrex", [FeatureMips2, FeatureSingleFloat, FeatureEABI,
       FeatureVFPU, FeatureSEInReg, FeatureCondMov, FeatureMulDivAdd,
       FeatureMinMax, FeatureSwap, FeatureBitCount]>;
 
+def MipsAsmWriter : AsmWriter {
+  string AsmWriterClassName  = "InstPrinter";
+  bit isMCAsmWriter = 1;
+}
+
 def Mips : Target {
   let InstructionSet = MipsInstrInfo;
+
+  let AssemblyWriters = [MipsAsmWriter];
 }
+
diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp
index 8caa7cd..69e03bd 100644
--- a/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -13,80 +13,49 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "mips-asm-printer"
+#include "MipsAsmPrinter.h"
 #include "Mips.h"
-#include "MipsSubtarget.h"
 #include "MipsInstrInfo.h"
-#include "MipsTargetMachine.h"
 #include "MipsMachineFunction.h"
+#include "MipsMCInstLower.h"
+#include "InstPrinter/MipsInstPrinter.h"
 #include "llvm/BasicBlock.h"
 #include "llvm/Instructions.h"
-#include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-namespace {
-  class MipsAsmPrinter : public AsmPrinter {
-    const MipsSubtarget *Subtarget;
-  public:
-    explicit MipsAsmPrinter(TargetMachine &TM,  MCStreamer &Streamer)
-      : AsmPrinter(TM, Streamer) {
-      Subtarget = &TM.getSubtarget<MipsSubtarget>();
-    }
+#include "llvm/Analysis/DebugInfo.h"
 
-    virtual const char *getPassName() const {
-      return "Mips Assembly Printer";
-    }
+using namespace llvm;
 
-    bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
-                         unsigned AsmVariant, const char *ExtraCode,
-                         raw_ostream &O);
-    void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
-    void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                         const char *Modifier = 0);
-    void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                         const char *Modifier = 0);
-    void printSavedRegsBitmask(raw_ostream &O);
-    void printHex32(unsigned int Value, raw_ostream &O);
-
-    const char *getCurrentABIString() const;
-    void emitFrameDirective();
-
-    void printInstruction(const MachineInstr *MI, raw_ostream &O); // autogen'd.
-    void EmitInstruction(const MachineInstr *MI) {
-      SmallString<128> Str;
-      raw_svector_ostream OS(Str);
-      printInstruction(MI, OS);
-      OutStreamer.EmitRawText(OS.str());
-    }
-    virtual void EmitFunctionBodyStart();
-    virtual void EmitFunctionBodyEnd();
-    virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
-                                                   MBB) const;
-    static const char *getRegisterName(unsigned RegNo);
+void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  SmallString<128> Str;
+  raw_svector_ostream OS(Str);
 
-    virtual void EmitFunctionEntryLabel();
-    void EmitStartOfAsmFile(Module &M);
-  };
-} // end of anonymous namespace
+  if (MI->isDebugValue()) {
+    PrintDebugValueComment(MI, OS);
+    return;
+  }
 
-#include "MipsGenAsmWriter.inc"
+  MipsMCInstLower MCInstLowering(Mang, *MF, *this);
+  MCInst TmpInst0;
+  MCInstLowering.Lower(MI, TmpInst0);
+  OutStreamer.EmitInstruction(TmpInst0);
+}
 
 //===----------------------------------------------------------------------===//
 //
@@ -202,9 +171,9 @@ void MipsAsmPrinter::emitFrameDirective() {
   unsigned stackSize = MF->getFrameInfo()->getStackSize();
 
   OutStreamer.EmitRawText("\t.frame\t$" +
-                          Twine(LowercaseString(getRegisterName(stackReg))) +
-                          "," + Twine(stackSize) + ",$" +
-                          Twine(LowercaseString(getRegisterName(returnReg))));
+           Twine(LowercaseString(MipsInstPrinter::getRegisterName(stackReg))) +
+           "," + Twine(stackSize) + ",$" +
+           Twine(LowercaseString(MipsInstPrinter::getRegisterName(returnReg))));
 }
 
 /// Emit Set directives.
@@ -304,6 +273,19 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
   return false;
 }
 
+bool MipsAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                           unsigned OpNum, unsigned AsmVariant,
+                                           const char *ExtraCode,
+                                           raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0])
+     return true; // Unknown modifier.
+   
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isReg() && "unexpected inline asm memory operand");
+  O << "0($" << MipsInstPrinter::getRegisterName(MO.getReg()) << ")";
+  return false;
+}
+
 void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
                                   raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(opNum);
@@ -326,7 +308,8 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
 
   switch (MO.getType()) {
     case MachineOperand::MO_Register:
-      O << '$' << LowercaseString(getRegisterName(MO.getReg()));
+      O << '$'
+        << LowercaseString(MipsInstPrinter::getRegisterName(MO.getReg()));
       break;
 
     case MachineOperand::MO_Immediate:
@@ -380,27 +363,27 @@ void MipsAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum,
 }
 
 void MipsAsmPrinter::
-printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
-                const char *Modifier) {
-  // when using stack locations for not load/store instructions
-  // print the same way as all normal 3 operand instructions.
-  if (Modifier && !strcmp(Modifier, "stackloc")) {
-    printOperand(MI, opNum+1, O);
-    O << ", ";
-    printOperand(MI, opNum, O);
-    return;
-  }
-
+printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O) {
   // Load/Store memory operands -- imm($reg)
   // If PIC target the target is loaded as the
   // pattern lw $25,%call16($28)
-  printOperand(MI, opNum, O);
-  O << "(";
   printOperand(MI, opNum+1, O);
+  O << "(";
+  printOperand(MI, opNum, O);
   O << ")";
 }
 
 void MipsAsmPrinter::
+printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O) {
+  // when using stack locations for not load/store instructions
+  // print the same way as all normal 3 operand instructions.
+  printOperand(MI, opNum, O);
+  O << ", ";
+  printOperand(MI, opNum+1, O);
+  return;
+}
+
+void MipsAsmPrinter::
 printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
                 const char *Modifier) {
   const MachineOperand& MO = MI->getOperand(opNum);
@@ -425,8 +408,33 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) {
   OutStreamer.EmitRawText(StringRef("\t.previous"));
 }
 
+MachineLocation
+MipsAsmPrinter::getDebugValueLocation(const MachineInstr *MI) const {
+  // Handles frame addresses emitted in MipsInstrInfo::emitFrameIndexDebugValue.
+  assert(MI->getNumOperands() == 4 && "Invalid no. of machine operands!");
+  assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm() &&
+         "Unexpected MachineOperand types");
+  return MachineLocation(MI->getOperand(0).getReg(),
+                         MI->getOperand(1).getImm());
+}
+
+void MipsAsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
+                                           raw_ostream &OS) {
+  // TODO: implement
+}
+
 // Force static initialization.
+static MCInstPrinter *createMipsMCInstPrinter(const Target &T,
+                                              unsigned SyntaxVariant,
+                                              const MCAsmInfo &MAI) {
+  return new MipsInstPrinter(MAI);
+}
+
 extern "C" void LLVMInitializeMipsAsmPrinter() {
   RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
   RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget);
+
+  TargetRegistry::RegisterMCInstPrinter(TheMipsTarget, createMipsMCInstPrinter);
+  TargetRegistry::RegisterMCInstPrinter(TheMipselTarget,
+                                        createMipsMCInstPrinter);
 }
diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h
new file mode 100644
index 0000000..16461ff
--- /dev/null
+++ b/lib/Target/Mips/MipsAsmPrinter.h
@@ -0,0 +1,71 @@
+//===-- MipsAsmPrinter.h - Mips LLVM assembly writer ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Mips Assembly printer class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSASMPRINTER_H
+#define MIPSASMPRINTER_H
+
+#include "MipsSubtarget.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class MCStreamer;
+class MachineInstr;
+class raw_ostream;
+class MachineBasicBlock;
+class Module;
+
+class LLVM_LIBRARY_VISIBILITY MipsAsmPrinter : public AsmPrinter {
+  const MipsSubtarget *Subtarget;
+  
+public:
+  explicit MipsAsmPrinter(TargetMachine &TM,  MCStreamer &Streamer)
+    : AsmPrinter(TM, Streamer) {
+    Subtarget = &TM.getSubtarget<MipsSubtarget>();
+  }
+
+  virtual const char *getPassName() const {
+    return "Mips Assembly Printer";
+  }
+
+  void EmitInstruction(const MachineInstr *MI);
+  void printSavedRegsBitmask(raw_ostream &O);
+  void printHex32(unsigned int Value, raw_ostream &O);
+  void emitFrameDirective();
+  const char *getCurrentABIString() const;
+  virtual void EmitFunctionEntryLabel();
+  virtual void EmitFunctionBodyStart();
+  virtual void EmitFunctionBodyEnd();
+  virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock*
+                                                 MBB) const;
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+                       unsigned AsmVariant, const char *ExtraCode,
+                       raw_ostream &O);
+  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+                             unsigned AsmVariant, const char *ExtraCode,
+                             raw_ostream &O);
+  void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+  void printUnsignedImm(const MachineInstr *MI, int opNum, raw_ostream &O);
+  void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O);
+  void printMemOperandEA(const MachineInstr *MI, int opNum, raw_ostream &O);
+  void printFCCOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
+                       const char *Modifier = 0);
+  void EmitStartOfAsmFile(Module &M);
+  virtual MachineLocation getDebugValueLocation(const MachineInstr *MI) const;
+  void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS);
+};
+}
+
+#endif
+
diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td
index 57aeb1d..876f0fc 100644
--- a/lib/Target/Mips/MipsCallingConv.td
+++ b/lib/Target/Mips/MipsCallingConv.td
@@ -20,8 +20,8 @@ class CCIfSubtarget<string F, CCAction A>:
 // Only the return rules are defined here for O32. The rules for argument
 // passing are defined in MipsISelLowering.cpp.
 def RetCC_MipsO32 : CallingConv<[
-  // i32 are returned in registers V0, V1
-  CCIfType<[i32], CCAssignToReg<[V0, V1]>>,
+  // i32 are returned in registers V0, V1, A0, A1
+  CCIfType<[i32], CCAssignToReg<[V0, V1, A0, A1]>>,
 
   // f32 are returned in registers F0, F2
   CCIfType<[f32], CCAssignToReg<[F0, F2]>>,
diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp
index b44a0af..c3a6211 100644
--- a/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -59,10 +59,10 @@ runOnMachineBasicBlock(MachineBasicBlock &MBB)
 {
   bool Changed = false;
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
-    const TargetInstrDesc& Tid = I->getDesc();
-    if (Tid.hasDelaySlot() &&
+    const MCInstrDesc& MCid = I->getDesc();
+    if (MCid.hasDelaySlot() &&
         (TM.getSubtarget<MipsSubtarget>().isMips1() ||
-         Tid.isCall() || Tid.isBranch() || Tid.isReturn())) {
+         MCid.isCall() || MCid.isBranch() || MCid.isReturn())) {
       MachineBasicBlock::iterator J = I;
       ++J;
       BuildMI(MBB, J, I->getDebugLoc(), TII->get(Mips::NOP));
diff --git a/lib/Target/Mips/MipsEmitGPRestore.cpp b/lib/Target/Mips/MipsEmitGPRestore.cpp
index f49d490..03d922f 100644
--- a/lib/Target/Mips/MipsEmitGPRestore.cpp
+++ b/lib/Target/Mips/MipsEmitGPRestore.cpp
@@ -64,8 +64,8 @@ bool Inserter::runOnMachineFunction(MachineFunction &F) {
       // Insert lw.
       ++I;
       DebugLoc dl = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
-      BuildMI(MBB, I, dl, TII->get(Mips::LW), Mips::GP).addImm(0)
-                                                       .addFrameIndex(FI);
+      BuildMI(MBB, I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI)
+                                                       .addImm(0);
       Changed = true;
     }
 
@@ -77,8 +77,8 @@ bool Inserter::runOnMachineFunction(MachineFunction &F) {
 
       DebugLoc dl = I->getDebugLoc();
       // emit lw $gp, ($gp save slot on stack) after jalr
-      BuildMI(MBB, ++I, dl, TII->get(Mips::LW), Mips::GP).addImm(0)
-        .addFrameIndex(FI);
+      BuildMI(MBB, ++I, dl, TII->get(Mips::LW), Mips::GP).addFrameIndex(FI)
+                                                         .addImm(0);
       Changed = true;
     }
   } 
diff --git a/lib/Target/Mips/MipsExpandPseudo.cpp b/lib/Target/Mips/MipsExpandPseudo.cpp
index 4423f51..a622258 100644
--- a/lib/Target/Mips/MipsExpandPseudo.cpp
+++ b/lib/Target/Mips/MipsExpandPseudo.cpp
@@ -61,9 +61,9 @@ bool MipsExpandPseudo::runOnMachineBasicBlock(MachineBasicBlock& MBB) {
 
   bool Changed = false;
   for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end();) {
-    const TargetInstrDesc& Tid = I->getDesc();
+    const MCInstrDesc& MCid = I->getDesc();
 
-    switch(Tid.getOpcode()) {
+    switch(MCid.getOpcode()) {
     default: 
       ++I;
       continue;
@@ -87,7 +87,7 @@ void MipsExpandPseudo::ExpandBuildPairF64(MachineBasicBlock& MBB,
                                             MachineBasicBlock::iterator I) {  
   unsigned DstReg = I->getOperand(0).getReg();
   unsigned LoReg = I->getOperand(1).getReg(), HiReg = I->getOperand(2).getReg();
-  const TargetInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);
+  const MCInstrDesc& Mtc1Tdd = TII->get(Mips::MTC1);
   DebugLoc dl = I->getDebugLoc();
   const unsigned* SubReg =
     TM.getRegisterInfo()->getSubRegisters(DstReg);
@@ -103,7 +103,7 @@ void MipsExpandPseudo::ExpandExtractElementF64(MachineBasicBlock& MBB,
   unsigned DstReg = I->getOperand(0).getReg();
   unsigned SrcReg = I->getOperand(1).getReg();
   unsigned N = I->getOperand(2).getImm();
-  const TargetInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);
+  const MCInstrDesc& Mfc1Tdd = TII->get(Mips::MFC1);
   DebugLoc dl = I->getDebugLoc();
   const unsigned* SubReg = TM.getRegisterInfo()->getSubRegisters(SrcReg);
 
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index d8a84ce..90aaeb6 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -94,6 +94,10 @@ private:
   inline SDValue getI32Imm(unsigned Imm) {
     return CurDAG->getTargetConstant(Imm, MVT::i32);
   }
+
+  virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                            char ConstraintCode,
+                                            std::vector<SDValue> &OutOps);
 };
 
 }
@@ -109,7 +113,7 @@ SDNode *MipsDAGToDAGISel::getGlobalBaseReg() {
 /// ComplexPattern used on MipsInstrInfo
 /// Used on Mips Load/Store instructions
 bool MipsDAGToDAGISel::
-SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
+SelectAddr(SDValue Addr, SDValue &Base, SDValue &Offset) {
   // if Address is FI, get the TargetFrameIndex.
   if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
     Base   = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
@@ -166,7 +170,8 @@ SelectAddr(SDValue Addr, SDValue &Offset, SDValue &Base) {
          Addr.getOperand(0).getOpcode() == ISD::LOAD) &&
         Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
       SDValue LoVal = Addr.getOperand(1);
-      if (dyn_cast<ConstantPoolSDNode>(LoVal.getOperand(0))) {
+      if (isa<ConstantPoolSDNode>(LoVal.getOperand(0)) || 
+          isa<GlobalAddressSDNode>(LoVal.getOperand(0))) {
         Base = Addr.getOperand(0);
         Offset = LoVal.getOperand(0);
         return true;
@@ -195,7 +200,7 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   SDValue Offset0, Offset1, Base;
 
-  if (!SelectAddr(N1, Offset0, Base) ||
+  if (!SelectAddr(N1, Base, Offset0) ||
       N1.getValueType() != MVT::i32)
     return NULL;
 
@@ -225,14 +230,14 @@ SDNode *MipsDAGToDAGISel::SelectLoadFp64(SDNode *N) {
   //    lwc $f0, X($3)
   //    lwc $f1, X+4($3)
   SDNode *LD0 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
-                                    MVT::Other, Offset0, Base, Chain);
+                                       MVT::Other, Base, Offset0, Chain);
   SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                                  dl, NVT), 0);
   SDValue I0 = CurDAG->getTargetInsertSubreg(Mips::sub_fpeven, dl,
                             MVT::f64, Undef, SDValue(LD0, 0));
 
   SDNode *LD1 = CurDAG->getMachineNode(Mips::LWC1, dl, MVT::f32,
-                          MVT::Other, Offset1, Base, SDValue(LD0, 1));
+                                       MVT::Other, Base, Offset1, SDValue(LD0, 1));
   SDValue I1 = CurDAG->getTargetInsertSubreg(Mips::sub_fpodd, dl,
                             MVT::f64, I0, SDValue(LD1, 0));
 
@@ -259,7 +264,7 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
   SDValue N2 = N->getOperand(2);
   SDValue Offset0, Offset1, Base;
 
-  if (!SelectAddr(N2, Offset0, Base) ||
+  if (!SelectAddr(N2, Base, Offset0) ||
       N1.getValueType() != MVT::f64 ||
       N2.getValueType() != MVT::i32)
     return NULL;
@@ -289,12 +294,12 @@ SDNode *MipsDAGToDAGISel::SelectStoreFp64(SDNode *N) {
   // Generate:
   //    swc $f0, X($3)
   //    swc $f1, X+4($3)
-  SDValue Ops0[] = { FPEven, Offset0, Base, Chain };
+  SDValue Ops0[] = { FPEven, Base, Offset0, Chain };
   Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
                                        MVT::Other, Ops0, 4), 0);
   cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
 
-  SDValue Ops1[] = { FPOdd, Offset1, Base, Chain };
+  SDValue Ops1[] = { FPOdd, Base, Offset1, Chain };
   Chain = SDValue(CurDAG->getMachineNode(Mips::SWC1, dl,
                                        MVT::Other, Ops1, 4), 0);
   cast<MachineSDNode>(Chain.getNode())->setMemRefs(MemRefs0, MemRefs0 + 1);
@@ -462,6 +467,14 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
   return ResNode;
 }
 
+bool MipsDAGToDAGISel::
+SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
+                             std::vector<SDValue> &OutOps) {
+  assert(ConstraintCode == 'm' && "unexpected asm memory constraint");
+  OutOps.push_back(Op);
+  return false;
+}
+
 /// createMipsISelDag - This pass converts a legalized DAG into a
 /// MIPS-specific DAG, ready for instruction scheduling.
 FunctionPass *llvm::createMipsISelDag(MipsTargetMachine &TM) {
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index fd90731..b4f4b1b 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -23,6 +23,7 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/Intrinsics.h"
 #include "llvm/CallingConv.h"
+#include "InstPrinter/MipsInstPrinter.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -59,6 +60,7 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MipsISD::BuildPairF64:      return "MipsISD::BuildPairF64";
   case MipsISD::ExtractElementF64: return "MipsISD::ExtractElementF64";
   case MipsISD::WrapperPIC:        return "MipsISD::WrapperPIC";
+  case MipsISD::DynAlloc:          return "MipsISD::DynAlloc";
   default:                         return NULL;
   }
 }
@@ -144,6 +146,8 @@ MipsTargetLowering(MipsTargetMachine &TM)
   setOperationAction(ISD::FLOG2,             MVT::f32,   Expand);
   setOperationAction(ISD::FLOG10,            MVT::f32,   Expand);
   setOperationAction(ISD::FEXP,              MVT::f32,   Expand);
+  setOperationAction(ISD::FMA,               MVT::f32,   Expand);
+  setOperationAction(ISD::FMA,               MVT::f64,   Expand);
 
   setOperationAction(ISD::EXCEPTIONADDR,     MVT::i32, Expand);
   setOperationAction(ISD::EHSELECTION,       MVT::i32, Expand);
@@ -773,7 +777,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
     }
 
     BuildMI(BB, dl, TII->get(Mips::SW))
-        .addReg(Incr).addImm(0).addFrameIndex(fi);
+        .addReg(Incr).addFrameIndex(fi).addImm(0);
   }
   BB->addSuccessor(loopMBB);
 
@@ -784,7 +788,7 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   //    sc tmp1, 0(ptr)
   //    beq tmp1, $0, loopMBB
   BB = loopMBB;
-  BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addImm(0).addReg(Ptr);
+  BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Ptr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::OR), Dest).addReg(Mips::ZERO).addReg(Oldval);
   if (Nand) {
     //  and tmp2, oldval, incr
@@ -797,10 +801,10 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
   } else {
     //  lw tmp2, fi(sp)              // load incr from stack
     //  or tmp1, $zero, tmp2
-    BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addImm(0).addFrameIndex(fi);;
+    BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0);
     BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
   }
-  BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addImm(0).addReg(Ptr);
+  BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::BEQ))
     .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loopMBB);
   BB->addSuccessor(loopMBB);
@@ -909,7 +913,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
     }
 
     BuildMI(BB, dl, TII->get(Mips::SW))
-        .addReg(Incr2).addImm(0).addFrameIndex(fi);
+        .addReg(Incr2).addFrameIndex(fi).addImm(0);
   }
   BB->addSuccessor(loopMBB);
 
@@ -922,7 +926,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
   //   sc      tmp9,0(addr)
   //   beq     tmp9,$0,loopMBB
   BB = loopMBB;
-  BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addImm(0).addReg(Addr);
+  BuildMI(BB, dl, TII->get(Mips::LL), Oldval).addReg(Addr).addImm(0);
   if (Nand) {
     //  and tmp6, oldval, incr2
     //  nor tmp7, $0, tmp6
@@ -937,13 +941,13 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI,
   } else {
     //  lw tmp6, fi(sp)              // load incr2 from stack
     //  or tmp7, $zero, tmp6
-    BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addImm(0).addFrameIndex(fi);;
+    BuildMI(BB, dl, TII->get(Mips::LW), Tmp6).addFrameIndex(fi).addImm(0);
     BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Mips::ZERO).addReg(Tmp6);
   }
   BuildMI(BB, dl, TII->get(Mips::AND), Newval).addReg(Tmp7).addReg(Mask);
   BuildMI(BB, dl, TII->get(Mips::AND), Tmp8).addReg(Oldval).addReg(Mask2);
   BuildMI(BB, dl, TII->get(Mips::OR), Tmp9).addReg(Tmp8).addReg(Newval);
-  BuildMI(BB, dl, TII->get(Mips::SC), Tmp9).addReg(Tmp9).addImm(0).addReg(Addr);
+  BuildMI(BB, dl, TII->get(Mips::SC), Tmp9).addReg(Tmp9).addReg(Addr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::BEQ))
       .addReg(Tmp9).addReg(Mips::ZERO).addMBB(loopMBB);
   BB->addSuccessor(loopMBB);
@@ -1026,14 +1030,14 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
   // hoist "or" instruction out of the block loop2MBB.
 
   BuildMI(BB, dl, TII->get(Mips::SW))
-      .addReg(Newval).addImm(0).addFrameIndex(fi);
+      .addReg(Newval).addFrameIndex(fi).addImm(0);
   BB->addSuccessor(loop1MBB);
 
   // loop1MBB:
   //   ll dest, 0(ptr)
   //   bne dest, oldval, exitMBB
   BB = loop1MBB;
-  BuildMI(BB, dl, TII->get(Mips::LL), Dest).addImm(0).addReg(Ptr);
+  BuildMI(BB, dl, TII->get(Mips::LL), Dest).addReg(Ptr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::BNE))
     .addReg(Dest).addReg(Oldval).addMBB(exitMBB);
   BB->addSuccessor(exitMBB);
@@ -1045,9 +1049,9 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
   //   sc tmp1, 0(ptr)
   //   beq tmp1, $0, loop1MBB
   BB = loop2MBB;
-  BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addImm(0).addFrameIndex(fi);;
+  BuildMI(BB, dl, TII->get(Mips::LW), Tmp2).addFrameIndex(fi).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::OR), Tmp1).addReg(Mips::ZERO).addReg(Tmp2);
-  BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addImm(0).addReg(Ptr);
+  BuildMI(BB, dl, TII->get(Mips::SC), Tmp1).addReg(Tmp1).addReg(Ptr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::BEQ))
     .addReg(Tmp1).addReg(Mips::ZERO).addMBB(loop1MBB);
   BB->addSuccessor(loop1MBB);
@@ -1142,7 +1146,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
   //    and     oldval4,oldval3,mask
   //    bne     oldval4,oldval2,exitMBB
   BB = loop1MBB;
-  BuildMI(BB, dl, TII->get(Mips::LL), Oldval3).addImm(0).addReg(Addr);
+  BuildMI(BB, dl, TII->get(Mips::LL), Oldval3).addReg(Addr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::AND), Oldval4).addReg(Oldval3).addReg(Mask);
   BuildMI(BB, dl, TII->get(Mips::BNE))
       .addReg(Oldval4).addReg(Oldval2).addMBB(exitMBB);
@@ -1158,7 +1162,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
   BuildMI(BB, dl, TII->get(Mips::AND), Tmp6).addReg(Oldval3).addReg(Mask2);
   BuildMI(BB, dl, TII->get(Mips::OR), Tmp7).addReg(Tmp6).addReg(Newval2);
   BuildMI(BB, dl, TII->get(Mips::SC), Tmp7)
-      .addReg(Tmp7).addImm(0).addReg(Addr);
+      .addReg(Tmp7).addReg(Addr).addImm(0);
   BuildMI(BB, dl, TII->get(Mips::BEQ))
       .addReg(Tmp7).addReg(Mips::ZERO).addMBB(loop1MBB);
   BB->addSuccessor(loop1MBB);
@@ -1189,9 +1193,10 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI,
 SDValue MipsTargetLowering::
 LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
 {
-  unsigned StackAlignment =
-    getTargetMachine().getFrameLowering()->getStackAlignment();
-  assert(StackAlignment >=
+  MachineFunction &MF = DAG.getMachineFunction();
+  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+
+  assert(getTargetMachine().getFrameLowering()->getStackAlignment() >=
          cast<ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue() &&
          "Cannot lower if the alignment of the allocated space is larger than \
           that of the stack.");
@@ -1211,24 +1216,14 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
   // must be placed in the stack pointer register.
   Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub,
                            SDValue());
-  // Retrieve updated $sp. There is a glue input to prevent instructions that
-  // clobber $sp from being inserted between copytoreg and copyfromreg.
-  SDValue NewSP = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32,
-                                     Chain.getValue(1));
-
-  // The stack space reserved by alloca is located right above the argument
-  // area. It is aligned on a boundary that is a multiple of StackAlignment.
-  MachineFunction &MF = DAG.getMachineFunction();
-  MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
-  unsigned SPOffset = (MipsFI->getMaxCallFrameSize() + StackAlignment - 1) /
-                      StackAlignment * StackAlignment;
-  SDValue AllocPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, NewSP,
-                                 DAG.getConstant(SPOffset, MVT::i32));
 
   // This node always has two return values: a new stack pointer
   // value and a chain
-  SDValue Ops[2] = { AllocPtr, NewSP.getValue(1) };
-  return DAG.getMergeValues(Ops, 2, dl);
+  SDVTList VTLs = DAG.getVTList(MVT::i32, MVT::Other);
+  SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy());
+  SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) };
+
+  return DAG.getNode(MipsISD::DynAlloc, dl, VTLs, Ops, 3);
 }
 
 SDValue MipsTargetLowering::
@@ -1358,7 +1353,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
     // General Dynamic TLS Model
     SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32,
-                                                 0, MipsII::MO_TLSGD);
+                                             0, MipsII::MO_TLSGD);
     SDValue Tlsgd = DAG.getNode(MipsISD::TlsGd, dl, MVT::i32, TGA);
     SDValue GP = DAG.getRegister(Mips::GP, MVT::i32);
     SDValue Argument = DAG.getNode(ISD::ADD, dl, MVT::i32, GP, Tlsgd);
@@ -1370,36 +1365,36 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
     Args.push_back(Entry);
     std::pair<SDValue, SDValue> CallResult =
         LowerCallTo(DAG.getEntryNode(),
-                 (const Type *) Type::getInt32Ty(*DAG.getContext()),
-                 false, false, false, false,
-                 0, CallingConv::C, false, true,
-                 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
+                    (const Type *) Type::getInt32Ty(*DAG.getContext()),
+                    false, false, false, false, 0, CallingConv::C, false, true,
+                    DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG,
+                    dl);
 
     return CallResult.first;
-  } else {
-    SDValue Offset;
-    if (GV->isDeclaration()) {
-      // Initial Exec TLS Model
-      SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
-                                              MipsII::MO_GOTTPREL);
-      Offset = DAG.getLoad(MVT::i32, dl,
-                                  DAG.getEntryNode(), TGA, MachinePointerInfo(),
-                                  false, false, 0);
-    } else {
-      // Local Exec TLS Model
-      SDVTList VTs = DAG.getVTList(MVT::i32);
-      SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
-                                              MipsII::MO_TPREL_HI);
-      SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
-                                              MipsII::MO_TPREL_LO);
-      SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
-      SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
-      Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
-    }
+  }
 
-    SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
-    return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
+  SDValue Offset;
+  if (GV->isDeclaration()) {
+    // Initial Exec TLS Model
+    SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                             MipsII::MO_GOTTPREL);
+    Offset = DAG.getLoad(MVT::i32, dl,
+                         DAG.getEntryNode(), TGA, MachinePointerInfo(),
+                         false, false, 0);
+  } else {
+    // Local Exec TLS Model
+    SDVTList VTs = DAG.getVTList(MVT::i32);
+    SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                               MipsII::MO_TPREL_HI);
+    SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, MVT::i32, 0,
+                                               MipsII::MO_TPREL_LO);
+    SDValue Hi = DAG.getNode(MipsISD::TprelHi, dl, VTs, &TGAHi, 1);
+    SDValue Lo = DAG.getNode(MipsISD::TprelLo, dl, MVT::i32, TGALo);
+    Offset = DAG.getNode(ISD::ADD, dl, MVT::i32, Hi, Lo);
   }
+
+  SDValue ThreadPointer = DAG.getNode(MipsISD::ThreadPointer, dl, PtrVT);
+  return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
 }
 
 SDValue MipsTargetLowering::
@@ -1550,8 +1545,8 @@ SDValue MipsTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG)
 
 SDValue MipsTargetLowering::
 LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
-  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-  assert((Depth == 0) &&
+  // check the depth
+  assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
          "Frame address can only be determined for current frame.");
 
   MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
@@ -1770,6 +1765,10 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   if (IsPIC && !MipsFI->getGPFI())
     MipsFI->setGPFI(MFI->CreateFixedObject(4, 0, true));
 
+  // Get the frame index of the stack frame object that points to the location
+  // of dynamically allocated area on the stack.
+  int DynAllocFI = MipsFI->getDynAllocFI();
+
   // Update size of the maximum argument space.
   // For O32, a minimum of four words (16 bytes) of argument space is
   // allocated.
@@ -1781,14 +1780,17 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   if (MaxCallFrameSize < NextStackOffset) {
     MipsFI->setMaxCallFrameSize(NextStackOffset);
 
-    if (IsPIC) {
-      // $gp restore slot must be aligned.
-      unsigned StackAlignment = TFL->getStackAlignment();
-      NextStackOffset = (NextStackOffset + StackAlignment - 1) /
-                        StackAlignment * StackAlignment;
-      int GPFI = MipsFI->getGPFI();
-      MFI->setObjectOffset(GPFI, NextStackOffset);
-    }
+    // Set the offsets relative to $sp of the $gp restore slot and dynamically
+    // allocated stack space. These offsets must be aligned to a boundary
+    // determined by the stack alignment of the ABI.
+    unsigned StackAlignment = TFL->getStackAlignment();
+    NextStackOffset = (NextStackOffset + StackAlignment - 1) /
+                      StackAlignment * StackAlignment;
+
+    if (IsPIC)
+      MFI->setObjectOffset(MipsFI->getGPFI(), NextStackOffset);
+
+    MFI->setObjectOffset(DynAllocFI, NextStackOffset);
   }
 
   // With EABI is it possible to have 16 args on registers.
@@ -1912,7 +1914,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     if (LoadSymAddr) {
       // Load callee address
       Callee = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, Callee);
-      SDValue LoadValue = DAG.getLoad(MVT::i32, dl, Chain, Callee,
+      SDValue LoadValue = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), Callee,
                                       MachinePointerInfo::getGOT(),
                                       false, false, 0);
 
@@ -1922,9 +1924,6 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
         Callee = DAG.getNode(ISD::ADD, dl, MVT::i32, LoadValue, Lo);
       } else
         Callee = LoadValue;
-
-      // Use chain output from LoadValue
-      Chain = LoadValue.getValue(1);
     }
 
     // copy to T9
@@ -1965,7 +1964,8 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   InFlag = Chain.getValue(1);
 
   // Create the CALLSEQ_END node.
-  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NextStackOffset, true),
+  Chain = DAG.getCALLSEQ_END(Chain,
+                             DAG.getIntPtrConstant(NextStackOffset, true),
                              DAG.getIntPtrConstant(0, true), InFlag);
   InFlag = Chain.getValue(1);
 
@@ -2332,14 +2332,16 @@ MipsTargetLowering::getSingleConstraintMatchWeight(
   return weight;
 }
 
-/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
-/// return a list of registers that can be used to satisfy the constraint.
-/// This should only be used for C_RegisterClass constraints.
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
 std::pair<unsigned, const TargetRegisterClass*> MipsTargetLowering::
 getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
 {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
+    case 'd': // Address register. Same as 'r' unless generating MIPS16 code.
+    case 'y': // Same as 'r'. Exists for compatibility.
     case 'r':
       return std::make_pair(0U, Mips::CPURegsRegisterClass);
     case 'f':
@@ -2348,55 +2350,12 @@ getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const
       if (VT == MVT::f64)
         if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
           return std::make_pair(0U, Mips::AFGR64RegisterClass);
+      break;
     }
   }
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
 
-/// Given a register class constraint, like 'r', if this corresponds directly
-/// to an LLVM register class, return a register of 0 and the register class
-/// pointer.
-std::vector<unsigned> MipsTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const
-{
-  if (Constraint.size() != 1)
-    return std::vector<unsigned>();
-
-  switch (Constraint[0]) {
-    default : break;
-    case 'r':
-    // GCC Mips Constraint Letters
-    case 'd':
-    case 'y':
-      return make_vector<unsigned>(Mips::T0, Mips::T1, Mips::T2, Mips::T3,
-             Mips::T4, Mips::T5, Mips::T6, Mips::T7, Mips::S0, Mips::S1,
-             Mips::S2, Mips::S3, Mips::S4, Mips::S5, Mips::S6, Mips::S7,
-             Mips::T8, 0);
-
-    case 'f':
-      if (VT == MVT::f32) {
-        if (Subtarget->isSingleFloat())
-          return make_vector<unsigned>(Mips::F2, Mips::F3, Mips::F4, Mips::F5,
-                 Mips::F6, Mips::F7, Mips::F8, Mips::F9, Mips::F10, Mips::F11,
-                 Mips::F20, Mips::F21, Mips::F22, Mips::F23, Mips::F24,
-                 Mips::F25, Mips::F26, Mips::F27, Mips::F28, Mips::F29,
-                 Mips::F30, Mips::F31, 0);
-        else
-          return make_vector<unsigned>(Mips::F2, Mips::F4, Mips::F6, Mips::F8,
-                 Mips::F10, Mips::F20, Mips::F22, Mips::F24, Mips::F26,
-                 Mips::F28, Mips::F30, 0);
-      }
-
-      if (VT == MVT::f64)
-        if ((!Subtarget->isSingleFloat()) && (!Subtarget->isFP64bit()))
-          return make_vector<unsigned>(Mips::D1, Mips::D2, Mips::D3, Mips::D4,
-                 Mips::D5, Mips::D10, Mips::D11, Mips::D12, Mips::D13,
-                 Mips::D14, Mips::D15, 0);
-  }
-  return std::vector<unsigned>();
-}
-
 bool
 MipsTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The Mips target isn't yet aware of offsets.
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index fbcedfd..bda26a2 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -79,7 +79,9 @@ namespace llvm {
       BuildPairF64,
       ExtractElementF64,
 
-      WrapperPIC
+      WrapperPIC,
+
+      DynAlloc
     };
   }
 
@@ -167,10 +169,6 @@ namespace llvm {
               getRegForInlineAsmConstraint(const std::string &Constraint,
               EVT VT) const;
 
-    std::vector<unsigned>
-    getRegClassForInlineAsmConstraint(const std::string &Constraint,
-              EVT VT) const;
-
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
     /// isFPImmLegal - Returns true if the target can instruction select the
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index be044fa..0a7a7f2 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -14,18 +14,27 @@
 #include "MipsInstrInfo.h"
 #include "MipsTargetMachine.h"
 #include "MipsMachineFunction.h"
-#include "llvm/ADT/STLExtras.h"
+#include "InstPrinter/MipsInstPrinter.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
 #include "MipsGenInstrInfo.inc"
 
 using namespace llvm;
 
 MipsInstrInfo::MipsInstrInfo(MipsTargetMachine &tm)
-  : TargetInstrInfoImpl(MipsInsts, array_lengthof(MipsInsts)),
+  : MipsGenInstrInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
     TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
 
+
+const MipsRegisterInfo &MipsInstrInfo::getRegisterInfo() const { 
+  return RI;
+}
+
 static bool isZeroImm(const MachineOperand &op) {
   return op.isImm() && op.getImm() == 0;
 }
@@ -40,10 +49,10 @@ isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const
 {
   if ((MI->getOpcode() == Mips::LW) || (MI->getOpcode() == Mips::LWC1) ||
       (MI->getOpcode() == Mips::LDC1)) {
-    if ((MI->getOperand(2).isFI()) && // is a stack slot
-        (MI->getOperand(1).isImm()) &&  // the imm is zero
-        (isZeroImm(MI->getOperand(1)))) {
-      FrameIndex = MI->getOperand(2).getIndex();
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) {
+      FrameIndex = MI->getOperand(1).getIndex();
       return MI->getOperand(0).getReg();
     }
   }
@@ -61,10 +70,10 @@ isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const
 {
   if ((MI->getOpcode() == Mips::SW) || (MI->getOpcode() == Mips::SWC1) ||
       (MI->getOpcode() == Mips::SDC1)) {
-    if ((MI->getOperand(2).isFI()) && // is a stack slot
-        (MI->getOperand(1).isImm()) &&  // the imm is zero
-        (isZeroImm(MI->getOperand(1)))) {
-      FrameIndex = MI->getOperand(2).getIndex();
+    if ((MI->getOperand(1).isFI()) && // is a stack slot
+        (MI->getOperand(2).isImm()) &&  // the imm is zero
+        (isZeroImm(MI->getOperand(2)))) {
+      FrameIndex = MI->getOperand(1).getIndex();
       return MI->getOperand(0).getReg();
     }
   }
@@ -161,25 +170,25 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
 
   if (RC == Mips::CPURegsRegisterClass)
     BuildMI(MBB, I, DL, get(Mips::SW)).addReg(SrcReg, getKillRegState(isKill))
-          .addImm(0).addFrameIndex(FI);
+                                      .addFrameIndex(FI).addImm(0);
   else if (RC == Mips::FGR32RegisterClass)
     BuildMI(MBB, I, DL, get(Mips::SWC1)).addReg(SrcReg, getKillRegState(isKill))
-          .addImm(0).addFrameIndex(FI);
+                                        .addFrameIndex(FI).addImm(0);
   else if (RC == Mips::AFGR64RegisterClass) {
     if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
       BuildMI(MBB, I, DL, get(Mips::SDC1))
         .addReg(SrcReg, getKillRegState(isKill))
-        .addImm(0).addFrameIndex(FI);
+        .addFrameIndex(FI).addImm(0);
     } else {
       const TargetRegisterInfo *TRI =
         MBB.getParent()->getTarget().getRegisterInfo();
       const unsigned *SubSet = TRI->getSubRegisters(SrcReg);
       BuildMI(MBB, I, DL, get(Mips::SWC1))
         .addReg(SubSet[0], getKillRegState(isKill))
-        .addImm(0).addFrameIndex(FI);
+        .addFrameIndex(FI).addImm(0);
       BuildMI(MBB, I, DL, get(Mips::SWC1))
         .addReg(SubSet[1], getKillRegState(isKill))
-        .addImm(4).addFrameIndex(FI);
+        .addFrameIndex(FI).addImm(4);
     }
   } else
     llvm_unreachable("Register class not handled!");
@@ -195,25 +204,34 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == Mips::CPURegsRegisterClass)
-    BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addImm(0).addFrameIndex(FI);
+    BuildMI(MBB, I, DL, get(Mips::LW), DestReg).addFrameIndex(FI).addImm(0);
   else if (RC == Mips::FGR32RegisterClass)
-    BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addImm(0).addFrameIndex(FI);
+    BuildMI(MBB, I, DL, get(Mips::LWC1), DestReg).addFrameIndex(FI).addImm(0);
   else if (RC == Mips::AFGR64RegisterClass) {
     if (!TM.getSubtarget<MipsSubtarget>().isMips1()) {
-      BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addImm(0).addFrameIndex(FI);
+      BuildMI(MBB, I, DL, get(Mips::LDC1), DestReg).addFrameIndex(FI).addImm(0);
     } else {
       const TargetRegisterInfo *TRI =
         MBB.getParent()->getTarget().getRegisterInfo();
       const unsigned *SubSet = TRI->getSubRegisters(DestReg);
       BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[0])
-        .addImm(0).addFrameIndex(FI);
+        .addFrameIndex(FI).addImm(0);
       BuildMI(MBB, I, DL, get(Mips::LWC1), SubSet[1])
-        .addImm(4).addFrameIndex(FI);
+        .addFrameIndex(FI).addImm(4);
     }
   } else
     llvm_unreachable("Register class not handled!");
 }
 
+MachineInstr*
+MipsInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
+                                        uint64_t Offset, const MDNode *MDPtr,
+                                        DebugLoc DL) const {
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Mips::DBG_VALUE))
+    .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr);
+  return &*MIB;
+}
+
 //===----------------------------------------------------------------------===//
 // Branch Analysis
 //===----------------------------------------------------------------------===//
@@ -341,8 +359,8 @@ void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB,
                                 const SmallVectorImpl<MachineOperand>& Cond)
   const {
   unsigned Opc = Cond[0].getImm();
-  const TargetInstrDesc &TID = get(Opc);
-  MachineInstrBuilder MIB = BuildMI(&MBB, DL, TID);
+  const MCInstrDesc &MCID = get(Opc);
+  MachineInstrBuilder MIB = BuildMI(&MBB, DL, MCID);
 
   for (unsigned i = 1; i < Cond.size(); ++i)
     MIB.addReg(Cond[i].getReg());
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index abf6773..4421c48 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -19,103 +19,15 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "MipsRegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "MipsGenInstrInfo.inc"
+
 namespace llvm {
 
 namespace Mips {
-
-  // Mips Branch Codes
-  enum FPBranchCode {
-    BRANCH_F,
-    BRANCH_T,
-    BRANCH_FL,
-    BRANCH_TL,
-    BRANCH_INVALID
-  };
-
-  // Mips Condition Codes
-  enum CondCode {
-    // To be used with float branch True
-    FCOND_F,
-    FCOND_UN,
-    FCOND_OEQ,
-    FCOND_UEQ,
-    FCOND_OLT,
-    FCOND_ULT,
-    FCOND_OLE,
-    FCOND_ULE,
-    FCOND_SF,
-    FCOND_NGLE,
-    FCOND_SEQ,
-    FCOND_NGL,
-    FCOND_LT,
-    FCOND_NGE,
-    FCOND_LE,
-    FCOND_NGT,
-
-    // To be used with float branch False
-    // This conditions have the same mnemonic as the
-    // above ones, but are used with a branch False;
-    FCOND_T,
-    FCOND_OR,
-    FCOND_UNE,
-    FCOND_ONE,
-    FCOND_UGE,
-    FCOND_OGE,
-    FCOND_UGT,
-    FCOND_OGT,
-    FCOND_ST,
-    FCOND_GLE,
-    FCOND_SNE,
-    FCOND_GL,
-    FCOND_NLT,
-    FCOND_GE,
-    FCOND_NLE,
-    FCOND_GT
-  };
-
   /// GetOppositeBranchOpc - Return the inverse of the specified
   /// opcode, e.g. turning BEQ to BNE.
   unsigned GetOppositeBranchOpc(unsigned Opc);
-
-  /// MipsCCToString - Map each FP condition code to its string
-  inline static const char *MipsFCCToString(Mips::CondCode CC)
-  {
-    switch (CC) {
-      default: llvm_unreachable("Unknown condition code");
-      case FCOND_F:
-      case FCOND_T:   return "f";
-      case FCOND_UN:
-      case FCOND_OR:  return "un";
-      case FCOND_OEQ:
-      case FCOND_UNE: return "eq";
-      case FCOND_UEQ:
-      case FCOND_ONE: return "ueq";
-      case FCOND_OLT:
-      case FCOND_UGE: return "olt";
-      case FCOND_ULT:
-      case FCOND_OGE: return "ult";
-      case FCOND_OLE:
-      case FCOND_UGT: return "ole";
-      case FCOND_ULE:
-      case FCOND_OGT: return "ule";
-      case FCOND_SF:
-      case FCOND_ST:  return "sf";
-      case FCOND_NGLE:
-      case FCOND_GLE: return "ngle";
-      case FCOND_SEQ:
-      case FCOND_SNE: return "seq";
-      case FCOND_NGL:
-      case FCOND_GL:  return "ngl";
-      case FCOND_LT:
-      case FCOND_NLT: return "lt";
-      case FCOND_NGE:
-      case FCOND_GE:  return "nge";
-      case FCOND_LE:
-      case FCOND_NLE: return "le";
-      case FCOND_NGT:
-      case FCOND_GT:  return "ngt";
-    }
-  }
 }
 
 /// MipsII - This namespace holds all of the target specific flags that
@@ -164,7 +76,7 @@ namespace MipsII {
   };
 }
 
-class MipsInstrInfo : public TargetInstrInfoImpl {
+class MipsInstrInfo : public MipsGenInstrInfo {
   MipsTargetMachine &TM;
   const MipsRegisterInfo RI;
 public:
@@ -174,7 +86,7 @@ public:
   /// such, whenever a client has an instance of instruction info, it should
   /// always be able to get register info as well (through this method).
   ///
-  virtual const MipsRegisterInfo &getRegisterInfo() const { return RI; }
+  virtual const MipsRegisterInfo &getRegisterInfo() const;
 
   /// isLoadFromStackSlot - If the specified machine instruction is a direct
   /// load from a stack slot, return the virtual or physical register number of
@@ -224,6 +136,11 @@ public:
                                     const TargetRegisterClass *RC,
                                     const TargetRegisterInfo *TRI) const;
 
+  virtual MachineInstr* emitFrameIndexDebugValue(MachineFunction &MF,
+                                                 int FrameIx, uint64_t Offset,
+                                                 const MDNode *MDPtr,
+                                                 DebugLoc DL) const;
+
   virtual
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
 
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index 329a002..d1a0587 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -39,6 +39,9 @@ def SDT_MipsDivRem       : SDTypeProfile<0, 2,
 
 def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
 
+def SDT_MipsDynAlloc    : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+                                               SDTCisVT<1, iPTR>]>;
+
 // Call
 def MipsJmpLink : SDNode<"MipsISD::JmpLink",SDT_MipsJmpLink,
                          [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
@@ -99,6 +102,10 @@ def MipsDivRemU   : SDNode<"MipsISD::DivRemU", SDT_MipsDivRem,
 
 def MipsWrapperPIC    : SDNode<"MipsISD::WrapperPIC",  SDTIntUnaryOp>;
 
+// Pointer to dynamically allocated stack area.
+def MipsDynAlloc  : SDNode<"MipsISD::DynAlloc", SDT_MipsDynAlloc,
+                           [SDNPHasChain, SDNPInGlue]>;
+
 //===----------------------------------------------------------------------===//
 // Mips Instruction Predicate Definitions.
 //===----------------------------------------------------------------------===//
@@ -127,7 +134,12 @@ def uimm16      : Operand<i32> {
 // Address operand
 def mem : Operand<i32> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops simm16, CPURegs);
+  let MIOperandInfo = (ops CPURegs, simm16);
+}
+
+def mem_ea : Operand<i32> {
+  let PrintMethod = "printMemOperandEA";
+  let MIOperandInfo = (ops CPURegs, simm16);
 }
 
 // Transformation Function - get the lower 16 bits.
@@ -344,7 +356,7 @@ class MoveToLOHI<bits<6> func, string instr_asm>:
      !strconcat(instr_asm, "\t$src"), [], IIHiLo>;
 
 class EffectiveAddress<string instr_asm> :
-  FI<0x09, (outs CPURegs:$dst), (ins mem:$addr),
+  FI<0x09, (outs CPURegs:$dst), (ins mem_ea:$addr),
      instr_asm, [(set CPURegs:$dst, addr:$addr)], IIAlu>;
 
 // Count Leading Ones/Zeros in Word
@@ -412,7 +424,7 @@ def ATMACRO   : MipsPseudo<(outs), (ins), ".set\tat", []>;
 // are used, we have the same behavior, but get also a bunch of warnings
 // from the assembler.
 def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>;
-def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc\n", []>;
+def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>;
 
 let usesCustomInserter = 1 in {
   def ATOMIC_LOAD_ADD_I8 : MipsPseudo<
@@ -673,7 +685,13 @@ let addr=0 in
 // instructions. The same not happens for stack address copies, so an
 // add op with mem ComplexPattern is used and the stack address copy
 // can be matched. It's similar to Sparc LEA_ADDRi
-def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, ${addr:stackloc}">;
+def LEA_ADDiu : EffectiveAddress<"addiu\t$dst, $addr">;
+
+// DynAlloc node points to dynamically allocated stack space.
+// $sp is added to the list of implicitly used registers to prevent dead code
+// elimination from removing instructions that modify $sp.
+let Uses = [SP] in
+def DynAlloc : EffectiveAddress<"addiu\t$dst, $addr">;
 
 // MADD*/MSUB*
 def MADD  : MArithR<0, "madd", MipsMAdd, 1>;
@@ -852,6 +870,9 @@ def : Pat<(setge CPURegs:$lhs, immSExt16:$rhs),
 def : Pat<(setuge CPURegs:$lhs, immSExt16:$rhs),
           (XORi (SLTiu CPURegs:$lhs, immSExt16:$rhs), 1)>;
 
+// select MipsDynAlloc
+def : Pat<(MipsDynAlloc addr:$f), (DynAlloc addr:$f)>;
+
 //===----------------------------------------------------------------------===//
 // Floating Point Support
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp
new file mode 100644
index 0000000..f5cc3aa
--- /dev/null
+++ b/lib/Target/Mips/MipsMCInstLower.cpp
@@ -0,0 +1,118 @@
+//===-- MipsMCInstLower.cpp - Convert Mips MachineInstr to MCInst ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower Mips MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MipsMCInstLower.h"
+#include "MipsAsmPrinter.h"
+#include "MipsInstrInfo.h"
+#include "MipsMCSymbolRefExpr.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Target/Mangler.h"
+using namespace llvm;
+
+MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf,
+                                 MipsAsmPrinter &asmprinter)
+  : Ctx(mf.getContext()), Mang(mang), AsmPrinter(asmprinter) {}
+
+MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
+                                              MachineOperandType MOTy) const {
+  MipsMCSymbolRefExpr::VariantKind Kind;
+  const MCSymbol *Symbol;
+  int Offset = 0;
+
+  switch(MO.getTargetFlags()) {
+  default:                  assert(0 && "Invalid target flag!");
+  case MipsII::MO_NO_FLAG:  Kind = MipsMCSymbolRefExpr::VK_Mips_None; break;
+  case MipsII::MO_GPREL:    Kind = MipsMCSymbolRefExpr::VK_Mips_GPREL; break;
+  case MipsII::MO_GOT_CALL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_CALL; break;
+  case MipsII::MO_GOT:      Kind = MipsMCSymbolRefExpr::VK_Mips_GOT; break;
+  case MipsII::MO_ABS_HI:   Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_HI; break;
+  case MipsII::MO_ABS_LO:   Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_LO; break;
+  case MipsII::MO_TLSGD:    Kind = MipsMCSymbolRefExpr::VK_Mips_TLSGD; break;
+  case MipsII::MO_GOTTPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOTTPREL; break;
+  case MipsII::MO_TPREL_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_HI; break;
+  case MipsII::MO_TPREL_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_LO; break;
+  }
+
+  switch (MOTy) {
+    case MachineOperand::MO_MachineBasicBlock:
+      Symbol = MO.getMBB()->getSymbol();
+      break;
+
+    case MachineOperand::MO_GlobalAddress:
+      Symbol = Mang->getSymbol(MO.getGlobal());
+      break;
+
+    case MachineOperand::MO_BlockAddress:
+      Symbol = AsmPrinter.GetBlockAddressSymbol(MO.getBlockAddress());
+      break;
+
+    case MachineOperand::MO_ExternalSymbol:
+      Symbol = AsmPrinter.GetExternalSymbolSymbol(MO.getSymbolName());
+      break;
+
+    case MachineOperand::MO_JumpTableIndex:
+      Symbol = AsmPrinter.GetJTISymbol(MO.getIndex());
+      break;
+
+    case MachineOperand::MO_ConstantPoolIndex:
+      Symbol = AsmPrinter.GetCPISymbol(MO.getIndex());
+      if (MO.getOffset())
+        Offset = MO.getOffset();  
+      break;
+
+    default:
+      llvm_unreachable("<unknown operand type>");
+  }
+  
+  return MCOperand::CreateExpr(MipsMCSymbolRefExpr::Create(Kind, Symbol, Offset,
+                                                           Ctx));
+}
+
+void MipsMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI->getOpcode());
+  
+  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI->getOperand(i);
+    MCOperand MCOp;
+    MachineOperandType MOTy = MO.getType();
+
+    switch (MOTy) {
+    default:
+      MI->dump();
+      llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit()) continue;
+      MCOp = MCOperand::CreateReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::CreateImm(MO.getImm());
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+    case MachineOperand::MO_GlobalAddress:
+    case MachineOperand::MO_ExternalSymbol:
+    case MachineOperand::MO_JumpTableIndex:
+    case MachineOperand::MO_ConstantPoolIndex:
+    case MachineOperand::MO_BlockAddress:
+      MCOp = LowerSymbolOperand(MO, MOTy);
+      break;
+    }
+    
+    OutMI.addOperand(MCOp);
+  }
+}
diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h
new file mode 100644
index 0000000..ec5201b
--- /dev/null
+++ b/lib/Target/Mips/MipsMCInstLower.h
@@ -0,0 +1,43 @@
+//===-- MipsMCInstLower.h - Lower MachineInstr to MCInst -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCINSTLOWER_H
+#define MIPSMCINSTLOWER_H
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+  class MCAsmInfo;
+  class MCContext;
+  class MCInst;
+  class MCOperand;
+  class MCSymbol;
+  class MachineInstr;
+  class MachineFunction;
+  class Mangler;
+  class MipsAsmPrinter;
+  
+/// MipsMCInstLower - This class is used to lower an MachineInstr into an
+//                    MCInst.
+class LLVM_LIBRARY_VISIBILITY MipsMCInstLower {
+  typedef MachineOperand::MachineOperandType MachineOperandType;
+  MCContext &Ctx;
+  Mangler *Mang;
+  MipsAsmPrinter &AsmPrinter;
+public:
+  MipsMCInstLower(Mangler *mang, const MachineFunction &MF,
+                  MipsAsmPrinter &asmprinter);  
+  void Lower(const MachineInstr *MI, MCInst &OutMI) const;
+private:
+  MCOperand LowerSymbolOperand(const MachineOperand &MO,
+                               MachineOperandType MOTy) const;
+};
+}
+
+#endif
diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.cpp b/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
new file mode 100644
index 0000000..9a2bdae
--- /dev/null
+++ b/lib/Target/Mips/MipsMCSymbolRefExpr.cpp
@@ -0,0 +1,63 @@
+//===-- MipsMCSymbolRefExpr.cpp - Mips specific MC expression classes -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mipsmcsymbolrefexpr"
+#include "MipsMCSymbolRefExpr.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbol.h"
+using namespace llvm;
+
+const MipsMCSymbolRefExpr*
+MipsMCSymbolRefExpr::Create(VariantKind Kind, const MCSymbol *Symbol,
+                            int Offset, MCContext &Ctx) {
+  return new (Ctx) MipsMCSymbolRefExpr(Kind, Symbol, Offset);
+}
+
+void MipsMCSymbolRefExpr::PrintImpl(raw_ostream &OS) const {
+  switch (Kind) {
+  default: assert(0 && "Invalid kind!");
+  case VK_Mips_None:     break;
+  case VK_Mips_GPREL:    OS << "%gp_rel("; break;
+  case VK_Mips_GOT_CALL: OS << "%call16("; break;
+  case VK_Mips_GOT:      OS << "%got(";    break;
+  case VK_Mips_ABS_HI:   OS << "%hi(";     break;
+  case VK_Mips_ABS_LO:   OS << "%lo(";     break;
+  case VK_Mips_TLSGD:    OS << "%tlsgd(";  break;
+  case VK_Mips_GOTTPREL: OS << "%gottprel("; break;
+  case VK_Mips_TPREL_HI: OS << "%tprel_hi("; break;
+  case VK_Mips_TPREL_LO: OS << "%tprel_lo("; break;
+  }
+
+  OS << *Symbol;
+
+  if (Offset) {
+    if (Offset > 0)
+      OS << '+';
+    OS << Offset;
+  }
+
+  if (Kind != VK_Mips_None)
+    OS << ')';
+}
+
+bool
+MipsMCSymbolRefExpr::EvaluateAsRelocatableImpl(MCValue &Res,
+                                              const MCAsmLayout *Layout) const {
+  return false;
+}
+
+void MipsMCSymbolRefExpr::AddValueSymbols(MCAssembler *Asm) const {
+  Asm->getOrCreateSymbolData(*Symbol);
+}
+
+const MCSection *MipsMCSymbolRefExpr::FindAssociatedSection() const {
+  return Symbol->isDefined() ? &Symbol->getSection() : NULL;
+}
+  
diff --git a/lib/Target/Mips/MipsMCSymbolRefExpr.h b/lib/Target/Mips/MipsMCSymbolRefExpr.h
new file mode 100644
index 0000000..3e69596
--- /dev/null
+++ b/lib/Target/Mips/MipsMCSymbolRefExpr.h
@@ -0,0 +1,62 @@
+//===-- MipsMCSymbolRefExpr.h - Mips specific MCSymbolRefExpr class -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MIPSMCSYMBOLREFEXPR_H
+#define MIPSMCSYMBOLREFEXPR_H
+#include "llvm/MC/MCExpr.h"
+
+namespace llvm {
+
+class MipsMCSymbolRefExpr : public MCTargetExpr {
+public:
+  enum VariantKind {
+    VK_Mips_None,
+    VK_Mips_GPREL,
+    VK_Mips_GOT_CALL,
+    VK_Mips_GOT,
+    VK_Mips_ABS_HI,
+    VK_Mips_ABS_LO,
+    VK_Mips_TLSGD,
+    VK_Mips_GOTTPREL,
+    VK_Mips_TPREL_HI,
+    VK_Mips_TPREL_LO
+  };
+
+private:
+  const VariantKind Kind;
+  const MCSymbol *Symbol;
+  int Offset;
+
+  explicit MipsMCSymbolRefExpr(VariantKind _Kind, const MCSymbol *_Symbol,
+                               int _Offset)
+    : Kind(_Kind), Symbol(_Symbol), Offset(_Offset) {}
+  
+public:
+  static const MipsMCSymbolRefExpr *Create(VariantKind Kind,
+                                           const MCSymbol *Symbol, int Offset,
+                                           MCContext &Ctx);
+
+  void PrintImpl(raw_ostream &OS) const;
+  bool EvaluateAsRelocatableImpl(MCValue &Res,
+                                 const MCAsmLayout *Layout) const;
+  void AddValueSymbols(MCAssembler *) const;
+  const MCSection *FindAssociatedSection() const;
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+
+  static bool classof(const MipsMCSymbolRefExpr *) { return true; }
+
+  int getOffset() const { return Offset; }
+  void setOffset(int O) { Offset = O; }
+};
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index df40e6c..dbb7a67 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -27,6 +27,7 @@ namespace llvm {
 class MipsFunctionInfo : public MachineFunctionInfo {
 
 private:
+  MachineFunction& MF;
   /// SRetReturnReg - Some subtargets require that sret lowering includes
   /// returning the value of the returned struct in a register. This field
   /// holds the virtual register into which the sret argument is passed.
@@ -47,6 +48,7 @@ private:
   //                LowerCall except for the frame object for restoring $gp. 
   std::pair<int, int> InArgFIRange, OutArgFIRange;
   int GPFI; // Index of the frame object for restoring $gp 
+  mutable int DynAllocFI; // Frame index of dynamically allocated stack area.   
   unsigned MaxCallFrameSize;
 
   /// AtomicFrameIndex - To implement atomic.swap and atomic.cmp.swap
@@ -55,10 +57,10 @@ private:
   int AtomicFrameIndex;
 public:
   MipsFunctionInfo(MachineFunction& MF)
-  : SRetReturnReg(0), GlobalBaseReg(0),
+  : MF(MF), SRetReturnReg(0), GlobalBaseReg(0),
     VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)),
-    OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), MaxCallFrameSize(0),
-    AtomicFrameIndex(-1)
+    OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0),
+    MaxCallFrameSize(0), AtomicFrameIndex(-1)
   {}
 
   bool isInArgFI(int FI) const {
@@ -81,6 +83,16 @@ public:
   bool needGPSaveRestore() const { return getGPFI(); }
   bool isGPFI(int FI) const { return GPFI && GPFI == FI; }
 
+  // The first call to this function creates a frame object for dynamically
+  // allocated stack area.
+  int getDynAllocFI() const {
+    if (!DynAllocFI)
+      DynAllocFI = MF.getFrameInfo()->CreateFixedObject(4, 0, true);
+
+    return DynAllocFI;
+  }
+  bool isDynAllocFI(int FI) const { return DynAllocFI && DynAllocFI == FI; }
+
   unsigned getSRetReturnReg() const { return SRetReturnReg; }
   void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
 
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index b0984af..24390da 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -35,13 +35,16 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/DebugInfo.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "MipsGenRegisterInfo.inc"
 
 using namespace llvm;
 
 MipsRegisterInfo::MipsRegisterInfo(const MipsSubtarget &ST,
                                    const TargetInstrInfo &tii)
-  : MipsGenRegisterInfo(Mips::ADJCALLSTACKDOWN, Mips::ADJCALLSTACKUP),
-    Subtarget(ST), TII(tii) {}
+  : MipsGenRegisterInfo(), Subtarget(ST), TII(tii) {}
 
 /// getRegisterNumbering - Given the enum value for some register, e.g.
 /// Mips::RA, return the number that it corresponds to (e.g. 31).
@@ -176,28 +179,6 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
                << "spOffset   : " << spOffset << "\n"
                << "stackSize  : " << stackSize << "\n");
 
-  int Offset;
-
-  // Calculate final offset.
-  // - There is no need to change the offset if the frame object is an outgoing
-  //   argument or a $gp restore location,
-  // - If the frame object is any of the following, its offset must be adjusted
-  //   by adding the size of the stack:
-  //   incoming argument, callee-saved register location or local variable.  
-  if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex))
-    Offset = spOffset;
-  else
-    Offset = spOffset + stackSize;
-
-  Offset    += MI.getOperand(i-1).getImm();
-
-  DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");
-
-  unsigned NewReg = 0;
-  int NewImm = 0;
-  MachineBasicBlock &MBB = *MI.getParent();
-  bool ATUsed;
-  unsigned FrameReg;
   const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
   int MinCSFI = 0;
   int MaxCSFI = -1;
@@ -213,42 +194,54 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
   //  3. Locations for callee-saved registers.
   // Everything else is referenced relative to whatever register 
   // getFrameRegister() returns.
-  if (MipsFI->isOutArgFI(FrameIndex) ||
+  unsigned FrameReg;
+
+  if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isDynAllocFI(FrameIndex) ||
       (FrameIndex >= MinCSFI && FrameIndex <= MaxCSFI))
     FrameReg = Mips::SP;
   else
     FrameReg = getFrameRegister(MF); 
   
-  // Offset fits in the 16-bit field
-  if (Offset < 0x8000 && Offset >= -0x8000) {
-    NewReg = FrameReg;
-    NewImm = Offset;
-    ATUsed = false;
-  }
-  else {
-    const TargetInstrInfo *TII = MF.getTarget().getInstrInfo();
+  // Calculate final offset.
+  // - There is no need to change the offset if the frame object is one of the
+  //   following: an outgoing argument, pointer to a dynamically allocated
+  //   stack space or a $gp restore location,
+  // - If the frame object is any of the following, its offset must be adjusted
+  //   by adding the size of the stack:
+  //   incoming argument, callee-saved register location or local variable.  
+  int Offset;
+
+  if (MipsFI->isOutArgFI(FrameIndex) || MipsFI->isGPFI(FrameIndex) ||
+      MipsFI->isDynAllocFI(FrameIndex))
+    Offset = spOffset;
+  else
+    Offset = spOffset + stackSize;
+
+  Offset    += MI.getOperand(i+1).getImm();
+
+  DEBUG(errs() << "Offset     : " << Offset << "\n" << "<--------->\n");
+
+  // If MI is not a debug value, make sure Offset fits in the 16-bit immediate
+  // field. 
+  if (!MI.isDebugValue() && (Offset >= 0x8000 || Offset < -0x8000)) {
+    MachineBasicBlock &MBB = *MI.getParent();
     DebugLoc DL = II->getDebugLoc();
-    int ImmLo = (short)(Offset & 0xffff);
     int ImmHi = (((unsigned)Offset & 0xffff0000) >> 16) +
                 ((Offset & 0x8000) != 0);
 
     // FIXME: change this when mips goes MC".
-    BuildMI(MBB, II, DL, TII->get(Mips::NOAT));
-    BuildMI(MBB, II, DL, TII->get(Mips::LUi), Mips::AT).addImm(ImmHi);
-    BuildMI(MBB, II, DL, TII->get(Mips::ADDu), Mips::AT).addReg(FrameReg)
-                                                        .addReg(Mips::AT);
-    NewReg = Mips::AT;
-    NewImm = ImmLo;
-    
-    ATUsed = true;
-  }
+    BuildMI(MBB, II, DL, TII.get(Mips::NOAT));
+    BuildMI(MBB, II, DL, TII.get(Mips::LUi), Mips::AT).addImm(ImmHi);
+    BuildMI(MBB, II, DL, TII.get(Mips::ADDu), Mips::AT).addReg(FrameReg)
+                                                       .addReg(Mips::AT);
+    FrameReg = Mips::AT;
+    Offset = (short)(Offset & 0xffff);
 
-  // FIXME: change this when mips goes MC".
-  if (ATUsed)
     BuildMI(MBB, ++II, MI.getDebugLoc(), TII.get(Mips::ATMACRO));
+  }
 
-  MI.getOperand(i).ChangeToRegister(NewReg, false);
-  MI.getOperand(i-1).ChangeToImmediate(NewImm);
+  MI.getOperand(i).ChangeToRegister(FrameReg, false);
+  MI.getOperand(i+1).ChangeToImmediate(Offset);
 }
 
 unsigned MipsRegisterInfo::
@@ -283,5 +276,3 @@ getDwarfRegNum(unsigned RegNum, bool isEH) const {
 int MipsRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
   return MipsGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
 }
-
-#include "MipsGenRegisterInfo.inc"
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 76b0035..646369b 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -16,7 +16,9 @@
 
 #include "Mips.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "MipsGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "MipsGenRegisterInfo.inc"
 
 namespace llvm {
 class MipsSubtarget;
diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td
index e97d450..f0db518 100644
--- a/lib/Target/Mips/MipsRegisterInfo.td
+++ b/lib/Target/Mips/MipsRegisterInfo.td
@@ -157,15 +157,15 @@ let Namespace = "Mips" in {
 // Register Classes
 //===----------------------------------------------------------------------===//
 
-def CPURegs : RegisterClass<"Mips", [i32], 32,
+def CPURegs : RegisterClass<"Mips", [i32], 32, (add
   // Return Values and Arguments
-  [V0, V1, A0, A1, A2, A3,
+  V0, V1, A0, A1, A2, A3,
   // Not preserved across procedure calls
   T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
   // Callee save
   S0, S1, S2, S3, S4, S5, S6, S7,
   // Reserved
-  ZERO, AT, K0, K1, GP, SP, FP, RA]>;
+  ZERO, AT, K0, K1, GP, SP, FP, RA)>;
 
 // 64bit fp:
 // * FGR64  - 32 64-bit registers
@@ -174,33 +174,25 @@ def CPURegs : RegisterClass<"Mips", [i32], 32,
 // 32bit fp:
 // * FGR32 - 16 32-bit even registers
 // * FGR32 - 32 32-bit registers (single float only mode)
-def FGR32 : RegisterClass<"Mips", [f32], 32,
-  // Return Values and Arguments
-  [F0, F1, F2, F3, F12, F13, F14, F15,
-  // Not preserved across procedure calls
-  F4, F5, F6, F7, F8, F9, F10, F11, F16, F17, F18, F19,
-  // Callee save
-  F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30,
-  // Reserved
-  F31]>;
+def FGR32 : RegisterClass<"Mips", [f32], 32, (sequence "F%u", 0, 31)>;
 
-def AFGR64 : RegisterClass<"Mips", [f64], 64,
+def AFGR64 : RegisterClass<"Mips", [f64], 64, (add
   // Return Values and Arguments
-  [D0, D1, D6, D7,
+  D0, D1, D6, D7,
   // Not preserved across procedure calls
   D2, D3, D4, D5, D8, D9,
   // Callee save
   D10, D11, D12, D13, D14,
   // Reserved
-  D15]> {
+  D15)> {
   let SubRegClasses = [(FGR32 sub_fpeven, sub_fpodd)];
 }
 
 // Condition Register for floating point operations
-def CCR  : RegisterClass<"Mips", [i32], 32, [FCR31]>;
+def CCR  : RegisterClass<"Mips", [i32], 32, (add FCR31)>;
 
 // Hi/Lo Registers
-def HILO : RegisterClass<"Mips", [i32], 32, [HI, LO]>;
+def HILO : RegisterClass<"Mips", [i32], 32, (add HI, LO)>;
 
 // Hardware registers
-def HWRegs : RegisterClass<"Mips", [i32], 32, [HWR29]>;
+def HWRegs : RegisterClass<"Mips", [i32], 32, (add HWR29)>;
diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp
index 70747f5d..6eee333 100644
--- a/lib/Target/Mips/MipsSubtarget.cpp
+++ b/lib/Target/Mips/MipsSubtarget.cpp
@@ -7,27 +7,38 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the Mips specific subclass of TargetSubtarget.
+// This file implements the Mips specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "MipsSubtarget.h"
 #include "Mips.h"
-#include "MipsGenSubtarget.inc"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "MipsGenSubtargetInfo.inc"
+
 using namespace llvm;
 
-MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &FS,
-                             bool little) :
+MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
+                             const std::string &FS, bool little) :
+  MipsGenSubtargetInfo(TT, CPU, FS),
   MipsArchVersion(Mips1), MipsABI(O32), IsLittle(little), IsSingleFloat(false),
   IsFP64bit(false), IsGP64bit(false), HasVFPU(false), IsLinux(true),
   HasSEInReg(false), HasCondMov(false), HasMulDivAdd(false), HasMinMax(false),
   HasSwap(false), HasBitCount(false)
 {
-  std::string CPU = "mips1";
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "mips1";
   MipsArchVersion = Mips1;
 
   // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
+  ParseSubtargetFeatures(CPUName, FS);
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUName);
 
   // Is the target system Linux ?
   if (TT.find("linux") == std::string::npos)
diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h
index 096bbed..533d4af 100644
--- a/lib/Target/Mips/MipsSubtarget.h
+++ b/lib/Target/Mips/MipsSubtarget.h
@@ -7,21 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the Mips specific subclass of TargetSubtarget.
+// This file declares the Mips specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef MIPSSUBTARGET_H
 #define MIPSSUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
-#include "llvm/Target/TargetMachine.h"
-
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "MipsGenSubtargetInfo.inc"
+
 namespace llvm {
+class StringRef;
 
-class MipsSubtarget : public TargetSubtarget {
+class MipsSubtarget : public MipsGenSubtargetInfo {
 
 public:
   enum MipsABIEnum {
@@ -92,12 +95,12 @@ public:
 
   /// This constructor initializes the data members to match that
   /// of the specified triple.
-  MipsSubtarget(const std::string &TT, const std::string &FS, bool little);
+  MipsSubtarget(const std::string &TT, const std::string &CPU,
+                const std::string &FS, bool little);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   bool isMips1() const { return MipsArchVersion == Mips1; }
   bool isMips32() const { return MipsArchVersion >= Mips32; }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index cfbb92c..20b9f4e 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Mips.h"
-#include "MipsMCAsmInfo.h"
 #include "MipsTargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -22,8 +21,6 @@ extern "C" void LLVMInitializeMipsTarget() {
   // Register the target.
   RegisterTargetMachine<MipsTargetMachine> X(TheMipsTarget);
   RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget);
-  RegisterAsmInfo<MipsMCAsmInfo> A(TheMipsTarget);
-  RegisterAsmInfo<MipsMCAsmInfo> B(TheMipselTarget);
 }
 
 // DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
@@ -34,10 +31,11 @@ extern "C" void LLVMInitializeMipsTarget() {
 // an easier handling.
 // Using CodeModel::Large enables different CALL behavior.
 MipsTargetMachine::
-MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS,
+MipsTargetMachine(const Target &T, const std::string &TT,
+                  const std::string &CPU, const std::string &FS,
                   bool isLittle=false):
-  LLVMTargetMachine(T, TT),
-  Subtarget(TT, FS, isLittle),
+  LLVMTargetMachine(T, TT, CPU, FS),
+  Subtarget(TT, CPU, FS, isLittle),
   DataLayout(isLittle ? 
              std::string("e-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32") :
              std::string("E-p:32:32:32-i8:8:32-i16:16:32-i64:64:64-n32")),
@@ -55,8 +53,8 @@ MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS,
 
 MipselTargetMachine::
 MipselTargetMachine(const Target &T, const std::string &TT,
-                    const std::string &FS) :
-  MipsTargetMachine(T, TT, FS, true) {}
+                    const std::string &CPU, const std::string &FS) :
+  MipsTargetMachine(T, TT, CPU, FS, true) {}
 
 // Install an instruction selector pass using
 // the ISelDag to gen Mips code.
diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h
index 102dd85..a021af2 100644
--- a/lib/Target/Mips/MipsTargetMachine.h
+++ b/lib/Target/Mips/MipsTargetMachine.h
@@ -35,7 +35,8 @@ namespace llvm {
     MipsSelectionDAGInfo TSInfo;
   public:
     MipsTargetMachine(const Target &T, const std::string &TT,
-                      const std::string &FS, bool isLittle);
+                      const std::string &CPU, const std::string &FS,
+                      bool isLittle);
 
     virtual const MipsInstrInfo   *getInstrInfo()     const
     { return &InstrInfo; }
@@ -73,7 +74,7 @@ namespace llvm {
 class MipselTargetMachine : public MipsTargetMachine {
 public:
   MipselTargetMachine(const Target &T, const std::string &TT,
-                      const std::string &FS);
+                      const std::string &CPU, const std::string &FS);
 };
 
 } // End llvm namespace
diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt
index 331266d..ce08916 100644
--- a/lib/Target/PTX/CMakeLists.txt
+++ b/lib/Target/PTX/CMakeLists.txt
@@ -1,13 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS PTX.td)
 
 tablegen(PTXGenAsmWriter.inc -gen-asm-writer)
+tablegen(PTXGenCallingConv.inc -gen-callingconv)
 tablegen(PTXGenDAGISel.inc -gen-dag-isel)
-tablegen(PTXGenInstrInfo.inc -gen-instr-desc)
-tablegen(PTXGenInstrNames.inc -gen-instr-enums)
-tablegen(PTXGenRegisterInfo.inc -gen-register-desc)
-tablegen(PTXGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(PTXGenRegisterNames.inc -gen-register-enums)
-tablegen(PTXGenSubtarget.inc -gen-subtarget)
+tablegen(PTXGenInstrInfo.inc -gen-instr-info)
+tablegen(PTXGenRegisterInfo.inc -gen-register-info)
+tablegen(PTXGenSubtargetInfo.inc -gen-subtarget)
 
 add_llvm_target(PTXCodeGen
   PTXAsmPrinter.cpp
@@ -15,7 +13,6 @@ add_llvm_target(PTXCodeGen
   PTXISelLowering.cpp
   PTXInstrInfo.cpp
   PTXFrameLowering.cpp
-  PTXMCAsmInfo.cpp
   PTXMCAsmStreamer.cpp
   PTXMFInfoExtract.cpp
   PTXRegisterInfo.cpp
@@ -24,3 +21,4 @@ add_llvm_target(PTXCodeGen
   )
 
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..df0f63f
--- /dev/null
+++ b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMPTXDesc
+  PTXMCTargetDesc.cpp
+  PTXMCAsmInfo.cpp
+  )
diff --git a/lib/Target/PTX/MCTargetDesc/Makefile b/lib/Target/PTX/MCTargetDesc/Makefile
new file mode 100644
index 0000000..35f5a7b
--- /dev/null
+++ b/lib/Target/PTX/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPTXDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PTX/PTXMCAsmInfo.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
index b670abd..efefead 100644
--- a/lib/Target/PTX/PTXMCAsmInfo.cpp
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.cpp
@@ -12,10 +12,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "PTXMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
 
 using namespace llvm;
 
 PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) {
+  Triple TheTriple(TT);
+  if (TheTriple.getArch() == Triple::ptx64)
+    PointerSize = 8;
+
   CommentString = "//";
 
   PrivateGlobalPrefix = "$L__";
diff --git a/lib/Target/PTX/PTXMCAsmInfo.h b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
index 03f5d66..03f5d66 100644
--- a/lib/Target/PTX/PTXMCAsmInfo.h
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCAsmInfo.h
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
new file mode 100644
index 0000000..23f70bd
--- /dev/null
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.cpp
@@ -0,0 +1,60 @@
+//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PTXMCTargetDesc.h"
+#include "PTXMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "PTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "PTXGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "PTXGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createPTXMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitPTXMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializePTXMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo);
+}
+
+static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                 StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitPTXMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializePTXMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target,
+                                          createPTXMCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target,
+                                          createPTXMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializePTXMCAsmInfo() {
+  RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target);
+  RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target);
+}
diff --git a/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
new file mode 100644
index 0000000..1003b0b
--- /dev/null
+++ b/lib/Target/PTX/MCTargetDesc/PTXMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- PTXMCTargetDesc.h - PTX Target Descriptions ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PTX specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PTXMCTARGETDESC_H
+#define PTXMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target ThePTX32Target;
+extern Target ThePTX64Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for PTX registers.
+#define GET_REGINFO_ENUM
+#include "PTXGenRegisterInfo.inc"
+
+// Defines symbolic names for the PTX instructions.
+#define GET_INSTRINFO_ENUM
+#include "PTXGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "PTXGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/PTX/Makefile b/lib/Target/PTX/Makefile
index 2c40d69..93dd38a 100644
--- a/lib/Target/PTX/Makefile
+++ b/lib/Target/PTX/Makefile
@@ -13,14 +13,12 @@ TARGET = PTX
 
 # Make sure that tblgen is run, first thing.
 BUILT_SOURCES = PTXGenAsmWriter.inc \
+		PTXGenCallingConv.inc \
 		PTXGenDAGISel.inc \
 		PTXGenInstrInfo.inc \
-		PTXGenInstrNames.inc \
 		PTXGenRegisterInfo.inc \
-		PTXGenRegisterInfo.h.inc \
-		PTXGenRegisterNames.inc \
-		PTXGenSubtarget.inc
+		PTXGenSubtargetInfo.inc
 
-DIRS = TargetInfo
+DIRS = TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PTX/PTX.h b/lib/Target/PTX/PTX.h
index ec2be92..28cab24 100644
--- a/lib/Target/PTX/PTX.h
+++ b/lib/Target/PTX/PTX.h
@@ -15,6 +15,7 @@
 #ifndef PTX_H
 #define PTX_H
 
+#include "MCTargetDesc/PTXMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -42,14 +43,6 @@ namespace llvm {
   FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
                                        CodeGenOpt::Level OptLevel);
 
-  extern Target ThePTX32Target;
-  extern Target ThePTX64Target;
 } // namespace llvm;
 
-// Defines symbolic names for PTX registers.
-#include "PTXGenRegisterNames.inc"
-
-// Defines symbolic names for the PTX instructions.
-#include "PTXGenInstrNames.inc"
-
 #endif // PTX_H
diff --git a/lib/Target/PTX/PTX.td b/lib/Target/PTX/PTX.td
index 231866a..f6fbe9f 100644
--- a/lib/Target/PTX/PTX.td
+++ b/lib/Target/PTX/PTX.td
@@ -16,7 +16,7 @@
 include "llvm/Target/Target.td"
 
 //===----------------------------------------------------------------------===//
-// Subtarget Features.
+// Subtarget Features
 //===----------------------------------------------------------------------===//
 
 //===- Architectural Features ---------------------------------------------===//
@@ -30,34 +30,54 @@ def FeatureNoFMA  : SubtargetFeature<"no-fma","SupportsFMA", "false",
 //===- PTX Version --------------------------------------------------------===//
 
 def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
-                                    "Use PTX Language Version 2.0",
-                                    []>;
+                                    "Use PTX Language Version 2.0">;
 
 def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
-                                    "Use PTX Language Version 2.1",
-                                    [FeaturePTX20]>;
+                                    "Use PTX Language Version 2.1">;
 
 def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2",
-                                    "Use PTX Language Version 2.2",
-                                    [FeaturePTX21]>;
+                                    "Use PTX Language Version 2.2">;
 
 def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3",
-                                    "Use PTX Language Version 2.3",
-                                    [FeaturePTX22]>;
-
-//===- PTX Shader Model ---------------------------------------------------===//
-
-def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0",
-                                   "Enable Shader Model 1.0 compliance">;
-def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3",
-                                   "Enable Shader Model 1.3 compliance",
-                                   [FeatureSM10, FeatureDouble]>;
-def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0",
-                                   "Enable Shader Model 2.0 compliance",
-                                   [FeatureSM13]>;
+                                    "Use PTX Language Version 2.3">;
+
+//===- PTX Target ---------------------------------------------------------===//
+
+def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0",
+                                   "Use Shader Model 1.0">;
+def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1",
+                                   "Use Shader Model 1.1">;
+def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2",
+                                   "Use Shader Model 1.2">;
+def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3",
+                                   "Use Shader Model 1.3">;
+def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0",
+                                   "Use Shader Model 2.0">;
+def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1",
+                                   "Use Shader Model 2.1">;
+def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2",
+                                   "Use Shader Model 2.2">;
+def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3",
+                                   "Use Shader Model 2.3">;
+
+def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget",
+                                        "PTX_COMPUTE_1_0",
+                                        "Use Compute Compatibility 1.0">;
+def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget",
+                                        "PTX_COMPUTE_1_1",
+                                        "Use Compute Compatibility 1.1">;
+def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget",
+                                        "PTX_COMPUTE_1_2",
+                                        "Use Compute Compatibility 1.2">;
+def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget",
+                                        "PTX_COMPUTE_1_3",
+                                        "Use Compute Compatibility 1.3">;
+def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget",
+                                        "PTX_COMPUTE_2_0",
+                                        "Use Compute Compatibility 2.0">;
 
 //===----------------------------------------------------------------------===//
-// PTX supported processors.
+// PTX supported processors
 //===----------------------------------------------------------------------===//
 
 class Proc<string Name, list<SubtargetFeature> Features>
@@ -65,6 +85,27 @@ class Proc<string Name, list<SubtargetFeature> Features>
 
 def : Proc<"generic", []>;
 
+// Processor definitions for compute/shader models
+def : Proc<"compute_10", [FeatureCOMPUTE10]>;
+def : Proc<"compute_11", [FeatureCOMPUTE11]>;
+def : Proc<"compute_12", [FeatureCOMPUTE12]>;
+def : Proc<"compute_13", [FeatureCOMPUTE13]>;
+def : Proc<"compute_20", [FeatureCOMPUTE20]>;
+def : Proc<"sm_10",      [FeatureSM10]>;
+def : Proc<"sm_11",      [FeatureSM11]>;
+def : Proc<"sm_12",      [FeatureSM12]>;
+def : Proc<"sm_13",      [FeatureSM13]>;
+def : Proc<"sm_20",      [FeatureSM20]>;
+def : Proc<"sm_21",      [FeatureSM21]>;
+def : Proc<"sm_22",      [FeatureSM22]>;
+def : Proc<"sm_23",      [FeatureSM23]>;
+
+// Processor definitions for common GPU architectures
+def : Proc<"g80",        [FeatureSM10]>;
+def : Proc<"gt200",      [FeatureSM13]>;
+def : Proc<"gf100",      [FeatureSM20, FeatureDouble]>;
+def : Proc<"fermi",      [FeatureSM20, FeatureDouble]>;
+
 //===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
@@ -72,6 +113,12 @@ def : Proc<"generic", []>;
 include "PTXRegisterInfo.td"
 
 //===----------------------------------------------------------------------===//
+// Calling Conventions
+//===----------------------------------------------------------------------===//
+
+include "PTXCallingConv.td"
+
+//===----------------------------------------------------------------------===//
 // Instruction Descriptions
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp
index 29c4781..2848d54 100644
--- a/lib/Target/PTX/PTXAsmPrinter.cpp
+++ b/lib/Target/PTX/PTXAsmPrinter.cpp
@@ -22,9 +22,12 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
@@ -34,6 +37,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -62,8 +66,13 @@ public:
                        const char *Modifier = 0);
   void printParamOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
                          const char *Modifier = 0);
+  void printReturnOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
+                          const char *Modifier = 0); 
   void printPredicateOperand(const MachineInstr *MI, raw_ostream &O);
 
+  unsigned GetOrCreateSourceID(StringRef FileName,
+                               StringRef DirName);
+
   // autogen'd.
   void printInstruction(const MachineInstr *MI, raw_ostream &OS);
   static const char *getRegisterName(unsigned RegNo);
@@ -71,20 +80,23 @@ public:
 private:
   void EmitVariableDeclaration(const GlobalVariable *gv);
   void EmitFunctionDeclaration();
+
+  StringMap<unsigned> SourceIdMap;
 }; // class PTXAsmPrinter
 } // namespace
 
 static const char PARAM_PREFIX[] = "__param_";
+static const char RETURN_PREFIX[] = "__ret_";
 
 static const char *getRegisterTypeName(unsigned RegNo) {
 #define TEST_REGCLS(cls, clsstr)                \
   if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
-  TEST_REGCLS(Preds, pred);
-  TEST_REGCLS(RRegu16, u16);
-  TEST_REGCLS(RRegu32, u32);
-  TEST_REGCLS(RRegu64, u64);
-  TEST_REGCLS(RRegf32, f32);
-  TEST_REGCLS(RRegf64, f64);
+  TEST_REGCLS(RegPred, pred);
+  TEST_REGCLS(RegI16, b16);
+  TEST_REGCLS(RegI32, b32);
+  TEST_REGCLS(RegI64, b64);
+  TEST_REGCLS(RegF32, b32);
+  TEST_REGCLS(RegF64, b64);
 #undef TEST_REGCLS
 
   llvm_unreachable("Not in any register class!");
@@ -162,6 +174,27 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
   OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
                                 (ST.supportsDouble() ? ""
                                                      : ", map_f64_to_f32")));
+  // .address_size directive is optional, but it must immediately follow
+  // the .target directive if present within a module
+  if (ST.supportsPTX23()) {
+    std::string addrSize = ST.is64Bit() ? "64" : "32";
+    OutStreamer.EmitRawText(Twine("\t.address_size " + addrSize));
+  }
+
+  OutStreamer.AddBlankLine();
+
+  // Define any .file directives
+  DebugInfoFinder DbgFinder;
+  DbgFinder.processModule(M);
+
+  for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
+       E = DbgFinder.compile_unit_end(); I != E; ++I) {
+    DICompileUnit DIUnit(*I);
+    StringRef FN = DIUnit.getFilename();
+    StringRef Dir = DIUnit.getDirectory();
+    GetOrCreateSourceID(FN, Dir);
+  }
+
   OutStreamer.AddBlankLine();
 
   // declare global variables
@@ -194,6 +227,21 @@ void PTXAsmPrinter::EmitFunctionBodyStart() {
     def += ';';
     OutStreamer.EmitRawText(Twine(def));
   }
+
+  const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
+  DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects()
+               << " frame object(s)\n");
+  for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
+    DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
+    if (FrameInfo->getObjectSize(i) > 0) {
+      std::string def = "\t.reg .b";
+      def += utostr(FrameInfo->getObjectSize(i)*8); // Convert to bits
+      def += " s";
+      def += utostr(i);
+      def += ";";
+      OutStreamer.EmitRawText(Twine(def));
+    }
+  }
 }
 
 void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
@@ -202,6 +250,54 @@ void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
   raw_string_ostream OS(str);
 
+  DebugLoc DL = MI->getDebugLoc();
+  if (!DL.isUnknown()) {
+
+    const MDNode *S = DL.getScope(MF->getFunction()->getContext());
+
+    // This is taken from DwarfDebug.cpp, which is conveniently not a public
+    // LLVM class.
+    StringRef Fn;
+    StringRef Dir;
+    unsigned Src = 1;
+    if (S) {
+      DIDescriptor Scope(S);
+      if (Scope.isCompileUnit()) {
+        DICompileUnit CU(S);
+        Fn = CU.getFilename();
+        Dir = CU.getDirectory();
+      } else if (Scope.isFile()) {
+        DIFile F(S);
+        Fn = F.getFilename();
+        Dir = F.getDirectory();
+      } else if (Scope.isSubprogram()) {
+        DISubprogram SP(S);
+        Fn = SP.getFilename();
+        Dir = SP.getDirectory();
+      } else if (Scope.isLexicalBlock()) {
+        DILexicalBlock DB(S);
+        Fn = DB.getFilename();
+        Dir = DB.getDirectory();
+      } else
+        assert(0 && "Unexpected scope info");
+
+      Src = GetOrCreateSourceID(Fn, Dir);
+    }
+    OutStreamer.EmitDwarfLocDirective(Src, DL.getLine(), DL.getCol(),
+                                     0, 0, 0, Fn);
+
+    const MCDwarfLoc& MDL = OutContext.getCurrentDwarfLoc();
+
+    OS << "\t.loc ";
+    OS << utostr(MDL.getFileNum());
+    OS << " ";
+    OS << utostr(MDL.getLine());
+    OS << " ";
+    OS << utostr(MDL.getColumn());
+    OS << "\n";
+  }
+
+
   // Emit predicate
   printPredicateOperand(MI, OS);
 
@@ -275,6 +371,11 @@ void PTXAsmPrinter::printParamOperand(const MachineInstr *MI, int opNum,
   OS << PARAM_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
 }
 
+void PTXAsmPrinter::printReturnOperand(const MachineInstr *MI, int opNum,
+                                       raw_ostream &OS, const char *Modifier) {
+  OS << RETURN_PREFIX << (int) MI->getOperand(opNum).getImm() + 1;
+}
+
 void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
   // Check to see if this is a special global used by LLVM, if so, emit it.
   if (EmitSpecialLLVMGlobal(gv))
@@ -311,7 +412,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
     decl += ".b8 ";
     decl += gvsym->getName();
     decl += "[";
-    
+
     if (elementTy->isArrayTy())
     {
       assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
@@ -320,7 +421,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
       elementTy = arrayTy->getElementType();
 
       unsigned numElements = arrayTy->getNumElements();
-      
+
       while (elementTy->isArrayTy()) {
 
         arrayTy = dyn_cast<const ArrayType>(elementTy);
@@ -336,17 +437,17 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
       // Compute the size of the array, in bytes.
       uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3)
                         * numElements;
-  
+
       decl += utostr(arraySize);
     }
-    
+
     decl += "]";
-    
+
     // handle string constants (assume ConstantArray means string)
-    
+
     if (gv->hasInitializer())
     {
-      Constant *C = gv->getInitializer();  
+      const Constant *C = gv->getInitializer();  
       if (const ConstantArray *CA = dyn_cast<ConstantArray>(C))
       {
         decl += " = {";
@@ -354,10 +455,11 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
         for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
         {
           if (i > 0)   decl += ",";
-      
-          decl += "0x" + utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
+
+          decl += "0x" +
+                utohexstr(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
         }
-      
+
         decl += "}";
       }
     }
@@ -393,17 +495,25 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
 
   const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
   const bool isKernel = MFI->isKernel();
-  unsigned reg;
+  const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
 
   std::string decl = isKernel ? ".entry" : ".func";
 
-  // Print return register
-  reg = MFI->retReg();
-  if (!isKernel && reg != PTX::NoRegister) {
-    decl += " (.reg ."; // FIXME: could it return in .param space?
-    decl += getRegisterTypeName(reg);
-    decl += " ";
-    decl += getRegisterName(reg);
+  unsigned cnt = 0;
+
+  if (!isKernel) {
+    decl += " (";
+    for (PTXMachineFunctionInfo::ret_iterator
+         i = MFI->retRegBegin(), e = MFI->retRegEnd(), b = i;
+         i != e; ++i) {
+      if (i != b) {
+        decl += ", ";
+      }
+      decl += ".reg .";
+      decl += getRegisterTypeName(*i);
+      decl += " ";
+      decl += getRegisterName(*i);
+    }
     decl += ")";
   }
 
@@ -411,40 +521,31 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
   decl += " ";
   decl += CurrentFnSym->getName().str();
 
-  // Print parameter list
-  if (!MFI->argRegEmpty()) {
-    decl += " (";
-    if (isKernel) {
-      unsigned cnt = 0;
-      for(PTXMachineFunctionInfo::reg_iterator
-          i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
-          i != e; ++i) {
-        reg = *i;
-        assert(reg != PTX::NoRegister && "Not a valid register!");
-        if (i != b)
-          decl += ", ";
-        decl += ".param .";
-        decl += getRegisterTypeName(reg);
-        decl += " ";
-        decl += PARAM_PREFIX;
-        decl += utostr(++cnt);
-      }
+  decl += " (";
+
+  cnt = 0;
+
+  // Print parameters
+  for (PTXMachineFunctionInfo::reg_iterator
+       i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
+       i != e; ++i) {
+    if (i != b) {
+      decl += ", ";
+    }
+    if (isKernel || ST.useParamSpaceForDeviceArgs()) {
+      decl += ".param .b";
+      decl += utostr(*i);
+      decl += " ";
+      decl += PARAM_PREFIX;
+      decl += utostr(++cnt);
     } else {
-      for (PTXMachineFunctionInfo::reg_iterator
-           i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i;
-           i != e; ++i) {
-        reg = *i;
-        assert(reg != PTX::NoRegister && "Not a valid register!");
-        if (i != b)
-          decl += ", ";
-        decl += ".reg .";
-        decl += getRegisterTypeName(reg);
-        decl += " ";
-        decl += getRegisterName(reg);
-      }
+      decl += ".reg .";
+      decl += getRegisterTypeName(*i);
+      decl += " ";
+      decl += getRegisterName(*i);
     }
-    decl += ")";
   }
+  decl += ")";
 
   OutStreamer.EmitRawText(Twine(decl));
 }
@@ -468,6 +569,33 @@ printPredicateOperand(const MachineInstr *MI, raw_ostream &O) {
   }
 }
 
+unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
+                                            StringRef DirName) {
+  // If FE did not provide a file name, then assume stdin.
+  if (FileName.empty())
+    return GetOrCreateSourceID("<stdin>", StringRef());
+
+  // MCStream expects full path name as filename.
+  if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
+    SmallString<128> FullPathName = DirName;
+    sys::path::append(FullPathName, FileName);
+    // Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
+    return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
+  }
+
+  StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
+  if (Entry.getValue())
+    return Entry.getValue();
+
+  unsigned SrcId = SourceIdMap.size();
+  Entry.setValue(SrcId);
+
+  // Print out a .file directive to specify files for .loc directives.
+  OutStreamer.EmitDwarfFileDirective(SrcId, Entry.getKey());
+
+  return SrcId;
+}
+
 #include "PTXGenAsmWriter.inc"
 
 // Force static initialization.
diff --git a/lib/Target/PTX/PTXCallingConv.td b/lib/Target/PTX/PTXCallingConv.td
new file mode 100644
index 0000000..3e3ff48
--- /dev/null
+++ b/lib/Target/PTX/PTXCallingConv.td
@@ -0,0 +1,29 @@
+
+//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PTX architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Formal Parameter Calling Convention
+def CC_PTX : CallingConv<[
+  CCIfType<[i1],      CCAssignToReg<[P12, P13, P14, P15, P16, P17, P18, P19, P20, P21, P22, P23, P24, P25, P26, P27, P28, P29, P30, P31, P32, P33, P34, P35, P36, P37, P38, P39, P40, P41, P42, P43, P44, P45, P46, P47, P48, P49, P50, P51, P52, P53, P54, P55, P56, P57, P58, P59, P60, P61, P62, P63, P64, P65, P66, P67, P68, P69, P70, P71, P72, P73, P74, P75, P76, P77, P78, P79, P80, P81, P82, P83, P84, P85, P86, P87, P88, P89, P90, P91, P92, P93, P94, P95, P96, P97, P98, P99, P100, P101, P102, P103, P104, P105, P106, P107, P108, P109, P110, P111, P112, P113, P114, P115, P116, P117, P118, P119, P120, P121, P122, P123, P124, P125, P126, P127]>>,
+  CCIfType<[i16],     CCAssignToReg<[RH12, RH13, RH14, RH15, RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23, RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31, RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39, RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47, RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55, RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63, RH64, RH65, RH66, RH67, RH68, RH69, RH70, RH71, RH72, RH73, RH74, RH75, RH76, RH77, RH78, RH79, RH80, RH81, RH82, RH83, RH84, RH85, RH86, RH87, RH88, RH89, RH90, RH91, RH92, RH93, RH94, RH95, RH96, RH97, RH98, RH99, RH100, RH101, RH102, RH103, RH104, RH105, RH106, RH107, RH108, RH109, RH110, RH111, RH112, RH113, RH114, RH115, RH116, RH117, RH118, RH119, RH120, RH121, RH122, RH123, RH124, RH125, RH126, RH127]>>,
+  CCIfType<[i32,f32], CCAssignToReg<[R12, R13, R14, R15, R16, R17, R18, R19, R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31, R32, R33, R34, R35, R36, R37, R38, R39, R40, R41, R42, R43, R44, R45, R46, R47, R48, R49, R50, R51, R52, R53, R54, R55, R56, R57, R58, R59, R60, R61, R62, R63, R64, R65, R66, R67, R68, R69, R70, R71, R72, R73, R74, R75, R76, R77, R78, R79, R80, R81, R82, R83, R84, R85, R86, R87, R88, R89, R90, R91, R92, R93, R94, R95, R96, R97, R98, R99, R100, R101, R102, R103, R104, R105, R106, R107, R108, R109, R110, R111, R112, R113, R114, R115, R116, R117, R118, R119, R120, R121, R122, R123, R124, R125, R126, R127]>>,
+  CCIfType<[i64,f64], CCAssignToReg<[RD12, RD13, RD14, RD15, RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23, RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31, RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39, RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47, RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55, RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63, RD64, RD65, RD66, RD67, RD68, RD69, RD70, RD71, RD72, RD73, RD74, RD75, RD76, RD77, RD78, RD79, RD80, RD81, RD82, RD83, RD84, RD85, RD86, RD87, RD88, RD89, RD90, RD91, RD92, RD93, RD94, RD95, RD96, RD97, RD98, RD99, RD100, RD101, RD102, RD103, RD104, RD105, RD106, RD107, RD108, RD109, RD110, RD111, RD112, RD113, RD114, RD115, RD116, RD117, RD118, RD119, RD120, RD121, RD122, RD123, RD124, RD125, RD126, RD127]>>
+]>;
+
+// PTX Return Value Calling Convention
+def RetCC_PTX : CallingConv<[
+  CCIfType<[i1],      CCAssignToReg<[P0, P1, P2, P3, P4, P5, P6, P7, P8, P9, P10, P11]>>,
+  CCIfType<[i16],     CCAssignToReg<[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7, RH8, RH9, RH10, RH11]>>,
+  CCIfType<[i32,f32], CCAssignToReg<[R0, R1, R2, R3, R4, R5, R6, R7, R8, R9, R10, R11]>>,
+  CCIfType<[i64,f64], CCAssignToReg<[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7, RD8, RD9, RD10, RD11]>>
+]>;
diff --git a/lib/Target/PTX/PTXISelDAGToDAG.cpp b/lib/Target/PTX/PTXISelDAGToDAG.cpp
index b3c85da..9adfa62 100644
--- a/lib/Target/PTX/PTXISelDAGToDAG.cpp
+++ b/lib/Target/PTX/PTXISelDAGToDAG.cpp
@@ -15,6 +15,7 @@
 #include "PTXTargetMachine.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
@@ -41,8 +42,6 @@ class PTXDAGToDAGISel : public SelectionDAGISel {
 #include "PTXGenDAGISel.inc"
 
   private:
-    SDNode *SelectREAD_PARAM(SDNode *Node);
-
     // We need this only because we can't match intruction BRAdp
     // pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td
     SDNode *SelectBRCOND(SDNode *Node);
@@ -67,8 +66,6 @@ PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM,
 
 SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
   switch (Node->getOpcode()) {
-    case PTXISD::READ_PARAM:
-      return SelectREAD_PARAM(Node);
     case ISD::BRCOND:
       return SelectBRCOND(Node);
     default:
@@ -76,37 +73,6 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
   }
 }
 
-SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) {
-  SDValue  index = Node->getOperand(1);
-  DebugLoc dl    = Node->getDebugLoc();
-  unsigned opcode;
-
-  if (index.getOpcode() != ISD::TargetConstant)
-    llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant");
-
-  if (Node->getValueType(0) == MVT::i16) {
-    opcode = PTX::LDpiU16;
-  }
-  else if (Node->getValueType(0) == MVT::i32) {
-    opcode = PTX::LDpiU32;
-  }
-  else if (Node->getValueType(0) == MVT::i64) {
-    opcode = PTX::LDpiU64;
-  }
-  else if (Node->getValueType(0) == MVT::f32) {
-    opcode = PTX::LDpiF32;
-  }
-  else if (Node->getValueType(0) == MVT::f64) {
-    opcode = PTX::LDpiF64;
-  }
-  else {
-    llvm_unreachable("Unknown parameter type for ld.param");
-  }
-
-  return PTXInstrInfo::
-    GetPTXMachineNode(CurDAG, opcode, dl, Node->getValueType(0), index);
-}
-
 SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
   assert(Node->getNumOperands() >= 3);
 
diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp
index e9b1d8c..6fcf710 100644
--- a/lib/Target/PTX/PTXISelLowering.cpp
+++ b/lib/Target/PTX/PTXISelLowering.cpp
@@ -15,7 +15,9 @@
 #include "PTXISelLowering.h"
 #include "PTXMachineFunctionInfo.h"
 #include "PTXRegisterInfo.h"
+#include "PTXSubtarget.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -24,49 +26,80 @@
 
 using namespace llvm;
 
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "PTXGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// TargetLowering Implementation
+//===----------------------------------------------------------------------===//
+
 PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
   : TargetLowering(TM, new TargetLoweringObjectFileELF()) {
   // Set up the register classes.
-  addRegisterClass(MVT::i1,  PTX::PredsRegisterClass);
-  addRegisterClass(MVT::i16, PTX::RRegu16RegisterClass);
-  addRegisterClass(MVT::i32, PTX::RRegu32RegisterClass);
-  addRegisterClass(MVT::i64, PTX::RRegu64RegisterClass);
-  addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass);
-  addRegisterClass(MVT::f64, PTX::RRegf64RegisterClass);
+  addRegisterClass(MVT::i1,  PTX::RegPredRegisterClass);
+  addRegisterClass(MVT::i16, PTX::RegI16RegisterClass);
+  addRegisterClass(MVT::i32, PTX::RegI32RegisterClass);
+  addRegisterClass(MVT::i64, PTX::RegI64RegisterClass);
+  addRegisterClass(MVT::f32, PTX::RegF32RegisterClass);
+  addRegisterClass(MVT::f64, PTX::RegF64RegisterClass);
 
   setBooleanContents(ZeroOrOneBooleanContent);
+  setMinFunctionAlignment(2);
   
-  setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
-
-  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
-  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+  ////////////////////////////////////
+  /////////// Expansion //////////////
+  ////////////////////////////////////
+  
+  // (any/zero/sign) extload => load + (any/zero/sign) extend
   
-  // Turn i16 (z)extload into load + (z)extend
   setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
-
-  // Turn f32 extload into load + fextend
-  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
   
-  // Turn f64 truncstore into trunc + store.
-  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+  // f32 extload => load + fextend
+  
+  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);  
+  
+  // f64 truncstore => trunc + store
+  
+  setTruncStoreAction(MVT::f64, MVT::f32, Expand); 
+  
+  // sign_extend_inreg => sign_extend
+  
+  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+  
+  // br_cc => brcond
   
-  // Customize translation of memory addresses
-  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
-  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
-
-  // Expand BR_CC into BRCOND
   setOperationAction(ISD::BR_CC, MVT::Other, Expand);
 
-  // Expand SELECT_CC into SETCC
+  // select_cc => setcc
+  
   setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
   setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
   setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
   
-  // need to lower SETCC of Preds into bitwise logic
+  ////////////////////////////////////
+  //////////// Legal /////////////////
+  ////////////////////////////////////
+  
+  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+  setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
+  
+  ////////////////////////////////////
+  //////////// Custom ////////////////
+  ////////////////////////////////////
+  
+  // customise setcc to use bitwise logic if possible
+  
   setOperationAction(ISD::SETCC, MVT::i1, Custom);
 
-  setMinFunctionAlignment(2);
+  // customize translation of memory addresses
+  
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
 
   // Compute derived properties from the register classes
   computeRegisterProperties();
@@ -93,8 +126,10 @@ const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
       llvm_unreachable("Unknown opcode");
     case PTXISD::COPY_ADDRESS:
       return "PTXISD::COPY_ADDRESS";
-    case PTXISD::READ_PARAM:
-      return "PTXISD::READ_PARAM";
+    case PTXISD::LOAD_PARAM:
+      return "PTXISD::LOAD_PARAM";
+    case PTXISD::STORE_PARAM:
+      return "PTXISD::STORE_PARAM";
     case PTXISD::EXIT:
       return "PTXISD::EXIT";
     case PTXISD::RET:
@@ -113,18 +148,18 @@ SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   SDValue Op2 = Op.getOperand(2);
   DebugLoc dl = Op.getDebugLoc();
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-  
+
   // Look for X == 0, X == 1, X != 0, or X != 1  
   // We can simplify these to bitwise logic
-  
+
   if (Op1.getOpcode() == ISD::Constant &&
       (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
        cast<ConstantSDNode>(Op1)->isNullValue()) &&
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
 
-	  return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
+    return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
   }
-  
+
   return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2);
 }
 
@@ -149,27 +184,6 @@ LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
 //                      Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
-namespace {
-struct argmap_entry {
-  MVT::SimpleValueType VT;
-  TargetRegisterClass *RC;
-  TargetRegisterClass::iterator loc;
-
-  argmap_entry(MVT::SimpleValueType _VT, TargetRegisterClass *_RC)
-    : VT(_VT), RC(_RC), loc(_RC->begin()) {}
-
-  void reset() { loc = RC->begin(); }
-  bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
-} argmap[] = {
-  argmap_entry(MVT::i1,  PTX::PredsRegisterClass),
-  argmap_entry(MVT::i16, PTX::RRegu16RegisterClass),
-  argmap_entry(MVT::i32, PTX::RRegu32RegisterClass),
-  argmap_entry(MVT::i64, PTX::RRegu64RegisterClass),
-  argmap_entry(MVT::f32, PTX::RRegf32RegisterClass),
-  argmap_entry(MVT::f64, PTX::RRegf64RegisterClass)
-};
-}                               // end anonymous namespace
-
 SDValue PTXTargetLowering::
   LowerFormalArguments(SDValue Chain,
                        CallingConv::ID CallConv,
@@ -181,6 +195,7 @@ SDValue PTXTargetLowering::
   if (isVarArg) llvm_unreachable("PTX does not support varargs");
 
   MachineFunction &MF = DAG.getMachineFunction();
+  const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
   PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
 
   switch (CallConv) {
@@ -195,44 +210,76 @@ SDValue PTXTargetLowering::
       break;
   }
 
-  // Make sure we don't add argument registers twice
-  if (MFI->isDoneAddArg())
-    llvm_unreachable("cannot add argument registers twice");
-
-  // Reset argmap before allocation
-  for (struct argmap_entry *i = argmap, *e = argmap + array_lengthof(argmap);
-       i != e; ++ i)
-    i->reset();
-
-  for (int i = 0, e = Ins.size(); i != e; ++ i) {
-    MVT::SimpleValueType VT = Ins[i].VT.SimpleTy;
-
-    struct argmap_entry *entry = std::find(argmap,
-                                           argmap + array_lengthof(argmap), VT);
-    if (entry == argmap + array_lengthof(argmap))
-      llvm_unreachable("Type of argument is not supported");
-
-    if (MFI->isKernel() && entry->RC == PTX::PredsRegisterClass)
-      llvm_unreachable("cannot pass preds to kernel");
-
-    MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
-
-    unsigned preg = *++(entry->loc); // allocate start from register 1
-    unsigned vreg = RegInfo.createVirtualRegister(entry->RC);
-    RegInfo.addLiveIn(preg, vreg);
-
-    MFI->addArgReg(preg);
-
-    SDValue inval;
-    if (MFI->isKernel())
-      inval = DAG.getNode(PTXISD::READ_PARAM, dl, VT, Chain,
-                          DAG.getTargetConstant(i, MVT::i32));
-    else
-      inval = DAG.getCopyFromReg(Chain, dl, vreg, VT);
-    InVals.push_back(inval);
+  // We do one of two things here:
+  // IsKernel || SM >= 2.0  ->  Use param space for arguments
+  // SM < 2.0               ->  Use registers for arguments
+  if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
+    // We just need to emit the proper LOAD_PARAM ISDs
+    for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+
+      assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) &&
+             "Kernels cannot take pred operands");
+
+      SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
+                                     DAG.getTargetConstant(i, MVT::i32));
+      InVals.push_back(ArgValue);
+
+      // Instead of storing a physical register in our argument list, we just
+      // store the total size of the parameter, in bits.  The ASM printer
+      // knows how to process this.
+      MFI->addArgReg(Ins[i].VT.getStoreSizeInBits());
+    }
+  }
+  else {
+    // For device functions, we use the PTX calling convention to do register
+    // assignments then create CopyFromReg ISDs for the allocated registers
+
+    SmallVector<CCValAssign, 16> ArgLocs;
+    CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), ArgLocs,
+                   *DAG.getContext());
+
+    CCInfo.AnalyzeFormalArguments(Ins, CC_PTX);
+
+    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+
+      CCValAssign&         VA    = ArgLocs[i];
+      EVT                  RegVT = VA.getLocVT();
+      TargetRegisterClass* TRC   = 0;
+
+      assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
+
+      // Determine which register class we need
+      if (RegVT == MVT::i1) {
+        TRC = PTX::RegPredRegisterClass;
+      }
+      else if (RegVT == MVT::i16) {
+        TRC = PTX::RegI16RegisterClass;
+      }
+      else if (RegVT == MVT::i32) {
+        TRC = PTX::RegI32RegisterClass;
+      }
+      else if (RegVT == MVT::i64) {
+        TRC = PTX::RegI64RegisterClass;
+      }
+      else if (RegVT == MVT::f32) {
+        TRC = PTX::RegF32RegisterClass;
+      }
+      else if (RegVT == MVT::f64) {
+        TRC = PTX::RegF64RegisterClass;
+      }
+      else {
+        llvm_unreachable("Unknown parameter type");
+      }
+
+      unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
+      MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg);
+
+      SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+      InVals.push_back(ArgValue);
+
+      MFI->addArgReg(VA.getLocReg());
+    }
   }
-
-  MFI->doneAddArg();
 
   return Chain;
 }
@@ -254,51 +301,47 @@ SDValue PTXTargetLowering::
       assert(Outs.size() == 0 && "Kernel must return void.");
       return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
     case CallingConv::PTX_Device:
-      assert(Outs.size() <= 1 && "Can at most return one value.");
+      //assert(Outs.size() <= 1 && "Can at most return one value.");
       break;
   }
 
-  // PTX_Device
-
-  // return void
-  if (Outs.size() == 0)
-    return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
+  MachineFunction& MF = DAG.getMachineFunction();
+  PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
 
   SDValue Flag;
-  unsigned reg;
 
-  if (Outs[0].VT == MVT::i16) {
-    reg = PTX::RH0;
-  }
-  else if (Outs[0].VT == MVT::i32) {
-    reg = PTX::R0;
-  }
-  else if (Outs[0].VT == MVT::i64) {
-    reg = PTX::RD0;
-  }
-  else if (Outs[0].VT == MVT::f32) {
-    reg = PTX::F0;
-  }
-  else {
-    assert(Outs[0].VT == MVT::f64 && "Can return only basic types");
-    reg = PTX::FD0;
-  }
+  // Even though we could use the .param space for return arguments for
+  // device functions if SM >= 2.0 and the number of return arguments is
+  // only 1, we just always use registers since this makes the codegen
+  // easier.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
+  getTargetMachine(), RVLocs, *DAG.getContext());
 
-  MachineFunction &MF = DAG.getMachineFunction();
-  PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
-  MFI->setRetReg(reg);
+  CCInfo.AnalyzeReturn(Outs, RetCC_PTX);
+
+  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
+    CCValAssign& VA  = RVLocs[i];
 
-  // If this is the first return lowered for this function, add the regs to the
-  // liveout set for the function
-  if (DAG.getMachineFunction().getRegInfo().liveout_empty())
-    DAG.getMachineFunction().getRegInfo().addLiveOut(reg);
+    assert(VA.isRegLoc() && "CCValAssign must be RegLoc");
 
-  // Copy the result values into the output registers
-  Chain = DAG.getCopyToReg(Chain, dl, reg, OutVals[0], Flag);
+    unsigned Reg = VA.getLocReg();
 
-  // Guarantee that all emitted copies are stuck together,
-  // avoiding something bad
-  Flag = Chain.getValue(1);
+    DAG.getMachineFunction().getRegInfo().addLiveOut(Reg);
 
-  return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
+    Chain = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i], Flag);
+
+    // Guarantee that all emitted copies are stuck together,
+    // avoiding something bad
+    Flag = Chain.getValue(1);
+
+    MFI->addRetReg(Reg);
+  }
+
+  if (Flag.getNode() == 0) {
+    return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
+  }
+  else {
+    return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
+  }
 }
diff --git a/lib/Target/PTX/PTXISelLowering.h b/lib/Target/PTX/PTXISelLowering.h
index 225c000..4318541 100644
--- a/lib/Target/PTX/PTXISelLowering.h
+++ b/lib/Target/PTX/PTXISelLowering.h
@@ -24,12 +24,13 @@ class PTXTargetMachine;
 namespace PTXISD {
   enum NodeType {
     FIRST_NUMBER = ISD::BUILTIN_OP_END,
-    READ_PARAM,
+    LOAD_PARAM,
+    STORE_PARAM,
     EXIT,
     RET,
     COPY_ADDRESS
   };
-} // namespace PTXISD
+}                               // namespace PTXISD
 
 class PTXTargetLowering : public TargetLowering {
   public:
@@ -40,7 +41,7 @@ class PTXTargetLowering : public TargetLowering {
     virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
 
     virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
-    
+
     virtual SDValue
       LowerFormalArguments(SDValue Chain,
                            CallingConv::ID CallConv,
@@ -58,9 +59,9 @@ class PTXTargetLowering : public TargetLowering {
                   const SmallVectorImpl<SDValue> &OutVals,
                   DebugLoc dl,
                   SelectionDAG &DAG) const;
-    
+
     virtual MVT::SimpleValueType getSetCCResultType(EVT VT) const;
-    
+
   private:
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
 }; // class PTXTargetLowering
diff --git a/lib/Target/PTX/PTXInstrFormats.td b/lib/Target/PTX/PTXInstrFormats.td
index e4e0999..8cee351 100644
--- a/lib/Target/PTX/PTXInstrFormats.td
+++ b/lib/Target/PTX/PTXInstrFormats.td
@@ -9,7 +9,7 @@
 
 // PTX Predicate operand, default to (0, 0) = (zero-reg, always).
 // Leave PrintMethod empty; predicate printing is defined elsewhere.
-def pred : PredicateOperand<OtherVT, (ops Preds, i32imm),
+def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm),
                                      (ops (i1 zero_reg), (i32 0))>;
 
 let Namespace = "PTX" in {
diff --git a/lib/Target/PTX/PTXInstrInfo.cpp b/lib/Target/PTX/PTXInstrInfo.cpp
index a12a6d0..425265a 100644
--- a/lib/Target/PTX/PTXInstrInfo.cpp
+++ b/lib/Target/PTX/PTXInstrInfo.cpp
@@ -18,27 +18,29 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 
-using namespace llvm;
-
+#define GET_INSTRINFO_CTOR
 #include "PTXGenInstrInfo.inc"
 
+using namespace llvm;
+
 PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM)
-  : TargetInstrInfoImpl(PTXInsts, array_lengthof(PTXInsts)),
+  : PTXGenInstrInfo(),
     RI(_TM, *this), TM(_TM) {}
 
 static const struct map_entry {
   const TargetRegisterClass *cls;
   const int opcode;
 } map[] = {
-  { &PTX::RRegu16RegClass, PTX::MOVU16rr },
-  { &PTX::RRegu32RegClass, PTX::MOVU32rr },
-  { &PTX::RRegu64RegClass, PTX::MOVU64rr },
-  { &PTX::RRegf32RegClass, PTX::MOVF32rr },
-  { &PTX::RRegf64RegClass, PTX::MOVF64rr },
-  { &PTX::PredsRegClass,   PTX::MOVPREDrr }
+  { &PTX::RegI16RegClass, PTX::MOVU16rr },
+  { &PTX::RegI32RegClass, PTX::MOVU32rr },
+  { &PTX::RegI64RegClass, PTX::MOVU64rr },
+  { &PTX::RegF32RegClass, PTX::MOVF32rr },
+  { &PTX::RegF64RegClass, PTX::MOVF64rr },
+  { &PTX::RegPredRegClass,   PTX::MOVPREDrr }
 };
 
 void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
@@ -47,8 +49,8 @@ void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                bool KillSrc) const {
   for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i) {
     if (map[i].cls->contains(DstReg, SrcReg)) {
-      const TargetInstrDesc &TID = get(map[i].opcode);
-      MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg).
+      const MCInstrDesc &MCID = get(map[i].opcode);
+      MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).
         addReg(SrcReg, getKillRegState(KillSrc));
       AddDefaultPredicate(MI);
       return;
@@ -69,8 +71,8 @@ bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
 
   for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
     if (DstRC == map[i].cls) {
-      const TargetInstrDesc &TID = get(map[i].opcode);
-      MachineInstr *MI = BuildMI(MBB, I, DL, TID, DstReg).addReg(SrcReg);
+      const MCInstrDesc &MCID = get(map[i].opcode);
+      MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg);
       AddDefaultPredicate(MI);
       return true;
     }
@@ -155,7 +157,7 @@ DefinesPredicate(MachineInstr *MI,
 
   const MachineOperand &MO = MI->getOperand(0);
 
-  if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::PredsRegClass)
+  if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::RegPredRegClass)
     return false;
 
   Pred.push_back(MO);
@@ -178,13 +180,13 @@ AnalyzeBranch(MachineBasicBlock &MBB,
 
   MachineBasicBlock::const_iterator iter = MBB.end();
   const MachineInstr& instLast1 = *--iter;
-  const TargetInstrDesc &desc1 = instLast1.getDesc();
+  const MCInstrDesc &desc1 = instLast1.getDesc();
   // for special case that MBB has only 1 instruction
   const bool IsSizeOne = MBB.size() == 1;
   // if IsSizeOne is true, *--iter and instLast2 are invalid
   // we put a dummy value in instLast2 and desc2 since they are used
   const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
-  const TargetInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc();
+  const MCInstrDesc &desc2 = IsSizeOne ? desc1 : instLast2.getDesc();
 
   DEBUG(dbgs() << "\n");
   DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
@@ -288,6 +290,77 @@ InsertBranch(MachineBasicBlock &MBB,
   }
 }
 
+// Memory operand folding for spills
+void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
+                                       MachineBasicBlock::iterator MII,
+                                     unsigned SrcReg, bool isKill, int FrameIdx,
+                                       const TargetRegisterClass *RC,
+                                       const TargetRegisterInfo *TRI) const {
+  MachineInstr& MI = *MII;
+  DebugLoc DL = MI.getDebugLoc();
+
+  DEBUG(dbgs() << "storeRegToStackSlot: " << MI);
+
+  int OpCode;
+
+  // Select the appropriate opcode based on the register class
+  if (RC == PTX::RegI16RegisterClass) {
+    OpCode = PTX::STACKSTOREI16;
+  }  else if (RC == PTX::RegI32RegisterClass) {
+    OpCode = PTX::STACKSTOREI32;
+  }  else if (RC == PTX::RegI64RegisterClass) {
+    OpCode = PTX::STACKSTOREI32;
+  }  else if (RC == PTX::RegF32RegisterClass) {
+    OpCode = PTX::STACKSTOREF32;
+  }  else if (RC == PTX::RegF64RegisterClass) {
+    OpCode = PTX::STACKSTOREF64;
+  } else {
+    llvm_unreachable("Unknown PTX register class!");
+  }
+
+  // Build the store instruction (really a mov)
+  MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
+  MIB.addFrameIndex(FrameIdx);
+  MIB.addReg(SrcReg);
+
+  AddDefaultPredicate(MIB);
+}
+
+void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MII,
+                                        unsigned DestReg, int FrameIdx,
+                                        const TargetRegisterClass *RC,
+                                        const TargetRegisterInfo *TRI) const {
+  MachineInstr& MI = *MII;
+  DebugLoc DL = MI.getDebugLoc();
+
+  DEBUG(dbgs() << "loadRegToStackSlot: " << MI);
+
+  int OpCode;
+
+  // Select the appropriate opcode based on the register class
+  if (RC == PTX::RegI16RegisterClass) {
+    OpCode = PTX::STACKLOADI16;
+  } else if (RC == PTX::RegI32RegisterClass) {
+    OpCode = PTX::STACKLOADI32;
+  } else if (RC == PTX::RegI64RegisterClass) {
+    OpCode = PTX::STACKLOADI32;
+  } else if (RC == PTX::RegF32RegisterClass) {
+    OpCode = PTX::STACKLOADF32;
+  } else if (RC == PTX::RegF64RegisterClass) {
+    OpCode = PTX::STACKLOADF64;
+  } else {
+    llvm_unreachable("Unknown PTX register class!");
+  }
+
+  // Build the load instruction (really a mov)
+  MachineInstrBuilder MIB = BuildMI(MBB, MII, DL, get(OpCode));
+  MIB.addReg(DestReg);
+  MIB.addFrameIndex(FrameIdx);
+
+  AddDefaultPredicate(MIB);
+}
+
 // static helper routines
 
 MachineSDNode *PTXInstrInfo::
@@ -316,7 +389,7 @@ void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
 }
 
 bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
-  const TargetInstrDesc &desc = inst.getDesc();
+  const MCInstrDesc &desc = inst.getDesc();
   return desc.isTerminator() || desc.isBranch() || desc.isIndirectBranch();
 }
 
diff --git a/lib/Target/PTX/PTXInstrInfo.h b/lib/Target/PTX/PTXInstrInfo.h
index a04be77..871f1ac 100644
--- a/lib/Target/PTX/PTXInstrInfo.h
+++ b/lib/Target/PTX/PTXInstrInfo.h
@@ -17,6 +17,9 @@
 #include "PTXRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "PTXGenInstrInfo.inc"
+
 namespace llvm {
 class PTXTargetMachine;
 
@@ -24,7 +27,7 @@ class MachineSDNode;
 class SDValue;
 class SelectionDAG;
 
-class PTXInstrInfo : public TargetInstrInfoImpl {
+class PTXInstrInfo : public PTXGenInstrInfo {
 private:
   const PTXRegisterInfo RI;
   PTXTargetMachine &TM;
@@ -84,6 +87,29 @@ public:
                                 const SmallVectorImpl<MachineOperand> &Cond,
                                 DebugLoc DL) const;
 
+  // Memory operand folding for spills
+  // TODO: Implement this eventually and get rid of storeRegToStackSlot and
+  //       loadRegFromStackSlot.  Doing so will get rid of the "stack" registers
+  //       we currently use to spill, though I doubt the overall effect on ptxas
+  //       output will be large.  I have yet to see a case where ptxas is unable
+  //       to see through the "stack" register usage and hence generates
+  //       efficient code anyway.
+  // virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+  //                                             MachineInstr* MI,
+  //                                       const SmallVectorImpl<unsigned> &Ops,
+  //                                             int FrameIndex) const;
+
+  virtual void storeRegToStackSlot(MachineBasicBlock& MBB,
+                                   MachineBasicBlock::iterator MII,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass* RC,
+                                   const TargetRegisterInfo* TRI) const;
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MII,
+                                    unsigned DestReg, int FrameIdx,
+                                    const TargetRegisterClass *RC,
+                                    const TargetRegisterInfo *TRI) const;
+
   // static helper routines
 
   static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td
index d5d08be..6bfe906 100644
--- a/lib/Target/PTX/PTXInstrInfo.td
+++ b/lib/Target/PTX/PTXInstrInfo.td
@@ -26,10 +26,10 @@ def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
 def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
 
 // Shader Model Support
-def SupportsSM13       : Predicate<"getSubtarget().supportsSM13()">;
-def DoesNotSupportSM13 : Predicate<"!getSubtarget().supportsSM13()">;
-def SupportsSM20       : Predicate<"getSubtarget().supportsSM20()">;
-def DoesNotSupportSM20 : Predicate<"!getSubtarget().supportsSM20()">;
+def FDivNeedsRoundingMode : Predicate<"getSubtarget().fdivNeedsRoundingMode()">;
+def FDivNoRoundingMode : Predicate<"!getSubtarget().fdivNeedsRoundingMode()">;
+def FMadNeedsRoundingMode : Predicate<"getSubtarget().fmadNeedsRoundingMode()">;
+def FMadNoRoundingMode : Predicate<"!getSubtarget().fmadNeedsRoundingMode()">;
 
 // PTX Version Support
 def SupportsPTX21       : Predicate<"getSubtarget().supportsPTX21()">;
@@ -143,11 +143,11 @@ def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
 // Address operands
 def MEMri32 : Operand<i32> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops RRegu32, i32imm);
+  let MIOperandInfo = (ops RegI32, i32imm);
 }
 def MEMri64 : Operand<i64> {
   let PrintMethod = "printMemOperand";
-  let MIOperandInfo = (ops RRegu64, i64imm);
+  let MIOperandInfo = (ops RegI64, i64imm);
 }
 def MEMii32 : Operand<i32> {
   let PrintMethod = "printMemOperand";
@@ -163,6 +163,10 @@ def MEMpi : Operand<i32> {
   let PrintMethod = "printParamOperand";
   let MIOperandInfo = (ops i32imm);
 }
+def MEMret : Operand<i32> {
+  let PrintMethod = "printReturnOperand";
+  let MIOperandInfo = (ops i32imm);
+}
 
 // Branch & call targets have OtherVT type.
 def brtarget   : Operand<OtherVT>;
@@ -180,181 +184,190 @@ def PTXsra : SDNode<"ISD::SRA", SDTIntBinOp>;
 def PTXexit
   : SDNode<"PTXISD::EXIT", SDTNone, [SDNPHasChain]>;
 def PTXret
-  : SDNode<"PTXISD::RET",  SDTNone, [SDNPHasChain]>;
+  : SDNode<"PTXISD::RET",  SDTNone,
+           [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 def PTXcopyaddress
   : SDNode<"PTXISD::COPY_ADDRESS", SDTypeProfile<1, 1, []>, []>;
 
+// Load/store .param space
+def PTXloadparam
+  : SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
+           [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+def PTXstoreparam
+  : SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
+           [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
+
 //===----------------------------------------------------------------------===//
 // Instruction Class Templates
 //===----------------------------------------------------------------------===//
 
 //===- Floating-Point Instructions - 2 Operand Form -----------------------===//
 multiclass PTX_FLOAT_2OP<string opcstr, SDNode opnode> {
-  def rr32 : InstPTX<(outs RRegf32:$d),
-                     (ins RRegf32:$a),
+  def rr32 : InstPTX<(outs RegF32:$d),
+                     (ins RegF32:$a),
                      !strconcat(opcstr, ".f32\t$d, $a"),
-                     [(set RRegf32:$d, (opnode RRegf32:$a))]>;
-  def ri32 : InstPTX<(outs RRegf32:$d),
+                     [(set RegF32:$d, (opnode RegF32:$a))]>;
+  def ri32 : InstPTX<(outs RegF32:$d),
                      (ins f32imm:$a),
                      !strconcat(opcstr, ".f32\t$d, $a"),
-                     [(set RRegf32:$d, (opnode fpimm:$a))]>;
-  def rr64 : InstPTX<(outs RRegf64:$d),
-                     (ins RRegf64:$a),
+                     [(set RegF32:$d, (opnode fpimm:$a))]>;
+  def rr64 : InstPTX<(outs RegF64:$d),
+                     (ins RegF64:$a),
                      !strconcat(opcstr, ".f64\t$d, $a"),
-                     [(set RRegf64:$d, (opnode RRegf64:$a))]>;
-  def ri64 : InstPTX<(outs RRegf64:$d),
+                     [(set RegF64:$d, (opnode RegF64:$a))]>;
+  def ri64 : InstPTX<(outs RegF64:$d),
                      (ins f64imm:$a),
                      !strconcat(opcstr, ".f64\t$d, $a"),
-                     [(set RRegf64:$d, (opnode fpimm:$a))]>;
+                     [(set RegF64:$d, (opnode fpimm:$a))]>;
 }
 
 //===- Floating-Point Instructions - 3 Operand Form -----------------------===//
 multiclass PTX_FLOAT_3OP<string opcstr, SDNode opnode> {
-  def rr32 : InstPTX<(outs RRegf32:$d),
-                     (ins RRegf32:$a, RRegf32:$b),
+  def rr32 : InstPTX<(outs RegF32:$d),
+                     (ins RegF32:$a, RegF32:$b),
                      !strconcat(opcstr, ".f32\t$d, $a, $b"),
-                     [(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
-  def ri32 : InstPTX<(outs RRegf32:$d),
-                     (ins RRegf32:$a, f32imm:$b),
+                     [(set RegF32:$d, (opnode RegF32:$a, RegF32:$b))]>;
+  def ri32 : InstPTX<(outs RegF32:$d),
+                     (ins RegF32:$a, f32imm:$b),
                      !strconcat(opcstr, ".f32\t$d, $a, $b"),
-                     [(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
-  def rr64 : InstPTX<(outs RRegf64:$d),
-                     (ins RRegf64:$a, RRegf64:$b),
+                     [(set RegF32:$d, (opnode RegF32:$a, fpimm:$b))]>;
+  def rr64 : InstPTX<(outs RegF64:$d),
+                     (ins RegF64:$a, RegF64:$b),
                      !strconcat(opcstr, ".f64\t$d, $a, $b"),
-                     [(set RRegf64:$d, (opnode RRegf64:$a, RRegf64:$b))]>;
-  def ri64 : InstPTX<(outs RRegf64:$d),
-                     (ins RRegf64:$a, f64imm:$b),
+                     [(set RegF64:$d, (opnode RegF64:$a, RegF64:$b))]>;
+  def ri64 : InstPTX<(outs RegF64:$d),
+                     (ins RegF64:$a, f64imm:$b),
                      !strconcat(opcstr, ".f64\t$d, $a, $b"),
-                     [(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>;
+                     [(set RegF64:$d, (opnode RegF64:$a, fpimm:$b))]>;
 }
 
 //===- Floating-Point Instructions - 4 Operand Form -----------------------===//
 multiclass PTX_FLOAT_4OP<string opcstr, SDNode opnode1, SDNode opnode2> {
-  def rrr32 : InstPTX<(outs RRegf32:$d),
-                      (ins RRegf32:$a, RRegf32:$b, RRegf32:$c),
+  def rrr32 : InstPTX<(outs RegF32:$d),
+                      (ins RegF32:$a, RegF32:$b, RegF32:$c),
                       !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
-                      [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
-                                                          RRegf32:$b),
-                                                 RRegf32:$c))]>;
-  def rri32 : InstPTX<(outs RRegf32:$d),
-                      (ins RRegf32:$a, RRegf32:$b, f32imm:$c),
+                      [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a,
+                                                          RegF32:$b),
+                                                 RegF32:$c))]>;
+  def rri32 : InstPTX<(outs RegF32:$d),
+                      (ins RegF32:$a, RegF32:$b, f32imm:$c),
                       !strconcat(opcstr, ".f32\t$d, $a, $b, $c"),
-                      [(set RRegf32:$d, (opnode2 (opnode1 RRegf32:$a,
-                                                          RRegf32:$b),
+                      [(set RegF32:$d, (opnode2 (opnode1 RegF32:$a,
+                                                          RegF32:$b),
                                                  fpimm:$c))]>;
-  def rrr64 : InstPTX<(outs RRegf64:$d),
-                      (ins RRegf64:$a, RRegf64:$b, RRegf64:$c),
+  def rrr64 : InstPTX<(outs RegF64:$d),
+                      (ins RegF64:$a, RegF64:$b, RegF64:$c),
                       !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
-                      [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
-                                                          RRegf64:$b),
-                                                 RRegf64:$c))]>;
-  def rri64 : InstPTX<(outs RRegf64:$d),
-                      (ins RRegf64:$a, RRegf64:$b, f64imm:$c),
+                      [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a,
+                                                          RegF64:$b),
+                                                 RegF64:$c))]>;
+  def rri64 : InstPTX<(outs RegF64:$d),
+                      (ins RegF64:$a, RegF64:$b, f64imm:$c),
                       !strconcat(opcstr, ".f64\t$d, $a, $b, $c"),
-                      [(set RRegf64:$d, (opnode2 (opnode1 RRegf64:$a,
-                                                          RRegf64:$b),
+                      [(set RegF64:$d, (opnode2 (opnode1 RegF64:$a,
+                                                          RegF64:$b),
                                                  fpimm:$c))]>;
 }
 
 multiclass INT3<string opcstr, SDNode opnode> {
-  def rr16 : InstPTX<(outs RRegu16:$d),
-                     (ins RRegu16:$a, RRegu16:$b),
+  def rr16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, RegI16:$b),
                      !strconcat(opcstr, ".u16\t$d, $a, $b"),
-                     [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
-  def ri16 : InstPTX<(outs RRegu16:$d),
-                     (ins RRegu16:$a, i16imm:$b),
+                     [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+  def ri16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, i16imm:$b),
                      !strconcat(opcstr, ".u16\t$d, $a, $b"),
-                     [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
-  def rr32 : InstPTX<(outs RRegu32:$d),
-                     (ins RRegu32:$a, RRegu32:$b),
+                     [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+  def rr32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, RegI32:$b),
                      !strconcat(opcstr, ".u32\t$d, $a, $b"),
-                     [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
-  def ri32 : InstPTX<(outs RRegu32:$d),
-                     (ins RRegu32:$a, i32imm:$b),
+                     [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+  def ri32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, i32imm:$b),
                      !strconcat(opcstr, ".u32\t$d, $a, $b"),
-                     [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
-  def rr64 : InstPTX<(outs RRegu64:$d),
-                     (ins RRegu64:$a, RRegu64:$b),
+                     [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+  def rr64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, RegI64:$b),
                      !strconcat(opcstr, ".u64\t$d, $a, $b"),
-                     [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
-  def ri64 : InstPTX<(outs RRegu64:$d),
-                     (ins RRegu64:$a, i64imm:$b),
+                     [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+  def ri64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, i64imm:$b),
                      !strconcat(opcstr, ".u64\t$d, $a, $b"),
-                     [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
+                     [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
 }
 
 multiclass PTX_LOGIC<string opcstr, SDNode opnode> {
-  def ripreds : InstPTX<(outs Preds:$d),
-                     (ins Preds:$a, i1imm:$b),
+  def ripreds : InstPTX<(outs RegPred:$d),
+                     (ins RegPred:$a, i1imm:$b),
                      !strconcat(opcstr, ".pred\t$d, $a, $b"),
-                     [(set Preds:$d, (opnode Preds:$a, imm:$b))]>;
-  def rrpreds : InstPTX<(outs Preds:$d),
-                     (ins Preds:$a, Preds:$b),
+                     [(set RegPred:$d, (opnode RegPred:$a, imm:$b))]>;
+  def rrpreds : InstPTX<(outs RegPred:$d),
+                     (ins RegPred:$a, RegPred:$b),
                      !strconcat(opcstr, ".pred\t$d, $a, $b"),
-                     [(set Preds:$d, (opnode Preds:$a, Preds:$b))]>;
-  def rr16 : InstPTX<(outs RRegu16:$d),
-                     (ins RRegu16:$a, RRegu16:$b),
+                     [(set RegPred:$d, (opnode RegPred:$a, RegPred:$b))]>;
+  def rr16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, RegI16:$b),
                      !strconcat(opcstr, ".b16\t$d, $a, $b"),
-                     [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
-  def ri16 : InstPTX<(outs RRegu16:$d),
-                     (ins RRegu16:$a, i16imm:$b),
+                     [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+  def ri16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, i16imm:$b),
                      !strconcat(opcstr, ".b16\t$d, $a, $b"),
-                     [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
-  def rr32 : InstPTX<(outs RRegu32:$d),
-                     (ins RRegu32:$a, RRegu32:$b),
+                     [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+  def rr32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, RegI32:$b),
                      !strconcat(opcstr, ".b32\t$d, $a, $b"),
-                     [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
-  def ri32 : InstPTX<(outs RRegu32:$d),
-                     (ins RRegu32:$a, i32imm:$b),
+                     [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+  def ri32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, i32imm:$b),
                      !strconcat(opcstr, ".b32\t$d, $a, $b"),
-                     [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
-  def rr64 : InstPTX<(outs RRegu64:$d),
-                     (ins RRegu64:$a, RRegu64:$b),
+                     [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+  def rr64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, RegI64:$b),
                      !strconcat(opcstr, ".b64\t$d, $a, $b"),
-                     [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
-  def ri64 : InstPTX<(outs RRegu64:$d),
-                     (ins RRegu64:$a, i64imm:$b),
+                     [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+  def ri64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, i64imm:$b),
                      !strconcat(opcstr, ".b64\t$d, $a, $b"),
-                     [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
+                     [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
 }
 
 multiclass INT3ntnc<string opcstr, SDNode opnode> {
-  def rr16 : InstPTX<(outs RRegu16:$d),
-                     (ins RRegu16:$a, RRegu16:$b),
+  def rr16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, RegI16:$b),
                      !strconcat(opcstr, "16\t$d, $a, $b"),
-                     [(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
-  def rr32 : InstPTX<(outs RRegu32:$d),
-                     (ins RRegu32:$a, RRegu32:$b),
+                     [(set RegI16:$d, (opnode RegI16:$a, RegI16:$b))]>;
+  def rr32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, RegI32:$b),
                      !strconcat(opcstr, "32\t$d, $a, $b"),
-                     [(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
-  def rr64 : InstPTX<(outs RRegu64:$d),
-                     (ins RRegu64:$a, RRegu64:$b),
+                     [(set RegI32:$d, (opnode RegI32:$a, RegI32:$b))]>;
+  def rr64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, RegI64:$b),
                      !strconcat(opcstr, "64\t$d, $a, $b"),
-                     [(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
-  def ri16 : InstPTX<(outs RRegu16:$d),
-                     (ins RRegu16:$a, i16imm:$b),
+                     [(set RegI64:$d, (opnode RegI64:$a, RegI64:$b))]>;
+  def ri16 : InstPTX<(outs RegI16:$d),
+                     (ins RegI16:$a, i16imm:$b),
                      !strconcat(opcstr, "16\t$d, $a, $b"),
-                     [(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
-  def ri32 : InstPTX<(outs RRegu32:$d),
-                     (ins RRegu32:$a, i32imm:$b),
+                     [(set RegI16:$d, (opnode RegI16:$a, imm:$b))]>;
+  def ri32 : InstPTX<(outs RegI32:$d),
+                     (ins RegI32:$a, i32imm:$b),
                      !strconcat(opcstr, "32\t$d, $a, $b"),
-                     [(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
-  def ri64 : InstPTX<(outs RRegu64:$d),
-                     (ins RRegu64:$a, i64imm:$b),
+                     [(set RegI32:$d, (opnode RegI32:$a, imm:$b))]>;
+  def ri64 : InstPTX<(outs RegI64:$d),
+                     (ins RegI64:$a, i64imm:$b),
                      !strconcat(opcstr, "64\t$d, $a, $b"),
-                     [(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
-  def ir16 : InstPTX<(outs RRegu16:$d),
-                     (ins i16imm:$a, RRegu16:$b),
+                     [(set RegI64:$d, (opnode RegI64:$a, imm:$b))]>;
+  def ir16 : InstPTX<(outs RegI16:$d),
+                     (ins i16imm:$a, RegI16:$b),
                      !strconcat(opcstr, "16\t$d, $a, $b"),
-                     [(set RRegu16:$d, (opnode imm:$a, RRegu16:$b))]>;
-  def ir32 : InstPTX<(outs RRegu32:$d),
-                     (ins i32imm:$a, RRegu32:$b),
+                     [(set RegI16:$d, (opnode imm:$a, RegI16:$b))]>;
+  def ir32 : InstPTX<(outs RegI32:$d),
+                     (ins i32imm:$a, RegI32:$b),
                      !strconcat(opcstr, "32\t$d, $a, $b"),
-                     [(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>;
-  def ir64 : InstPTX<(outs RRegu64:$d),
-                     (ins i64imm:$a, RRegu64:$b),
+                     [(set RegI32:$d, (opnode imm:$a, RegI32:$b))]>;
+  def ir64 : InstPTX<(outs RegI64:$d),
+                     (ins i64imm:$a, RegI64:$b),
                      !strconcat(opcstr, "64\t$d, $a, $b"),
-                     [(set RRegu64:$d, (opnode imm:$a, RRegu64:$b))]>;
+                     [(set RegI64:$d, (opnode imm:$a, RegI64:$b))]>;
 }
 
 multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
@@ -362,63 +375,63 @@ multiclass PTX_SETP_I<RegisterClass RC, string regclsname, Operand immcls,
   // TODO support 5-operand format: p|q, a, b, c
 
   def rr
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
               !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
-              [(set Preds:$p, (setcc RC:$a, RC:$b, cmp))]>;
+              [(set RegPred:$p, (setcc RC:$a, RC:$b, cmp))]>;
   def ri
-    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b),
               !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
-              [(set Preds:$p, (setcc RC:$a, imm:$b, cmp))]>;
+              [(set RegPred:$p, (setcc RC:$a, imm:$b, cmp))]>;
 
   def rr_and_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
   def ri_and_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
   def rr_or_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
   def ri_or_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
   def rr_xor_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), Preds:$c))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), RegPred:$c))]>;
   def ri_xor_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), Preds:$c))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), RegPred:$c))]>;
 
   def rr_and_not_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (and (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
   def ri_and_not_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (and (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
   def rr_or_not_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (or (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
   def ri_or_not_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (or (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
   def rr_xor_not_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, cmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, cmp), (not RegPred:$c)))]>;
   def ri_xor_not_r
-    : InstPTX<(outs Preds:$p), (ins RC:$a, immcls:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, immcls:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (xor (setcc RC:$a, imm:$b, cmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, imm:$b, cmp), (not RegPred:$c)))]>;
 }
 
 multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
@@ -426,74 +439,74 @@ multiclass PTX_SETP_FP<RegisterClass RC, string regclsname,
   // TODO support 5-operand format: p|q, a, b, c
 
   def rr_u
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
               !strconcat("setp.", cmpstr, "u.", regclsname, "\t$p, $a, $b"),
-              [(set Preds:$p, (setcc RC:$a, RC:$b, ucmp))]>;
+              [(set RegPred:$p, (setcc RC:$a, RC:$b, ucmp))]>;
   def rr_o
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b),
               !strconcat("setp.", cmpstr, ".", regclsname, "\t$p, $a, $b"),
-              [(set Preds:$p, (setcc RC:$a, RC:$b, ocmp))]>;
+              [(set RegPred:$p, (setcc RC:$a, RC:$b, ocmp))]>;
 
   def rr_and_r_u
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
   def rr_and_r_o
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
 
   def rr_or_r_u
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
   def rr_or_r_o
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
 
   def rr_xor_r_u
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), Preds:$c))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), RegPred:$c))]>;
   def rr_xor_r_o
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, $c"),
-              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), Preds:$c))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), RegPred:$c))]>;
 
   def rr_and_not_r_u
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, "u.and.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (and (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
   def rr_and_not_r_o
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".and.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (and (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (and (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
 
   def rr_or_not_r_u
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, "u.or.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (or (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
   def rr_or_not_r_o
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".or.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (or (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (or (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
 
   def rr_xor_not_r_u
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, "u.xor.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ucmp), (not RegPred:$c)))]>;
   def rr_xor_not_r_o
-    : InstPTX<(outs Preds:$p), (ins RC:$a, RC:$b, Preds:$c),
+    : InstPTX<(outs RegPred:$p), (ins RC:$a, RC:$b, RegPred:$c),
               !strconcat("setp.", cmpstr, ".xor.", regclsname, "\t$p, $a, $b, !$c"),
-              [(set Preds:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not Preds:$c)))]>;
+              [(set RegPred:$p, (xor (setcc RC:$a, RC:$b, ocmp), (not RegPred:$c)))]>;
 }
 
 multiclass PTX_SELP<RegisterClass RC, string regclsname> {
   def rr
-    : InstPTX<(outs RC:$r), (ins Preds:$a, RC:$b, RC:$c),
+    : InstPTX<(outs RC:$r), (ins RegPred:$a, RC:$b, RC:$c),
               !strconcat("selp.", regclsname, "\t$r, $b, $c, $a"),
-              [(set RC:$r, (select Preds:$a, RC:$b, RC:$c))]>;
+              [(set RC:$r, (select RegPred:$a, RC:$b, RC:$c))]>;
 }
 
 multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
@@ -524,11 +537,11 @@ multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_lo
 }
 
 multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
-  defm u16 : PTX_LD<opstr, ".u16", RRegu16, pat_load>;
-  defm u32 : PTX_LD<opstr, ".u32", RRegu32, pat_load>;
-  defm u64 : PTX_LD<opstr, ".u64", RRegu64, pat_load>;
-  defm f32 : PTX_LD<opstr, ".f32", RRegf32, pat_load>;
-  defm f64 : PTX_LD<opstr, ".f64", RRegf64, pat_load>;
+  defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
+  defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
+  defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
+  defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>;
+  defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>;
 }
 
 multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> {
@@ -559,11 +572,11 @@ multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_st
 }
 
 multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
-  defm u16 : PTX_ST<opstr, ".u16", RRegu16, pat_store>;
-  defm u32 : PTX_ST<opstr, ".u32", RRegu32, pat_store>;
-  defm u64 : PTX_ST<opstr, ".u64", RRegu64, pat_store>;
-  defm f32 : PTX_ST<opstr, ".f32", RRegf32, pat_store>;
-  defm f64 : PTX_ST<opstr, ".f64", RRegf64, pat_store>;
+  defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
+  defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
+  defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
+  defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>;
+  defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -584,44 +597,59 @@ defm REM : INT3<"rem", urem>;
 defm FNEG : PTX_FLOAT_2OP<"neg", fneg>;
 
 // Standard Binary Operations
-defm FADD : PTX_FLOAT_3OP<"add", fadd>;
-defm FSUB : PTX_FLOAT_3OP<"sub", fsub>;
-defm FMUL : PTX_FLOAT_3OP<"mul", fmul>;
-
-// TODO: Allow user selection of rounding modes for fdiv.
-// For division, we need to have f32 and f64 differently.
-// For f32, we just always use .approx since it is supported on all hardware
-// for PTX 1.4+, which is our minimum target.
-def FDIVrr32 : InstPTX<(outs RRegf32:$d),
-                       (ins RRegf32:$a, RRegf32:$b),
-                       "div.approx.f32\t$d, $a, $b",
-                       [(set RRegf32:$d, (fdiv RRegf32:$a, RRegf32:$b))]>;
-def FDIVri32 : InstPTX<(outs RRegf32:$d),
-                       (ins RRegf32:$a, f32imm:$b),
-                       "div.approx.f32\t$d, $a, $b",
-                       [(set RRegf32:$d, (fdiv RRegf32:$a, fpimm:$b))]>;
-
-// For f64, we must specify a rounding for sm 1.3+ but *not* for sm 1.0.
-def FDIVrr64SM13 : InstPTX<(outs RRegf64:$d),
-                           (ins RRegf64:$a, RRegf64:$b),
+defm FADD : PTX_FLOAT_3OP<"add.rn", fadd>;
+defm FSUB : PTX_FLOAT_3OP<"sub.rn", fsub>;
+defm FMUL : PTX_FLOAT_3OP<"mul.rn", fmul>;
+
+// For floating-point division:
+// SM_13+ defaults to .rn for f32 and f64,
+// SM10 must *not* provide a rounding
+
+// TODO: 
+//     - Allow user selection of rounding modes for fdiv
+//     - Add support for -prec-div=false (.approx)
+
+def FDIVrr32SM13 : InstPTX<(outs RegF32:$d),
+                       (ins RegF32:$a, RegF32:$b),
+                       "div.rn.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
+                   Requires<[FDivNeedsRoundingMode]>;
+def FDIVri32SM13 : InstPTX<(outs RegF32:$d),
+                       (ins RegF32:$a, f32imm:$b),
+                       "div.rn.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
+                   Requires<[FDivNeedsRoundingMode]>;
+def FDIVrr32SM10 : InstPTX<(outs RegF32:$d),
+                       (ins RegF32:$a, RegF32:$b),
+                       "div.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, RegF32:$b))]>,
+                   Requires<[FDivNoRoundingMode]>;
+def FDIVri32SM10 : InstPTX<(outs RegF32:$d),
+                       (ins RegF32:$a, f32imm:$b),
+                       "div.f32\t$d, $a, $b",
+                       [(set RegF32:$d, (fdiv RegF32:$a, fpimm:$b))]>,
+                   Requires<[FDivNoRoundingMode]>;
+
+def FDIVrr64SM13 : InstPTX<(outs RegF64:$d),
+                           (ins RegF64:$a, RegF64:$b),
                            "div.rn.f64\t$d, $a, $b",
-                           [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
-                   Requires<[SupportsSM13]>;
-def FDIVri64SM13 : InstPTX<(outs RRegf64:$d),
-                           (ins RRegf64:$a, f64imm:$b),
+                           [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
+                   Requires<[FDivNeedsRoundingMode]>;
+def FDIVri64SM13 : InstPTX<(outs RegF64:$d),
+                           (ins RegF64:$a, f64imm:$b),
                            "div.rn.f64\t$d, $a, $b",
-                           [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
-                   Requires<[SupportsSM13]>;
-def FDIVrr64SM10 : InstPTX<(outs RRegf64:$d),
-                           (ins RRegf64:$a, RRegf64:$b),
+                           [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
+                   Requires<[FDivNeedsRoundingMode]>;
+def FDIVrr64SM10 : InstPTX<(outs RegF64:$d),
+                           (ins RegF64:$a, RegF64:$b),
                            "div.f64\t$d, $a, $b",
-                           [(set RRegf64:$d, (fdiv RRegf64:$a, RRegf64:$b))]>,
-                   Requires<[DoesNotSupportSM13]>;
-def FDIVri64SM10 : InstPTX<(outs RRegf64:$d),
-                           (ins RRegf64:$a, f64imm:$b),
+                           [(set RegF64:$d, (fdiv RegF64:$a, RegF64:$b))]>,
+                   Requires<[FDivNoRoundingMode]>;
+def FDIVri64SM10 : InstPTX<(outs RegF64:$d),
+                           (ins RegF64:$a, f64imm:$b),
                            "div.f64\t$d, $a, $b",
-                           [(set RRegf64:$d, (fdiv RRegf64:$a, fpimm:$b))]>,
-                   Requires<[DoesNotSupportSM13]>;
+                           [(set RegF64:$d, (fdiv RegF64:$a, fpimm:$b))]>,
+                   Requires<[FDivNoRoundingMode]>;
 
 
 
@@ -633,40 +661,42 @@ def FDIVri64SM10 : InstPTX<(outs RRegf64:$d),
 // In the short term, mad is supported on all PTX versions and we use a
 // default rounding mode no matter what shader model or PTX version.
 // TODO: Allow the rounding mode to be selectable through llc.
-defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>, Requires<[SupportsSM13, SupportsFMA]>;
-defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>, Requires<[DoesNotSupportSM13, SupportsFMA]>;
+defm FMADSM13 : PTX_FLOAT_4OP<"mad.rn", fmul, fadd>,
+                Requires<[FMadNeedsRoundingMode, SupportsFMA]>;
+defm FMAD : PTX_FLOAT_4OP<"mad", fmul, fadd>,
+            Requires<[FMadNoRoundingMode, SupportsFMA]>;
 
 ///===- Floating-Point Intrinsic Instructions -----------------------------===//
 
-def FSQRT32 : InstPTX<(outs RRegf32:$d),
-                      (ins RRegf32:$a),
+def FSQRT32 : InstPTX<(outs RegF32:$d),
+                      (ins RegF32:$a),
                       "sqrt.rn.f32\t$d, $a",
-                      [(set RRegf32:$d, (fsqrt RRegf32:$a))]>;
+                      [(set RegF32:$d, (fsqrt RegF32:$a))]>;
 
-def FSQRT64 : InstPTX<(outs RRegf64:$d),
-                      (ins RRegf64:$a),
+def FSQRT64 : InstPTX<(outs RegF64:$d),
+                      (ins RegF64:$a),
                       "sqrt.rn.f64\t$d, $a",
-                      [(set RRegf64:$d, (fsqrt RRegf64:$a))]>;
+                      [(set RegF64:$d, (fsqrt RegF64:$a))]>;
 
-def FSIN32 : InstPTX<(outs RRegf32:$d),
-                     (ins RRegf32:$a),
+def FSIN32 : InstPTX<(outs RegF32:$d),
+                     (ins RegF32:$a),
                      "sin.approx.f32\t$d, $a",
-                     [(set RRegf32:$d, (fsin RRegf32:$a))]>;
+                     [(set RegF32:$d, (fsin RegF32:$a))]>;
 
-def FSIN64 : InstPTX<(outs RRegf64:$d),
-                     (ins RRegf64:$a),
+def FSIN64 : InstPTX<(outs RegF64:$d),
+                     (ins RegF64:$a),
                      "sin.approx.f64\t$d, $a",
-                     [(set RRegf64:$d, (fsin RRegf64:$a))]>;
+                     [(set RegF64:$d, (fsin RegF64:$a))]>;
 
-def FCOS32 : InstPTX<(outs RRegf32:$d),
-                     (ins RRegf32:$a),
+def FCOS32 : InstPTX<(outs RegF32:$d),
+                     (ins RegF32:$a),
                      "cos.approx.f32\t$d, $a",
-                     [(set RRegf32:$d, (fcos RRegf32:$a))]>;
+                     [(set RegF32:$d, (fcos RegF32:$a))]>;
 
-def FCOS64 : InstPTX<(outs RRegf64:$d),
-                     (ins RRegf64:$a),
+def FCOS64 : InstPTX<(outs RegF64:$d),
+                     (ins RegF64:$a),
                      "cos.approx.f64\t$d, $a",
-                     [(set RRegf64:$d, (fcos RRegf64:$a))]>;
+                     [(set RegF64:$d, (fcos RegF64:$a))]>;
 
 
 ///===- Comparison and Selection Instructions -----------------------------===//
@@ -675,56 +705,68 @@ def FCOS64 : InstPTX<(outs RRegf64:$d),
 
 // Compare u16
 
-defm SETPEQu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETEQ,  "eq">;
-defm SETPNEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETNE,  "ne">;
-defm SETPLTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULT, "lt">;
-defm SETPLEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETULE, "le">;
-defm SETPGTu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGT, "gt">;
-defm SETPGEu16 : PTX_SETP_I<RRegu16, "u16", i16imm, SETUGE, "ge">;
+defm SETPEQu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETEQ,  "eq">;
+defm SETPNEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETNE,  "ne">;
+defm SETPLTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULT, "lt">;
+defm SETPLEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETULE, "le">;
+defm SETPGTu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGT, "gt">;
+defm SETPGEu16 : PTX_SETP_I<RegI16, "u16", i16imm, SETUGE, "ge">;
+defm SETPLTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLT,  "lt">;
+defm SETPLEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETLE,  "le">;
+defm SETPGTs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGT,  "gt">;
+defm SETPGEs16 : PTX_SETP_I<RegI16, "s16", i16imm, SETGE,  "ge">;
 
 // Compare u32
 
-defm SETPEQu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETEQ,  "eq">;
-defm SETPNEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETNE,  "ne">;
-defm SETPLTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULT, "lt">;
-defm SETPLEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETULE, "le">;
-defm SETPGTu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGT, "gt">;
-defm SETPGEu32 : PTX_SETP_I<RRegu32, "u32", i32imm, SETUGE, "ge">;
+defm SETPEQu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETEQ,  "eq">;
+defm SETPNEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETNE,  "ne">;
+defm SETPLTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULT, "lt">;
+defm SETPLEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETULE, "le">;
+defm SETPGTu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGT, "gt">;
+defm SETPGEu32 : PTX_SETP_I<RegI32, "u32", i32imm, SETUGE, "ge">;
+defm SETPLTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLT,  "lt">;
+defm SETPLEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETLE,  "le">;
+defm SETPGTs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGT,  "gt">;
+defm SETPGEs32 : PTX_SETP_I<RegI32, "s32", i32imm, SETGE,  "ge">;
 
 // Compare u64
 
-defm SETPEQu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETEQ,  "eq">;
-defm SETPNEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETNE,  "ne">;
-defm SETPLTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULT, "lt">;
-defm SETPLEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETULE, "le">;
-defm SETPGTu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGT, "gt">;
-defm SETPGEu64 : PTX_SETP_I<RRegu64, "u64", i64imm, SETUGE, "ge">;
+defm SETPEQu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETEQ,  "eq">;
+defm SETPNEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETNE,  "ne">;
+defm SETPLTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULT, "lt">;
+defm SETPLEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETULE, "le">;
+defm SETPGTu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGT, "gt">;
+defm SETPGEu64 : PTX_SETP_I<RegI64, "u64", i64imm, SETUGE, "ge">;
+defm SETPLTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLT,  "lt">;
+defm SETPLEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETLE,  "le">;
+defm SETPGTs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGT,  "gt">;
+defm SETPGEs64 : PTX_SETP_I<RegI64, "s64", i64imm, SETGE,  "ge">;
 
 // Compare f32
 
-defm SETPEQf32 : PTX_SETP_FP<RRegf32, "f32", SETUEQ, SETOEQ, "eq">;
-defm SETPNEf32 : PTX_SETP_FP<RRegf32, "f32", SETUNE, SETONE, "ne">;
-defm SETPLTf32 : PTX_SETP_FP<RRegf32, "f32", SETULT, SETOLT, "lt">;
-defm SETPLEf32 : PTX_SETP_FP<RRegf32, "f32", SETULE, SETOLE, "le">;
-defm SETPGTf32 : PTX_SETP_FP<RRegf32, "f32", SETUGT, SETOGT, "gt">;
-defm SETPGEf32 : PTX_SETP_FP<RRegf32, "f32", SETUGE, SETOGE, "ge">;
+defm SETPEQf32 : PTX_SETP_FP<RegF32, "f32", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf32 : PTX_SETP_FP<RegF32, "f32", SETUNE, SETONE, "ne">;
+defm SETPLTf32 : PTX_SETP_FP<RegF32, "f32", SETULT, SETOLT, "lt">;
+defm SETPLEf32 : PTX_SETP_FP<RegF32, "f32", SETULE, SETOLE, "le">;
+defm SETPGTf32 : PTX_SETP_FP<RegF32, "f32", SETUGT, SETOGT, "gt">;
+defm SETPGEf32 : PTX_SETP_FP<RegF32, "f32", SETUGE, SETOGE, "ge">;
 
 // Compare f64
 
-defm SETPEQf64 : PTX_SETP_FP<RRegf64, "f64", SETUEQ, SETOEQ, "eq">;
-defm SETPNEf64 : PTX_SETP_FP<RRegf64, "f64", SETUNE, SETONE, "ne">;
-defm SETPLTf64 : PTX_SETP_FP<RRegf64, "f64", SETULT, SETOLT, "lt">;
-defm SETPLEf64 : PTX_SETP_FP<RRegf64, "f64", SETULE, SETOLE, "le">;
-defm SETPGTf64 : PTX_SETP_FP<RRegf64, "f64", SETUGT, SETOGT, "gt">;
-defm SETPGEf64 : PTX_SETP_FP<RRegf64, "f64", SETUGE, SETOGE, "ge">;
+defm SETPEQf64 : PTX_SETP_FP<RegF64, "f64", SETUEQ, SETOEQ, "eq">;
+defm SETPNEf64 : PTX_SETP_FP<RegF64, "f64", SETUNE, SETONE, "ne">;
+defm SETPLTf64 : PTX_SETP_FP<RegF64, "f64", SETULT, SETOLT, "lt">;
+defm SETPLEf64 : PTX_SETP_FP<RegF64, "f64", SETULE, SETOLE, "le">;
+defm SETPGTf64 : PTX_SETP_FP<RegF64, "f64", SETUGT, SETOGT, "gt">;
+defm SETPGEf64 : PTX_SETP_FP<RegF64, "f64", SETUGE, SETOGE, "ge">;
 
 // .selp
 
-defm PTX_SELPu16 : PTX_SELP<RRegu16, "u16">;
-defm PTX_SELPu32 : PTX_SELP<RRegu32, "u32">;
-defm PTX_SELPu64 : PTX_SELP<RRegu64, "u64">;
-defm PTX_SELPf32 : PTX_SELP<RRegf32, "f32">;
-defm PTX_SELPf64 : PTX_SELP<RRegf64, "f64">;
+defm PTX_SELPu16 : PTX_SELP<RegI16, "u16">;
+defm PTX_SELPu32 : PTX_SELP<RegI32, "u32">;
+defm PTX_SELPu64 : PTX_SELP<RegI64, "u64">;
+defm PTX_SELPf32 : PTX_SELP<RegF32, "f32">;
+defm PTX_SELPf64 : PTX_SELP<RegF64, "f64">;
 
 ///===- Logic and Shift Instructions --------------------------------------===//
 
@@ -740,47 +782,47 @@ defm XOR : PTX_LOGIC<"xor", xor>;
 
 let neverHasSideEffects = 1 in {
   def MOVPREDrr
-    : InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
+    : InstPTX<(outs RegPred:$d), (ins RegPred:$a), "mov.pred\t$d, $a", []>;
   def MOVU16rr
-    : InstPTX<(outs RRegu16:$d), (ins RRegu16:$a), "mov.u16\t$d, $a", []>;
+    : InstPTX<(outs RegI16:$d), (ins RegI16:$a), "mov.u16\t$d, $a", []>;
   def MOVU32rr
-    : InstPTX<(outs RRegu32:$d), (ins RRegu32:$a), "mov.u32\t$d, $a", []>;
+    : InstPTX<(outs RegI32:$d), (ins RegI32:$a), "mov.u32\t$d, $a", []>;
   def MOVU64rr
-    : InstPTX<(outs RRegu64:$d), (ins RRegu64:$a), "mov.u64\t$d, $a", []>;
+    : InstPTX<(outs RegI64:$d), (ins RegI64:$a), "mov.u64\t$d, $a", []>;
   def MOVF32rr
-    : InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>;
+    : InstPTX<(outs RegF32:$d), (ins RegF32:$a), "mov.f32\t$d, $a", []>;
   def MOVF64rr
-    : InstPTX<(outs RRegf64:$d), (ins RRegf64:$a), "mov.f64\t$d, $a", []>;
+    : InstPTX<(outs RegF64:$d), (ins RegF64:$a), "mov.f64\t$d, $a", []>;
 }
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
   def MOVPREDri
-    : InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
-              [(set Preds:$d, imm:$a)]>;
+    : InstPTX<(outs RegPred:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
+              [(set RegPred:$d, imm:$a)]>;
   def MOVU16ri
-    : InstPTX<(outs RRegu16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
-              [(set RRegu16:$d, imm:$a)]>;
+    : InstPTX<(outs RegI16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
+              [(set RegI16:$d, imm:$a)]>;
   def MOVU32ri
-    : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
-              [(set RRegu32:$d, imm:$a)]>;
+    : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+              [(set RegI32:$d, imm:$a)]>;
   def MOVU64ri
-    : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
-              [(set RRegu64:$d, imm:$a)]>;
+    : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+              [(set RegI64:$d, imm:$a)]>;
   def MOVF32ri
-    : InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
-              [(set RRegf32:$d, fpimm:$a)]>;
+    : InstPTX<(outs RegF32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
+              [(set RegF32:$d, fpimm:$a)]>;
   def MOVF64ri
-    : InstPTX<(outs RRegf64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
-              [(set RRegf64:$d, fpimm:$a)]>;
+    : InstPTX<(outs RegF64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
+              [(set RegF64:$d, fpimm:$a)]>;
 }
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
   def MOVaddr32
-    : InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
-              [(set RRegu32:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+    : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
+              [(set RegI32:$d, (PTXcopyaddress tglobaladdr:$a))]>;
   def MOVaddr64
-    : InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
-              [(set RRegu64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
+    : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
+              [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>;
 }
 
 // Loads
@@ -789,17 +831,48 @@ defm LDc : PTX_LD_ALL<"ld.const",  load_constant>;
 defm LDl : PTX_LD_ALL<"ld.local",  load_local>;
 defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
 
-// This is a special instruction that is manually inserted for kernel parameters
-def LDpiU16 : InstPTX<(outs RRegu16:$d), (ins MEMpi:$a),
-                      "ld.param.u16\t$d, [$a]", []>;
-def LDpiU32 : InstPTX<(outs RRegu32:$d), (ins MEMpi:$a),
-                      "ld.param.u32\t$d, [$a]", []>;
-def LDpiU64 : InstPTX<(outs RRegu64:$d), (ins MEMpi:$a),
-                      "ld.param.u64\t$d, [$a]", []>;
-def LDpiF32 : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
-                      "ld.param.f32\t$d, [$a]", []>;
-def LDpiF64 : InstPTX<(outs RRegf64:$d), (ins MEMpi:$a),
-                      "ld.param.f64\t$d, [$a]", []>;
+// These instructions are used to load/store from the .param space for
+// device and kernel parameters
+
+let hasSideEffects = 1 in {
+  def LDpiPred : InstPTX<(outs RegPred:$d), (ins MEMpi:$a),
+                         "ld.param.pred\t$d, [$a]",
+                         [(set RegPred:$d, (PTXloadparam timm:$a))]>;
+  def LDpiU16  : InstPTX<(outs RegI16:$d), (ins MEMpi:$a),
+                         "ld.param.u16\t$d, [$a]",
+                         [(set RegI16:$d, (PTXloadparam timm:$a))]>;
+  def LDpiU32  : InstPTX<(outs RegI32:$d), (ins MEMpi:$a),
+                         "ld.param.u32\t$d, [$a]",
+                         [(set RegI32:$d, (PTXloadparam timm:$a))]>;
+  def LDpiU64  : InstPTX<(outs RegI64:$d), (ins MEMpi:$a),
+                         "ld.param.u64\t$d, [$a]",
+                         [(set RegI64:$d, (PTXloadparam timm:$a))]>;
+  def LDpiF32  : InstPTX<(outs RegF32:$d), (ins MEMpi:$a),
+                         "ld.param.f32\t$d, [$a]",
+                         [(set RegF32:$d, (PTXloadparam timm:$a))]>;
+  def LDpiF64  : InstPTX<(outs RegF64:$d), (ins MEMpi:$a),
+                         "ld.param.f64\t$d, [$a]",
+                         [(set RegF64:$d, (PTXloadparam timm:$a))]>;
+
+  def STpiPred : InstPTX<(outs), (ins MEMret:$d, RegPred:$a),
+                         "st.param.pred\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegPred:$a)]>;
+  def STpiU16  : InstPTX<(outs), (ins MEMret:$d, RegI16:$a),
+                         "st.param.u16\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegI16:$a)]>;
+  def STpiU32  : InstPTX<(outs), (ins MEMret:$d, RegI32:$a),
+                         "st.param.u32\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegI32:$a)]>;
+  def STpiU64  : InstPTX<(outs), (ins MEMret:$d, RegI64:$a),
+                         "st.param.u64\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegI64:$a)]>;
+  def STpiF32  : InstPTX<(outs), (ins MEMret:$d, RegF32:$a),
+                         "st.param.f32\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegF32:$a)]>;
+  def STpiF64  : InstPTX<(outs), (ins MEMret:$d, RegF64:$a),
+                         "st.param.f64\t[$d], $a",
+                         [(PTXstoreparam timm:$d, RegF64:$a)]>;
+}
 
 // Stores
 defm STg : PTX_ST_ALL<"st.global", store_global>;
@@ -811,136 +884,174 @@ defm STs : PTX_ST_ALL<"st.shared", store_shared>;
 // TODO: Do something with st.param if/when it is needed.
 
 // Conversion to pred
-
+// PTX does not directly support converting to a predicate type, so we fake it
+// by performing a greater-than test between the value and zero.  This follows
+// the C convention that any non-zero value is equivalent to 'true'.
 def CVT_pred_u16
-  : InstPTX<(outs Preds:$d), (ins RRegu16:$a), "cvt.pred.u16\t$d, $a",
-            [(set Preds:$d, (trunc RRegu16:$a))]>;
+  : InstPTX<(outs RegPred:$d), (ins RegI16:$a), "setp.gt.u16\t$d, $a, 0",
+            [(set RegPred:$d, (trunc RegI16:$a))]>;
 
 def CVT_pred_u32
-  : InstPTX<(outs Preds:$d), (ins RRegu32:$a), "cvt.pred.u32\t$d, $a",
-            [(set Preds:$d, (trunc RRegu32:$a))]>;
+  : InstPTX<(outs RegPred:$d), (ins RegI32:$a), "setp.gt.u32\t$d, $a, 0",
+            [(set RegPred:$d, (trunc RegI32:$a))]>;
 
 def CVT_pred_u64
-  : InstPTX<(outs Preds:$d), (ins RRegu64:$a), "cvt.pred.u64\t$d, $a",
-            [(set Preds:$d, (trunc RRegu64:$a))]>;
+  : InstPTX<(outs RegPred:$d), (ins RegI64:$a), "setp.gt.u64\t$d, $a, 0",
+            [(set RegPred:$d, (trunc RegI64:$a))]>;
 
 def CVT_pred_f32
-  : InstPTX<(outs Preds:$d), (ins RRegf32:$a), "cvt.rni.pred.f32\t$d, $a",
-            [(set Preds:$d, (fp_to_uint RRegf32:$a))]>;
+  : InstPTX<(outs RegPred:$d), (ins RegF32:$a), "setp.gt.f32\t$d, $a, 0",
+            [(set RegPred:$d, (fp_to_uint RegF32:$a))]>;
 
 def CVT_pred_f64
-  : InstPTX<(outs Preds:$d), (ins RRegf64:$a), "cvt.rni.pred.f64\t$d, $a",
-            [(set Preds:$d, (fp_to_uint RRegf64:$a))]>;
+  : InstPTX<(outs RegPred:$d), (ins RegF64:$a), "setp.gt.f64\t$d, $a, 0",
+            [(set RegPred:$d, (fp_to_uint RegF64:$a))]>;
 
 // Conversion to u16
+// PTX does not directly support converting a predicate to a value, so we
+// use a select instruction to select either 0 or 1 (integer or fp) based
+// on the truth value of the predicate.
+def CVT_u16_preda
+  : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
+            [(set RegI16:$d, (anyext RegPred:$a))]>;
 
 def CVT_u16_pred
-  : InstPTX<(outs RRegu16:$d), (ins Preds:$a), "cvt.u16.pred\t$d, $a",
-            [(set RRegu16:$d, (zext Preds:$a))]>;
+  : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
+            [(set RegI16:$d, (zext RegPred:$a))]>;
+
+def CVT_u16_preds
+  : InstPTX<(outs RegI16:$d), (ins RegPred:$a), "selp.u16\t$d, 1, 0, $a",
+            [(set RegI16:$d, (sext RegPred:$a))]>;
 
 def CVT_u16_u32
-  : InstPTX<(outs RRegu16:$d), (ins RRegu32:$a), "cvt.u16.u32\t$d, $a",
-            [(set RRegu16:$d, (trunc RRegu32:$a))]>;
+  : InstPTX<(outs RegI16:$d), (ins RegI32:$a), "cvt.u16.u32\t$d, $a",
+            [(set RegI16:$d, (trunc RegI32:$a))]>;
 
 def CVT_u16_u64
-  : InstPTX<(outs RRegu16:$d), (ins RRegu64:$a), "cvt.u16.u64\t$d, $a",
-            [(set RRegu16:$d, (trunc RRegu64:$a))]>;
+  : InstPTX<(outs RegI16:$d), (ins RegI64:$a), "cvt.u16.u64\t$d, $a",
+            [(set RegI16:$d, (trunc RegI64:$a))]>;
 
 def CVT_u16_f32
-  : InstPTX<(outs RRegu16:$d), (ins RRegf32:$a), "cvt.rni.u16.f32\t$d, $a",
-            [(set RRegu16:$d, (fp_to_uint RRegf32:$a))]>;
+  : InstPTX<(outs RegI16:$d), (ins RegF32:$a), "cvt.rzi.u16.f32\t$d, $a",
+            [(set RegI16:$d, (fp_to_uint RegF32:$a))]>;
 
 def CVT_u16_f64
-  : InstPTX<(outs RRegu16:$d), (ins RRegf64:$a), "cvt.rni.u16.f64\t$d, $a",
-            [(set RRegu16:$d, (fp_to_uint RRegf64:$a))]>;
+  : InstPTX<(outs RegI16:$d), (ins RegF64:$a), "cvt.rzi.u16.f64\t$d, $a",
+            [(set RegI16:$d, (fp_to_uint RegF64:$a))]>;
 
 // Conversion to u32
 
 def CVT_u32_pred
-  : InstPTX<(outs RRegu32:$d), (ins Preds:$a), "cvt.u32.pred\t$d, $a",
-            [(set RRegu32:$d, (zext Preds:$a))]>;
+  : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
+            [(set RegI32:$d, (zext RegPred:$a))]>;
+
+def CVT_u32_b16
+  : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
+            [(set RegI32:$d, (anyext RegI16:$a))]>;
 
 def CVT_u32_u16
-  : InstPTX<(outs RRegu32:$d), (ins RRegu16:$a), "cvt.u32.u16\t$d, $a",
-            [(set RRegu32:$d, (zext RRegu16:$a))]>;
+  : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.u16\t$d, $a",
+            [(set RegI32:$d, (zext RegI16:$a))]>;
+
+def CVT_u32_preds
+  : InstPTX<(outs RegI32:$d), (ins RegPred:$a), "selp.u32\t$d, 1, 0, $a",
+            [(set RegI32:$d, (sext RegPred:$a))]>;
+
+def CVT_u32_s16
+  : InstPTX<(outs RegI32:$d), (ins RegI16:$a), "cvt.u32.s16\t$d, $a",
+            [(set RegI32:$d, (sext RegI16:$a))]>;
 
 def CVT_u32_u64
-  : InstPTX<(outs RRegu32:$d), (ins RRegu64:$a), "cvt.u32.u64\t$d, $a",
-            [(set RRegu32:$d, (trunc RRegu64:$a))]>;
+  : InstPTX<(outs RegI32:$d), (ins RegI64:$a), "cvt.u32.u64\t$d, $a",
+            [(set RegI32:$d, (trunc RegI64:$a))]>;
 
 def CVT_u32_f32
-  : InstPTX<(outs RRegu32:$d), (ins RRegf32:$a), "cvt.rni.u32.f32\t$d, $a",
-            [(set RRegu32:$d, (fp_to_uint RRegf32:$a))]>;
+  : InstPTX<(outs RegI32:$d), (ins RegF32:$a), "cvt.rzi.u32.f32\t$d, $a",
+            [(set RegI32:$d, (fp_to_uint RegF32:$a))]>;
 
 def CVT_u32_f64
-  : InstPTX<(outs RRegu32:$d), (ins RRegf64:$a), "cvt.rni.u32.f64\t$d, $a",
-            [(set RRegu32:$d, (fp_to_uint RRegf64:$a))]>;
+  : InstPTX<(outs RegI32:$d), (ins RegF64:$a), "cvt.rzi.u32.f64\t$d, $a",
+            [(set RegI32:$d, (fp_to_uint RegF64:$a))]>;
 
 // Conversion to u64
 
 def CVT_u64_pred
-  : InstPTX<(outs RRegu64:$d), (ins Preds:$a), "cvt.u64.pred\t$d, $a",
-            [(set RRegu64:$d, (zext Preds:$a))]>;
+  : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
+            [(set RegI64:$d, (zext RegPred:$a))]>;
+
+def CVT_u64_preds
+  : InstPTX<(outs RegI64:$d), (ins RegPred:$a), "selp.u64\t$d, 1, 0, $a",
+            [(set RegI64:$d, (sext RegPred:$a))]>;
 
 def CVT_u64_u16
-  : InstPTX<(outs RRegu64:$d), (ins RRegu16:$a), "cvt.u64.u16\t$d, $a",
-            [(set RRegu64:$d, (zext RRegu16:$a))]>;
+  : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.u16\t$d, $a",
+            [(set RegI64:$d, (zext RegI16:$a))]>;
+
+def CVT_u64_s16
+  : InstPTX<(outs RegI64:$d), (ins RegI16:$a), "cvt.u64.s16\t$d, $a",
+            [(set RegI64:$d, (sext RegI16:$a))]>;
 
 def CVT_u64_u32
-  : InstPTX<(outs RRegu64:$d), (ins RRegu32:$a), "cvt.u64.u32\t$d, $a",
-            [(set RRegu64:$d, (zext RRegu32:$a))]>;
+  : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.u32\t$d, $a",
+            [(set RegI64:$d, (zext RegI32:$a))]>;
+
+def CVT_u64_s32
+  : InstPTX<(outs RegI64:$d), (ins RegI32:$a), "cvt.u64.s32\t$d, $a",
+            [(set RegI64:$d, (sext RegI32:$a))]>;
 
 def CVT_u64_f32
-  : InstPTX<(outs RRegu64:$d), (ins RRegf32:$a), "cvt.rni.u64.f32\t$d, $a",
-            [(set RRegu64:$d, (fp_to_uint RRegf32:$a))]>;
+  : InstPTX<(outs RegI64:$d), (ins RegF32:$a), "cvt.rzi.u64.f32\t$d, $a",
+            [(set RegI64:$d, (fp_to_uint RegF32:$a))]>;
 
 def CVT_u64_f64
-  : InstPTX<(outs RRegu64:$d), (ins RRegf64:$a), "cvt.rni.u64.f64\t$d, $a",
-            [(set RRegu64:$d, (fp_to_uint RRegf64:$a))]>;
+  : InstPTX<(outs RegI64:$d), (ins RegF64:$a), "cvt.rzi.u64.f64\t$d, $a",
+            [(set RegI64:$d, (fp_to_uint RegF64:$a))]>;
 
 // Conversion to f32
 
 def CVT_f32_pred
-  : InstPTX<(outs RRegf32:$d), (ins Preds:$a), "cvt.rn.f32.pred\t$d, $a",
-            [(set RRegf32:$d, (uint_to_fp Preds:$a))]>;
+  : InstPTX<(outs RegF32:$d), (ins RegPred:$a),
+            "selp.f32\t$d, 0F3F800000, 0F00000000, $a",  // 1.0
+            [(set RegF32:$d, (uint_to_fp RegPred:$a))]>;
 
 def CVT_f32_u16
-  : InstPTX<(outs RRegf32:$d), (ins RRegu16:$a), "cvt.rn.f32.u16\t$d, $a",
-            [(set RRegf32:$d, (uint_to_fp RRegu16:$a))]>;
+  : InstPTX<(outs RegF32:$d), (ins RegI16:$a), "cvt.rn.f32.u16\t$d, $a",
+            [(set RegF32:$d, (uint_to_fp RegI16:$a))]>;
 
 def CVT_f32_u32
-  : InstPTX<(outs RRegf32:$d), (ins RRegu32:$a), "cvt.rn.f32.u32\t$d, $a",
-            [(set RRegf32:$d, (uint_to_fp RRegu32:$a))]>;
+  : InstPTX<(outs RegF32:$d), (ins RegI32:$a), "cvt.rn.f32.u32\t$d, $a",
+            [(set RegF32:$d, (uint_to_fp RegI32:$a))]>;
 
 def CVT_f32_u64
-  : InstPTX<(outs RRegf32:$d), (ins RRegu64:$a), "cvt.rn.f32.u64\t$d, $a",
-            [(set RRegf32:$d, (uint_to_fp RRegu64:$a))]>;
+  : InstPTX<(outs RegF32:$d), (ins RegI64:$a), "cvt.rn.f32.u64\t$d, $a",
+            [(set RegF32:$d, (uint_to_fp RegI64:$a))]>;
 
 def CVT_f32_f64
-  : InstPTX<(outs RRegf32:$d), (ins RRegf64:$a), "cvt.rn.f32.f64\t$d, $a",
-            [(set RRegf32:$d, (fround RRegf64:$a))]>;
+  : InstPTX<(outs RegF32:$d), (ins RegF64:$a), "cvt.rn.f32.f64\t$d, $a",
+            [(set RegF32:$d, (fround RegF64:$a))]>;
 
 // Conversion to f64
 
 def CVT_f64_pred
-  : InstPTX<(outs RRegf64:$d), (ins Preds:$a), "cvt.rn.f64.pred\t$d, $a",
-            [(set RRegf64:$d, (uint_to_fp Preds:$a))]>;
+  : InstPTX<(outs RegF64:$d), (ins RegPred:$a), 
+            "selp.f64\t$d, 0D3F80000000000000, 0D0000000000000000, $a",  // 1.0
+            [(set RegF64:$d, (uint_to_fp RegPred:$a))]>;
 
 def CVT_f64_u16
-  : InstPTX<(outs RRegf64:$d), (ins RRegu16:$a), "cvt.rn.f64.u16\t$d, $a",
-            [(set RRegf64:$d, (uint_to_fp RRegu16:$a))]>;
+  : InstPTX<(outs RegF64:$d), (ins RegI16:$a), "cvt.rn.f64.u16\t$d, $a",
+            [(set RegF64:$d, (uint_to_fp RegI16:$a))]>;
 
 def CVT_f64_u32
-  : InstPTX<(outs RRegf64:$d), (ins RRegu32:$a), "cvt.rn.f64.u32\t$d, $a",
-            [(set RRegf64:$d, (uint_to_fp RRegu32:$a))]>;
+  : InstPTX<(outs RegF64:$d), (ins RegI32:$a), "cvt.rn.f64.u32\t$d, $a",
+            [(set RegF64:$d, (uint_to_fp RegI32:$a))]>;
 
 def CVT_f64_u64
-  : InstPTX<(outs RRegf64:$d), (ins RRegu64:$a), "cvt.rn.f64.u64\t$d, $a",
-            [(set RRegf64:$d, (uint_to_fp RRegu64:$a))]>;
+  : InstPTX<(outs RegF64:$d), (ins RegI64:$a), "cvt.rn.f64.u64\t$d, $a",
+            [(set RegF64:$d, (uint_to_fp RegI64:$a))]>;
 
 def CVT_f64_f32
-  : InstPTX<(outs RRegf64:$d), (ins RRegf32:$a), "cvt.f64.f32\t$d, $a",
-            [(set RRegf64:$d, (fextend RRegf32:$a))]>;
+  : InstPTX<(outs RegF64:$d), (ins RegF32:$a), "cvt.f64.f32\t$d, $a",
+            [(set RegF64:$d, (fextend RegF32:$a))]>;
 
 ///===- Control Flow Instructions -----------------------------------------===//
 
@@ -951,7 +1062,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
 
 let isBranch = 1, isTerminator = 1 in {
   // FIXME: The pattern part is blank because I cannot (or do not yet know
-  // how to) use the first operand of PredicateOperand (a Preds register) here
+  // how to) use the first operand of PredicateOperand (a RegPred register) here
   def BRAdp
     : InstPTX<(outs), (ins brtarget:$d), "bra\t$d",
               [/*(brcond pred:$_p, bb:$d)*/]>;
@@ -962,6 +1073,30 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
   def RET  : InstPTX<(outs), (ins), "ret",  [(PTXret)]>;
 }
 
+///===- Spill Instructions ------------------------------------------------===//
+// Special instructions used for stack spilling
+def STACKSTOREI16 : InstPTX<(outs), (ins i32imm:$d, RegI16:$a),
+                            "mov.u16\ts$d, $a", []>;
+def STACKSTOREI32 : InstPTX<(outs), (ins i32imm:$d, RegI32:$a),
+                            "mov.u32\ts$d, $a", []>;
+def STACKSTOREI64 : InstPTX<(outs), (ins i32imm:$d, RegI64:$a),
+                            "mov.u64\ts$d, $a", []>;
+def STACKSTOREF32 : InstPTX<(outs), (ins i32imm:$d, RegF32:$a),
+                            "mov.f32\ts$d, $a", []>;
+def STACKSTOREF64 : InstPTX<(outs), (ins i32imm:$d, RegF64:$a),
+                            "mov.f64\ts$d, $a", []>;
+
+def STACKLOADI16 : InstPTX<(outs), (ins RegI16:$d, i32imm:$a),
+                           "mov.u16\t$d, s$a", []>;
+def STACKLOADI32 : InstPTX<(outs), (ins RegI32:$d, i32imm:$a),
+                           "mov.u32\t$d, s$a", []>;
+def STACKLOADI64 : InstPTX<(outs), (ins RegI64:$d, i32imm:$a),
+                           "mov.u64\t$d, s$a", []>;
+def STACKLOADF32 : InstPTX<(outs), (ins RegF32:$d, i32imm:$a),
+                           "mov.f32\t$d, s$a", []>;
+def STACKLOADF64 : InstPTX<(outs), (ins RegF64:$d, i32imm:$a),
+                           "mov.f64\t$d, s$a", []>;
+
 ///===- Intrinsic Instructions --------------------------------------------===//
 
 include "PTXIntrinsicInstrInfo.td"
diff --git a/lib/Target/PTX/PTXIntrinsicInstrInfo.td b/lib/Target/PTX/PTXIntrinsicInstrInfo.td
index 320934a..8d97909 100644
--- a/lib/Target/PTX/PTXIntrinsicInstrInfo.td
+++ b/lib/Target/PTX/PTXIntrinsicInstrInfo.td
@@ -14,14 +14,14 @@
 // PTX Special Purpose Register Accessor Intrinsics
 
 class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
-  : InstPTX<(outs RRegu64:$d), (ins),
+  : InstPTX<(outs RegI64:$d), (ins),
             !strconcat("mov.u64\t$d, %", regname),
-            [(set RRegu64:$d, (intop))]>;
+            [(set RegI64:$d, (intop))]>;
 
 class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
-  : InstPTX<(outs RRegu32:$d), (ins),
+  : InstPTX<(outs RegI32:$d), (ins),
             !strconcat("mov.u32\t$d, %", regname),
-            [(set RRegu32:$d, (intop))]>;
+            [(set RegI32:$d, (intop))]>;
 
 // TODO Add read vector-version of special registers
 
diff --git a/lib/Target/PTX/PTXMCAsmStreamer.cpp b/lib/Target/PTX/PTXMCAsmStreamer.cpp
index 1574670..b13a3da 100644
--- a/lib/Target/PTX/PTXMCAsmStreamer.cpp
+++ b/lib/Target/PTX/PTXMCAsmStreamer.cpp
@@ -23,7 +23,6 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetAsmInfo.h"
 
 using namespace llvm;
 
@@ -115,7 +114,8 @@ public:
 
   virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                         const MCSymbol *LastLabel,
-                                        const MCSymbol *Label);
+                                        const MCSymbol *Label,
+                                        unsigned PointerSize);
 
   virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
 
@@ -260,7 +260,8 @@ void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias,
 
 void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                                 const MCSymbol *LastLabel,
-                                                const MCSymbol *Label) {
+                                                const MCSymbol *Label,
+                                                unsigned PointerSize) {
   report_fatal_error("Unimplemented.");
 }
 
@@ -367,7 +368,7 @@ void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
     int64_t IntValue;
     if (!Value->EvaluateAsAbsolute(IntValue))
       report_fatal_error("Don't know how to emit this value.");
-    if (getContext().getTargetAsmInfo().isLittleEndian()) {
+    if (getContext().getAsmInfo().isLittleEndian()) {
       EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
       EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
     } else {
diff --git a/lib/Target/PTX/PTXMFInfoExtract.cpp b/lib/Target/PTX/PTXMFInfoExtract.cpp
index c5e1910..6fe9e6c 100644
--- a/lib/Target/PTX/PTXMFInfoExtract.cpp
+++ b/lib/Target/PTX/PTXMFInfoExtract.cpp
@@ -54,8 +54,6 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
 
   DEBUG(dbgs() << "******** PTX FUNCTION LOCAL VAR REG DEF ********\n");
 
-  unsigned retreg = MFI->retReg();
-
   DEBUG(dbgs()
         << "PTX::NoRegister == " << PTX::NoRegister << "\n"
         << "PTX::NUM_TARGET_REGS == " << PTX::NUM_TARGET_REGS << "\n");
@@ -68,15 +66,13 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
   // FIXME: This is a slow linear scanning
   for (unsigned reg = PTX::NoRegister + 1; reg < PTX::NUM_TARGET_REGS; ++reg)
     if (MRI.isPhysRegUsed(reg) &&
-        reg != retreg &&
+        !MFI->isRetReg(reg) &&
         (MFI->isKernel() || !MFI->isArgReg(reg)))
       MFI->addLocalVarReg(reg);
 
   // Notify MachineFunctionInfo that I've done adding local var reg
   MFI->doneAddLocalVar();
 
-  DEBUG(dbgs() << "Return Reg: " << retreg << "\n");
-
   DEBUG(for (PTXMachineFunctionInfo::reg_iterator
              i = MFI->argRegBegin(), e = MFI->argRegEnd();
              i != e; ++i)
diff --git a/lib/Target/PTX/PTXMachineFunctionInfo.h b/lib/Target/PTX/PTXMachineFunctionInfo.h
index 81df1c2..9d65f5b 100644
--- a/lib/Target/PTX/PTXMachineFunctionInfo.h
+++ b/lib/Target/PTX/PTXMachineFunctionInfo.h
@@ -15,6 +15,7 @@
 #define PTX_MACHINE_FUNCTION_INFO_H
 
 #include "PTX.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/CodeGen/MachineFunction.h"
 
 namespace llvm {
@@ -25,7 +26,7 @@ class PTXMachineFunctionInfo : public MachineFunctionInfo {
 private:
   bool is_kernel;
   std::vector<unsigned> reg_arg, reg_local_var;
-  unsigned reg_ret;
+  std::vector<unsigned> reg_ret;
   bool _isDoneAddArg;
 
 public:
@@ -39,19 +40,22 @@ public:
 
   void addArgReg(unsigned reg) { reg_arg.push_back(reg); }
   void addLocalVarReg(unsigned reg) { reg_local_var.push_back(reg); }
-  void setRetReg(unsigned reg) { reg_ret = reg; }
+  void addRetReg(unsigned reg) {
+    if (!isRetReg(reg)) {
+      reg_ret.push_back(reg);
+    }
+  }
 
   void doneAddArg(void) {
     _isDoneAddArg = true;
   }
   void doneAddLocalVar(void) {}
 
-  bool isDoneAddArg(void) { return _isDoneAddArg; }
-
   bool isKernel() const { return is_kernel; }
 
   typedef std::vector<unsigned>::const_iterator         reg_iterator;
   typedef std::vector<unsigned>::const_reverse_iterator reg_reverse_iterator;
+  typedef std::vector<unsigned>::const_iterator         ret_iterator;
 
   bool         argRegEmpty() const { return reg_arg.empty(); }
   int          getNumArg() const { return reg_arg.size(); }
@@ -64,12 +68,19 @@ public:
   reg_iterator localVarRegBegin() const { return reg_local_var.begin(); }
   reg_iterator localVarRegEnd()   const { return reg_local_var.end(); }
 
-  unsigned retReg() const { return reg_ret; }
+  bool         retRegEmpty() const { return reg_ret.empty(); }
+  int          getNumRet() const { return reg_ret.size(); }
+  ret_iterator retRegBegin() const { return reg_ret.begin(); }
+  ret_iterator retRegEnd()   const { return reg_ret.end(); }
 
   bool isArgReg(unsigned reg) const {
     return std::find(reg_arg.begin(), reg_arg.end(), reg) != reg_arg.end();
   }
 
+  bool isRetReg(unsigned reg) const {
+    return std::find(reg_ret.begin(), reg_ret.end(), reg) != reg_ret.end();
+  }
+
   bool isLocalVarReg(unsigned reg) const {
     return std::find(reg_local_var.begin(), reg_local_var.end(), reg)
       != reg_local_var.end();
diff --git a/lib/Target/PTX/PTXRegisterInfo.cpp b/lib/Target/PTX/PTXRegisterInfo.cpp
index 0f3e7bc..cb56ea9 100644
--- a/lib/Target/PTX/PTXRegisterInfo.cpp
+++ b/lib/Target/PTX/PTXRegisterInfo.cpp
@@ -13,7 +13,39 @@
 
 #include "PTX.h"
 #include "PTXRegisterInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "PTXGenRegisterInfo.inc"
 
 using namespace llvm;
 
-#include "PTXGenRegisterInfo.inc"
+PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM,
+                                 const TargetInstrInfo &TII)
+  : PTXGenRegisterInfo() {
+}
+
+void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
+                                          int SPAdj,
+                                          RegScavenger *RS) const {
+  unsigned Index;
+  MachineInstr& MI = *II;
+
+  Index = 0;
+  while (!MI.getOperand(Index).isFI()) {
+    ++Index;
+    assert(Index < MI.getNumOperands() &&
+           "Instr does not have a FrameIndex operand!");
+  }
+
+  int FrameIndex = MI.getOperand(Index).getIndex();
+
+  DEBUG(dbgs() << "eliminateFrameIndex: " << MI);
+  DEBUG(dbgs() << "- SPAdj: " << SPAdj << "\n");
+  DEBUG(dbgs() << "- FrameIndex: " << FrameIndex << "\n");
+
+  // This frame index is post stack slot re-use assignments
+  MI.getOperand(Index).ChangeToImmediate(FrameIndex);
+}
diff --git a/lib/Target/PTX/PTXRegisterInfo.h b/lib/Target/PTX/PTXRegisterInfo.h
index dc56352..0b63cb6 100644
--- a/lib/Target/PTX/PTXRegisterInfo.h
+++ b/lib/Target/PTX/PTXRegisterInfo.h
@@ -17,7 +17,8 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/ADT/BitVector.h"
 
-#include "PTXGenRegisterInfo.h.inc"
+#define GET_REGINFO_HEADER
+#include "PTXGenRegisterInfo.inc"
 
 namespace llvm {
 class PTXTargetMachine;
@@ -25,7 +26,7 @@ class MachineFunction;
 
 struct PTXRegisterInfo : public PTXGenRegisterInfo {
   PTXRegisterInfo(PTXTargetMachine &TM,
-                  const TargetInstrInfo &TII) {}
+                  const TargetInstrInfo &TII);
 
   virtual const unsigned
     *getCalleeSavedRegs(const MachineFunction *MF = 0) const {
@@ -38,11 +39,9 @@ struct PTXRegisterInfo : public PTXGenRegisterInfo {
     return Reserved; // reserve no regs
   }
 
-  virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
+  virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
                                    int SPAdj,
-                                   RegScavenger *RS = NULL) const {
-    llvm_unreachable("PTX does not support general function call");
-  }
+                                   RegScavenger *RS = NULL) const;
 
   virtual unsigned getFrameRegister(const MachineFunction &MF) const {
     llvm_unreachable("PTX does not have a frame register");
diff --git a/lib/Target/PTX/PTXRegisterInfo.td b/lib/Target/PTX/PTXRegisterInfo.td
index f616141..1313d24 100644
--- a/lib/Target/PTX/PTXRegisterInfo.td
+++ b/lib/Target/PTX/PTXRegisterInfo.td
@@ -1,3 +1,4 @@
+
 //===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -21,16 +22,16 @@ class PTXReg<string n> : Register<n> {
 
 ///===- Predicate Registers -----------------------------------------------===//
 
-def P0  : PTXReg<"p0">;
-def P1  : PTXReg<"p1">;
-def P2  : PTXReg<"p2">;
-def P3  : PTXReg<"p3">;
-def P4  : PTXReg<"p4">;
-def P5  : PTXReg<"p5">;
-def P6  : PTXReg<"p6">;
-def P7  : PTXReg<"p7">;
-def P8  : PTXReg<"p8">;
-def P9  : PTXReg<"p9">;
+def P0 : PTXReg<"p0">;
+def P1 : PTXReg<"p1">;
+def P2 : PTXReg<"p2">;
+def P3 : PTXReg<"p3">;
+def P4 : PTXReg<"p4">;
+def P5 : PTXReg<"p5">;
+def P6 : PTXReg<"p6">;
+def P7 : PTXReg<"p7">;
+def P8 : PTXReg<"p8">;
+def P9 : PTXReg<"p9">;
 def P10 : PTXReg<"p10">;
 def P11 : PTXReg<"p11">;
 def P12 : PTXReg<"p12">;
@@ -85,19 +86,83 @@ def P60 : PTXReg<"p60">;
 def P61 : PTXReg<"p61">;
 def P62 : PTXReg<"p62">;
 def P63 : PTXReg<"p63">;
+def P64 : PTXReg<"p64">;
+def P65 : PTXReg<"p65">;
+def P66 : PTXReg<"p66">;
+def P67 : PTXReg<"p67">;
+def P68 : PTXReg<"p68">;
+def P69 : PTXReg<"p69">;
+def P70 : PTXReg<"p70">;
+def P71 : PTXReg<"p71">;
+def P72 : PTXReg<"p72">;
+def P73 : PTXReg<"p73">;
+def P74 : PTXReg<"p74">;
+def P75 : PTXReg<"p75">;
+def P76 : PTXReg<"p76">;
+def P77 : PTXReg<"p77">;
+def P78 : PTXReg<"p78">;
+def P79 : PTXReg<"p79">;
+def P80 : PTXReg<"p80">;
+def P81 : PTXReg<"p81">;
+def P82 : PTXReg<"p82">;
+def P83 : PTXReg<"p83">;
+def P84 : PTXReg<"p84">;
+def P85 : PTXReg<"p85">;
+def P86 : PTXReg<"p86">;
+def P87 : PTXReg<"p87">;
+def P88 : PTXReg<"p88">;
+def P89 : PTXReg<"p89">;
+def P90 : PTXReg<"p90">;
+def P91 : PTXReg<"p91">;
+def P92 : PTXReg<"p92">;
+def P93 : PTXReg<"p93">;
+def P94 : PTXReg<"p94">;
+def P95 : PTXReg<"p95">;
+def P96 : PTXReg<"p96">;
+def P97 : PTXReg<"p97">;
+def P98 : PTXReg<"p98">;
+def P99 : PTXReg<"p99">;
+def P100 : PTXReg<"p100">;
+def P101 : PTXReg<"p101">;
+def P102 : PTXReg<"p102">;
+def P103 : PTXReg<"p103">;
+def P104 : PTXReg<"p104">;
+def P105 : PTXReg<"p105">;
+def P106 : PTXReg<"p106">;
+def P107 : PTXReg<"p107">;
+def P108 : PTXReg<"p108">;
+def P109 : PTXReg<"p109">;
+def P110 : PTXReg<"p110">;
+def P111 : PTXReg<"p111">;
+def P112 : PTXReg<"p112">;
+def P113 : PTXReg<"p113">;
+def P114 : PTXReg<"p114">;
+def P115 : PTXReg<"p115">;
+def P116 : PTXReg<"p116">;
+def P117 : PTXReg<"p117">;
+def P118 : PTXReg<"p118">;
+def P119 : PTXReg<"p119">;
+def P120 : PTXReg<"p120">;
+def P121 : PTXReg<"p121">;
+def P122 : PTXReg<"p122">;
+def P123 : PTXReg<"p123">;
+def P124 : PTXReg<"p124">;
+def P125 : PTXReg<"p125">;
+def P126 : PTXReg<"p126">;
+def P127 : PTXReg<"p127">;
 
-///===- 16-bit Integer Registers ------------------------------------------===//
+///===- 16-Bit Registers --------------------------------------------------===//
 
-def RH0  : PTXReg<"rh0">;
-def RH1  : PTXReg<"rh1">;
-def RH2  : PTXReg<"rh2">;
-def RH3  : PTXReg<"rh3">;
-def RH4  : PTXReg<"rh4">;
-def RH5  : PTXReg<"rh5">;
-def RH6  : PTXReg<"rh6">;
-def RH7  : PTXReg<"rh7">;
-def RH8  : PTXReg<"rh8">;
-def RH9  : PTXReg<"rh9">;
+def RH0 : PTXReg<"rh0">;
+def RH1 : PTXReg<"rh1">;
+def RH2 : PTXReg<"rh2">;
+def RH3 : PTXReg<"rh3">;
+def RH4 : PTXReg<"rh4">;
+def RH5 : PTXReg<"rh5">;
+def RH6 : PTXReg<"rh6">;
+def RH7 : PTXReg<"rh7">;
+def RH8 : PTXReg<"rh8">;
+def RH9 : PTXReg<"rh9">;
 def RH10 : PTXReg<"rh10">;
 def RH11 : PTXReg<"rh11">;
 def RH12 : PTXReg<"rh12">;
@@ -152,20 +217,83 @@ def RH60 : PTXReg<"rh60">;
 def RH61 : PTXReg<"rh61">;
 def RH62 : PTXReg<"rh62">;
 def RH63 : PTXReg<"rh63">;
+def RH64 : PTXReg<"rh64">;
+def RH65 : PTXReg<"rh65">;
+def RH66 : PTXReg<"rh66">;
+def RH67 : PTXReg<"rh67">;
+def RH68 : PTXReg<"rh68">;
+def RH69 : PTXReg<"rh69">;
+def RH70 : PTXReg<"rh70">;
+def RH71 : PTXReg<"rh71">;
+def RH72 : PTXReg<"rh72">;
+def RH73 : PTXReg<"rh73">;
+def RH74 : PTXReg<"rh74">;
+def RH75 : PTXReg<"rh75">;
+def RH76 : PTXReg<"rh76">;
+def RH77 : PTXReg<"rh77">;
+def RH78 : PTXReg<"rh78">;
+def RH79 : PTXReg<"rh79">;
+def RH80 : PTXReg<"rh80">;
+def RH81 : PTXReg<"rh81">;
+def RH82 : PTXReg<"rh82">;
+def RH83 : PTXReg<"rh83">;
+def RH84 : PTXReg<"rh84">;
+def RH85 : PTXReg<"rh85">;
+def RH86 : PTXReg<"rh86">;
+def RH87 : PTXReg<"rh87">;
+def RH88 : PTXReg<"rh88">;
+def RH89 : PTXReg<"rh89">;
+def RH90 : PTXReg<"rh90">;
+def RH91 : PTXReg<"rh91">;
+def RH92 : PTXReg<"rh92">;
+def RH93 : PTXReg<"rh93">;
+def RH94 : PTXReg<"rh94">;
+def RH95 : PTXReg<"rh95">;
+def RH96 : PTXReg<"rh96">;
+def RH97 : PTXReg<"rh97">;
+def RH98 : PTXReg<"rh98">;
+def RH99 : PTXReg<"rh99">;
+def RH100 : PTXReg<"rh100">;
+def RH101 : PTXReg<"rh101">;
+def RH102 : PTXReg<"rh102">;
+def RH103 : PTXReg<"rh103">;
+def RH104 : PTXReg<"rh104">;
+def RH105 : PTXReg<"rh105">;
+def RH106 : PTXReg<"rh106">;
+def RH107 : PTXReg<"rh107">;
+def RH108 : PTXReg<"rh108">;
+def RH109 : PTXReg<"rh109">;
+def RH110 : PTXReg<"rh110">;
+def RH111 : PTXReg<"rh111">;
+def RH112 : PTXReg<"rh112">;
+def RH113 : PTXReg<"rh113">;
+def RH114 : PTXReg<"rh114">;
+def RH115 : PTXReg<"rh115">;
+def RH116 : PTXReg<"rh116">;
+def RH117 : PTXReg<"rh117">;
+def RH118 : PTXReg<"rh118">;
+def RH119 : PTXReg<"rh119">;
+def RH120 : PTXReg<"rh120">;
+def RH121 : PTXReg<"rh121">;
+def RH122 : PTXReg<"rh122">;
+def RH123 : PTXReg<"rh123">;
+def RH124 : PTXReg<"rh124">;
+def RH125 : PTXReg<"rh125">;
+def RH126 : PTXReg<"rh126">;
+def RH127 : PTXReg<"rh127">;
 
+///===- 32-Bit Registers --------------------------------------------------===//
 
-///===- 32-bit Integer Registers ------------------------------------------===//
-
-def R0  : PTXReg<"r0">;
-def R1  : PTXReg<"r1">;
-def R2  : PTXReg<"r2">;
-def R3  : PTXReg<"r3">;
-def R4  : PTXReg<"r4">;
-def R5  : PTXReg<"r5">;
-def R6  : PTXReg<"r6">;
-def R7  : PTXReg<"r7">;
-def R8  : PTXReg<"r8">;
-def R9  : PTXReg<"r9">;
+def R0 : PTXReg<"r0">;
+def R1 : PTXReg<"r1">;
+def R2 : PTXReg<"r2">;
+def R3 : PTXReg<"r3">;
+def R4 : PTXReg<"r4">;
+def R5 : PTXReg<"r5">;
+def R6 : PTXReg<"r6">;
+def R7 : PTXReg<"r7">;
+def R8 : PTXReg<"r8">;
+def R9 : PTXReg<"r9">;
 def R10 : PTXReg<"r10">;
 def R11 : PTXReg<"r11">;
 def R12 : PTXReg<"r12">;
@@ -220,20 +348,83 @@ def R60 : PTXReg<"r60">;
 def R61 : PTXReg<"r61">;
 def R62 : PTXReg<"r62">;
 def R63 : PTXReg<"r63">;
+def R64 : PTXReg<"r64">;
+def R65 : PTXReg<"r65">;
+def R66 : PTXReg<"r66">;
+def R67 : PTXReg<"r67">;
+def R68 : PTXReg<"r68">;
+def R69 : PTXReg<"r69">;
+def R70 : PTXReg<"r70">;
+def R71 : PTXReg<"r71">;
+def R72 : PTXReg<"r72">;
+def R73 : PTXReg<"r73">;
+def R74 : PTXReg<"r74">;
+def R75 : PTXReg<"r75">;
+def R76 : PTXReg<"r76">;
+def R77 : PTXReg<"r77">;
+def R78 : PTXReg<"r78">;
+def R79 : PTXReg<"r79">;
+def R80 : PTXReg<"r80">;
+def R81 : PTXReg<"r81">;
+def R82 : PTXReg<"r82">;
+def R83 : PTXReg<"r83">;
+def R84 : PTXReg<"r84">;
+def R85 : PTXReg<"r85">;
+def R86 : PTXReg<"r86">;
+def R87 : PTXReg<"r87">;
+def R88 : PTXReg<"r88">;
+def R89 : PTXReg<"r89">;
+def R90 : PTXReg<"r90">;
+def R91 : PTXReg<"r91">;
+def R92 : PTXReg<"r92">;
+def R93 : PTXReg<"r93">;
+def R94 : PTXReg<"r94">;
+def R95 : PTXReg<"r95">;
+def R96 : PTXReg<"r96">;
+def R97 : PTXReg<"r97">;
+def R98 : PTXReg<"r98">;
+def R99 : PTXReg<"r99">;
+def R100 : PTXReg<"r100">;
+def R101 : PTXReg<"r101">;
+def R102 : PTXReg<"r102">;
+def R103 : PTXReg<"r103">;
+def R104 : PTXReg<"r104">;
+def R105 : PTXReg<"r105">;
+def R106 : PTXReg<"r106">;
+def R107 : PTXReg<"r107">;
+def R108 : PTXReg<"r108">;
+def R109 : PTXReg<"r109">;
+def R110 : PTXReg<"r110">;
+def R111 : PTXReg<"r111">;
+def R112 : PTXReg<"r112">;
+def R113 : PTXReg<"r113">;
+def R114 : PTXReg<"r114">;
+def R115 : PTXReg<"r115">;
+def R116 : PTXReg<"r116">;
+def R117 : PTXReg<"r117">;
+def R118 : PTXReg<"r118">;
+def R119 : PTXReg<"r119">;
+def R120 : PTXReg<"r120">;
+def R121 : PTXReg<"r121">;
+def R122 : PTXReg<"r122">;
+def R123 : PTXReg<"r123">;
+def R124 : PTXReg<"r124">;
+def R125 : PTXReg<"r125">;
+def R126 : PTXReg<"r126">;
+def R127 : PTXReg<"r127">;
 
+///===- 64-Bit Registers --------------------------------------------------===//
 
-///===- 64-bit Integer Registers ------------------------------------------===//
-
-def RD0  : PTXReg<"rd0">;
-def RD1  : PTXReg<"rd1">;
-def RD2  : PTXReg<"rd2">;
-def RD3  : PTXReg<"rd3">;
-def RD4  : PTXReg<"rd4">;
-def RD5  : PTXReg<"rd5">;
-def RD6  : PTXReg<"rd6">;
-def RD7  : PTXReg<"rd7">;
-def RD8  : PTXReg<"rd8">;
-def RD9  : PTXReg<"rd9">;
+def RD0 : PTXReg<"rd0">;
+def RD1 : PTXReg<"rd1">;
+def RD2 : PTXReg<"rd2">;
+def RD3 : PTXReg<"rd3">;
+def RD4 : PTXReg<"rd4">;
+def RD5 : PTXReg<"rd5">;
+def RD6 : PTXReg<"rd6">;
+def RD7 : PTXReg<"rd7">;
+def RD8 : PTXReg<"rd8">;
+def RD9 : PTXReg<"rd9">;
 def RD10 : PTXReg<"rd10">;
 def RD11 : PTXReg<"rd11">;
 def RD12 : PTXReg<"rd12">;
@@ -288,204 +479,77 @@ def RD60 : PTXReg<"rd60">;
 def RD61 : PTXReg<"rd61">;
 def RD62 : PTXReg<"rd62">;
 def RD63 : PTXReg<"rd63">;
-
-
-///===- 32-bit Floating-Point Registers -----------------------------------===//
-
-def F0  : PTXReg<"f0">;
-def F1  : PTXReg<"f1">;
-def F2  : PTXReg<"f2">;
-def F3  : PTXReg<"f3">;
-def F4  : PTXReg<"f4">;
-def F5  : PTXReg<"f5">;
-def F6  : PTXReg<"f6">;
-def F7  : PTXReg<"f7">;
-def F8  : PTXReg<"f8">;
-def F9  : PTXReg<"f9">;
-def F10 : PTXReg<"f10">;
-def F11 : PTXReg<"f11">;
-def F12 : PTXReg<"f12">;
-def F13 : PTXReg<"f13">;
-def F14 : PTXReg<"f14">;
-def F15 : PTXReg<"f15">;
-def F16 : PTXReg<"f16">;
-def F17 : PTXReg<"f17">;
-def F18 : PTXReg<"f18">;
-def F19 : PTXReg<"f19">;
-def F20 : PTXReg<"f20">;
-def F21 : PTXReg<"f21">;
-def F22 : PTXReg<"f22">;
-def F23 : PTXReg<"f23">;
-def F24 : PTXReg<"f24">;
-def F25 : PTXReg<"f25">;
-def F26 : PTXReg<"f26">;
-def F27 : PTXReg<"f27">;
-def F28 : PTXReg<"f28">;
-def F29 : PTXReg<"f29">;
-def F30 : PTXReg<"f30">;
-def F31 : PTXReg<"f31">;
-def F32 : PTXReg<"f32">;
-def F33 : PTXReg<"f33">;
-def F34 : PTXReg<"f34">;
-def F35 : PTXReg<"f35">;
-def F36 : PTXReg<"f36">;
-def F37 : PTXReg<"f37">;
-def F38 : PTXReg<"f38">;
-def F39 : PTXReg<"f39">;
-def F40 : PTXReg<"f40">;
-def F41 : PTXReg<"f41">;
-def F42 : PTXReg<"f42">;
-def F43 : PTXReg<"f43">;
-def F44 : PTXReg<"f44">;
-def F45 : PTXReg<"f45">;
-def F46 : PTXReg<"f46">;
-def F47 : PTXReg<"f47">;
-def F48 : PTXReg<"f48">;
-def F49 : PTXReg<"f49">;
-def F50 : PTXReg<"f50">;
-def F51 : PTXReg<"f51">;
-def F52 : PTXReg<"f52">;
-def F53 : PTXReg<"f53">;
-def F54 : PTXReg<"f54">;
-def F55 : PTXReg<"f55">;
-def F56 : PTXReg<"f56">;
-def F57 : PTXReg<"f57">;
-def F58 : PTXReg<"f58">;
-def F59 : PTXReg<"f59">;
-def F60 : PTXReg<"f60">;
-def F61 : PTXReg<"f61">;
-def F62 : PTXReg<"f62">;
-def F63 : PTXReg<"f63">;
-
-
-///===- 64-bit Floating-Point Registers -----------------------------------===//
-
-def FD0  : PTXReg<"fd0">;
-def FD1  : PTXReg<"fd1">;
-def FD2  : PTXReg<"fd2">;
-def FD3  : PTXReg<"fd3">;
-def FD4  : PTXReg<"fd4">;
-def FD5  : PTXReg<"fd5">;
-def FD6  : PTXReg<"fd6">;
-def FD7  : PTXReg<"fd7">;
-def FD8  : PTXReg<"fd8">;
-def FD9  : PTXReg<"fd9">;
-def FD10 : PTXReg<"fd10">;
-def FD11 : PTXReg<"fd11">;
-def FD12 : PTXReg<"fd12">;
-def FD13 : PTXReg<"fd13">;
-def FD14 : PTXReg<"fd14">;
-def FD15 : PTXReg<"fd15">;
-def FD16 : PTXReg<"fd16">;
-def FD17 : PTXReg<"fd17">;
-def FD18 : PTXReg<"fd18">;
-def FD19 : PTXReg<"fd19">;
-def FD20 : PTXReg<"fd20">;
-def FD21 : PTXReg<"fd21">;
-def FD22 : PTXReg<"fd22">;
-def FD23 : PTXReg<"fd23">;
-def FD24 : PTXReg<"fd24">;
-def FD25 : PTXReg<"fd25">;
-def FD26 : PTXReg<"fd26">;
-def FD27 : PTXReg<"fd27">;
-def FD28 : PTXReg<"fd28">;
-def FD29 : PTXReg<"fd29">;
-def FD30 : PTXReg<"fd30">;
-def FD31 : PTXReg<"fd31">;
-def FD32 : PTXReg<"fd32">;
-def FD33 : PTXReg<"fd33">;
-def FD34 : PTXReg<"fd34">;
-def FD35 : PTXReg<"fd35">;
-def FD36 : PTXReg<"fd36">;
-def FD37 : PTXReg<"fd37">;
-def FD38 : PTXReg<"fd38">;
-def FD39 : PTXReg<"fd39">;
-def FD40 : PTXReg<"fd40">;
-def FD41 : PTXReg<"fd41">;
-def FD42 : PTXReg<"fd42">;
-def FD43 : PTXReg<"fd43">;
-def FD44 : PTXReg<"fd44">;
-def FD45 : PTXReg<"fd45">;
-def FD46 : PTXReg<"f4d6">;
-def FD47 : PTXReg<"fd47">;
-def FD48 : PTXReg<"fd48">;
-def FD49 : PTXReg<"fd49">;
-def FD50 : PTXReg<"fd50">;
-def FD51 : PTXReg<"fd51">;
-def FD52 : PTXReg<"fd52">;
-def FD53 : PTXReg<"fd53">;
-def FD54 : PTXReg<"fd54">;
-def FD55 : PTXReg<"fd55">;
-def FD56 : PTXReg<"fd56">;
-def FD57 : PTXReg<"fd57">;
-def FD58 : PTXReg<"fd58">;
-def FD59 : PTXReg<"fd59">;
-def FD60 : PTXReg<"fd60">;
-def FD61 : PTXReg<"fd61">;
-def FD62 : PTXReg<"fd62">;
-def FD63 : PTXReg<"fd63">;
-
+def RD64 : PTXReg<"rd64">;
+def RD65 : PTXReg<"rd65">;
+def RD66 : PTXReg<"rd66">;
+def RD67 : PTXReg<"rd67">;
+def RD68 : PTXReg<"rd68">;
+def RD69 : PTXReg<"rd69">;
+def RD70 : PTXReg<"rd70">;
+def RD71 : PTXReg<"rd71">;
+def RD72 : PTXReg<"rd72">;
+def RD73 : PTXReg<"rd73">;
+def RD74 : PTXReg<"rd74">;
+def RD75 : PTXReg<"rd75">;
+def RD76 : PTXReg<"rd76">;
+def RD77 : PTXReg<"rd77">;
+def RD78 : PTXReg<"rd78">;
+def RD79 : PTXReg<"rd79">;
+def RD80 : PTXReg<"rd80">;
+def RD81 : PTXReg<"rd81">;
+def RD82 : PTXReg<"rd82">;
+def RD83 : PTXReg<"rd83">;
+def RD84 : PTXReg<"rd84">;
+def RD85 : PTXReg<"rd85">;
+def RD86 : PTXReg<"rd86">;
+def RD87 : PTXReg<"rd87">;
+def RD88 : PTXReg<"rd88">;
+def RD89 : PTXReg<"rd89">;
+def RD90 : PTXReg<"rd90">;
+def RD91 : PTXReg<"rd91">;
+def RD92 : PTXReg<"rd92">;
+def RD93 : PTXReg<"rd93">;
+def RD94 : PTXReg<"rd94">;
+def RD95 : PTXReg<"rd95">;
+def RD96 : PTXReg<"rd96">;
+def RD97 : PTXReg<"rd97">;
+def RD98 : PTXReg<"rd98">;
+def RD99 : PTXReg<"rd99">;
+def RD100 : PTXReg<"rd100">;
+def RD101 : PTXReg<"rd101">;
+def RD102 : PTXReg<"rd102">;
+def RD103 : PTXReg<"rd103">;
+def RD104 : PTXReg<"rd104">;
+def RD105 : PTXReg<"rd105">;
+def RD106 : PTXReg<"rd106">;
+def RD107 : PTXReg<"rd107">;
+def RD108 : PTXReg<"rd108">;
+def RD109 : PTXReg<"rd109">;
+def RD110 : PTXReg<"rd110">;
+def RD111 : PTXReg<"rd111">;
+def RD112 : PTXReg<"rd112">;
+def RD113 : PTXReg<"rd113">;
+def RD114 : PTXReg<"rd114">;
+def RD115 : PTXReg<"rd115">;
+def RD116 : PTXReg<"rd116">;
+def RD117 : PTXReg<"rd117">;
+def RD118 : PTXReg<"rd118">;
+def RD119 : PTXReg<"rd119">;
+def RD120 : PTXReg<"rd120">;
+def RD121 : PTXReg<"rd121">;
+def RD122 : PTXReg<"rd122">;
+def RD123 : PTXReg<"rd123">;
+def RD124 : PTXReg<"rd124">;
+def RD125 : PTXReg<"rd125">;
+def RD126 : PTXReg<"rd126">;
+def RD127 : PTXReg<"rd127">;
 
 //===----------------------------------------------------------------------===//
 //  Register classes
 //===----------------------------------------------------------------------===//
-
-def Preds : RegisterClass<"PTX", [i1], 8,
-                          [P0, P1, P2, P3, P4, P5, P6, P7,
-                           P8, P9, P10, P11, P12, P13, P14, P15,
-                           P16, P17, P18, P19, P20, P21, P22, P23,
-                           P24, P25, P26, P27, P28, P29, P30, P31,
-                           P32, P33, P34, P35, P36, P37, P38, P39,
-                           P40, P41, P42, P43, P44, P45, P46, P47,
-                           P48, P49, P50, P51, P52, P53, P54, P55,
-                           P56, P57, P58, P59, P60, P61, P62, P63]>;
-
-def RRegu16 : RegisterClass<"PTX", [i16], 16,
-                            [RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7,
-                             RH8, RH9, RH10, RH11, RH12, RH13, RH14, RH15,
-                             RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23,
-                             RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31,
-                             RH32, RH33, RH34, RH35, RH36, RH37, RH38, RH39,
-                             RH40, RH41, RH42, RH43, RH44, RH45, RH46, RH47,
-                             RH48, RH49, RH50, RH51, RH52, RH53, RH54, RH55,
-                             RH56, RH57, RH58, RH59, RH60, RH61, RH62, RH63]>;
-
-def RRegu32 : RegisterClass<"PTX", [i32], 32,
-                            [R0, R1, R2, R3, R4, R5, R6, R7,
-                             R8, R9, R10, R11, R12, R13, R14, R15,
-                             R16, R17, R18, R19, R20, R21, R22, R23,
-                             R24, R25, R26, R27, R28, R29, R30, R31,
-                             R32, R33, R34, R35, R36, R37, R38, R39,
-                             R40, R41, R42, R43, R44, R45, R46, R47,
-                             R48, R49, R50, R51, R52, R53, R54, R55,
-                             R56, R57, R58, R59, R60, R61, R62, R63]>;
-
-def RRegu64 : RegisterClass<"PTX", [i64], 64,
-                            [RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7,
-                             RD8, RD9, RD10, RD11, RD12, RD13, RD14, RD15,
-                             RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23,
-                             RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31,
-                             RD32, RD33, RD34, RD35, RD36, RD37, RD38, RD39,
-                             RD40, RD41, RD42, RD43, RD44, RD45, RD46, RD47,
-                             RD48, RD49, RD50, RD51, RD52, RD53, RD54, RD55,
-                             RD56, RD57, RD58, RD59, RD60, RD61, RD62, RD63]>;
-
-def RRegf32 : RegisterClass<"PTX", [f32], 32,
-                            [F0, F1, F2, F3, F4, F5, F6, F7,
-                             F8, F9, F10, F11, F12, F13, F14, F15,
-                             F16, F17, F18, F19, F20, F21, F22, F23,
-                             F24, F25, F26, F27, F28, F29, F30, F31,
-                             F32, F33, F34, F35, F36, F37, F38, F39,
-                             F40, F41, F42, F43, F44, F45, F46, F47,
-                             F48, F49, F50, F51, F52, F53, F54, F55,
-                             F56, F57, F58, F59, F60, F61, F62, F63]>;
-
-def RRegf64 : RegisterClass<"PTX", [f64], 64,
-                            [FD0, FD1, FD2, FD3, FD4, FD5, FD6, FD7,
-                             FD8, FD9, FD10, FD11, FD12, FD13, FD14, FD15,
-                             FD16, FD17, FD18, FD19, FD20, FD21, FD22, FD23,
-                             FD24, FD25, FD26, FD27, FD28, FD29, FD30, FD31,
-                             FD32, FD33, FD34, FD35, FD36, FD37, FD38, FD39,
-                             FD40, FD41, FD42, FD43, FD44, FD45, FD46, FD47,
-                             FD48, FD49, FD50, FD51, FD52, FD53, FD54, FD55,
-                             FD56, FD57, FD58, FD59, FD60, FD61, FD62, FD63]>;
+def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%u", 0, 127)>;
+def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%u", 0, 127)>;
+def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%u", 0, 127)>;
+def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%u", 0, 127)>;
+def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%u", 0, 127)>;
+def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%u", 0, 127)>;
diff --git a/lib/Target/PTX/PTXSubtarget.cpp b/lib/Target/PTX/PTXSubtarget.cpp
index e8a1dfe..8ec646e 100644
--- a/lib/Target/PTX/PTXSubtarget.cpp
+++ b/lib/Target/PTX/PTXSubtarget.cpp
@@ -7,32 +7,51 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the PTX specific subclass of TargetSubtarget.
+// This file implements the PTX specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "PTXSubtarget.h"
+#include "PTX.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "PTXGenSubtargetInfo.inc"
 
 using namespace llvm;
 
-PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS,
-                           bool is64Bit)
-  : PTXShaderModel(PTX_SM_1_0),
+PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS, bool is64Bit)
+  : PTXGenSubtargetInfo(TT, CPU, FS),
+    PTXTarget(PTX_COMPUTE_1_0),
     PTXVersion(PTX_VERSION_2_0),
     SupportsDouble(false),
     SupportsFMA(true),
-    Is64Bit(is64Bit) {	
-  std::string TARGET = "generic";
-  ParseSubtargetFeatures(FS, TARGET);
+    Is64Bit(is64Bit) {
+  std::string TARGET = CPU;
+  if (TARGET.empty())
+    TARGET = "generic";
+  ParseSubtargetFeatures(TARGET, FS);
 }
 
 std::string PTXSubtarget::getTargetString() const {
-  switch(PTXShaderModel) {
-    default: llvm_unreachable("Unknown shader model");
+  switch(PTXTarget) {
+    default: llvm_unreachable("Unknown PTX target");
     case PTX_SM_1_0: return "sm_10";
+    case PTX_SM_1_1: return "sm_11";
+    case PTX_SM_1_2: return "sm_12";
     case PTX_SM_1_3: return "sm_13";
     case PTX_SM_2_0: return "sm_20";
+    case PTX_SM_2_1: return "sm_21";
+    case PTX_SM_2_2: return "sm_22";
+    case PTX_SM_2_3: return "sm_23";
+    case PTX_COMPUTE_1_0: return "compute_10";
+    case PTX_COMPUTE_1_1: return "compute_11";
+    case PTX_COMPUTE_1_2: return "compute_12";
+    case PTX_COMPUTE_1_3: return "compute_13";
+    case PTX_COMPUTE_2_0: return "compute_20";
   }
 }
 
@@ -45,5 +64,3 @@ std::string PTXSubtarget::getPTXVersionString() const {
     case PTX_VERSION_2_3: return "2.3";
   }
 }
-
-#include "PTXGenSubtarget.inc"
diff --git a/lib/Target/PTX/PTXSubtarget.h b/lib/Target/PTX/PTXSubtarget.h
index 59fa696..0921f1f 100644
--- a/lib/Target/PTX/PTXSubtarget.h
+++ b/lib/Target/PTX/PTXSubtarget.h
@@ -7,26 +7,44 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the PTX specific subclass of TargetSubtarget.
+// This file declares the PTX specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef PTX_SUBTARGET_H
 #define PTX_SUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define GET_SUBTARGETINFO_HEADER
+#include "PTXGenSubtargetInfo.inc"
 
 namespace llvm {
-  class PTXSubtarget : public TargetSubtarget {
-    private:
+class StringRef;
+
+  class PTXSubtarget : public PTXGenSubtargetInfo {
+    public:
 
       /**
        * Enumeration of Shader Models supported by the back-end.
        */
-      enum PTXShaderModelEnum {
+      enum PTXTargetEnum {
+        PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */
+        PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */
+        PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */
+        PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */
+        PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */
+        PTX_LAST_COMPUTE,
+
         PTX_SM_1_0, /*< Shader Model 1.0 */
+        PTX_SM_1_1, /*< Shader Model 1.1 */
+        PTX_SM_1_2, /*< Shader Model 1.2 */
         PTX_SM_1_3, /*< Shader Model 1.3 */
-        PTX_SM_2_0  /*< Shader Model 2.0 */
+        PTX_SM_2_0, /*< Shader Model 2.0 */
+        PTX_SM_2_1, /*< Shader Model 2.1 */
+        PTX_SM_2_2, /*< Shader Model 2.2 */
+        PTX_SM_2_3, /*< Shader Model 2.3 */
+        PTX_LAST_SM
       };
 
       /**
@@ -41,24 +59,30 @@ namespace llvm {
         PTX_VERSION_2_3   /*< PTX Version 2.3 */
       };
 
+  private:
+
       /// Shader Model supported on the target GPU.
-      PTXShaderModelEnum PTXShaderModel;
+      PTXTargetEnum PTXTarget;
 
       /// PTX Language Version.
       PTXVersionEnum PTXVersion;
 
       // The native .f64 type is supported on the hardware.
       bool SupportsDouble;
-      
-      // Support the fused-multiply add (FMA) and multiply-add (MAD) instructions
+
+      // Support the fused-multiply add (FMA) and multiply-add (MAD)
+      // instructions
       bool SupportsFMA;
-      
+
       // Use .u64 instead of .u32 for addresses.
       bool Is64Bit;
 
     public:
-      PTXSubtarget(const std::string &TT, const std::string &FS, bool is64Bit);
 
+      PTXSubtarget(const std::string &TT, const std::string &CPU,
+                   const std::string &FS, bool is64Bit);
+
+      // Target architecture accessors
       std::string getTargetString() const;
 
       std::string getPTXVersionString() const;
@@ -68,10 +92,6 @@ namespace llvm {
       bool is64Bit() const { return Is64Bit; }
 
       bool supportsFMA() const { return SupportsFMA; }
-      
-      bool supportsSM13() const { return PTXShaderModel >= PTX_SM_1_3; }
-
-      bool supportsSM20() const { return PTXShaderModel >= PTX_SM_2_0; }
 
       bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
 
@@ -79,8 +99,22 @@ namespace llvm {
 
       bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; }
 
-      std::string ParseSubtargetFeatures(const std::string &FS,
-                                         const std::string &CPU);
+      bool fdivNeedsRoundingMode() const {
+        return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
+               (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
+      }
+
+      bool fmadNeedsRoundingMode() const {
+        return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
+               (PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
+      }
+
+      bool useParamSpaceForDeviceArgs() const {
+        return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
+               (PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
+      }
+
+    void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
   }; // class PTXSubtarget
 } // namespace llvm
 
diff --git a/lib/Target/PTX/PTXTargetMachine.cpp b/lib/Target/PTX/PTXTargetMachine.cpp
index 1b737c9..ab926e0 100644
--- a/lib/Target/PTX/PTXTargetMachine.cpp
+++ b/lib/Target/PTX/PTXTargetMachine.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "PTX.h"
-#include "PTXMCAsmInfo.h"
 #include "PTXTargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -35,9 +34,6 @@ extern "C" void LLVMInitializePTXTarget() {
   RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target);
   RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target);
 
-  RegisterAsmInfo<PTXMCAsmInfo> Z(ThePTX32Target);
-  RegisterAsmInfo<PTXMCAsmInfo> W(ThePTX64Target);
-
   TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer);
   TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer);
 }
@@ -52,11 +48,12 @@ namespace {
 // DataLayout and FrameLowering are filled with dummy data
 PTXTargetMachine::PTXTargetMachine(const Target &T,
                                    const std::string &TT,
+                                   const std::string &CPU,
                                    const std::string &FS,
                                    bool is64Bit)
-  : LLVMTargetMachine(T, TT),
+  : LLVMTargetMachine(T, TT, CPU, FS),
     DataLayout(is64Bit ? DataLayout64 : DataLayout32),
-    Subtarget(TT, FS, is64Bit),
+    Subtarget(TT, CPU, FS, is64Bit),
     FrameLowering(Subtarget),
     InstrInfo(*this),
     TLInfo(*this) {
@@ -64,14 +61,16 @@ PTXTargetMachine::PTXTargetMachine(const Target &T,
 
 PTX32TargetMachine::PTX32TargetMachine(const Target &T,
                                        const std::string& TT,
+                                       const std::string& CPU,
                                        const std::string& FS)
-  : PTXTargetMachine(T, TT, FS, false) {
+  : PTXTargetMachine(T, TT, CPU, FS, false) {
 }
 
 PTX64TargetMachine::PTX64TargetMachine(const Target &T,
                                        const std::string& TT,
+                                       const std::string& CPU,
                                        const std::string& FS)
-  : PTXTargetMachine(T, TT, FS, true) {
+  : PTXTargetMachine(T, TT, CPU, FS, true) {
 }
 
 bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
diff --git a/lib/Target/PTX/PTXTargetMachine.h b/lib/Target/PTX/PTXTargetMachine.h
index 149be8e..ae42153 100644
--- a/lib/Target/PTX/PTXTargetMachine.h
+++ b/lib/Target/PTX/PTXTargetMachine.h
@@ -33,7 +33,8 @@ class PTXTargetMachine : public LLVMTargetMachine {
 
   public:
     PTXTargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS, bool is64Bit);
+                     const std::string &CPU, const std::string &FS,
+                     bool is64Bit);
 
     virtual const TargetData *getTargetData() const { return &DataLayout; }
 
@@ -61,14 +62,14 @@ class PTX32TargetMachine : public PTXTargetMachine {
 public:
 
   PTX32TargetMachine(const Target &T, const std::string &TT,
-                     const std::string& FS);
+                     const std::string& CPU, const std::string& FS);
 }; // class PTX32TargetMachine
 
 class PTX64TargetMachine : public PTXTargetMachine {
 public:
 
   PTX64TargetMachine(const Target &T, const std::string &TT,
-                     const std::string& FS);
+                     const std::string& CPU, const std::string& FS);
 }; // class PTX32TargetMachine
 
 } // namespace llvm
diff --git a/lib/Target/PTX/generate-register-td.py b/lib/Target/PTX/generate-register-td.py
new file mode 100755
index 0000000..1528690
--- /dev/null
+++ b/lib/Target/PTX/generate-register-td.py
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+##===- generate-register-td.py --------------------------------*-python-*--===##
+##
+##                     The LLVM Compiler Infrastructure
+##
+## This file is distributed under the University of Illinois Open Source
+## License. See LICENSE.TXT for details.
+##
+##===----------------------------------------------------------------------===##
+##
+## This file describes the PTX register file generator.
+##
+##===----------------------------------------------------------------------===##
+
+from sys import argv, exit, stdout
+
+
+if len(argv) != 5:
+    print('Usage: generate-register-td.py <num_preds> <num_16> <num_32> <num_64>')
+    exit(1)
+
+try:
+    num_pred  = int(argv[1])
+    num_16bit = int(argv[2])
+    num_32bit = int(argv[3])
+    num_64bit = int(argv[4])
+except:
+    print('ERROR: Invalid integer parameter')
+    exit(1)
+
+## Print the register definition file
+td_file = open('PTXRegisterInfo.td', 'w')
+
+td_file.write('''
+//===- PTXRegisterInfo.td - PTX Register defs ----------------*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the PTX register file
+//===----------------------------------------------------------------------===//
+
+class PTXReg<string n> : Register<n> {
+  let Namespace = "PTX";
+}
+
+//===----------------------------------------------------------------------===//
+//  Registers
+//===----------------------------------------------------------------------===//
+''')
+
+
+# Print predicate registers
+td_file.write('\n///===- Predicate Registers -----------------------------------------------===//\n\n')
+for r in range(0, num_pred):
+    td_file.write('def P%d : PTXReg<"p%d">;\n' % (r, r))
+
+# Print 16-bit registers
+td_file.write('\n///===- 16-Bit Registers --------------------------------------------------===//\n\n')
+for r in range(0, num_16bit):
+    td_file.write('def RH%d : PTXReg<"rh%d">;\n' % (r, r))
+
+# Print 32-bit registers
+td_file.write('\n///===- 32-Bit Registers --------------------------------------------------===//\n\n')
+for r in range(0, num_32bit):
+    td_file.write('def R%d : PTXReg<"r%d">;\n' % (r, r))
+
+# Print 64-bit registers
+td_file.write('\n///===- 64-Bit Registers --------------------------------------------------===//\n\n')
+for r in range(0, num_64bit):
+    td_file.write('def RD%d : PTXReg<"rd%d">;\n' % (r, r))
+
+
+td_file.write('''
+//===----------------------------------------------------------------------===//
+//  Register classes
+//===----------------------------------------------------------------------===//
+''')
+
+
+# Print register classes
+
+td_file.write('def RegPred : RegisterClass<"PTX", [i1], 8, (sequence "P%%u", 0, %d)>;\n' % (num_pred-1))
+td_file.write('def RegI16 : RegisterClass<"PTX", [i16], 16, (sequence "RH%%u", 0, %d)>;\n' % (num_16bit-1))
+td_file.write('def RegI32 : RegisterClass<"PTX", [i32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
+td_file.write('def RegI64 : RegisterClass<"PTX", [i64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
+td_file.write('def RegF32 : RegisterClass<"PTX", [f32], 32, (sequence "R%%u", 0, %d)>;\n' % (num_32bit-1))
+td_file.write('def RegF64 : RegisterClass<"PTX", [f64], 64, (sequence "RD%%u", 0, %d)>;\n' % (num_64bit-1))
+
+
+td_file.close()
+
+## Now write the PTXCallingConv.td file
+td_file = open('PTXCallingConv.td', 'w')
+
+# Reserve 10% of the available registers for return values, and the other 90%
+# for parameters
+num_ret_pred    = int(0.1 * num_pred)
+num_ret_16bit   = int(0.1 * num_16bit)
+num_ret_32bit   = int(0.1 * num_32bit)
+num_ret_64bit   = int(0.1 * num_64bit)
+num_param_pred  = num_pred - num_ret_pred
+num_param_16bit = num_16bit - num_ret_16bit
+num_param_32bit = num_32bit - num_ret_32bit
+num_param_64bit = num_64bit - num_ret_64bit
+
+param_regs_pred  = [('P%d' % (i+num_ret_pred)) for i in range(0, num_param_pred)]
+ret_regs_pred    = ['P%d' % i for i in range(0, num_ret_pred)]
+param_regs_16bit = [('RH%d' % (i+num_ret_16bit)) for i in range(0, num_param_16bit)]
+ret_regs_16bit   = ['RH%d' % i for i in range(0, num_ret_16bit)]
+param_regs_32bit = [('R%d' % (i+num_ret_32bit)) for i in range(0, num_param_32bit)]
+ret_regs_32bit   = ['R%d' % i for i in range(0, num_ret_32bit)]
+param_regs_64bit = [('RD%d' % (i+num_ret_64bit)) for i in range(0, num_param_64bit)]
+ret_regs_64bit   = ['RD%d' % i for i in range(0, num_ret_64bit)]
+
+param_list_pred  = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_pred)
+ret_list_pred    = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_pred)
+param_list_16bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_16bit)
+ret_list_16bit   = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_16bit)
+param_list_32bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_32bit)
+ret_list_32bit   = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_32bit)
+param_list_64bit = reduce(lambda x, y: '%s, %s' % (x, y), param_regs_64bit)
+ret_list_64bit   = reduce(lambda x, y: '%s, %s' % (x, y), ret_regs_64bit)
+
+td_file.write('''
+//===--- PTXCallingConv.td - Calling Conventions -----------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This describes the calling conventions for the PTX architecture.
+//
+//===----------------------------------------------------------------------===//
+
+// PTX Formal Parameter Calling Convention
+def CC_PTX : CallingConv<[
+  CCIfType<[i1],      CCAssignToReg<[%s]>>,
+  CCIfType<[i16],     CCAssignToReg<[%s]>>,
+  CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
+  CCIfType<[i64,f64], CCAssignToReg<[%s]>>
+]>;
+
+// PTX Return Value Calling Convention
+def RetCC_PTX : CallingConv<[
+  CCIfType<[i1],      CCAssignToReg<[%s]>>,
+  CCIfType<[i16],     CCAssignToReg<[%s]>>,
+  CCIfType<[i32,f32], CCAssignToReg<[%s]>>,
+  CCIfType<[i64,f64], CCAssignToReg<[%s]>>
+]>;
+''' % (param_list_pred, param_list_16bit, param_list_32bit, param_list_64bit,
+       ret_list_pred, ret_list_16bit, ret_list_32bit, ret_list_64bit))
+
+
+td_file.close()
diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt
index f282579..d1dda37 100644
--- a/lib/Target/PowerPC/CMakeLists.txt
+++ b/lib/Target/PowerPC/CMakeLists.txt
@@ -1,16 +1,13 @@
 set(LLVM_TARGET_DEFINITIONS PPC.td)
 
-tablegen(PPCGenInstrNames.inc -gen-instr-enums)
-tablegen(PPCGenRegisterNames.inc -gen-register-enums)
 tablegen(PPCGenAsmWriter.inc -gen-asm-writer)
 tablegen(PPCGenCodeEmitter.inc -gen-emitter)
 tablegen(PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter)
-tablegen(PPCGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(PPCGenRegisterInfo.inc -gen-register-desc)
-tablegen(PPCGenInstrInfo.inc -gen-instr-desc)
+tablegen(PPCGenRegisterInfo.inc -gen-register-info)
+tablegen(PPCGenInstrInfo.inc -gen-instr-info)
 tablegen(PPCGenDAGISel.inc -gen-dag-isel)
 tablegen(PPCGenCallingConv.inc -gen-callingconv)
-tablegen(PPCGenSubtarget.inc -gen-subtarget)
+tablegen(PPCGenSubtargetInfo.inc -gen-subtarget)
 
 add_llvm_target(PowerPCCodeGen
   PPCAsmBackend.cpp
@@ -23,7 +20,6 @@ add_llvm_target(PowerPCCodeGen
   PPCISelLowering.cpp
   PPCFrameLowering.cpp
   PPCJITInfo.cpp
-  PPCMCAsmInfo.cpp
   PPCMCCodeEmitter.cpp
   PPCMCInstLower.cpp
   PPCPredicates.cpp
@@ -35,3 +31,4 @@ add_llvm_target(PowerPCCodeGen
 
 add_subdirectory(InstPrinter)
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index adfa0aa..d022a44 100644
--- a/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -19,14 +19,12 @@
 namespace llvm {
 
 class MCOperand;
-class TargetMachine;
 
 class PPCInstPrinter : public MCInstPrinter {
   // 0 -> AIX, 1 -> Darwin.
   unsigned SyntaxVariant;
 public:
-  PPCInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI,
-                 unsigned syntaxVariant)
+  PPCInstPrinter(const MCAsmInfo &MAI, unsigned syntaxVariant)
     : MCInstPrinter(MAI), SyntaxVariant(syntaxVariant) {}
   
   bool isDarwinSyntax() const {
diff --git a/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..a1b8166
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMPowerPCDesc
+  PPCMCTargetDesc.cpp
+  PPCMCAsmInfo.cpp
+  )
diff --git a/lib/Target/PowerPC/MCTargetDesc/Makefile b/lib/Target/PowerPC/MCTargetDesc/Makefile
new file mode 100644
index 0000000..9db6662
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/PowerPC/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMPowerPCDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
index 2d5c880..b6dca83 100644
--- a/lib/Target/PowerPC/PPCMCAsmInfo.cpp
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp
@@ -15,6 +15,10 @@
 using namespace llvm;
 
 PPCMCAsmInfoDarwin::PPCMCAsmInfoDarwin(bool is64Bit) {
+  if (is64Bit)
+    PointerSize = 8;
+  IsLittleEndian = false;
+
   PCSymbol = ".";
   CommentString = ";";
   ExceptionsType = ExceptionHandling::DwarfCFI;
diff --git a/lib/Target/PowerPC/PPCMCAsmInfo.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 96ae6fb..96ae6fb 100644
--- a/lib/Target/PowerPC/PPCMCAsmInfo.h
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
new file mode 100644
index 0000000..02b887f
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -0,0 +1,70 @@
+//===-- PPCMCTargetDesc.cpp - PowerPC Target Descriptions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PPCMCTargetDesc.h"
+#include "PPCMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "PPCGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "PPCGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createPPCMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitPPCMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializePowerPCMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(ThePPC32Target, createPPCMCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(ThePPC64Target, createPPCMCInstrInfo);
+}
+
+
+static MCSubtargetInfo *createPPCMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                 StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitPPCMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializePowerPCMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(ThePPC32Target,
+                                          createPPCMCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(ThePPC64Target,
+                                          createPPCMCSubtargetInfo);
+}
+
+static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+  bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
+  if (TheTriple.isOSDarwin())
+    return new PPCMCAsmInfoDarwin(isPPC64);
+  return new PPCLinuxMCAsmInfo(isPPC64);
+  
+}
+
+extern "C" void LLVMInitializePowerPCMCAsmInfo() {
+  RegisterMCAsmInfoFn C(ThePPC32Target, createMCAsmInfo);
+  RegisterMCAsmInfoFn D(ThePPC64Target, createMCAsmInfo);  
+}
diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
new file mode 100644
index 0000000..cee2350
--- /dev/null
+++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -0,0 +1,41 @@
+//===-- PPCMCTargetDesc.h - PowerPC Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides PowerPC specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PPCMCTARGETDESC_H
+#define PPCMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target ThePPC32Target;
+extern Target ThePPC64Target;
+  
+} // End llvm namespace
+
+// Defines symbolic names for PowerPC registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "PPCGenRegisterInfo.inc"
+
+// Defines symbolic names for the PowerPC instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "PPCGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "PPCGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/PowerPC/Makefile b/lib/Target/PowerPC/Makefile
index 030defe..1617b26 100644
--- a/lib/Target/PowerPC/Makefile
+++ b/lib/Target/PowerPC/Makefile
@@ -12,13 +12,12 @@ LIBRARYNAME = LLVMPowerPCCodeGen
 TARGET = PPC
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = PPCGenInstrNames.inc PPCGenRegisterNames.inc \
+BUILT_SOURCES = PPCGenRegisterInfo.inc \
                 PPCGenAsmWriter.inc  PPCGenCodeEmitter.inc \
-                PPCGenRegisterInfo.h.inc PPCGenRegisterInfo.inc \
                 PPCGenInstrInfo.inc PPCGenDAGISel.inc \
-                PPCGenSubtarget.inc PPCGenCallingConv.inc \
+                PPCGenSubtargetInfo.inc PPCGenCallingConv.inc \
                 PPCGenMCCodeEmitter.inc
 
-DIRS = InstPrinter TargetInfo
+DIRS = InstPrinter TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/PowerPC/PPC.h b/lib/Target/PowerPC/PPC.h
index 92672b5..7191dd1 100644
--- a/lib/Target/PowerPC/PPC.h
+++ b/lib/Target/PowerPC/PPC.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_TARGET_POWERPC_H
 #define LLVM_TARGET_POWERPC_H
 
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include <string>
 
 // GCC #defines PPC on Linux but we use it as our namespace name
@@ -31,6 +32,8 @@ namespace llvm {
   class MCInst;
   class MCCodeEmitter;
   class MCContext;
+  class MCInstrInfo;
+  class MCSubtargetInfo;
   class TargetMachine;
   class TargetAsmBackend;
   
@@ -38,16 +41,14 @@ namespace llvm {
   FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
   FunctionPass *createPPCJITCodeEmitterPass(PPCTargetMachine &TM,
                                             JITCodeEmitter &MCE);
-  MCCodeEmitter *createPPCMCCodeEmitter(const Target &, TargetMachine &TM,
+  MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+                                        const MCSubtargetInfo &STI,
                                         MCContext &Ctx);
   TargetAsmBackend *createPPCAsmBackend(const Target &, const std::string &);
   
   void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                     AsmPrinter &AP, bool isDarwin);
   
-  extern Target ThePPC32Target;
-  extern Target ThePPC64Target;
-  
   namespace PPCII {
     
   /// Target Operand Flag enum.
@@ -81,13 +82,4 @@ namespace llvm {
   
 } // end namespace llvm;
 
-// Defines symbolic names for PowerPC registers.  This defines a mapping from
-// register name to register number.
-//
-#include "PPCGenRegisterNames.inc"
-
-// Defines symbolic names for the PowerPC instructions.
-//
-#include "PPCGenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/PowerPC/PPCAsmBackend.cpp b/lib/Target/PowerPC/PPCAsmBackend.cpp
index f562a3f..4b8cbb7 100644
--- a/lib/Target/PowerPC/PPCAsmBackend.cpp
+++ b/lib/Target/PowerPC/PPCAsmBackend.cpp
@@ -13,6 +13,7 @@
 #include "llvm/MC/MCMachObjectWriter.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Object/MachOFormat.h"
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
@@ -23,6 +24,11 @@ public:
   PPCMachObjectWriter(bool Is64Bit, uint32_t CPUType,
                       uint32_t CPUSubtype)
     : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype) {}
+
+  void RecordRelocation(MachObjectWriter *Writer,
+                        const MCAssembler &Asm, const MCAsmLayout &Layout,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
+                        MCValue Target, uint64_t &FixedValue) {}
 };
 
 class PPCAsmBackend : public TargetAsmBackend {
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index b795db9..9de2200 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -680,10 +680,9 @@ static AsmPrinter *createPPCAsmPrinterPass(TargetMachine &tm,
 }
 
 static MCInstPrinter *createPPCMCInstPrinter(const Target &T,
-                                             TargetMachine &TM,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI) {
-  return new PPCInstPrinter(TM, MAI, SyntaxVariant);
+  return new PPCInstPrinter(MAI, SyntaxVariant);
 }
 
 
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 74ecff5..cddc9d8 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -73,12 +73,12 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode,
   }
   Opcode = ~Opcode;
 
-  const TargetInstrDesc &TID = TII.get(Opcode);
+  const MCInstrDesc &MCID = TII.get(Opcode);
 
-  isLoad  = TID.mayLoad();
-  isStore = TID.mayStore();
+  isLoad  = MCID.mayLoad();
+  isStore = MCID.mayStore();
 
-  uint64_t TSFlags = TID.TSFlags;
+  uint64_t TSFlags = MCID.TSFlags;
 
   isFirst   = TSFlags & PPCII::PPC970_First;
   isSingle  = TSFlags & PPCII::PPC970_Single;
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 511bb22..2176c02 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -610,6 +610,9 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
   unsigned Imm;
   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+  bool isPPC64 = (PtrVT == MVT::i64);
+
   if (isInt32Immediate(N->getOperand(1), Imm)) {
     // We can codegen setcc op, imm very efficiently compared to a brcond.
     // Check for those cases here.
@@ -624,6 +627,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
         return CurDAG->SelectNodeTo(N, PPC::RLWINM, MVT::i32, Ops, 4);
       }
       case ISD::SETNE: {
+        if (isPPC64) break;
         SDValue AD =
           SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
                                          Op, getI32Imm(~0U)), 0);
@@ -647,6 +651,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
       switch (CC) {
       default: break;
       case ISD::SETEQ:
+        if (isPPC64) break;
         Op = SDValue(CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
                                             Op, getI32Imm(1)), 0);
         return CurDAG->SelectNodeTo(N, PPC::ADDZE, MVT::i32,
@@ -655,6 +660,7 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) {
                                                              getI32Imm(0)), 0),
                                       Op.getValue(1));
       case ISD::SETNE: {
+        if (isPPC64) break;
         Op = SDValue(CurDAG->getMachineNode(PPC::NOR, dl, MVT::i32, Op, Op), 0);
         SDNode *AD = CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
                                             Op, getI32Imm(~0U));
@@ -996,22 +1002,25 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
   }
   case ISD::SELECT_CC: {
     ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+    EVT PtrVT = CurDAG->getTargetLoweringInfo().getPointerTy();
+    bool isPPC64 = (PtrVT == MVT::i64);
 
     // Handle the setcc cases here.  select_cc lhs, 0, 1, 0, cc
-    if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
-      if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
-        if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
-          if (N1C->isNullValue() && N3C->isNullValue() &&
-              N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
-              // FIXME: Implement this optzn for PPC64.
-              N->getValueType(0) == MVT::i32) {
-            SDNode *Tmp =
-              CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
-                                     N->getOperand(0), getI32Imm(~0U));
-            return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
-                                        SDValue(Tmp, 0), N->getOperand(0),
-                                        SDValue(Tmp, 1));
-          }
+    if (!isPPC64)
+      if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+        if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N->getOperand(2)))
+          if (ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N->getOperand(3)))
+            if (N1C->isNullValue() && N3C->isNullValue() &&
+                N2C->getZExtValue() == 1ULL && CC == ISD::SETNE &&
+                // FIXME: Implement this optzn for PPC64.
+                N->getValueType(0) == MVT::i32) {
+              SDNode *Tmp =
+                CurDAG->getMachineNode(PPC::ADDIC, dl, MVT::i32, MVT::Glue,
+                                       N->getOperand(0), getI32Imm(~0U));
+              return CurDAG->SelectNodeTo(N, PPC::SUBFE, MVT::i32,
+                                          SDValue(Tmp, 0), N->getOperand(0),
+                                          SDValue(Tmp, 1));
+            }
 
     SDValue CCReg = SelectCC(N->getOperand(0), N->getOperand(1), CC, dl);
     unsigned BROpc = getPredicateForSetCC(CC);
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index dbb184c..9741a39 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -125,10 +125,12 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::FCOS , MVT::f64, Expand);
   setOperationAction(ISD::FREM , MVT::f64, Expand);
   setOperationAction(ISD::FPOW , MVT::f64, Expand);
+  setOperationAction(ISD::FMA  , MVT::f64, Expand);
   setOperationAction(ISD::FSIN , MVT::f32, Expand);
   setOperationAction(ISD::FCOS , MVT::f32, Expand);
   setOperationAction(ISD::FREM , MVT::f32, Expand);
   setOperationAction(ISD::FPOW , MVT::f32, Expand);
+  setOperationAction(ISD::FMA  , MVT::f32, Expand);
 
   setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
 
@@ -215,10 +217,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM)
   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 
   // VAARG is custom lowered with the 32-bit SVR4 ABI.
-  if (    TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
-      && !TM.getSubtarget<PPCSubtarget>().isPPC64())
+  if (TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+      && !TM.getSubtarget<PPCSubtarget>().isPPC64()) {
     setOperationAction(ISD::VAARG, MVT::Other, Custom);
-  else
+    setOperationAction(ISD::VAARG, MVT::i64, Custom);
+  } else
     setOperationAction(ISD::VAARG, MVT::Other, Expand);
 
   // Use the default implementation.
@@ -1262,9 +1265,107 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG,
                                       const PPCSubtarget &Subtarget) const {
+  SDNode *Node = Op.getNode();
+  EVT VT = Node->getValueType(0);
+  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
+  SDValue InChain = Node->getOperand(0);
+  SDValue VAListPtr = Node->getOperand(1);
+  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
+  DebugLoc dl = Node->getDebugLoc();
+
+  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
+
+  // gpr_index
+  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+                                    VAListPtr, MachinePointerInfo(SV), MVT::i8,
+                                    false, false, 0);
+  InChain = GprIndex.getValue(1);
+
+  if (VT == MVT::i64) {
+    // Check if GprIndex is even
+    SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
+                                 DAG.getConstant(1, MVT::i32));
+    SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
+                                DAG.getConstant(0, MVT::i32), ISD::SETNE);
+    SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
+                                          DAG.getConstant(1, MVT::i32));
+    // Align GprIndex to be even if it isn't
+    GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
+                           GprIndex);
+  }
+
+  // fpr index is 1 byte after gpr
+  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+                               DAG.getConstant(1, MVT::i32));
+
+  // fpr
+  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
+                                    FprPtr, MachinePointerInfo(SV), MVT::i8,
+                                    false, false, 0);
+  InChain = FprIndex.getValue(1);
+
+  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+                                       DAG.getConstant(8, MVT::i32));
+
+  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
+                                        DAG.getConstant(4, MVT::i32));
 
-  llvm_unreachable("VAARG not yet implemented for the SVR4 ABI!");
-  return SDValue(); // Not reached
+  // areas
+  SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr,
+                                     MachinePointerInfo(), false, false, 0);
+  InChain = OverflowArea.getValue(1);
+
+  SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr,
+                                    MachinePointerInfo(), false, false, 0);
+  InChain = RegSaveArea.getValue(1);
+
+  // select overflow_area if index > 8
+  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
+                            DAG.getConstant(8, MVT::i32), ISD::SETLT);
+
+  // adjustment constant gpr_index * 4/8
+  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
+                                    VT.isInteger() ? GprIndex : FprIndex,
+                                    DAG.getConstant(VT.isInteger() ? 4 : 8,
+                                                    MVT::i32));
+
+  // OurReg = RegSaveArea + RegConstant
+  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
+                               RegConstant);
+
+  // Floating types are 32 bytes into RegSaveArea
+  if (VT.isFloatingPoint())
+    OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
+                         DAG.getConstant(32, MVT::i32));
+
+  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
+  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
+                                   VT.isInteger() ? GprIndex : FprIndex,
+                                   DAG.getConstant(VT == MVT::i64 ? 2 : 1,
+                                                   MVT::i32));
+
+  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
+                              VT.isInteger() ? VAListPtr : FprPtr,
+                              MachinePointerInfo(SV),
+                              MVT::i8, false, false, 0);
+
+  // determine if we should load from reg_save_area or overflow_area
+  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
+
+  // increase overflow_area by 4/8 if gpr/fpr > 8
+  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
+                                          DAG.getConstant(VT.isInteger() ? 4 : 8,
+                                          MVT::i32));
+
+  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
+                             OverflowAreaPlusN);
+
+  InChain = DAG.getTruncStore(InChain, dl, OverflowArea,
+                              OverflowAreaPtr,
+                              MachinePointerInfo(),
+                              MVT::i32, false, false, 0);
+
+  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0);
 }
 
 SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op,
@@ -1870,7 +1971,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
       InVals.push_back(FIN);
       if (ObjSize==1 || ObjSize==2) {
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+          unsigned VReg;
+          if (isPPC64)
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+          else
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN,
                                             MachinePointerInfo(),
@@ -1889,7 +1994,11 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
         // to memory.  ArgVal will be address of the beginning of
         // the object.
         if (GPR_idx != Num_GPR_Regs) {
-          unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
+          unsigned VReg;
+          if (isPPC64)
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
+          else
+            VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
           int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
@@ -2902,6 +3011,12 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
                         &MemOpChains[0], MemOpChains.size());
 
+  // Set CR6 to true if this is a vararg call.
+  if (isVarArg) {
+    SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
+    RegsToPass.push_back(std::make_pair(unsigned(PPC::CR1EQ), SetCR));
+  }
+
   // Build a sequence of copy-to-reg nodes chained together with token chain
   // and flag operands which copy the outgoing args into the appropriate regs.
   SDValue InFlag;
@@ -2911,13 +3026,6 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee,
     InFlag = Chain.getValue(1);
   }
 
-  // Set CR6 to true if this is a vararg call.
-  if (isVarArg) {
-    SDValue SetCR(DAG.getMachineNode(PPC::CRSET, dl, MVT::i32), 0);
-    Chain = DAG.getCopyToReg(Chain, dl, PPC::CR1EQ, SetCR, InFlag);
-    InFlag = Chain.getValue(1);
-  }
-
   if (isTailCall)
     PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp,
                     false, TailCallArguments);
@@ -4422,11 +4530,27 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
 void PPCTargetLowering::ReplaceNodeResults(SDNode *N,
                                            SmallVectorImpl<SDValue>&Results,
                                            SelectionDAG &DAG) const {
+  const TargetMachine &TM = getTargetMachine();
   DebugLoc dl = N->getDebugLoc();
   switch (N->getOpcode()) {
   default:
     assert(false && "Do not know how to custom type legalize this operation!");
     return;
+  case ISD::VAARG: {
+    if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI()
+        || TM.getSubtarget<PPCSubtarget>().isPPC64())
+      return;
+
+    EVT VT = N->getValueType(0);
+
+    if (VT == MVT::i64) {
+      SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget);
+
+      Results.push_back(NewNode);
+      Results.push_back(NewNode.getValue(1));
+    }
+    return;
+  }
   case ISD::FP_ROUND_INREG: {
     assert(N->getValueType(0) == MVT::ppcf128);
     assert(N->getOperand(0).getValueType() == MVT::ppcf128);
@@ -4676,7 +4800,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI,
     .addReg(TmpReg).addReg(MaskReg);
   BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg)
     .addReg(Tmp3Reg).addReg(Tmp2Reg);
-  BuildMI(BB, dl, TII->get(PPC::STWCX))
+  BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX))
     .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg);
   BuildMI(BB, dl, TII->get(PPC::BCC))
     .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 53b0491..143444f 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -12,22 +12,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCInstrInfo.h"
+#include "PPC.h"
 #include "PPCInstrBuilder.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCPredicates.h"
-#include "PPCGenInstrInfo.inc"
 #include "PPCTargetMachine.h"
 #include "PPCHazardRecognizers.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/ADT/STLExtras.h"
+
+#define GET_INSTRINFO_CTOR
+#include "PPCGenInstrInfo.inc"
 
 namespace llvm {
 extern cl::opt<bool> EnablePPC32RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
@@ -37,8 +41,8 @@ extern cl::opt<bool> EnablePPC64RS;  // FIXME (64-bit): See PPCRegisterInfo.cpp.
 using namespace llvm;
 
 PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm)
-  : TargetInstrInfoImpl(PPCInsts, array_lengthof(PPCInsts)), TM(tm),
-    RI(*TM.getSubtargetImpl(), *this) {}
+  : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
+    TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
 
 /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for
 /// this target when scheduling the DAG.
@@ -120,7 +124,7 @@ PPCInstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const {
   // destination register as well.
   if (Reg0 == Reg1) {
     // Must be two address instruction!
-    assert(MI->getDesc().getOperandConstraint(0, TOI::TIED_TO) &&
+    assert(MI->getDesc().getOperandConstraint(0, MCOI::TIED_TO) &&
            "Expecting a two-address instruction!");
     Reg2IsKill = false;
     ChangeReg0 = true;
@@ -315,12 +319,12 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   else
     llvm_unreachable("Impossible reg-to-reg copy");
 
-  const TargetInstrDesc &TID = get(Opc);
-  if (TID.getNumOperands() == 3)
-    BuildMI(MBB, I, DL, TID, DestReg)
+  const MCInstrDesc &MCID = get(Opc);
+  if (MCID.getNumOperands() == 3)
+    BuildMI(MBB, I, DL, MCID, DestReg)
       .addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));
   else
-    BuildMI(MBB, I, DL, TID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
+    BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));
 }
 
 bool
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index b5249ae..90bacc9 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -18,6 +18,9 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "PPCRegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "PPCGenInstrInfo.inc"
+
 namespace llvm {
 
 /// PPCII - This namespace holds all of the PowerPC target-specific
@@ -61,7 +64,7 @@ enum PPC970_Unit {
 } // end namespace PPCII
 
 
-class PPCInstrInfo : public TargetInstrInfoImpl {
+class PPCInstrInfo : public PPCGenInstrInfo {
   PPCTargetMachine &TM;
   const PPCRegisterInfo RI;
 
diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp
index 78383e0..4590f00 100644
--- a/lib/Target/PowerPC/PPCJITInfo.cpp
+++ b/lib/Target/PowerPC/PPCJITInfo.cpp
@@ -87,7 +87,7 @@ asm(
     // FIXME: could shrink frame
     // Set up a proper stack frame
     // FIXME Layout
-    //   PowerPC64 ABI linkage    -  24 bytes
+    //   PowerPC32 ABI linkage    -  24 bytes
     //                 parameters -  32 bytes
     //   13 double registers      - 104 bytes
     //   8 int registers          -  32 bytes
@@ -205,11 +205,27 @@ void PPC32CompilationCallback() {
 
 #if (defined(__POWERPC__) || defined (__ppc__) || defined(_POWER)) && \
     defined(__ppc64__)
+#ifdef __ELF__
+asm(
+    ".text\n"
+    ".align 2\n"
+    ".globl PPC64CompilationCallback\n"
+    ".section \".opd\",\"aw\"\n"
+    ".align 3\n"
+"PPC64CompilationCallback:\n"
+    ".quad .L.PPC64CompilationCallback,.TOC.@tocbase,0\n"
+    ".size PPC64CompilationCallback,24\n"
+    ".previous\n"
+    ".align 4\n"
+    ".type PPC64CompilationCallback,@function\n"
+".L.PPC64CompilationCallback:\n"
+#else
 asm(
     ".text\n"
     ".align 2\n"
     ".globl _PPC64CompilationCallback\n"
 "_PPC64CompilationCallback:\n"
+#endif
     // Make space for 8 ints r[3-10] and 13 doubles f[1-13] and the 
     // FIXME: need to save v[0-19] for altivec?
     // Set up a proper stack frame
@@ -218,49 +234,55 @@ asm(
     //                 parameters -  64 bytes
     //   13 double registers      - 104 bytes
     //   8 int registers          -  64 bytes
-    "mflr r0\n"
-    "std r0,  16(r1)\n"
-    "stdu r1, -280(r1)\n"
+    "mflr 0\n"
+    "std  0,  16(1)\n"
+    "stdu 1, -280(1)\n"
     // Save all int arg registers
-    "std r10, 272(r1)\n"    "std r9,  264(r1)\n"
-    "std r8,  256(r1)\n"    "std r7,  248(r1)\n"
-    "std r6,  240(r1)\n"    "std r5,  232(r1)\n"
-    "std r4,  224(r1)\n"    "std r3,  216(r1)\n"
+    "std 10, 272(1)\n"    "std 9,  264(1)\n"
+    "std 8,  256(1)\n"    "std 7,  248(1)\n"
+    "std 6,  240(1)\n"    "std 5,  232(1)\n"
+    "std 4,  224(1)\n"    "std 3,  216(1)\n"
     // Save all call-clobbered FP regs.
-    "stfd f13, 208(r1)\n"    "stfd f12, 200(r1)\n"
-    "stfd f11, 192(r1)\n"    "stfd f10, 184(r1)\n"
-    "stfd f9,  176(r1)\n"    "stfd f8,  168(r1)\n"
-    "stfd f7,  160(r1)\n"    "stfd f6,  152(r1)\n"
-    "stfd f5,  144(r1)\n"    "stfd f4,  136(r1)\n"
-    "stfd f3,  128(r1)\n"    "stfd f2,  120(r1)\n"
-    "stfd f1,  112(r1)\n"
+    "stfd 13, 208(1)\n"    "stfd 12, 200(1)\n"
+    "stfd 11, 192(1)\n"    "stfd 10, 184(1)\n"
+    "stfd 9,  176(1)\n"    "stfd 8,  168(1)\n"
+    "stfd 7,  160(1)\n"    "stfd 6,  152(1)\n"
+    "stfd 5,  144(1)\n"    "stfd 4,  136(1)\n"
+    "stfd 3,  128(1)\n"    "stfd 2,  120(1)\n"
+    "stfd 1,  112(1)\n"
     // Arguments to Compilation Callback:
     // r3 - our lr (address of the call instruction in stub plus 4)
     // r4 - stub's lr (address of instruction that called the stub plus 4)
     // r5 - is64Bit - always 1.
-    "mr   r3, r0\n"
-    "ld   r2, 280(r1)\n" // stub's frame
-    "ld   r4, 16(r2)\n"  // stub's lr
-    "li   r5, 1\n"       // 1 == 64 bit
+    "mr   3, 0\n"      // return address (still in r0)
+    "ld   5, 280(1)\n" // stub's frame
+    "ld   4, 16(5)\n"  // stub's lr
+    "li   5, 1\n"      // 1 == 64 bit
+#ifdef __ELF__
+    "bl PPCCompilationCallbackC\n"
+    "nop\n"
+#else
     "bl _PPCCompilationCallbackC\n"
-    "mtctr r3\n"
+#endif
+    "mtctr 3\n"
     // Restore all int arg registers
-    "ld r10, 272(r1)\n"    "ld r9,  264(r1)\n"
-    "ld r8,  256(r1)\n"    "ld r7,  248(r1)\n"
-    "ld r6,  240(r1)\n"    "ld r5,  232(r1)\n"
-    "ld r4,  224(r1)\n"    "ld r3,  216(r1)\n"
+    "ld 10, 272(1)\n"    "ld 9,  264(1)\n"
+    "ld 8,  256(1)\n"    "ld 7,  248(1)\n"
+    "ld 6,  240(1)\n"    "ld 5,  232(1)\n"
+    "ld 4,  224(1)\n"    "ld 3,  216(1)\n"
     // Restore all FP arg registers
-    "lfd f13, 208(r1)\n"    "lfd f12, 200(r1)\n"
-    "lfd f11, 192(r1)\n"    "lfd f10, 184(r1)\n"
-    "lfd f9,  176(r1)\n"    "lfd f8,  168(r1)\n"
-    "lfd f7,  160(r1)\n"    "lfd f6,  152(r1)\n"
-    "lfd f5,  144(r1)\n"    "lfd f4,  136(r1)\n"
-    "lfd f3,  128(r1)\n"    "lfd f2,  120(r1)\n"
-    "lfd f1,  112(r1)\n"
+    "lfd 13, 208(1)\n"    "lfd 12, 200(1)\n"
+    "lfd 11, 192(1)\n"    "lfd 10, 184(1)\n"
+    "lfd 9,  176(1)\n"    "lfd 8,  168(1)\n"
+    "lfd 7,  160(1)\n"    "lfd 6,  152(1)\n"
+    "lfd 5,  144(1)\n"    "lfd 4,  136(1)\n"
+    "lfd 3,  128(1)\n"    "lfd 2,  120(1)\n"
+    "lfd 1,  112(1)\n"
     // Pop 3 frames off the stack and branch to target
-    "ld  r1, 280(r1)\n"
-    "ld  r2, 16(r1)\n"
-    "mtlr r2\n"
+    "ld  1, 280(1)\n"
+    "ld  0, 16(1)\n"
+    "mtlr 0\n"
+    // XXX: any special TOC handling in the ELF case for JIT?
     "bctr\n"
     );
 #else
diff --git a/lib/Target/PowerPC/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
index 65c2c82..cf73d86 100644
--- a/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
+++ b/lib/Target/PowerPC/PPCMCCodeEmitter.cpp
@@ -28,12 +28,10 @@ namespace {
 class PPCMCCodeEmitter : public MCCodeEmitter {
   PPCMCCodeEmitter(const PPCMCCodeEmitter &); // DO NOT IMPLEMENT
   void operator=(const PPCMCCodeEmitter &);   // DO NOT IMPLEMENT
-  const TargetMachine &TM;
-  MCContext &Ctx;
   
 public:
-  PPCMCCodeEmitter(TargetMachine &tm, MCContext &ctx)
-    : TM(tm), Ctx(ctx) {
+  PPCMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+                   MCContext &ctx) {
   }
   
   ~PPCMCCodeEmitter() {}
@@ -79,9 +77,10 @@ public:
   
 } // end anonymous namespace
   
-MCCodeEmitter *llvm::createPPCMCCodeEmitter(const Target &, TargetMachine &TM,
+MCCodeEmitter *llvm::createPPCMCCodeEmitter(const MCInstrInfo &MCII,
+                                            const MCSubtargetInfo &STI,
                                             MCContext &Ctx) {
-  return new PPCMCCodeEmitter(TM, Ctx);
+  return new PPCMCCodeEmitter(MCII, STI, Ctx);
 }
 
 unsigned PPCMCCodeEmitter::
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index 3374e9b..9c2428b 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -44,6 +44,9 @@
 #include "llvm/ADT/STLExtras.h"
 #include <cstdlib>
 
+#define GET_REGINFO_TARGET_DESC
+#include "PPCGenRegisterInfo.inc"
+
 // FIXME (64-bit): Eventually enable by default.
 namespace llvm {
 cl::opt<bool> EnablePPC32RS("enable-ppc32-regscavenger",
@@ -110,8 +113,7 @@ unsigned PPCRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
 
 PPCRegisterInfo::PPCRegisterInfo(const PPCSubtarget &ST,
                                  const TargetInstrInfo &tii)
-  : PPCGenRegisterInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP),
-    Subtarget(ST), TII(tii) {
+  : PPCGenRegisterInfo(), Subtarget(ST), TII(tii) {
   ImmToIdxMap[PPC::LD]   = PPC::LDX;    ImmToIdxMap[PPC::STD]  = PPC::STDX;
   ImmToIdxMap[PPC::LBZ]  = PPC::LBZX;   ImmToIdxMap[PPC::STB]  = PPC::STBX;
   ImmToIdxMap[PPC::LHZ]  = PPC::LHZX;   ImmToIdxMap[PPC::LHA]  = PPC::LHAX;
@@ -504,6 +506,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
   const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
   unsigned Reg = findScratchRegister(II, RS, RC, SPAdj);
   unsigned SrcReg = MI.getOperand(0).getReg();
+  bool LP64 = Subtarget.isPPC64();
 
   // We need to store the CR in the low 4-bits of the saved value. First, issue
   // an MFCRpsued to save all of the CRBits and, if needed, kill the SrcReg.
@@ -520,7 +523,7 @@ void PPCRegisterInfo::lowerCRSpilling(MachineBasicBlock::iterator II,
       .addImm(0)
       .addImm(31);
 
-  addFrameReference(BuildMI(MBB, II, dl, TII.get(PPC::STW))
+  addFrameReference(BuildMI(MBB, II, dl, TII.get(LP64 ? PPC::STW8 : PPC::STW))
                     .addReg(Reg, getKillRegState(MI.getOperand(1).getImm())),
                     FrameIndex);
 
@@ -709,5 +712,3 @@ int PPCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
 
   return PPCGenRegisterInfo::getLLVMRegNumFull(RegNum, Flavour);
 }
-
-#include "PPCGenRegisterInfo.inc"
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 48c2562..33fe5eb 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -16,9 +16,11 @@
 #define POWERPC32_REGISTERINFO_H
 
 #include "PPC.h"
-#include "PPCGenRegisterInfo.h.inc"
 #include <map>
 
+#define GET_REGINFO_HEADER
+#include "PPCGenRegisterInfo.inc"
+
 namespace llvm {
 class PPCSubtarget;
 class TargetInstrInfo;
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 3c01901..1acdf4e 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -276,15 +276,13 @@ def RM: SPR<512, "**ROUNDING MODE**">;
 /// Register classes
 // Allocate volatiles first
 // then nonvolatiles in reverse order since stmw/lmw save from rN to r31
-def GPRC : RegisterClass<"PPC", [i32], 32,
-     [R2, R3, R4, R5, R6, R7, R8, R9, R10, R11, R12,
-      R30, R29, R28, R27, R26, R25, R24, R23, R22, R21, R20, R19, R18, R17,
-      R16, R15, R14, R13, R31, R0, R1, LR]>;
+def GPRC : RegisterClass<"PPC", [i32], 32, (add (sequence "R%u", 2, 12),
+                                                (sequence "R%u", 30, 13),
+                                                R31, R0, R1, LR)>;
 
-def G8RC : RegisterClass<"PPC", [i64], 64,
-     [X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X12,
-      X30, X29, X28, X27, X26, X25, X24, X23, X22, X21, X20, X19, X18, X17,
-      X16, X15, X14, X31, X13, X0, X1, LR8]>;
+def G8RC : RegisterClass<"PPC", [i64], 64, (add (sequence "X%u", 2, 12),
+                                                (sequence "X%u", 30, 14),
+                                                X31, X13, X0, X1, LR8)>;
 
 // Allocate volatiles first, then non-volatiles in reverse order. With the SVR4
 // ABI the size of the Floating-point register save area is determined by the
@@ -293,41 +291,36 @@ def G8RC : RegisterClass<"PPC", [i64], 64,
 // previous stack frame. By allocating non-volatiles in reverse order we make
 // sure that the Floating-point register save area is always as small as
 // possible because there aren't any unused spill slots.
-def F8RC : RegisterClass<"PPC", [f64], 64, [F0, F1, F2, F3, F4, F5, F6, F7,
-  F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23,
-  F22, F21, F20, F19, F18, F17, F16, F15, F14]>;
-def F4RC : RegisterClass<"PPC", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7,
-  F8, F9, F10, F11, F12, F13, F31, F30, F29, F28, F27, F26, F25, F24, F23,
-  F22, F21, F20, F19, F18, F17, F16, F15, F14]>;
+def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
+                                                (sequence "F%u", 31, 14))>;
+def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
 
 def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v4f32], 128,
- [V2, V3, V4, V5, V0, V1, 
-  V6, V7, V8, V9, V10, V11, V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
-  V29, V28, V27, V26, V25, V24, V23, V22, V21, V20]>;
+                         (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
+                             V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
+                             V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
 
 def CRBITRC : RegisterClass<"PPC", [i32], 32,
-  [CR0LT, CR0GT, CR0EQ, CR0UN,
-   CR1LT, CR1GT, CR1EQ, CR1UN,
-   CR2LT, CR2GT, CR2EQ, CR2UN,
-   CR3LT, CR3GT, CR3EQ, CR3UN,
-   CR4LT, CR4GT, CR4EQ, CR4UN,
-   CR5LT, CR5GT, CR5EQ, CR5UN,
-   CR6LT, CR6GT, CR6EQ, CR6UN,
-   CR7LT, CR7GT, CR7EQ, CR7UN
-  ]>
+  (add CR0LT, CR0GT, CR0EQ, CR0UN,
+       CR1LT, CR1GT, CR1EQ, CR1UN,
+       CR2LT, CR2GT, CR2EQ, CR2UN,
+       CR3LT, CR3GT, CR3EQ, CR3UN,
+       CR4LT, CR4GT, CR4EQ, CR4UN,
+       CR5LT, CR5GT, CR5EQ, CR5UN,
+       CR6LT, CR6GT, CR6EQ, CR6UN,
+       CR7LT, CR7GT, CR7EQ, CR7UN)>
 {
   let CopyCost = -1;
 }
 
-def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, 
-  CR3, CR4]>
-{
+def CRRC : RegisterClass<"PPC", [i32], 32, (add CR0, CR1, CR5, CR6,
+                                                CR7, CR2, CR3, CR4)> {
   let SubRegClasses = [(CRBITRC sub_lt, sub_gt, sub_eq, sub_un)];
 }
 
-def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>;
-def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>;
-def VRSAVERC : RegisterClass<"PPC", [i32], 32, [VRSAVE]>;
-def CARRYRC : RegisterClass<"PPC", [i32], 32, [CARRY]> {
+def CTRRC : RegisterClass<"PPC", [i32], 32, (add CTR)>;
+def CTRRC8 : RegisterClass<"PPC", [i64], 64, (add CTR8)>;
+def VRSAVERC : RegisterClass<"PPC", [i32], 32, (add VRSAVE)>;
+def CARRYRC : RegisterClass<"PPC", [i32], 32, (add CARRY)> {
   let CopyCost = -1;
 }
diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp
index 5f3aa23..5ea9b0f 100644
--- a/lib/Target/PowerPC/PPCSubtarget.cpp
+++ b/lib/Target/PowerPC/PPCSubtarget.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the PPC specific subclass of TargetSubtarget.
+// This file implements the PPC specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,8 +15,13 @@
 #include "PPC.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/Target/TargetMachine.h"
-#include "PPCGenSubtarget.inc"
+#include "llvm/Target/TargetRegistry.h"
 #include <cstdlib>
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "PPCGenSubtargetInfo.inc"
+
 using namespace llvm;
 
 #if defined(__APPLE__)
@@ -57,9 +62,10 @@ static const char *GetCurrentPowerPCCPU() {
 #endif
 
 
-PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS,
-                           bool is64Bit)
-  : StackAlignment(16)
+PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS, bool is64Bit)
+  : PPCGenSubtargetInfo(TT, CPU, FS)
+  , StackAlignment(16)
   , DarwinDirective(PPC::DIR_NONE)
   , IsGigaProcessor(false)
   , Has64BitSupport(false)
@@ -73,13 +79,19 @@ PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &FS,
   , TargetTriple(TT) {
 
   // Determine default and user specified characteristics
-  std::string CPU = "generic";
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "generic";
 #if defined(__APPLE__)
-  CPU = GetCurrentPowerPCCPU();
+  if (CPUName == "generic")
+    CPUName = GetCurrentPowerPCCPU();
 #endif
 
   // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
+  ParseSubtargetFeatures(CPUName, FS);
+
+  // Initialize scheduling itinerary for the specified CPU.
+  InstrItins = getInstrItineraryForCPU(CPUName);
 
   // If we are generating code for ppc64, verify that options make sense.
   if (is64Bit) {
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 8fd1a44..e028de6 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -7,23 +7,26 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the PowerPC specific subclass of TargetSubtarget.
+// This file declares the PowerPC specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef POWERPCSUBTARGET_H
 #define POWERPCSUBTARGET_H
 
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/Target/TargetInstrItineraries.h"
-#include "llvm/Target/TargetSubtarget.h"
-
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "PPCGenSubtargetInfo.inc"
+
 // GCC #defines PPC on Linux but we use it as our namespace name
 #undef PPC
 
 namespace llvm {
+class StringRef;
 
 namespace PPC {
   // -m directive values.
@@ -43,7 +46,7 @@ namespace PPC {
 class GlobalValue;
 class TargetMachine;
   
-class PPCSubtarget : public TargetSubtarget {
+class PPCSubtarget : public PPCGenSubtargetInfo {
 protected:
   /// stackAlignment - The minimum alignment known to hold of the stack frame on
   /// entry to the function and which must be maintained by every function.
@@ -73,13 +76,12 @@ public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  PPCSubtarget(const std::string &TT, const std::string &FS, bool is64Bit);
+  PPCSubtarget(const std::string &TT, const std::string &CPU,
+               const std::string &FS, bool is64Bit);
   
   /// ParseSubtargetFeatures - Parses features string setting specified 
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
-
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
   
   /// SetJITMode - This is called to inform the subtarget info that we are
   /// producing code for the JIT.
@@ -104,7 +106,7 @@ public:
     // Note, the alignment values for f64 and i64 on ppc64 in Darwin
     // documentation are wrong; these are correct (i.e. "what gcc does").
     return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
-                     : "E-p:32:32-f64:32:64-i64:32:64-f128:64:128-n32";
+                     : "E-p:32:32-f64:64:64-i64:64:64-f128:64:128-n32";
   }
 
   /// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index d27e54e..e0ea5ad 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPC.h"
-#include "PPCMCAsmInfo.h"
 #include "PPCTargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/MC/MCStreamer.h"
@@ -21,15 +20,6 @@
 #include "llvm/Support/FormattedStream.h"
 using namespace llvm;
 
-static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
-  Triple TheTriple(TT);
-  bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
-  if (TheTriple.isOSDarwin())
-    return new PPCMCAsmInfoDarwin(isPPC64);
-  return new PPCLinuxMCAsmInfo(isPPC64);
-  
-}
-
 // This is duplicated code. Refactor this.
 static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
                                     MCContext &Ctx, TargetAsmBackend &TAB,
@@ -48,9 +38,6 @@ extern "C" void LLVMInitializePowerPCTarget() {
   RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);  
   RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
   
-  RegisterAsmInfoFn C(ThePPC32Target, createMCAsmInfo);
-  RegisterAsmInfoFn D(ThePPC64Target, createMCAsmInfo);
-  
   // Register the MC Code Emitter
   TargetRegistry::RegisterCodeEmitter(ThePPC32Target, createPPCMCCodeEmitter);
   TargetRegistry::RegisterCodeEmitter(ThePPC64Target, createPPCMCCodeEmitter);
@@ -67,9 +54,10 @@ extern "C" void LLVMInitializePowerPCTarget() {
 
 
 PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &CPU,
                                    const std::string &FS, bool is64Bit)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS, is64Bit),
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS, is64Bit),
     DataLayout(Subtarget.getTargetDataString()), InstrInfo(*this),
     FrameLowering(Subtarget), JITInfo(*this, is64Bit),
     TLInfo(*this), TSInfo(*this),
@@ -88,14 +76,16 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const std::string &TT,
 bool PPCTargetMachine::getEnableTailMergeDefault() const { return false; }
 
 PPC32TargetMachine::PPC32TargetMachine(const Target &T, const std::string &TT, 
+                                       const std::string &CPU,
                                        const std::string &FS) 
-  : PPCTargetMachine(T, TT, FS, false) {
+  : PPCTargetMachine(T, TT, CPU, FS, false) {
 }
 
 
 PPC64TargetMachine::PPC64TargetMachine(const Target &T, const std::string &TT, 
+                                       const std::string &CPU, 
                                        const std::string &FS)
-  : PPCTargetMachine(T, TT, FS, true) {
+  : PPCTargetMachine(T, TT, CPU, FS, true) {
 }
 
 
diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h
index 2d24989..baf07e3 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.h
+++ b/lib/Target/PowerPC/PPCTargetMachine.h
@@ -41,7 +41,8 @@ class PPCTargetMachine : public LLVMTargetMachine {
 
 public:
   PPCTargetMachine(const Target &T, const std::string &TT,
-                   const std::string &FS, bool is64Bit);
+                   const std::string &CPU, const std::string &FS,
+                   bool is64Bit);
 
   virtual const PPCInstrInfo      *getInstrInfo() const { return &InstrInfo; }
   virtual const PPCFrameLowering  *getFrameLowering() const {
@@ -77,7 +78,7 @@ public:
 class PPC32TargetMachine : public PPCTargetMachine {
 public:
   PPC32TargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS);
+                     const std::string &CPU, const std::string &FS);
 };
 
 /// PPC64TargetMachine - PowerPC 64-bit target machine.
@@ -85,7 +86,7 @@ public:
 class PPC64TargetMachine : public PPCTargetMachine {
 public:
   PPC64TargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS);
+                     const std::string &CPU, const std::string &FS);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index fcec368..4cc9534 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -870,11 +870,6 @@ rshift_gt (unsigned int a)
    bar ();
 }
 
-void neg_eq_cst(unsigned int a) {
-if (-a == 123)
-bar();
-}
-
 All should simplify to a single comparison.  All of these are
 currently not optimized with "clang -emit-llvm-bc | opt
 -std-compile-opts".
@@ -1767,7 +1762,6 @@ case it choses instead to keep the max operation obvious.
 
 //===---------------------------------------------------------------------===//
 
-Switch lowering generates less than ideal code for the following switch:
 define void @a(i32 %x) nounwind {
 entry:
   switch i32 %x, label %if.end [
@@ -1788,19 +1782,15 @@ declare void @foo()
 Generated code on x86-64 (other platforms give similar results):
 a:
 	cmpl	$5, %edi
-	ja	.LBB0_2
-	movl	%edi, %eax
-	movl	$47, %ecx
-	btq	%rax, %rcx
-	jb	.LBB0_3
+	ja	LBB2_2
+	cmpl	$4, %edi
+	jne	LBB2_3
 .LBB0_2:
 	ret
 .LBB0_3:
 	jmp	foo  # TAILCALL
 
-The movl+movl+btq+jb could be simplified to a cmpl+jne.
-
-Or, if we wanted to be really clever, we could simplify the whole thing to
+If we wanted to be really clever, we could simplify the whole thing to
 something like the following, which eliminates a branch:
 	xorl    $1, %edi
 	cmpl	$4, %edi
diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt
index 6839234..c77ded4 100644
--- a/lib/Target/Sparc/CMakeLists.txt
+++ b/lib/Target/Sparc/CMakeLists.txt
@@ -1,13 +1,10 @@
 set(LLVM_TARGET_DEFINITIONS Sparc.td)
 
-tablegen(SparcGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(SparcGenRegisterNames.inc -gen-register-enums)
-tablegen(SparcGenRegisterInfo.inc -gen-register-desc)
-tablegen(SparcGenInstrNames.inc -gen-instr-enums)
-tablegen(SparcGenInstrInfo.inc -gen-instr-desc)
+tablegen(SparcGenRegisterInfo.inc -gen-register-info)
+tablegen(SparcGenInstrInfo.inc -gen-instr-info)
 tablegen(SparcGenAsmWriter.inc -gen-asm-writer)
 tablegen(SparcGenDAGISel.inc -gen-dag-isel)
-tablegen(SparcGenSubtarget.inc -gen-subtarget)
+tablegen(SparcGenSubtargetInfo.inc -gen-subtarget)
 tablegen(SparcGenCallingConv.inc -gen-callingconv)
 
 add_llvm_target(SparcCodeGen
@@ -18,7 +15,6 @@ add_llvm_target(SparcCodeGen
   SparcISelDAGToDAG.cpp
   SparcISelLowering.cpp
   SparcFrameLowering.cpp
-  SparcMCAsmInfo.cpp
   SparcRegisterInfo.cpp
   SparcSubtarget.cpp
   SparcTargetMachine.cpp
@@ -26,3 +22,4 @@ add_llvm_target(SparcCodeGen
   )
 
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/Sparc/DelaySlotFiller.cpp b/lib/Target/Sparc/DelaySlotFiller.cpp
index 4b12852..dab35e5 100644
--- a/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -298,7 +298,7 @@ bool Filler::isDelayFiller(MachineBasicBlock &MBB,
     return false;
   if (candidate->getOpcode() == SP::UNIMP)
     return true;
-  const TargetInstrDesc &prevdesc = (--candidate)->getDesc();
+  const MCInstrDesc &prevdesc = (--candidate)->getDesc();
   return prevdesc.hasDelaySlot();
 }
 
diff --git a/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..1e8c029
--- /dev/null
+++ b/lib/Target/Sparc/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,4 @@
+add_llvm_library(LLVMSparcDesc
+  SparcMCTargetDesc.cpp
+  SparcMCAsmInfo.cpp
+  )
diff --git a/lib/Target/Sparc/MCTargetDesc/Makefile b/lib/Target/Sparc/MCTargetDesc/Makefile
new file mode 100644
index 0000000..abcbe2d
--- /dev/null
+++ b/lib/Target/Sparc/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/Sparc/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSparcDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/Sparc/SparcMCAsmInfo.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
index d37d6d2..6a7e090 100644
--- a/lib/Target/Sparc/SparcMCAsmInfo.cpp
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp
@@ -12,9 +12,16 @@
 //===----------------------------------------------------------------------===//
 
 #include "SparcMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+
 using namespace llvm;
 
 SparcELFMCAsmInfo::SparcELFMCAsmInfo(const Target &T, StringRef TT) {
+  IsLittleEndian = false;
+  Triple TheTriple(TT);
+  if (TheTriple.getArch() == Triple::sparcv9)
+    PointerSize = 8;
+
   Data16bitsDirective = "\t.half\t";
   Data32bitsDirective = "\t.word\t";
   Data64bitsDirective = 0;  // .xword is only supported by V9.
diff --git a/lib/Target/Sparc/SparcMCAsmInfo.h b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
index 0cb6827..0cb6827 100644
--- a/lib/Target/Sparc/SparcMCAsmInfo.h
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
new file mode 100644
index 0000000..cb92a2b
--- /dev/null
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -0,0 +1,57 @@
+//===-- SparcMCTargetDesc.cpp - Sparc Target Descriptions --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Sparc specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SparcMCTargetDesc.h"
+#include "SparcMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SparcGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SparcGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SparcGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSparcMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitSparcMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeSparcMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheSparcTarget, createSparcMCInstrInfo);
+}
+
+static MCSubtargetInfo *createSparcMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                   StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitSparcMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeSparcMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheSparcTarget,
+                                          createSparcMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeSparcMCAsmInfo() {
+  RegisterMCAsmInfo<SparcELFMCAsmInfo> X(TheSparcTarget);
+  RegisterMCAsmInfo<SparcELFMCAsmInfo> Y(TheSparcV9Target);
+}
diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
new file mode 100644
index 0000000..2fd9e3f
--- /dev/null
+++ b/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -0,0 +1,41 @@
+//===-- SparcMCTargetDesc.h - Sparc Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides Sparc specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SPARCMCTARGETDESC_H
+#define SPARCMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheSparcTarget;
+extern Target TheSparcV9Target;
+
+} // End llvm namespace
+
+// Defines symbolic names for Sparc registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "SparcGenRegisterInfo.inc"
+
+// Defines symbolic names for the Sparc instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "SparcGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SparcGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/Sparc/Makefile b/lib/Target/Sparc/Makefile
index 27942c5..4b81ada 100644
--- a/lib/Target/Sparc/Makefile
+++ b/lib/Target/Sparc/Makefile
@@ -12,12 +12,11 @@ LIBRARYNAME = LLVMSparcCodeGen
 TARGET = Sparc
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = SparcGenRegisterInfo.h.inc SparcGenRegisterNames.inc \
-                SparcGenRegisterInfo.inc SparcGenInstrNames.inc \
-                SparcGenInstrInfo.inc SparcGenAsmWriter.inc \
-                SparcGenDAGISel.inc SparcGenSubtarget.inc SparcGenCallingConv.inc
+BUILT_SOURCES = SparcGenRegisterInfo.inc SparcGenInstrInfo.inc \
+		SparcGenAsmWriter.inc SparcGenDAGISel.inc \
+		SparcGenSubtargetInfo.inc SparcGenCallingConv.inc
 
-DIRS = TargetInfo
+DIRS = TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/Sparc/Sparc.h b/lib/Target/Sparc/Sparc.h
index a37920d..7b2c614 100644
--- a/lib/Target/Sparc/Sparc.h
+++ b/lib/Target/Sparc/Sparc.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_SPARC_H
 #define TARGET_SPARC_H
 
+#include "MCTargetDesc/SparcMCTargetDesc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Target/TargetMachine.h"
 #include <cassert>
@@ -28,21 +29,8 @@ namespace llvm {
   FunctionPass *createSparcDelaySlotFillerPass(TargetMachine &TM);
   FunctionPass *createSparcFPMoverPass(TargetMachine &TM);
 
-  extern Target TheSparcTarget;
-  extern Target TheSparcV9Target;
-
 } // end namespace llvm;
 
-// Defines symbolic names for Sparc registers.  This defines a mapping from
-// register name to register number.
-//
-#include "SparcGenRegisterNames.inc"
-
-// Defines symbolic names for the Sparc instructions.
-//
-#include "SparcGenInstrNames.inc"
-
-
 namespace llvm {
   // Enums corresponding to Sparc condition codes, both icc's and fcc's.  These
   // values must be kept in sync with the ones in the .td file.
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 0b4612d..6f30d3f 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -1,4 +1,3 @@
-
 //===-- SparcISelLowering.cpp - Sparc DAG Lowering Implementation ---------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -755,9 +754,11 @@ SparcTargetLowering::SparcTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::FSIN , MVT::f64, Expand);
   setOperationAction(ISD::FCOS , MVT::f64, Expand);
   setOperationAction(ISD::FREM , MVT::f64, Expand);
+  setOperationAction(ISD::FMA  , MVT::f64, Expand);
   setOperationAction(ISD::FSIN , MVT::f32, Expand);
   setOperationAction(ISD::FCOS , MVT::f32, Expand);
   setOperationAction(ISD::FREM , MVT::f32, Expand);
+  setOperationAction(ISD::FMA  , MVT::f32, Expand);
   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
   setOperationAction(ISD::CTTZ , MVT::i32, Expand);
   setOperationAction(ISD::CTLZ , MVT::i32, Expand);
@@ -1265,26 +1266,6 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
 
-std::vector<unsigned> SparcTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const {
-  if (Constraint.size() != 1)
-    return std::vector<unsigned>();
-
-  switch (Constraint[0]) {
-  default: break;
-  case 'r':
-    return make_vector<unsigned>(SP::L0, SP::L1, SP::L2, SP::L3,
-                                 SP::L4, SP::L5, SP::L6, SP::L7,
-                                 SP::I0, SP::I1, SP::I2, SP::I3,
-                                 SP::I4, SP::I5,
-                                 SP::O0, SP::O1, SP::O2, SP::O3,
-                                 SP::O4, SP::O5, SP::O7, 0);
-  }
-
-  return std::vector<unsigned>();
-}
-
 bool
 SparcTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // The Sparc target isn't yet aware of offsets.
diff --git a/lib/Target/Sparc/SparcISelLowering.h b/lib/Target/Sparc/SparcISelLowering.h
index 9ea6e16..8a1886a 100644
--- a/lib/Target/Sparc/SparcISelLowering.h
+++ b/lib/Target/Sparc/SparcISelLowering.h
@@ -65,9 +65,6 @@ namespace llvm {
     ConstraintType getConstraintType(const std::string &Constraint) const;
     std::pair<unsigned, const TargetRegisterClass*>
     getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
-    std::vector<unsigned>
-    getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                      EVT VT) const;
 
     virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index afa3c1f..4e3ddf8 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -12,19 +12,23 @@
 //===----------------------------------------------------------------------===//
 
 #include "SparcInstrInfo.h"
-#include "SparcSubtarget.h"
 #include "Sparc.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
+#include "SparcMachineFunctionInfo.h"
+#include "SparcSubtarget.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+
+#define GET_INSTRINFO_CTOR
 #include "SparcGenInstrInfo.inc"
-#include "SparcMachineFunctionInfo.h"
+
 using namespace llvm;
 
 SparcInstrInfo::SparcInstrInfo(SparcSubtarget &ST)
-  : TargetInstrInfoImpl(SparcInsts, array_lengthof(SparcInsts)),
+  : SparcGenInstrInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
     RI(ST, *this), Subtarget(ST) {
 }
 
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index b2d24f5..eda64ef 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -17,6 +17,9 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "SparcRegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "SparcGenInstrInfo.inc"
+
 namespace llvm {
 
 /// SPII - This namespace holds all of the target specific flags that
@@ -31,7 +34,7 @@ namespace SPII {
   };
 }
 
-class SparcInstrInfo : public TargetInstrInfoImpl {
+class SparcInstrInfo : public SparcGenInstrInfo {
   const SparcRegisterInfo RI;
   const SparcSubtarget& Subtarget;
 public:
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 9fcf028..0acdd2c 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -23,12 +23,15 @@
 #include "llvm/Type.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "SparcGenRegisterInfo.inc"
+
 using namespace llvm;
 
 SparcRegisterInfo::SparcRegisterInfo(SparcSubtarget &st,
                                      const TargetInstrInfo &tii)
-  : SparcGenRegisterInfo(SP::ADJCALLSTACKDOWN, SP::ADJCALLSTACKUP),
-    Subtarget(st), TII(tii) {
+  : SparcGenRegisterInfo(), Subtarget(st), TII(tii) {
 }
 
 const unsigned* SparcRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF)
@@ -135,6 +138,3 @@ int SparcRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
 int SparcRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
   return SparcGenRegisterInfo::getLLVMRegNumFull(DwarfRegNo,0);
 }
-
-#include "SparcGenRegisterInfo.inc"
-
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 56c8068..ec9e63a 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -15,7 +15,9 @@
 #define SPARCREGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "SparcGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "SparcGenRegisterInfo.inc"
 
 namespace llvm {
 
diff --git a/lib/Target/Sparc/SparcRegisterInfo.td b/lib/Target/Sparc/SparcRegisterInfo.td
index 0729818..cf92829 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/lib/Target/Sparc/SparcRegisterInfo.td
@@ -139,23 +139,21 @@ def D15 : Rd<30, "F30", [F30, F31]>, DwarfRegNum<[87]>;
 // FIXME: the register order should be defined in terms of the preferred
 // allocation order...
 //
-def IntRegs : RegisterClass<"SP", [i32], 32, [L0, L1, L2, L3, L4, L5, L6, L7,
-                                     I0, I1, I2, I3, I4, I5,
-                                     O0, O1, O2, O3, O4, O5, O7,
-                                     G1,
-                                     // Non-allocatable regs:
-                                     G2, G3, G4, // FIXME: OK for use only in
-                                                 // applications, not libraries.
-                                     O6, // stack ptr
-                                     I6, // frame ptr
-                                     I7, // return address
-                                     G0, // constant zero
-                                     G5, G6, G7 // reserved for kernel
-                                     ]>;
+def IntRegs : RegisterClass<"SP", [i32], 32,
+                            (add L0, L1, L2, L3, L4, L5, L6,
+                                 L7, I0, I1, I2, I3, I4, I5,
+                                 O0, O1, O2, O3, O4, O5, O7,
+                                 G1,
+                                 // Non-allocatable regs:
+                                 G2, G3, G4, // FIXME: OK for use only in
+                                             // applications, not libraries.
+                                 O6, // stack ptr
+                                 I6, // frame ptr
+                                 I7, // return address
+                                 G0, // constant zero
+                                 G5, G6, G7 // reserved for kernel
+                                 )>;
 
-def FPRegs : RegisterClass<"SP", [f32], 32, [F0, F1, F2, F3, F4, F5, F6, F7, F8,
-  F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22,
-  F23, F24, F25, F26, F27, F28, F29, F30, F31]>;
+def FPRegs : RegisterClass<"SP", [f32], 32, (sequence "F%u", 0, 31)>;
 
-def DFPRegs : RegisterClass<"SP", [f64], 64, [D0, D1, D2, D3, D4, D5, D6, D7,
-  D8, D9, D10, D11, D12, D13, D14, D15]>;
+def DFPRegs : RegisterClass<"SP", [f64], 64, (sequence "D%u", 0, 15)>;
diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp
index ce11af1..de647e8 100644
--- a/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/lib/Target/Sparc/SparcSubtarget.cpp
@@ -7,28 +7,38 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the SPARC specific subclass of TargetSubtarget.
+// This file implements the SPARC specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "SparcSubtarget.h"
-#include "SparcGenSubtarget.inc"
+#include "Sparc.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SparcGenSubtargetInfo.inc"
+
 using namespace llvm;
 
-SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &FS, 
-                               bool is64Bit) :
+SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU,
+                               const std::string &FS,  bool is64Bit) :
+  SparcGenSubtargetInfo(TT, CPU, FS),
   IsV9(false),
   V8DeprecatedInsts(false),
   IsVIS(false),
   Is64Bit(is64Bit) {
   
   // Determine default and user specified characteristics
-  const char *CPU = "v8";
-  if (is64Bit) {
-    CPU = "v9";
-    IsV9 = true;
+  std::string CPUName = CPU;
+  if (CPUName.empty()) {
+    if (is64Bit)
+      CPUName = "v9";
+    else
+      CPUName = "v8";
   }
+  IsV9 = CPUName == "v9";
 
   // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
+  ParseSubtargetFeatures(CPUName, FS);
 }
diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h
index cec0ab4..00a04c3 100644
--- a/lib/Target/Sparc/SparcSubtarget.h
+++ b/lib/Target/Sparc/SparcSubtarget.h
@@ -7,26 +7,31 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the SPARC specific subclass of TargetSubtarget.
+// This file declares the SPARC specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef SPARC_SUBTARGET_H
 #define SPARC_SUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "SparcGenSubtargetInfo.inc"
+
 namespace llvm {
+class StringRef;
 
-class SparcSubtarget : public TargetSubtarget {
+class SparcSubtarget : public SparcGenSubtargetInfo {
   bool IsV9;
   bool V8DeprecatedInsts;
   bool IsVIS;
   bool Is64Bit;
   
 public:
-  SparcSubtarget(const std::string &TT, const std::string &FS, bool is64bit);
+  SparcSubtarget(const std::string &TT, const std::string &CPU,
+                 const std::string &FS, bool is64bit);
 
   bool isV9() const { return IsV9; }
   bool isVIS() const { return IsVIS; }
@@ -34,8 +39,7 @@ public:
   
   /// ParseSubtargetFeatures - Parses features string setting specified 
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
   
   bool is64Bit() const { return Is64Bit; }
   std::string getDataLayout() const {
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index b84eab5..cbe6d87 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "Sparc.h"
-#include "SparcMCAsmInfo.h"
 #include "SparcTargetMachine.h"
 #include "llvm/PassManager.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -21,18 +20,15 @@ extern "C" void LLVMInitializeSparcTarget() {
   // Register the target.
   RegisterTargetMachine<SparcV8TargetMachine> X(TheSparcTarget);
   RegisterTargetMachine<SparcV9TargetMachine> Y(TheSparcV9Target);
-
-  RegisterAsmInfo<SparcELFMCAsmInfo> A(TheSparcTarget);
-  RegisterAsmInfo<SparcELFMCAsmInfo> B(TheSparcV9Target);
-
 }
 
 /// SparcTargetMachine ctor - Create an ILP32 architecture model
 ///
 SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT, 
+                                       const std::string &CPU,
                                        const std::string &FS, bool is64bit)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS, is64bit),
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS, is64bit),
     DataLayout(Subtarget.getDataLayout()),
     TLInfo(*this), TSInfo(*this), InstrInfo(Subtarget),
     FrameLowering(Subtarget) {
@@ -56,12 +52,14 @@ bool SparcTargetMachine::addPreEmitPass(PassManagerBase &PM,
 
 SparcV8TargetMachine::SparcV8TargetMachine(const Target &T,
                                            const std::string &TT, 
+                                           const std::string &CPU,
                                            const std::string &FS)
-  : SparcTargetMachine(T, TT, FS, false) {
+  : SparcTargetMachine(T, TT, CPU, FS, false) {
 }
 
 SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, 
                                            const std::string &TT, 
+                                           const std::string &CPU,
                                            const std::string &FS)
-  : SparcTargetMachine(T, TT, FS, true) {
+  : SparcTargetMachine(T, TT, CPU, FS, true) {
 }
diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h
index c4bb6bd..799fc49 100644
--- a/lib/Target/Sparc/SparcTargetMachine.h
+++ b/lib/Target/Sparc/SparcTargetMachine.h
@@ -34,7 +34,8 @@ class SparcTargetMachine : public LLVMTargetMachine {
   SparcFrameLowering FrameLowering;
 public:
   SparcTargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS, bool is64bit);
+                     const std::string &CPU, const std::string &FS,
+                     bool is64bit);
 
   virtual const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; }
   virtual const TargetFrameLowering  *getFrameLowering() const {
@@ -62,7 +63,7 @@ public:
 class SparcV8TargetMachine : public SparcTargetMachine {
 public:
   SparcV8TargetMachine(const Target &T, const std::string &TT,
-                       const std::string &FS);
+                       const std::string &CPU, const std::string &FS);
 };
 
 /// SparcV9TargetMachine - Sparc 64-bit target machine
@@ -70,7 +71,7 @@ public:
 class SparcV9TargetMachine : public SparcTargetMachine {
 public:
   SparcV9TargetMachine(const Target &T, const std::string &TT,
-                       const std::string &FS);
+                       const std::string &CPU, const std::string &FS);
 };
 
 } // end namespace llvm
diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt
index 1f5d355..f4bdbd8 100644
--- a/lib/Target/SystemZ/CMakeLists.txt
+++ b/lib/Target/SystemZ/CMakeLists.txt
@@ -1,14 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS SystemZ.td)
 
-tablegen(SystemZGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(SystemZGenRegisterNames.inc -gen-register-enums)
-tablegen(SystemZGenRegisterInfo.inc -gen-register-desc)
-tablegen(SystemZGenInstrNames.inc -gen-instr-enums)
-tablegen(SystemZGenInstrInfo.inc -gen-instr-desc)
+tablegen(SystemZGenRegisterInfo.inc -gen-register-info)
+tablegen(SystemZGenInstrInfo.inc -gen-instr-info)
 tablegen(SystemZGenAsmWriter.inc -gen-asm-writer)
 tablegen(SystemZGenDAGISel.inc -gen-dag-isel)
 tablegen(SystemZGenCallingConv.inc -gen-callingconv)
-tablegen(SystemZGenSubtarget.inc -gen-subtarget)
+tablegen(SystemZGenSubtargetInfo.inc -gen-subtarget)
 
 add_llvm_target(SystemZCodeGen
   SystemZAsmPrinter.cpp
@@ -16,7 +13,6 @@ add_llvm_target(SystemZCodeGen
   SystemZISelLowering.cpp
   SystemZInstrInfo.cpp
   SystemZFrameLowering.cpp
-  SystemZMCAsmInfo.cpp
   SystemZRegisterInfo.cpp
   SystemZSubtarget.cpp
   SystemZTargetMachine.cpp
@@ -24,3 +20,4 @@ add_llvm_target(SystemZCodeGen
   )
 
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt b/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..2ac9016
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMSystemZDesc
+  SystemZMCTargetDesc.cpp
+  SystemZMCAsmInfo.cpp
+  )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/SystemZ/MCTargetDesc/Makefile b/lib/Target/SystemZ/MCTargetDesc/Makefile
new file mode 100644
index 0000000..08f1a9d
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/SystemZ/TargetDesc/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMSystemZDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
index 2dc7e7b..8540546 100644
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.cpp
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp
@@ -18,6 +18,8 @@
 using namespace llvm;
 
 SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) {
+  IsLittleEndian = false;
+  PointerSize = 8;
   PrivateGlobalPrefix = ".L";
   WeakRefDirective = "\t.weak\t";
   PCSymbol = ".";
diff --git a/lib/Target/SystemZ/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
index a6a27e2..a6a27e2 100644
--- a/lib/Target/SystemZ/SystemZMCAsmInfo.h
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
new file mode 100644
index 0000000..5a826a6
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -0,0 +1,58 @@
+//===-- SystemZMCTargetDesc.cpp - SystemZ Target Descriptions ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides SystemZ specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMCTargetDesc.h"
+#include "SystemZMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "SystemZGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "SystemZGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createSystemZMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitSystemZMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeSystemZMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
+                                      createSystemZMCInstrInfo);
+}
+
+static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT,
+                                                     StringRef CPU,
+                                                     StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitSystemZMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeSystemZMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget,
+                                          createSystemZMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeSystemZMCAsmInfo() {
+  RegisterMCAsmInfo<SystemZMCAsmInfo> X(TheSystemZTarget);
+}
diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
new file mode 100644
index 0000000..e2ad5af
--- /dev/null
+++ b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -0,0 +1,38 @@
+//===-- SystemZMCTargetDesc.h - SystemZ Target Descriptions -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides SystemZ specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SYSTEMZMCTARGETDESC_H
+#define SYSTEMZMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheSystemZTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for SystemZ registers.
+// This defines a mapping from register name to register number.
+#define GET_REGINFO_ENUM
+#include "SystemZGenRegisterInfo.inc"
+
+// Defines symbolic names for the SystemZ instructions.
+#define GET_INSTRINFO_ENUM
+#include "SystemZGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "SystemZGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile
index 6930e14..6356491 100644
--- a/lib/Target/SystemZ/Makefile
+++ b/lib/Target/SystemZ/Makefile
@@ -12,12 +12,11 @@ LIBRARYNAME = LLVMSystemZCodeGen
 TARGET = SystemZ
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = SystemZGenRegisterInfo.h.inc SystemZGenRegisterNames.inc \
-                SystemZGenRegisterInfo.inc SystemZGenInstrNames.inc \
-                SystemZGenInstrInfo.inc SystemZGenAsmWriter.inc \
-                SystemZGenDAGISel.inc SystemZGenSubtarget.inc SystemZGenCallingConv.inc
+BUILT_SOURCES = SystemZGenRegisterInfo.inc SystemZGenInstrInfo.inc \
+		SystemZGenAsmWriter.inc SystemZGenDAGISel.inc \
+		SystemZGenSubtargetInfo.inc SystemZGenCallingConv.inc
 
-DIRS = TargetInfo
+DIRS = TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h
index ea5240a..88960b9 100644
--- a/lib/Target/SystemZ/SystemZ.h
+++ b/lib/Target/SystemZ/SystemZ.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_TARGET_SystemZ_H
 #define LLVM_TARGET_SystemZ_H
 
+#include "MCTargetDesc/SystemZMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -47,15 +48,5 @@ namespace llvm {
   FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
                                     CodeGenOpt::Level OptLevel);
 
-  extern Target TheSystemZTarget;
-
 } // end namespace llvm;
-
-// Defines symbolic names for SystemZ registers.
-// This defines a mapping from register name to register number.
-#include "SystemZGenRegisterNames.inc"
-
-// Defines symbolic names for the SystemZ instructions.
-#include "SystemZGenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index af85df5..871c297 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -142,6 +142,8 @@ SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) :
   setOperationAction(ISD::FCOS,             MVT::f64, Expand);
   setOperationAction(ISD::FREM,             MVT::f32, Expand);
   setOperationAction(ISD::FREM,             MVT::f64, Expand);
+  setOperationAction(ISD::FMA,              MVT::f32, Expand);
+  setOperationAction(ISD::FMA,              MVT::f64, Expand);
 
   // We have only 64-bit bitconverts
   setOperationAction(ISD::BITCAST,          MVT::f32, Expand);
diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h
index 2f2ef08..ab45ec5 100644
--- a/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -108,11 +108,11 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
   MachineInstr *MI = MIB;
   MachineFunction &MF = *MI->getParent()->getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
-  const TargetInstrDesc &TID = MI->getDesc();
+  const MCInstrDesc &MCID = MI->getDesc();
   unsigned Flags = 0;
-  if (TID.mayLoad())
+  if (MCID.mayLoad())
     Flags |= MachineMemOperand::MOLoad;
-  if (TID.mayStore())
+  if (MCID.mayStore())
     Flags |= MachineMemOperand::MOStore;
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(MachinePointerInfo(
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index be52803..99e2730 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -16,17 +16,21 @@
 #include "SystemZInstrInfo.h"
 #include "SystemZMachineFunctionInfo.h"
 #include "SystemZTargetMachine.h"
-#include "SystemZGenInstrInfo.inc"
 #include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/Support/ErrorHandling.h"
+
+#define GET_INSTRINFO_CTOR
+#include "SystemZGenInstrInfo.inc"
+
 using namespace llvm;
 
 SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
-  : TargetInstrInfoImpl(SystemZInsts, array_lengthof(SystemZInsts)),
+  : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN),
     RI(tm, *this), TM(tm) {
 }
 
@@ -199,13 +203,13 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
 }
 
 bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isTerminator()) return false;
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isTerminator()) return false;
 
   // Conditional branch is a special case.
-  if (TID.isBranch() && !TID.isBarrier())
+  if (MCID.isBranch() && !MCID.isBarrier())
     return true;
-  if (!TID.isPredicable())
+  if (!MCID.isPredicable())
     return true;
   return !isPredicated(MI);
 }
@@ -343,7 +347,7 @@ SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   return Count;
 }
 
-const TargetInstrDesc&
+const MCInstrDesc&
 SystemZInstrInfo::getBrCond(SystemZCC::CondCodes CC) const {
   switch (CC) {
   default:
@@ -408,7 +412,7 @@ SystemZInstrInfo::getOppositeCondition(SystemZCC::CondCodes CC) const {
   }
 }
 
-const TargetInstrDesc&
+const MCInstrDesc&
 SystemZInstrInfo::getLongDispOpc(unsigned Opc) const {
   switch (Opc) {
   default:
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index 6cb7200..6a31e94 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -19,6 +19,9 @@
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/Target/TargetInstrInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "SystemZGenInstrInfo.inc"
+
 namespace llvm {
 
 class SystemZTargetMachine;
@@ -47,7 +50,7 @@ namespace SystemZII {
   };
 }
 
-class SystemZInstrInfo : public TargetInstrInfoImpl {
+class SystemZInstrInfo : public SystemZGenInstrInfo {
   const SystemZRegisterInfo RI;
   SystemZTargetMachine &TM;
 public:
@@ -94,10 +97,10 @@ public:
 
   SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const;
   SystemZCC::CondCodes getCondFromBranchOpc(unsigned Opc) const;
-  const TargetInstrDesc& getBrCond(SystemZCC::CondCodes CC) const;
-  const TargetInstrDesc& getLongDispOpc(unsigned Opc) const;
+  const MCInstrDesc& getBrCond(SystemZCC::CondCodes CC) const;
+  const MCInstrDesc& getLongDispOpc(unsigned Opc) const;
 
-  const TargetInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const {
+  const MCInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const {
     if (Offset < 0 || Offset >= 4096)
       return getLongDispOpc(Opc);
     else
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index ed62cff..59692e8 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -25,12 +25,15 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/BitVector.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "SystemZGenRegisterInfo.inc"
+
 using namespace llvm;
 
 SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm,
                                          const SystemZInstrInfo &tii)
-  : SystemZGenRegisterInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN),
-    TM(tm), TII(tii) {
+  : SystemZGenRegisterInfo(), TM(tm), TII(tii) {
 }
 
 const unsigned*
@@ -51,10 +54,20 @@ BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const
   BitVector Reserved(getNumRegs());
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
 
-  if (TFI->hasFP(MF))
+  if (TFI->hasFP(MF)) {
+    // R11D is the frame pointer. Reserve all aliases.
     Reserved.set(SystemZ::R11D);
+    Reserved.set(SystemZ::R11W);
+    Reserved.set(SystemZ::R10P);
+    Reserved.set(SystemZ::R10Q);
+  }
+
   Reserved.set(SystemZ::R14D);
   Reserved.set(SystemZ::R15D);
+  Reserved.set(SystemZ::R14W);
+  Reserved.set(SystemZ::R15W);
+  Reserved.set(SystemZ::R14P);
+  Reserved.set(SystemZ::R14Q);
   return Reserved;
 }
 
@@ -143,6 +156,3 @@ int SystemZRegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
   assert(0 && "What is the dwarf register number");
   return -1;
 }
-
-
-#include "SystemZGenRegisterInfo.inc"
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index cd8f20f..2e262e1 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -15,7 +15,9 @@
 #define SystemZREGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "SystemZGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "SystemZGenRegisterInfo.inc"
 
 namespace llvm {
 
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td
index 9313ffd..a24cbcf 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -161,318 +161,45 @@ def F15L : FPRL<15, "f15", [F15S]>;
 // Status register
 def PSW : SystemZReg<"psw">;
 
-/// Register classes
-def GR32 : RegisterClass<"SystemZ", [i32], 32,
-   // Volatile registers
-  [R0W, R1W, R2W, R3W, R4W, R5W, R6W, R7W, R8W, R9W, R10W, R12W, R13W,
-   // Frame pointer, sometimes allocable
-   R11W,
-   // Volatile, but not allocable
-   R14W, R15W]>
-{
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_REG32[] = {
-      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
-      SystemZ::R5W,  SystemZ::R0W,  SystemZ::R12W, SystemZ::R11W,
-      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
-      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
-    };
-    static const unsigned SystemZ_REG32_nofp[] = {
-      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
-      SystemZ::R5W,  SystemZ::R0W,  SystemZ::R12W, /* No R11W */
-      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
-      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
-    };
-    GR32Class::iterator
-    GR32Class::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG32_nofp;
-      else
-        return SystemZ_REG32;
-    }
-    GR32Class::iterator
-    GR32Class::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG32_nofp + (sizeof(SystemZ_REG32_nofp) / sizeof(unsigned));
-      else
-        return SystemZ_REG32 + (sizeof(SystemZ_REG32) / sizeof(unsigned));
-    }
-  }];
-}
+/// Register classes.
+/// Allocate the callee-saved R6-R12 backwards. That way they can be saved
+/// together with R14 and R15 in one prolog instruction.
+def GR32 : RegisterClass<"SystemZ", [i32], 32, (add (sequence "R%uW",  0, 5),
+                                                    (sequence "R%uW", 15, 6))>;
 
 /// Registers used to generate address. Everything except R0.
-def ADDR32 : RegisterClass<"SystemZ", [i32], 32,
-   // Volatile registers
-  [R1W, R2W, R3W, R4W, R5W, R6W, R7W, R8W, R9W, R10W, R12W, R13W,
-   // Frame pointer, sometimes allocable
-   R11W,
-   // Volatile, but not allocable
-   R14W, R15W]>
-{
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_ADDR32[] = {
-      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
-      SystemZ::R5W,  /* No R0W */   SystemZ::R12W, SystemZ::R11W,
-      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
-      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
-    };
-    static const unsigned SystemZ_ADDR32_nofp[] = {
-      SystemZ::R1W,  SystemZ::R2W,  SystemZ::R3W,  SystemZ::R4W,
-      SystemZ::R5W,  /* No R0W */   SystemZ::R12W, /* No R11W */
-      SystemZ::R10W, SystemZ::R9W,  SystemZ::R8W,  SystemZ::R7W,
-      SystemZ::R6W,  SystemZ::R14W, SystemZ::R13W
-    };
-    ADDR32Class::iterator
-    ADDR32Class::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_ADDR32_nofp;
-      else
-        return SystemZ_ADDR32;
-    }
-    ADDR32Class::iterator
-    ADDR32Class::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_ADDR32_nofp + (sizeof(SystemZ_ADDR32_nofp) / sizeof(unsigned));
-      else
-        return SystemZ_ADDR32 + (sizeof(SystemZ_ADDR32) / sizeof(unsigned));
-    }
-  }];
-}
+def ADDR32 : RegisterClass<"SystemZ", [i32], 32, (sub GR32, R0W)>;
 
-def GR64 : RegisterClass<"SystemZ", [i64], 64,
-   // Volatile registers
-  [R0D, R1D, R2D, R3D, R4D, R5D, R6D, R7D, R8D, R9D, R10D, R12D, R13D,
-   // Frame pointer, sometimes allocable
-   R11D,
-   // Volatile, but not allocable
-   R14D, R15D]>
-{
+def GR64 : RegisterClass<"SystemZ", [i64], 64, (add (sequence "R%uD",  0, 5),
+                                                    (sequence "R%uD", 15, 6))> {
   let SubRegClasses = [(GR32 subreg_32bit)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_REG64[] = {
-      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
-      SystemZ::R5D,  SystemZ::R0D,  SystemZ::R12D, SystemZ::R11D,
-      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
-      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
-    };
-    static const unsigned SystemZ_REG64_nofp[] = {
-      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
-      SystemZ::R5D,  SystemZ::R0D,  SystemZ::R12D, /* No R11D */
-      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
-      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
-    };
-    GR64Class::iterator
-    GR64Class::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG64_nofp;
-      else
-        return SystemZ_REG64;
-    }
-    GR64Class::iterator
-    GR64Class::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG64_nofp + (sizeof(SystemZ_REG64_nofp) / sizeof(unsigned));
-      else
-        return SystemZ_REG64 + (sizeof(SystemZ_REG64) / sizeof(unsigned));
-    }
-  }];
 }
 
-def ADDR64 : RegisterClass<"SystemZ", [i64], 64,
-   // Volatile registers
-  [R1D, R2D, R3D, R4D, R5D, R6D, R7D, R8D, R9D, R10D, R12D, R13D,
-   // Frame pointer, sometimes allocable
-   R11D,
-   // Volatile, but not allocable
-   R14D, R15D]>
-{
+def ADDR64 : RegisterClass<"SystemZ", [i64], 64, (sub GR64, R0D)> {
   let SubRegClasses = [(ADDR32 subreg_32bit)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_ADDR64[] = {
-      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
-      SystemZ::R5D,  /* No R0D */   SystemZ::R12D, SystemZ::R11D,
-      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
-      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
-    };
-    static const unsigned SystemZ_ADDR64_nofp[] = {
-      SystemZ::R1D,  SystemZ::R2D,  SystemZ::R3D,  SystemZ::R4D,
-      SystemZ::R5D,  /* No R0D */   SystemZ::R12D, /* No R11D */
-      SystemZ::R10D, SystemZ::R9D,  SystemZ::R8D,  SystemZ::R7D,
-      SystemZ::R6D,  SystemZ::R14D, SystemZ::R13D
-    };
-    ADDR64Class::iterator
-    ADDR64Class::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_ADDR64_nofp;
-      else
-        return SystemZ_ADDR64;
-    }
-    ADDR64Class::iterator
-    ADDR64Class::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_ADDR64_nofp + (sizeof(SystemZ_ADDR64_nofp) / sizeof(unsigned));
-      else
-        return SystemZ_ADDR64 + (sizeof(SystemZ_ADDR64) / sizeof(unsigned));
-    }
-  }];
 }
 
 // Even-odd register pairs
-def GR64P : RegisterClass<"SystemZ", [v2i32], 64,
-  [R0P, R2P, R4P, R6P, R8P, R10P, R12P, R14P]>
-{
+def GR64P : RegisterClass<"SystemZ", [v2i32], 64, (add R0P, R2P, R4P,
+                                                       R12P, R10P, R8P, R6P,
+                                                       R14P)> {
   let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_REG64P[] = {
-      SystemZ::R0P,  SystemZ::R2P,  SystemZ::R4P, SystemZ::R10P,
-      SystemZ::R8P,  SystemZ::R6P };
-    static const unsigned SystemZ_REG64P_nofp[] = {
-      SystemZ::R0P,  SystemZ::R2P,  SystemZ::R4P, /* NO R10P */
-      SystemZ::R8P,  SystemZ::R6P };
-    GR64PClass::iterator
-    GR64PClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG64P_nofp;
-      else
-        return SystemZ_REG64P;
-    }
-    GR64PClass::iterator
-    GR64PClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG64P_nofp + (sizeof(SystemZ_REG64P_nofp) / sizeof(unsigned));
-      else
-        return SystemZ_REG64P + (sizeof(SystemZ_REG64P) / sizeof(unsigned));
-    }
-  }];
 }
 
-def GR128 : RegisterClass<"SystemZ", [v2i64], 128,
-  [R0Q, R2Q, R4Q, R6Q, R8Q, R10Q, R12Q, R14Q]>
-{
+def GR128 : RegisterClass<"SystemZ", [v2i64], 128, (add R0Q, R2Q, R4Q,
+                                                        R12Q, R10Q, R8Q, R6Q,
+                                                        R14Q)> {
   let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32),
-                         (GR64 subreg_even, subreg_odd)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_REG128[] = {
-      SystemZ::R0Q,  SystemZ::R2Q,  SystemZ::R4Q,  SystemZ::R10Q,
-      SystemZ::R8Q,  SystemZ::R6Q };
-    static const unsigned SystemZ_REG128_nofp[] = {
-      SystemZ::R0Q,  SystemZ::R2Q,  SystemZ::R4Q, /* NO R10Q */
-      SystemZ::R8Q,  SystemZ::R6Q };
-    GR128Class::iterator
-    GR128Class::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG128_nofp;
-      else
-        return SystemZ_REG128;
-    }
-    GR128Class::iterator
-    GR128Class::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      if (TFI->hasFP(MF))
-        return SystemZ_REG128_nofp + (sizeof(SystemZ_REG128_nofp) / sizeof(unsigned));
-      else
-        return SystemZ_REG128 + (sizeof(SystemZ_REG128) / sizeof(unsigned));
-    }
-  }];
+                       (GR64 subreg_even, subreg_odd)];
 }
 
-def FP32 : RegisterClass<"SystemZ", [f32], 32,
- [F0S, F1S,  F2S,  F3S,  F4S,  F5S,  F6S,  F7S,
-  F8S, F9S, F10S, F11S, F12S, F13S, F14S, F15S]> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_REGFP32[] = {
-      SystemZ::F0S,  SystemZ::F2S,  SystemZ::F4S,  SystemZ::F6S,
-      SystemZ::F1S,  SystemZ::F3S,  SystemZ::F5S,  SystemZ::F7S,
-      SystemZ::F8S,  SystemZ::F9S,  SystemZ::F10S, SystemZ::F11S,
-      SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S };
-    FP32Class::iterator
-    FP32Class::allocation_order_begin(const MachineFunction &MF) const {
-      return SystemZ_REGFP32;
-    }
-    FP32Class::iterator
-    FP32Class::allocation_order_end(const MachineFunction &MF) const {
-      return SystemZ_REGFP32 + (sizeof(SystemZ_REGFP32) / sizeof(unsigned));
-    }
-  }];
-}
+def FP32 : RegisterClass<"SystemZ", [f32], 32, (sequence "F%uS", 0, 15)>;
 
-def FP64 : RegisterClass<"SystemZ", [f64], 64,
- [F0L, F1L,  F2L,  F3L,  F4L,  F5L,  F6L,  F7L, 
-  F8L, F9L, F10L, F11L, F12L, F13L, F14L, F15L]> {
+def FP64 : RegisterClass<"SystemZ", [f64], 64, (sequence "F%uL", 0, 15)> {
   let SubRegClasses = [(FP32 subreg_32bit)];
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned SystemZ_REGFP64[] = {
-      SystemZ::F0L,  SystemZ::F2L,  SystemZ::F4L,  SystemZ::F6L,
-      SystemZ::F1L,  SystemZ::F3L,  SystemZ::F5L,  SystemZ::F7L,
-      SystemZ::F8L,  SystemZ::F9L,  SystemZ::F10L, SystemZ::F11L,
-      SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L };
-    FP64Class::iterator
-    FP64Class::allocation_order_begin(const MachineFunction &MF) const {
-      return SystemZ_REGFP64;
-    }
-    FP64Class::iterator
-    FP64Class::allocation_order_end(const MachineFunction &MF) const {
-      return SystemZ_REGFP64 + (sizeof(SystemZ_REGFP64) / sizeof(unsigned));
-    }
-  }];
 }
 
 // Status flags registers.
-def CCR : RegisterClass<"SystemZ", [i64], 64, [PSW]> {
+def CCR : RegisterClass<"SystemZ", [i64], 64, (add PSW)> {
   let CopyCost = -1;  // Don't allow copying of status registers.
 }
diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp
index a8b5e1f..b3ed066 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -7,25 +7,32 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the SystemZ specific subclass of TargetSubtarget.
+// This file implements the SystemZ specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "SystemZSubtarget.h"
 #include "SystemZ.h"
-#include "SystemZGenSubtarget.inc"
 #include "llvm/GlobalValue.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "SystemZGenSubtargetInfo.inc"
 
 using namespace llvm;
 
 SystemZSubtarget::SystemZSubtarget(const std::string &TT, 
+                                   const std::string &CPU,
                                    const std::string &FS):
-  HasZ10Insts(false) {
-  std::string CPU = "z9";
+  SystemZGenSubtargetInfo(TT, CPU, FS), HasZ10Insts(false) {
+  std::string CPUName = CPU;
+  if (CPUName.empty())
+    CPUName = "z9";
 
   // Parse features string.
-  ParseSubtargetFeatures(FS, CPU);
+  ParseSubtargetFeatures(CPUName, FS);
 }
 
 /// True if accessing the GV requires an extra load.
diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h
index 405d6e9..55cfd80 100644
--- a/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/lib/Target/SystemZ/SystemZSubtarget.h
@@ -7,33 +7,36 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the SystemZ specific subclass of TargetSubtarget.
+// This file declares the SystemZ specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_TARGET_SystemZ_SUBTARGET_H
 #define LLVM_TARGET_SystemZ_SUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
-
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "SystemZGenSubtargetInfo.inc"
+
 namespace llvm {
 class GlobalValue;
+class StringRef;
 class TargetMachine;
 
-class SystemZSubtarget : public TargetSubtarget {
+class SystemZSubtarget : public SystemZGenSubtargetInfo {
   bool HasZ10Insts;
 public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  SystemZSubtarget(const std::string &TT, const std::string &FS);
+  SystemZSubtarget(const std::string &TT, const std::string &CPU,
+                   const std::string &FS);
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   bool isZ10() const { return HasZ10Insts; }
 
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 1603899..48298cc 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -7,7 +7,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "SystemZMCAsmInfo.h"
 #include "SystemZTargetMachine.h"
 #include "SystemZ.h"
 #include "llvm/PassManager.h"
@@ -17,16 +16,16 @@ using namespace llvm;
 extern "C" void LLVMInitializeSystemZTarget() {
   // Register the target.
   RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
-  RegisterAsmInfo<SystemZMCAsmInfo> Y(TheSystemZTarget);
 }
 
 /// SystemZTargetMachine ctor - Create an ILP64 architecture model
 ///
 SystemZTargetMachine::SystemZTargetMachine(const Target &T,
                                            const std::string &TT,
+                                           const std::string &CPU,
                                            const std::string &FS)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS),
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS),
     DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
                "-f64:64:64-f128:128:128-a0:16:16-n32:64"),
     InstrInfo(*this), TLInfo(*this), TSInfo(*this),
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h
index 524f83d..e40b556 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.h
+++ b/lib/Target/SystemZ/SystemZTargetMachine.h
@@ -38,7 +38,7 @@ class SystemZTargetMachine : public LLVMTargetMachine {
   SystemZFrameLowering    FrameLowering;
 public:
   SystemZTargetMachine(const Target &T, const std::string &TT,
-                       const std::string &FS);
+                       const std::string &CPU, const std::string &FS);
 
   virtual const TargetFrameLowering *getFrameLowering() const {
     return &FrameLowering;
diff --git a/lib/Target/Target.cpp b/lib/Target/Target.cpp
index 0919fe4..a42ce54 100644
--- a/lib/Target/Target.cpp
+++ b/lib/Target/Target.cpp
@@ -97,10 +97,6 @@ unsigned long long LLVMOffsetOfElement(LLVMTargetDataRef TD, LLVMTypeRef StructT
   return unwrap(TD)->getStructLayout(STy)->getElementOffset(Element);
 }
 
-void LLVMInvalidateStructLayout(LLVMTargetDataRef TD, LLVMTypeRef StructTy) {
-  unwrap(TD)->InvalidateStructLayoutInfo(unwrap<StructType>(StructTy));
-}
-
 void LLVMDisposeTargetData(LLVMTargetDataRef TD) {
   delete unwrap(TD);
 }
diff --git a/lib/Target/TargetAsmInfo.cpp b/lib/Target/TargetAsmInfo.cpp
index 6fa5420..a97b0e8 100644
--- a/lib/Target/TargetAsmInfo.cpp
+++ b/lib/Target/TargetAsmInfo.cpp
@@ -17,11 +17,7 @@ using namespace llvm;
 
 TargetAsmInfo::TargetAsmInfo(const TargetMachine &TM) {
   TLOF = &TM.getTargetLowering()->getObjFileLowering();
-  const TargetData &TD = *TM.getTargetData();
-  IsLittleEndian = TD.isLittleEndian();
-  PointerSize = TD.getPointerSize();
-  const TargetFrameLowering &TFI = *TM.getFrameLowering();
-  StackDir = TFI.getStackGrowthDirection();
+  TFI = TM.getFrameLowering();
   TRI = TM.getRegisterInfo();
-  TFI.getInitialFrameState(InitialFrameState);
+  TFI->getInitialFrameState(InitialFrameState);
 }
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 1990bc7..17d022a 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -42,6 +42,7 @@ char TargetData::ID = 0;
 //===----------------------------------------------------------------------===//
 
 StructLayout::StructLayout(const StructType *ST, const TargetData &TD) {
+  assert(!ST->isOpaque() && "Cannot get layout of opaque structs");
   StructAlignment = 0;
   StructSize = 0;
   NumElements = ST->getNumElements();
@@ -313,63 +314,21 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
 
 namespace {
 
-class StructLayoutMap : public AbstractTypeUser {
+class StructLayoutMap {
   typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy;
   LayoutInfoTy LayoutInfo;
 
-  void RemoveEntry(LayoutInfoTy::iterator I, bool WasAbstract) {
-    I->second->~StructLayout();
-    free(I->second);
-    if (WasAbstract)
-      I->first->removeAbstractTypeUser(this);
-    LayoutInfo.erase(I);
-  }
-
-
-  /// refineAbstractType - The callback method invoked when an abstract type is
-  /// resolved to another type.  An object must override this method to update
-  /// its internal state to reference NewType instead of OldType.
-  ///
-  virtual void refineAbstractType(const DerivedType *OldTy,
-                                  const Type *) {
-    LayoutInfoTy::iterator I = LayoutInfo.find(cast<const StructType>(OldTy));
-    assert(I != LayoutInfo.end() && "Using type but not in map?");
-    RemoveEntry(I, true);
-  }
-
-  /// typeBecameConcrete - The other case which AbstractTypeUsers must be aware
-  /// of is when a type makes the transition from being abstract (where it has
-  /// clients on its AbstractTypeUsers list) to concrete (where it does not).
-  /// This method notifies ATU's when this occurs for a type.
-  ///
-  virtual void typeBecameConcrete(const DerivedType *AbsTy) {
-    LayoutInfoTy::iterator I = LayoutInfo.find(cast<const StructType>(AbsTy));
-    assert(I != LayoutInfo.end() && "Using type but not in map?");
-    RemoveEntry(I, true);
-  }
-
 public:
   virtual ~StructLayoutMap() {
     // Remove any layouts.
-    for (LayoutInfoTy::iterator
-           I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I) {
-      const Type *Key = I->first;
+    for (LayoutInfoTy::iterator I = LayoutInfo.begin(), E = LayoutInfo.end();
+         I != E; ++I) {
       StructLayout *Value = I->second;
-
-      if (Key->isAbstract())
-        Key->removeAbstractTypeUser(this);
-
       Value->~StructLayout();
       free(Value);
     }
   }
 
-  void InvalidateEntry(const StructType *Ty) {
-    LayoutInfoTy::iterator I = LayoutInfo.find(Ty);
-    if (I == LayoutInfo.end()) return;
-    RemoveEntry(I, Ty->isAbstract());
-  }
-
   StructLayout *&operator[](const StructType *STy) {
     return LayoutInfo[STy];
   }
@@ -404,22 +363,9 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
 
   new (L) StructLayout(Ty, *this);
 
-  if (Ty->isAbstract())
-    Ty->addAbstractTypeUser(STM);
-
   return L;
 }
 
-/// InvalidateStructLayoutInfo - TargetData speculatively caches StructLayout
-/// objects.  If a TargetData object is alive when types are being refined and
-/// removed, this method must be called whenever a StructType is removed to
-/// avoid a dangling pointer in this cache.
-void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const {
-  if (!LayoutMap) return;  // No cache.
-
-  static_cast<StructLayoutMap*>(LayoutMap)->InvalidateEntry(Ty);
-}
-
 std::string TargetData::getStringRepresentation() const {
   std::string Result;
   raw_string_ostream OS(Result);
@@ -570,7 +516,7 @@ unsigned TargetData::getPreferredTypeAlignmentShift(const Type *Ty) const {
 
 /// getIntPtrType - Return an unsigned integer type that is the same size or
 /// greater to the host pointer size.
-const IntegerType *TargetData::getIntPtrType(LLVMContext &C) const {
+IntegerType *TargetData::getIntPtrType(LLVMContext &C) const {
   return IntegerType::get(C, getPointerSizeInBits());
 }
 
diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp
index d4b7697..d52ecb3 100644
--- a/lib/Target/TargetInstrInfo.cpp
+++ b/lib/Target/TargetInstrInfo.cpp
@@ -12,44 +12,39 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetInstrItineraries.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGNodes.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cctype>
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
-//  TargetOperandInfo
+//  TargetInstrInfo
 //===----------------------------------------------------------------------===//
 
-/// getRegClass - Get the register class for the operand, handling resolution
-/// of "symbolic" pointer register classes etc.  If this is not a register
-/// operand, this returns null.
-const TargetRegisterClass *
-TargetOperandInfo::getRegClass(const TargetRegisterInfo *TRI) const {
-  if (isLookupPtrRegClass())
+TargetInstrInfo::~TargetInstrInfo() {
+}
+
+const TargetRegisterClass*
+TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
+                             const TargetRegisterInfo *TRI) const {
+  if (OpNum >= MCID.getNumOperands())
+    return 0;
+
+  short RegClass = MCID.OpInfo[OpNum].RegClass;
+  if (MCID.OpInfo[OpNum].isLookupPtrRegClass())
     return TRI->getPointerRegClass(RegClass);
+
   // Instructions like INSERT_SUBREG do not have fixed register classes.
   if (RegClass < 0)
     return 0;
+
   // Otherwise just look it up normally.
   return TRI->getRegClass(RegClass);
 }
 
-//===----------------------------------------------------------------------===//
-//  TargetInstrInfo
-//===----------------------------------------------------------------------===//
-
-TargetInstrInfo::TargetInstrInfo(const TargetInstrDesc* Desc,
-                                 unsigned numOpcodes)
-  : Descriptors(Desc), NumOpcodes(numOpcodes) {
-}
-
-TargetInstrInfo::~TargetInstrInfo() {
-}
-
 unsigned
 TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
                                 const MachineInstr *MI) const {
@@ -135,13 +130,13 @@ void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB,
 
 
 bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isTerminator()) return false;
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isTerminator()) return false;
 
   // Conditional branch is a special case.
-  if (TID.isBranch() && !TID.isBarrier())
+  if (MCID.isBranch() && !MCID.isBarrier())
     return true;
-  if (!TID.isPredicable())
+  if (!MCID.isPredicable())
     return true;
   return !isPredicated(MI);
 }
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 3343384..703431b 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -35,38 +35,39 @@ using namespace llvm;
 //                              Generic Code
 //===----------------------------------------------------------------------===//
 
-TargetLoweringObjectFile::TargetLoweringObjectFile() : Ctx(0) {
-  TextSection = 0;
-  DataSection = 0;
-  BSSSection = 0;
-  ReadOnlySection = 0;
-  StaticCtorSection = 0;
-  StaticDtorSection = 0;
-  LSDASection = 0;
-
-  CommDirectiveSupportsAlignment = true;
-  DwarfAbbrevSection = 0;
-  DwarfInfoSection = 0;
-  DwarfLineSection = 0;
-  DwarfFrameSection = 0;
-  DwarfPubNamesSection = 0;
-  DwarfPubTypesSection = 0;
-  DwarfDebugInlineSection = 0;
-  DwarfStrSection = 0;
-  DwarfLocSection = 0;
-  DwarfARangesSection = 0;
-  DwarfRangesSection = 0;
-  DwarfMacroInfoSection = 0;
-  
-  IsFunctionEHFrameSymbolPrivate = true;
-  SupportsWeakOmittedEHFrame = true;
+TargetLoweringObjectFile::TargetLoweringObjectFile() :
+  Ctx(0),
+  TextSection(0),
+  DataSection(0),
+  BSSSection(0),
+  ReadOnlySection(0),
+  StaticCtorSection(0),
+  StaticDtorSection(0),
+  LSDASection(0),
+  CompactUnwindSection(0),
+  DwarfAbbrevSection(0),
+  DwarfInfoSection(0),
+  DwarfLineSection(0),
+  DwarfFrameSection(0),
+  DwarfPubNamesSection(0),
+  DwarfPubTypesSection(0),
+  DwarfDebugInlineSection(0),
+  DwarfStrSection(0),
+  DwarfLocSection(0),
+  DwarfARangesSection(0),
+  DwarfRangesSection(0),
+  DwarfMacroInfoSection(0),
+  TLSExtraDataSection(0),
+  CommDirectiveSupportsAlignment(true),
+  SupportsWeakOmittedEHFrame(true), 
+  IsFunctionEHFrameSymbolPrivate(true) {
 }
 
 TargetLoweringObjectFile::~TargetLoweringObjectFile() {
 }
 
 static bool isSuitableForBSS(const GlobalVariable *GV) {
-  Constant *C = GV->getInitializer();
+  const Constant *C = GV->getInitializer();
 
   // Must have zero initializer.
   if (!C->isNullValue())
@@ -168,7 +169,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
     return SectionKind::getBSS();
   }
 
-  Constant *C = GVar->getInitializer();
+  const Constant *C = GVar->getInitializer();
 
   // If the global is marked constant, we can put it into a mergable section,
   // a mergable string section, or general .data if it contains relocations.
diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp
index 863b811..74a1f4e 100644
--- a/lib/Target/TargetMachine.cpp
+++ b/lib/Target/TargetMachine.cpp
@@ -43,7 +43,7 @@ namespace llvm {
   Reloc::Model RelocationModel;
   CodeModel::Model CMModel;
   bool GuaranteedTailCallOpt;
-  unsigned StackAlignment;
+  unsigned StackAlignmentOverride;
   bool RealignStack;
   bool DisableJumpTables;
   bool StrongPHIElim;
@@ -183,7 +183,7 @@ EnableGuaranteedTailCallOpt("tailcallopt",
 static cl::opt<unsigned, true>
 OverrideStackAlignment("stack-alignment",
   cl::desc("Override default stack alignment"),
-  cl::location(StackAlignment),
+  cl::location(StackAlignmentOverride),
   cl::init(0));
 static cl::opt<bool, true>
 EnableRealignStack("realign-stack",
@@ -216,8 +216,9 @@ FunctionSections("ffunction-sections",
 // TargetMachine Class
 //
 
-TargetMachine::TargetMachine(const Target &T) 
-  : TheTarget(T), AsmInfo(0),
+TargetMachine::TargetMachine(const Target &T,
+                             StringRef TT, StringRef CPU, StringRef FS)
+  : TheTarget(T), TargetTriple(TT), TargetCPU(CPU), TargetFS(FS), AsmInfo(0),
     MCRelaxAll(false),
     MCNoExecStack(false),
     MCSaveTempLabels(false),
diff --git a/lib/Target/TargetRegisterInfo.cpp b/lib/Target/TargetRegisterInfo.cpp
index 1c3f2dd..90a8f8d 100644
--- a/lib/Target/TargetRegisterInfo.cpp
+++ b/lib/Target/TargetRegisterInfo.cpp
@@ -20,21 +20,11 @@
 
 using namespace llvm;
 
-TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterDesc *D, unsigned NR,
+TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
                              regclass_iterator RCB, regclass_iterator RCE,
-                             const char *const *subregindexnames,
-                             int CFSO, int CFDO,
-                             const unsigned* subregs, const unsigned subregsize,
-                         const unsigned* aliases, const unsigned aliasessize)
-  : SubregHash(subregs), SubregHashSize(subregsize),
-    AliasesHash(aliases), AliasesHashSize(aliasessize),
-    Desc(D), SubRegIndexNames(subregindexnames), NumRegs(NR),
+                             const char *const *subregindexnames)
+  : InfoDesc(ID), SubRegIndexNames(subregindexnames),
     RegClassBegin(RCB), RegClassEnd(RCE) {
-  assert(isPhysicalRegister(NumRegs) &&
-         "Target has too many physical registers!");
-
-  CallFrameSetupOpcode   = CFSO;
-  CallFrameDestroyOpcode = CFDO;
 }
 
 TargetRegisterInfo::~TargetRegisterInfo() {}
@@ -83,14 +73,14 @@ TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const {
 /// registers for the specific register class.
 static void getAllocatableSetForRC(const MachineFunction &MF,
                                    const TargetRegisterClass *RC, BitVector &R){
-  for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF),
-         E = RC->allocation_order_end(MF); I != E; ++I)
-    R.set(*I);
+  ArrayRef<unsigned> Order = RC->getRawAllocationOrder(MF);
+  for (unsigned i = 0; i != Order.size(); ++i)
+    R.set(Order[i]);
 }
 
 BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
                                           const TargetRegisterClass *RC) const {
-  BitVector Allocatable(NumRegs);
+  BitVector Allocatable(getNumRegs());
   if (RC) {
     getAllocatableSetForRC(MF, RC, Allocatable);
   } else {
diff --git a/lib/Target/TargetSubtarget.cpp b/lib/Target/TargetSubtargetInfo.cpp
index edb76f9..59ffdea 100644
--- a/lib/Target/TargetSubtarget.cpp
+++ b/lib/Target/TargetSubtargetInfo.cpp
@@ -1,4 +1,4 @@
-//===-- TargetSubtarget.cpp - General Target Information -------------------==//
+//===-- TargetSubtargetInfo.cpp - General Target Information ---------------==//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,18 +11,18 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
 //---------------------------------------------------------------------------
-// TargetSubtarget Class
+// TargetSubtargetInfo Class
 //
-TargetSubtarget::TargetSubtarget() {}
+TargetSubtargetInfo::TargetSubtargetInfo() {}
 
-TargetSubtarget::~TargetSubtarget() {}
+TargetSubtargetInfo::~TargetSubtargetInfo() {}
 
-bool TargetSubtarget::enablePostRAScheduler(
+bool TargetSubtargetInfo::enablePostRAScheduler(
           CodeGenOpt::Level OptLevel,
           AntiDepBreakMode& Mode,
           RegClassVector& CriticalPathRCs) const {
diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp
index c352bfc..d45dd35 100644
--- a/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -15,9 +15,11 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -25,17 +27,15 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
 namespace {
 struct X86Operand;
 
 class X86ATTAsmParser : public TargetAsmParser {
+  MCSubtargetInfo &STI;
   MCAsmParser &Parser;
-  TargetMachine &TM;
-
-protected:
-  unsigned Is64Bit : 1;
 
 private:
   MCAsmParser &getParser() const { return Parser; }
@@ -61,6 +61,11 @@ private:
   /// or %es:(%edi) in 32bit mode.
   bool isDstOp(X86Operand &Op);
 
+  bool is64BitMode() const {
+    // FIXME: Can tablegen auto-generate this?
+    return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+  }
+
   /// @name Auto-generated Matcher Functions
   /// {
 
@@ -70,12 +75,11 @@ private:
   /// }
 
 public:
-  X86ATTAsmParser(const Target &T, MCAsmParser &parser, TargetMachine &TM)
-    : TargetAsmParser(T), Parser(parser), TM(TM) {
+  X86ATTAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser)
+    : TargetAsmParser(), STI(sti), Parser(parser) {
 
     // Initialize the set of available features.
-    setAvailableFeatures(ComputeAvailableFeatures(
-                           &TM.getSubtarget<X86Subtarget>()));
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
   }
   virtual bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc);
 
@@ -84,23 +88,6 @@ public:
 
   virtual bool ParseDirective(AsmToken DirectiveID);
 };
-
-class X86_32ATTAsmParser : public X86ATTAsmParser {
-public:
-  X86_32ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
-    : X86ATTAsmParser(T, Parser, TM) {
-    Is64Bit = false;
-  }
-};
-
-class X86_64ATTAsmParser : public X86ATTAsmParser {
-public:
-  X86_64ATTAsmParser(const Target &T, MCAsmParser &Parser, TargetMachine &TM)
-    : X86ATTAsmParser(T, Parser, TM) {
-    Is64Bit = true;
-  }
-};
-
 } // end anonymous namespace
 
 /// @name Auto-generated Match Functions
@@ -155,7 +142,7 @@ struct X86Operand : public MCParsedAsmOperand {
   /// getEndLoc - Get the location of the last token of this operand.
   SMLoc getEndLoc() const { return EndLoc; }
 
-  virtual void dump(raw_ostream &OS) const {}
+  virtual void print(raw_ostream &OS) const {}
 
   StringRef getToken() const {
     assert(Kind == Token && "Invalid access!");
@@ -365,7 +352,7 @@ struct X86Operand : public MCParsedAsmOperand {
 } // end anonymous namespace.
 
 bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
-  unsigned basereg = Is64Bit ? X86::RSI : X86::ESI;
+  unsigned basereg = is64BitMode() ? X86::RSI : X86::ESI;
 
   return (Op.isMem() &&
     (Op.Mem.SegReg == 0 || Op.Mem.SegReg == X86::DS) &&
@@ -375,7 +362,7 @@ bool X86ATTAsmParser::isSrcOp(X86Operand &Op) {
 }
 
 bool X86ATTAsmParser::isDstOp(X86Operand &Op) {
-  unsigned basereg = Is64Bit ? X86::RDI : X86::EDI;
+  unsigned basereg = is64BitMode() ? X86::RDI : X86::EDI;
 
   return Op.isMem() && Op.Mem.SegReg == X86::ES &&
     isa<MCConstantExpr>(Op.Mem.Disp) &&
@@ -406,7 +393,7 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo,
   // FIXME: This should be done using Requires<In32BitMode> and
   // Requires<In64BitMode> so "eiz" usage in 64-bit instructions
   // can be also checked.
-  if (RegNo == X86::RIZ && !Is64Bit)
+  if (RegNo == X86::RIZ && !is64BitMode())
     return Error(Tok.getLoc(), "riz register in 64-bit mode only");
 
   // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
@@ -710,23 +697,6 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
     }
   }
 
-  // FIXME: Hack to recognize vpclmul<src1_quadword, src2_quadword>dq
-  if (PatchedName.startswith("vpclmul")) {
-    unsigned CLMULQuadWordSelect = StringSwitch<unsigned>(
-      PatchedName.slice(7, PatchedName.size() - 2))
-      .Case("lqlq", 0x00) // src1[63:0],   src2[63:0]
-      .Case("hqlq", 0x01) // src1[127:64], src2[63:0]
-      .Case("lqhq", 0x10) // src1[63:0],   src2[127:64]
-      .Case("hqhq", 0x11) // src1[127:64], src2[127:64]
-      .Default(~0U);
-    if (CLMULQuadWordSelect != ~0U) {
-      ExtraImmOp = MCConstantExpr::Create(CLMULQuadWordSelect,
-                                          getParser().getContext());
-      assert(PatchedName.endswith("dq") && "Unexpected mnemonic!");
-      PatchedName = "vpclmulqdq";
-    }
-  }
-
   Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
 
   if (ExtraImmOp)
@@ -843,7 +813,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
   // Transform "movs[bwl] %ds:(%esi), %es:(%edi)" into "movs[bwl]"
   if (Name.startswith("movs") && Operands.size() == 3 &&
       (Name == "movsb" || Name == "movsw" || Name == "movsl" ||
-       (Is64Bit && Name == "movsq"))) {
+       (is64BitMode() && Name == "movsq"))) {
     X86Operand &Op = *(X86Operand*)Operands.begin()[1];
     X86Operand &Op2 = *(X86Operand*)Operands.begin()[2];
     if (isSrcOp(Op) && isDstOp(Op2)) {
@@ -856,7 +826,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
   // Transform "lods[bwl] %ds:(%esi),{%al,%ax,%eax,%rax}" into "lods[bwl]"
   if (Name.startswith("lods") && Operands.size() == 3 &&
       (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
-       Name == "lodsl" || (Is64Bit && Name == "lodsq"))) {
+       Name == "lodsl" || (is64BitMode() && Name == "lodsq"))) {
     X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
     X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
     if (isSrcOp(*Op1) && Op2->isReg()) {
@@ -886,7 +856,7 @@ ParseInstruction(StringRef Name, SMLoc NameLoc,
   // Transform "stos[bwl] {%al,%ax,%eax,%rax},%es:(%edi)" into "stos[bwl]"
   if (Name.startswith("stos") && Operands.size() == 3 &&
       (Name == "stos" || Name == "stosb" || Name == "stosw" ||
-       Name == "stosl" || (Is64Bit && Name == "stosq"))) {
+       Name == "stosl" || (is64BitMode() && Name == "stosq"))) {
     X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]);
     X86Operand *Op2 = static_cast<X86Operand*>(Operands[2]);
     if (isDstOp(*Op2) && Op1->isReg()) {
@@ -1161,8 +1131,8 @@ extern "C" void LLVMInitializeX86AsmLexer();
 
 // Force static initialization.
 extern "C" void LLVMInitializeX86AsmParser() {
-  RegisterAsmParser<X86_32ATTAsmParser> X(TheX86_32Target);
-  RegisterAsmParser<X86_64ATTAsmParser> Y(TheX86_64Target);
+  RegisterAsmParser<X86ATTAsmParser> X(TheX86_32Target);
+  RegisterAsmParser<X86ATTAsmParser> Y(TheX86_64Target);
   LLVMInitializeX86AsmLexer();
 }
 
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index b5fa94f..b112f9f 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -1,18 +1,15 @@
 set(LLVM_TARGET_DEFINITIONS X86.td)
 
-tablegen(X86GenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(X86GenRegisterNames.inc -gen-register-enums)
-tablegen(X86GenRegisterInfo.inc -gen-register-desc)
+tablegen(X86GenRegisterInfo.inc -gen-register-info)
 tablegen(X86GenDisassemblerTables.inc -gen-disassembler)
-tablegen(X86GenInstrNames.inc -gen-instr-enums)
-tablegen(X86GenInstrInfo.inc -gen-instr-desc)
+tablegen(X86GenInstrInfo.inc -gen-instr-info)
 tablegen(X86GenAsmWriter.inc -gen-asm-writer)
 tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1)
 tablegen(X86GenAsmMatcher.inc -gen-asm-matcher)
 tablegen(X86GenDAGISel.inc -gen-dag-isel)
 tablegen(X86GenFastISel.inc -gen-fast-isel)
 tablegen(X86GenCallingConv.inc -gen-callingconv)
-tablegen(X86GenSubtarget.inc -gen-subtarget)
+tablegen(X86GenSubtargetInfo.inc -gen-subtarget)
 tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info)
 
 set(sources
@@ -30,7 +27,6 @@ set(sources
   X86InstrInfo.cpp
   X86JITInfo.cpp
   X86MachObjectWriter.cpp
-  X86MCAsmInfo.cpp
   X86MCCodeEmitter.cpp 
   X86MCInstLower.cpp
   X86RegisterInfo.cpp
@@ -60,5 +56,6 @@ add_llvm_target(X86CodeGen ${sources})
 add_subdirectory(AsmParser)
 add_subdirectory(Disassembler)
 add_subdirectory(InstPrinter)
+add_subdirectory(MCTargetDesc)
 add_subdirectory(TargetInfo)
 add_subdirectory(Utils)
diff --git a/lib/Target/X86/Disassembler/X86Disassembler.cpp b/lib/Target/X86/Disassembler/X86Disassembler.cpp
index d8a105e..4a0d2ec 100644
--- a/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -26,7 +26,8 @@
 #include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/raw_ostream.h"
 
-#include "X86GenRegisterNames.inc"
+#define GET_REGINFO_ENUM
+#include "X86GenRegisterInfo.inc"
 #include "X86GenEDInfo.inc"
 
 using namespace llvm;
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 68247d2..c37d879 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -15,30 +15,23 @@
 #define DEBUG_TYPE "asm-printer"
 #include "X86ATTInstPrinter.h"
 #include "X86InstComments.h"
-#include "X86Subtarget.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
-#include "X86GenInstrNames.inc"
 #include <map>
 using namespace llvm;
 
 // Include the auto-generated portion of the assembly writer.
 #define GET_INSTRUCTION_NAME
 #define PRINT_ALIAS_INSTR
-#include "X86GenRegisterNames.inc"
 #include "X86GenAsmWriter.inc"
-#undef PRINT_ALIAS_INSTR
-#undef GET_INSTRUCTION_NAME
 
-X86ATTInstPrinter::X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+X86ATTInstPrinter::X86ATTInstPrinter(const MCAsmInfo &MAI)
   : MCInstPrinter(MAI) {
-  // Initialize the set of available features.
-  setAvailableFeatures(ComputeAvailableFeatures(
-            &TM.getSubtarget<X86Subtarget>()));
 }
 
 void X86ATTInstPrinter::printRegName(raw_ostream &OS,
diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
index 5f939b6..5426e5c 100644
--- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.h
@@ -19,19 +19,15 @@
 namespace llvm {
 
 class MCOperand;
-class X86Subtarget;
-class TargetMachine;
   
 class X86ATTInstPrinter : public MCInstPrinter {
 public:
-  X86ATTInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI);
+  X86ATTInstPrinter(const MCAsmInfo &MAI);
   
   virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
   virtual void printInst(const MCInst *MI, raw_ostream &OS);
   virtual StringRef getOpcodeName(unsigned Opcode) const;
 
-  // Methods used to print the alias of an instruction.
-  unsigned ComputeAvailableFeatures(const X86Subtarget *Subtarget) const;
   // Autogenerated by tblgen, returns true if we successfully printed an
   // alias.
   bool printAliasInstr(const MCInst *MI, raw_ostream &OS);
diff --git a/lib/Target/X86/InstPrinter/X86InstComments.cpp b/lib/Target/X86/InstPrinter/X86InstComments.cpp
index c642acc..4e28dfe 100644
--- a/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -13,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86InstComments.h"
-#include "X86GenInstrNames.inc"
+#include "MCTargetDesc/X86MCTargetDesc.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/raw_ostream.h"
 #include "../Utils/X86ShuffleDecode.h"
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 5f581ba..506e26c 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -15,13 +15,12 @@
 #define DEBUG_TYPE "asm-printer"
 #include "X86IntelInstPrinter.h"
 #include "X86InstComments.h"
-#include "X86Subtarget.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
-#include "X86GenInstrNames.inc"
 #include <cctype>
 using namespace llvm;
 
diff --git a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
index c8030c3..e84a194 100644
--- a/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
+++ b/lib/Target/X86/InstPrinter/X86IntelInstPrinter.h
@@ -20,11 +20,10 @@
 namespace llvm {
 
 class MCOperand;
-class TargetMachine;
   
 class X86IntelInstPrinter : public MCInstPrinter {
 public:
-  X86IntelInstPrinter(TargetMachine &TM, const MCAsmInfo &MAI)
+  X86IntelInstPrinter(const MCAsmInfo &MAI)
     : MCInstPrinter(MAI) {}
 
   virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..ca88f8f
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMX86Desc
+  X86MCTargetDesc.cpp
+  X86MCAsmInfo.cpp
+  )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/X86/MCTargetDesc/Makefile b/lib/Target/X86/MCTargetDesc/Makefile
new file mode 100644
index 0000000..b19774e
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/X86/TargetDesc/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMX86Desc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index 2e1ec63..2703100 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86MCAsmInfo.h"
-#include "X86TargetMachine.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
@@ -45,14 +44,17 @@ static const char *const x86_asm_table[] = {
   "{flags}", "",
   "{dirflag}", "",
   "{fpsr}", "",
+  "{fpcr}", "",
   "{cc}", "cc",
   0,0};
 
-X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
+X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) {
+  bool is64Bit = T.getArch() == Triple::x86_64;
+  if (is64Bit)
+    PointerSize = 8;
+
   AsmTransCBE = x86_asm_table;
   AssemblerDialect = AsmWriterFlavor;
-    
-  bool is64Bit = Triple.getArch() == Triple::x86_64;
 
   TextAlignFillValue = 0x90;
 
@@ -74,22 +76,14 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
   ExceptionsType = ExceptionHandling::DwarfCFI;
 }
 
-const MCExpr *
-X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
-                                                   unsigned Encoding,
-                                                   MCStreamer &Streamer) const {
-  MCContext &Context = Streamer.getContext();
-  const MCExpr *Res =
-    MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
-  const MCExpr *Four = MCConstantExpr::Create(4, Context);
-  return MCBinaryExpr::CreateAdd(Res, Four, Context);
-}
-
 X86_64MCAsmInfoDarwin::X86_64MCAsmInfoDarwin(const Triple &Triple)
   : X86MCAsmInfoDarwin(Triple) {
 }
 
 X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
+  if (T.getArch() == Triple::x86_64)
+    PointerSize = 8;
+
   AsmTransCBE = x86_asm_table;
   AssemblerDialect = AsmWriterFlavor;
 
@@ -114,6 +108,17 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &T) {
     Data64bitsDirective = 0;
 }
 
+const MCExpr *
+X86_64MCAsmInfoDarwin::getExprForPersonalitySymbol(const MCSymbol *Sym,
+                                                   unsigned Encoding,
+                                                   MCStreamer &Streamer) const {
+  MCContext &Context = Streamer.getContext();
+  const MCExpr *Res =
+    MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_GOTPCREL, Context);
+  const MCExpr *Four = MCConstantExpr::Create(4, Context);
+  return MCBinaryExpr::CreateAdd(Res, Four, Context);
+}
+
 const MCSection *X86ELFMCAsmInfo::
 getNonexecutableStackSection(MCContext &Ctx) const {
   return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS,
diff --git a/lib/Target/X86/X86MCAsmInfo.h b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
index 2cd4c8e..2cd4c8e 100644
--- a/lib/Target/X86/X86MCAsmInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
new file mode 100644
index 0000000..b77f37b
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -0,0 +1,185 @@
+//===-- X86MCTargetDesc.cpp - X86 Target Descriptions -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides X86 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86MCTargetDesc.h"
+#include "X86MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Support/Host.h"
+
+#define GET_REGINFO_MC_DESC
+#include "X86GenRegisterInfo.inc"
+
+#define GET_INSTRINFO_MC_DESC
+#include "X86GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "X86GenSubtargetInfo.inc"
+
+using namespace llvm;
+
+
+std::string X86_MC::ParseX86Triple(StringRef TT) {
+  Triple TheTriple(TT);
+  if (TheTriple.getArch() == Triple::x86_64)
+    return "+64bit-mode";
+  return "-64bit-mode";
+}
+
+/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments.  If we can't run cpuid on the host, return true.
+bool X86_MC::GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+                             unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+  #if defined(__GNUC__)
+    // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+    asm ("movq\t%%rbx, %%rsi\n\t"
+         "cpuid\n\t"
+         "xchgq\t%%rbx, %%rsi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    int registers[4];
+    __cpuid(registers, value);
+    *rEAX = registers[0];
+    *rEBX = registers[1];
+    *rECX = registers[2];
+    *rEDX = registers[3];
+    return false;
+  #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+  #if defined(__GNUC__)
+    asm ("movl\t%%ebx, %%esi\n\t"
+         "cpuid\n\t"
+         "xchgl\t%%ebx, %%esi\n\t"
+         : "=a" (*rEAX),
+           "=S" (*rEBX),
+           "=c" (*rECX),
+           "=d" (*rEDX)
+         :  "a" (value));
+    return false;
+  #elif defined(_MSC_VER)
+    __asm {
+      mov   eax,value
+      cpuid
+      mov   esi,rEAX
+      mov   dword ptr [esi],eax
+      mov   esi,rEBX
+      mov   dword ptr [esi],ebx
+      mov   esi,rECX
+      mov   dword ptr [esi],ecx
+      mov   esi,rEDX
+      mov   dword ptr [esi],edx
+    }
+    return false;
+  #endif
+#endif
+  return true;
+}
+
+void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family,
+                               unsigned &Model) {
+  Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+  Model  = (EAX >> 4) & 0xf; // Bits 4 - 7
+  if (Family == 6 || Family == 0xf) {
+    if (Family == 0xf)
+      // Examine extended family ID if family ID is F.
+      Family += (EAX >> 20) & 0xff;    // Bits 20 - 27
+    // Examine extended model ID if family ID is 6 or F.
+    Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+  }
+}
+
+MCSubtargetInfo *X86_MC::createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                  StringRef FS) {
+  std::string ArchFS = X86_MC::ParseX86Triple(TT);
+  if (!FS.empty()) {
+    if (!ArchFS.empty())
+      ArchFS = ArchFS + "," + FS.str();
+    else
+      ArchFS = FS;
+  }
+
+  std::string CPUName = CPU;
+  if (CPUName.empty()) {
+#if defined (__x86_64__) || defined(__i386__)
+    CPUName = sys::getHostCPUName();
+#else
+    CPUName = "generic";
+#endif
+  }
+
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitX86MCSubtargetInfo(X, TT, CPUName, ArchFS);
+  return X;
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeX86MCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheX86_32Target,
+                                          X86_MC::createX86MCSubtargetInfo);
+  TargetRegistry::RegisterMCSubtargetInfo(TheX86_64Target,
+                                          X86_MC::createX86MCSubtargetInfo);
+}
+
+static MCInstrInfo *createX86MCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitX86MCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeX86MCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheX86_32Target, createX86MCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(TheX86_64Target, createX86MCInstrInfo);
+}
+
+static MCRegisterInfo *createX86MCRegisterInfo() {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitX86MCRegisterInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeX86MCRegInfo() {
+  TargetRegistry::RegisterMCRegInfo(TheX86_32Target, createX86MCRegisterInfo);
+  TargetRegistry::RegisterMCRegInfo(TheX86_64Target, createX86MCRegisterInfo);
+}
+
+
+static MCAsmInfo *createX86MCAsmInfo(const Target &T, StringRef TT) {
+  Triple TheTriple(TT);
+
+  if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
+    if (TheTriple.getArch() == Triple::x86_64)
+      return new X86_64MCAsmInfoDarwin(TheTriple);
+    else
+      return new X86MCAsmInfoDarwin(TheTriple);
+  }
+
+  if (TheTriple.isOSWindows())
+    return new X86MCAsmInfoCOFF(TheTriple);
+
+  return new X86ELFMCAsmInfo(TheTriple);
+}
+
+extern "C" void LLVMInitializeX86MCAsmInfo() {
+  // Register the target asm info.
+  RegisterMCAsmInfoFn A(TheX86_32Target, createX86MCAsmInfo);
+  RegisterMCAsmInfoFn B(TheX86_64Target, createX86MCAsmInfo);
+}
diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
new file mode 100644
index 0000000..89ea22b
--- /dev/null
+++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -0,0 +1,60 @@
+//===-- X86MCTargetDesc.h - X86 Target Descriptions -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides X86 specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef X86MCTARGETDESC_H
+#define X86MCTARGETDESC_H
+
+#include <string>
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheX86_32Target, TheX86_64Target;
+
+namespace X86_MC {
+  std::string ParseX86Triple(StringRef TT);
+
+  /// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in
+  /// the specified arguments.  If we can't run cpuid on the host, return true.
+  bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
+                       unsigned *rEBX, unsigned *rECX, unsigned *rEDX);
+
+  void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model);
+
+  /// createARMMCSubtargetInfo - Create a X86 MCSubtargetInfo instance.
+  /// This is exposed so Asm parser, etc. do not need to go through
+  /// TargetRegistry.
+  MCSubtargetInfo *createX86MCSubtargetInfo(StringRef TT, StringRef CPU,
+                                            StringRef FS);
+}
+
+} // End llvm namespace
+
+
+// Defines symbolic names for X86 registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "X86GenRegisterInfo.inc"
+
+// Defines symbolic names for the X86 instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "X86GenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "X86GenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/X86/Makefile b/lib/Target/X86/Makefile
index 12fb090..949661e 100644
--- a/lib/Target/X86/Makefile
+++ b/lib/Target/X86/Makefile
@@ -12,14 +12,13 @@ LIBRARYNAME = LLVMX86CodeGen
 TARGET = X86
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = X86GenRegisterInfo.h.inc X86GenRegisterNames.inc \
-                X86GenRegisterInfo.inc X86GenInstrNames.inc \
-                X86GenInstrInfo.inc X86GenAsmWriter.inc X86GenAsmMatcher.inc \
+BUILT_SOURCES = X86GenRegisterInfo.inc X86GenInstrInfo.inc \
+		X86GenAsmWriter.inc X86GenAsmMatcher.inc \
                 X86GenAsmWriter1.inc X86GenDAGISel.inc  \
                 X86GenDisassemblerTables.inc X86GenFastISel.inc \
-                X86GenCallingConv.inc X86GenSubtarget.inc \
+                X86GenCallingConv.inc X86GenSubtargetInfo.inc \
 		X86GenEDInfo.inc
 
-DIRS = InstPrinter AsmParser Disassembler TargetInfo Utils
+DIRS = InstPrinter AsmParser Disassembler TargetInfo MCTargetDesc Utils
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 0ca4366..ec52dfb 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_X86_H
 #define TARGET_X86_H
 
+#include "MCTargetDesc/X86MCTargetDesc.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Target/TargetMachine.h"
 
@@ -22,10 +23,12 @@ namespace llvm {
 
 class FunctionPass;
 class JITCodeEmitter;
+class MachineCodeEmitter;
 class MCCodeEmitter;
 class MCContext;
+class MCInstrInfo;
 class MCObjectWriter;
-class MachineCodeEmitter;
+class MCSubtargetInfo;
 class Target;
 class TargetAsmBackend;
 class X86TargetMachine;
@@ -57,10 +60,9 @@ FunctionPass *createSSEDomainFixPass();
 FunctionPass *createX86JITCodeEmitterPass(X86TargetMachine &TM,
                                           JITCodeEmitter &JCE);
 
-MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM,
-                                         MCContext &Ctx);
-MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM,
-                                         MCContext &Ctx);
+MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
+                                      const MCSubtargetInfo &STI,
+                                      MCContext &Ctx);
 
 TargetAsmBackend *createX86_32AsmBackend(const Target &, const std::string &);
 TargetAsmBackend *createX86_64AsmBackend(const Target &, const std::string &);
@@ -84,17 +86,6 @@ MCObjectWriter *createX86MachObjectWriter(raw_ostream &OS,
                                           uint32_t CPUType,
                                           uint32_t CPUSubtype);
 
-extern Target TheX86_32Target, TheX86_64Target;
-
 } // End llvm namespace
 
-// Defines symbolic names for X86 registers.  This defines a mapping from
-// register name to register number.
-//
-#include "X86GenRegisterNames.inc"
-
-// Defines symbolic names for the X86 instructions.
-//
-#include "X86GenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 7bb9676..4ccb43f 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -17,6 +17,13 @@
 include "llvm/Target/Target.td"
 
 //===----------------------------------------------------------------------===//
+// X86 Subtarget state.
+//
+
+def Mode64Bit : SubtargetFeature<"64bit-mode", "In64BitMode", "true",
+                                  "64-bit mode (x86_64)">;
+
+//===----------------------------------------------------------------------===//
 // X86 Subtarget features.
 //===----------------------------------------------------------------------===//
 
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
index 4d7d96d..9b556a5 100644
--- a/lib/Target/X86/X86AsmBackend.cpp
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -194,6 +194,9 @@ static unsigned getRelaxedOpcodeArith(unsigned Op) {
 
     // PUSH
   case X86::PUSHi8: return X86::PUSHi32;
+  case X86::PUSHi16: return X86::PUSHi32;
+  case X86::PUSH64i8: return X86::PUSH64i32;
+  case X86::PUSH64i16: return X86::PUSH64i32;
   }
 }
 
diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp
index c2d53c4..99b4479 100644
--- a/lib/Target/X86/X86AsmPrinter.cpp
+++ b/lib/Target/X86/X86AsmPrinter.cpp
@@ -709,13 +709,12 @@ void X86AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
 //===----------------------------------------------------------------------===//
 
 static MCInstPrinter *createX86MCInstPrinter(const Target &T,
-                                             TargetMachine &TM,
                                              unsigned SyntaxVariant,
                                              const MCAsmInfo &MAI) {
   if (SyntaxVariant == 0)
-    return new X86ATTInstPrinter(TM, MAI);
+    return new X86ATTInstPrinter(MAI);
   if (SyntaxVariant == 1)
-    return new X86IntelInstPrinter(TM, MAI);
+    return new X86IntelInstPrinter(MAI);
   return 0;
 }
 
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 5635175..77b9905 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -44,11 +44,11 @@ def RetCC_X86Common : CallingConv<[
   // can only be used by ABI non-compliant code. This vector type is only
   // supported while using the AVX target feature.
   CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
-            CCIfSubtarget<"hasAVX()", CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>>,
+            CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
 
   // MMX vector types are always returned in MM0. If the target doesn't have
   // MM0, it doesn't support these vector types.
-  CCIfType<[x86mmx, v1i64], CCAssignToReg<[MM0]>>,
+  CCIfType<[x86mmx], CCAssignToReg<[MM0]>>,
 
   // Long double types are always returned in ST0 (even with SSE).
   CCIfType<[f80], CCAssignToReg<[ST0, ST1]>>
@@ -91,10 +91,7 @@ def RetCC_X86_64_C : CallingConv<[
   CCIfType<[f32], CCAssignToReg<[XMM0, XMM1]>>,
   CCIfType<[f64], CCAssignToReg<[XMM0, XMM1]>>,
 
-  // MMX vector types are always returned in XMM0 except for v1i64 which is
-  // returned in RAX. This disagrees with ABI documentation but is bug
-  // compatible with gcc.
-  CCIfType<[v1i64], CCAssignToReg<[RAX]>>,
+  // MMX vector types are always returned in XMM0.
   CCIfType<[x86mmx], CCAssignToReg<[XMM0, XMM1]>>,
   CCDelegateTo<RetCC_X86Common>
 ]>;
@@ -102,11 +99,7 @@ def RetCC_X86_64_C : CallingConv<[
 // X86-Win64 C return-value convention.
 def RetCC_X86_Win64_C : CallingConv<[
   // The X86-Win64 calling convention always returns __m64 values in RAX.
-  CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>,
-
-  // And FP in XMM0 only.
-  CCIfType<[f32], CCAssignToReg<[XMM0]>>,
-  CCIfType<[f64], CCAssignToReg<[XMM0]>>,
+  CCIfType<[x86mmx], CCBitConvertToType<i64>>,
 
   // Otherwise, everything is the same as 'normal' X86-64 C CC.
   CCDelegateTo<RetCC_X86_64_C>
@@ -150,17 +143,11 @@ def CC_X86_64_C : CallingConv<[
   // The 'nest' parameter, if any, is passed in R10.
   CCIfNest<CCAssignToReg<[R10]>>,
 
-  // The first 6 v1i64 vector arguments are passed in GPRs on Darwin.
-  CCIfType<[v1i64],
-            CCIfSubtarget<"isTargetDarwin()",
-            CCBitConvertToType<i64>>>,
-
   // The first 6 integer arguments are passed in integer registers.
   CCIfType<[i32], CCAssignToReg<[EDI, ESI, EDX, ECX, R8D, R9D]>>,
   CCIfType<[i64], CCAssignToReg<[RDI, RSI, RDX, RCX, R8 , R9 ]>>,
 
-  // The first 8 MMX (except for v1i64) vector arguments are passed in XMM
-  // registers on Darwin.
+  // The first 8 MMX vector arguments are passed in XMM registers on Darwin.
   CCIfType<[x86mmx],
             CCIfSubtarget<"isTargetDarwin()",
             CCIfSubtarget<"hasXMMInt()",
@@ -189,10 +176,7 @@ def CC_X86_64_C : CallingConv<[
 
   // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
   CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
-           CCAssignToStack<32, 32>>,
-
-  // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
-  CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>>
+           CCAssignToStack<32, 32>>
 ]>;
 
 // Calling convention used on Win64
@@ -210,7 +194,7 @@ def CC_X86_Win64_C : CallingConv<[
   CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
 
   // The first 4 MMX vector arguments are passed in GPRs.
-  CCIfType<[x86mmx, v1i64], CCBitConvertToType<i64>>,
+  CCIfType<[x86mmx], CCBitConvertToType<i64>>,
 
   // The first 4 integer arguments are passed in integer registers.
   CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
@@ -236,10 +220,7 @@ def CC_X86_Win64_C : CallingConv<[
 
   // Long doubles get stack slots whose size and alignment depends on the
   // subtarget.
-  CCIfType<[f80], CCAssignToStack<0, 0>>,
-
-  // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
-  CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 8>>
+  CCIfType<[f80], CCAssignToStack<0, 0>>
 ]>;
 
 def CC_X86_64_GHC : CallingConv<[
@@ -273,8 +254,8 @@ def CC_X86_32_Common : CallingConv<[
                 CCIfSubtarget<"hasXMMInt()",
                 CCAssignToReg<[XMM0,XMM1,XMM2]>>>>>,
 
-  // The first 3 __m64 (except for v1i64) vector arguments are passed in mmx
-  // registers if the call is not a vararg call.
+  // The first 3 __m64 vector arguments are passed in mmx registers if the
+  // call is not a vararg call.
   CCIfNotVarArg<CCIfType<[x86mmx],
                 CCAssignToReg<[MM0, MM1, MM2]>>>,
 
@@ -306,7 +287,7 @@ def CC_X86_32_Common : CallingConv<[
 
   // __m64 vectors get 8-byte stack slots that are 4-byte aligned. They are
   // passed in the parameter area.
-  CCIfType<[x86mmx,v1i64], CCAssignToStack<8, 4>>]>;
+  CCIfType<[x86mmx], CCAssignToStack<8, 4>>]>;
 
 def CC_X86_32_C : CallingConv<[
   // Promote i8/i16 arguments to i32.
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index 421e221..4b11db7 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -68,7 +68,7 @@ namespace {
       return "X86 Machine Code Emitter";
     }
 
-    void emitInstruction(MachineInstr &MI, const TargetInstrDesc *Desc);
+    void emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc);
     
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
@@ -132,7 +132,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
       MCE.StartMachineBasicBlock(MBB);
       for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
            I != E; ++I) {
-        const TargetInstrDesc &Desc = I->getDesc();
+        const MCInstrDesc &Desc = I->getDesc();
         emitInstruction(*I, &Desc);
         // MOVPC32r is basically a call plus a pop instruction.
         if (Desc.getOpcode() == X86::MOVPC32r)
@@ -150,7 +150,7 @@ bool Emitter<CodeEmitter>::runOnMachineFunction(MachineFunction &MF) {
 /// size, and 3) use of X86-64 extended registers.
 static unsigned determineREX(const MachineInstr &MI) {
   unsigned REX = 0;
-  const TargetInstrDesc &Desc = MI.getDesc();
+  const MCInstrDesc &Desc = MI.getDesc();
   
   // Pseudo instructions do not need REX prefix byte.
   if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo)
@@ -161,7 +161,7 @@ static unsigned determineREX(const MachineInstr &MI) {
   unsigned NumOps = Desc.getNumOperands();
   if (NumOps) {
     bool isTwoAddr = NumOps > 1 &&
-    Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
+    Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
     
     // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
     unsigned i = isTwoAddr ? 1 : 0;
@@ -598,7 +598,7 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
 
 template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
-                                           const TargetInstrDesc *Desc) {
+                                           const MCInstrDesc *Desc) {
   DEBUG(dbgs() << MI);
   
   // If this is a pseudo instruction, lower it.
@@ -708,9 +708,9 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI,
   // If this is a two-address instruction, skip one of the register operands.
   unsigned NumOps = Desc->getNumOperands();
   unsigned CurOp = 0;
-  if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1)
+  if (NumOps > 1 && Desc->getOperandConstraint(1, MCOI::TIED_TO) != -1)
     ++CurOp;
-  else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
+  else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1,MCOI::TIED_TO)== 0)
     // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
     --NumOps;
 
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index f1b9972..21e163a 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -15,6 +15,7 @@
 
 #include "X86.h"
 #include "X86InstrBuilder.h"
+#include "X86ISelLowering.h"
 #include "X86RegisterInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
@@ -1392,7 +1393,7 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
     assert(DI->getAddress() && "Null address should be checked earlier!");
     if (!X86SelectAddress(DI->getAddress(), AM))
       return false;
-    const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+    const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
     // FIXME may need to add RegState::Debug to any registers produced,
     // although ESP/EBP should be the only ones at the moment.
     addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM).
@@ -1493,7 +1494,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
     return false;
 
   // Fast-isel doesn't know about callee-pop yet.
-  if (Subtarget->IsCalleePop(isVarArg, CC))
+  if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
+                       GuaranteedTailCallOpt))
     return false;
 
   // Check whether the function can return without sret-demotion.
@@ -1628,7 +1630,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
   unsigned NumBytes = CCInfo.getNextStackOffset();
 
   // Issue CALLSEQ_START
-  unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
+  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
     .addImm(NumBytes);
 
@@ -1801,7 +1803,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
     MIB.addReg(RegArgs[i]);
 
   // Issue CALLSEQ_END
-  unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
+  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
   unsigned NumBytesCallee = 0;
   if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet))
     NumBytesCallee = 4;
@@ -1846,16 +1848,19 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
     // stack, but where we prefer to use the value in xmm registers, copy it
     // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
     if ((RVLocs[i].getLocReg() == X86::ST0 ||
-         RVLocs[i].getLocReg() == X86::ST1) &&
-        isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
-      CopyVT = MVT::f80;
-      CopyReg = createResultReg(X86::RFP80RegisterClass);
+         RVLocs[i].getLocReg() == X86::ST1)) {
+      if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
+        CopyVT = MVT::f80;
+        CopyReg = createResultReg(X86::RFP80RegisterClass);
+      }
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL),
+              CopyReg);
+    } else {
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
+              CopyReg).addReg(RVLocs[i].getLocReg());
+      UsedRegs.push_back(RVLocs[i].getLocReg());
     }
 
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
-            CopyReg).addReg(RVLocs[i].getLocReg());
-    UsedRegs.push_back(RVLocs[i].getLocReg());
-
     if (CopyVT != RVLocs[i].getValVT()) {
       // Round the F80 the right size, which also moves to the appropriate xmm
       // register. This is accomplished by storing the F80 value in memory and
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 325d061..6eed6abd 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -37,6 +37,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/InlineAsm.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
@@ -126,10 +127,45 @@ namespace {
     void bundleCFG(MachineFunction &MF);
 
     MachineBasicBlock *MBB;     // Current basic block
+
+    // The hardware keeps track of how many FP registers are live, so we have
+    // to model that exactly. Usually, each live register corresponds to an
+    // FP<n> register, but when dealing with calls, returns, and inline
+    // assembly, it is sometimes neccesary to have live scratch registers.
     unsigned Stack[8];          // FP<n> Registers in each stack slot...
-    unsigned RegMap[8];         // Track which stack slot contains each register
     unsigned StackTop;          // The current top of the FP stack.
 
+    enum {
+      NumFPRegs = 16            // Including scratch pseudo-registers.
+    };
+
+    // For each live FP<n> register, point to its Stack[] entry.
+    // The first entries correspond to FP0-FP6, the rest are scratch registers
+    // used when we need slightly different live registers than what the
+    // register allocator thinks.
+    unsigned RegMap[NumFPRegs];
+
+    // Pending fixed registers - Inline assembly needs FP registers to appear
+    // in fixed stack slot positions. This is handled by copying FP registers
+    // to ST registers before the instruction, and copying back after the
+    // instruction.
+    //
+    // This is modeled with pending ST registers. NumPendingSTs is the number
+    // of ST registers (ST0-STn) we are tracking. PendingST[n] points to an FP
+    // register that holds the ST value. The ST registers are not moved into
+    // place until immediately before the instruction that needs them.
+    //
+    // It can happen that we need an ST register to be live when no FP register
+    // holds the value:
+    //
+    //   %ST0 = COPY %FP4<kill>
+    //
+    // When that happens, we allocate a scratch FP register to hold the ST
+    // value. That means every register in PendingST must be live.
+
+    unsigned NumPendingSTs;
+    unsigned char PendingST[8];
+
     // Set up our stack model to match the incoming registers to MBB.
     void setupBlockStack();
 
@@ -142,13 +178,15 @@ namespace {
         dbgs() << " FP" << Stack[i];
         assert(RegMap[Stack[i]] == i && "Stack[] doesn't match RegMap[]!");
       }
+      for (unsigned i = 0; i != NumPendingSTs; ++i)
+        dbgs() << ", ST" << i << " in FP" << unsigned(PendingST[i]);
       dbgs() << "\n";
     }
 
     /// getSlot - Return the stack slot number a particular register number is
     /// in.
     unsigned getSlot(unsigned RegNo) const {
-      assert(RegNo < 8 && "Regno out of range!");
+      assert(RegNo < NumFPRegs && "Regno out of range!");
       return RegMap[RegNo];
     }
 
@@ -160,12 +198,17 @@ namespace {
 
     /// getScratchReg - Return an FP register that is not currently in use.
     unsigned getScratchReg() {
-      for (int i = 7; i >= 0; --i)
+      for (int i = NumFPRegs - 1; i >= 8; --i)
         if (!isLive(i))
           return i;
       llvm_unreachable("Ran out of scratch FP registers");
     }
 
+    /// isScratchReg - Returns trus if RegNo is a scratch FP register.
+    bool isScratchReg(unsigned RegNo) {
+      return RegNo > 8 && RegNo < NumFPRegs;
+    }
+
     /// getStackEntry - Return the X86::FP<n> register in register ST(i).
     unsigned getStackEntry(unsigned STi) const {
       if (STi >= StackTop)
@@ -181,7 +224,7 @@ namespace {
 
     // pushReg - Push the specified FP<n> register onto the stack.
     void pushReg(unsigned Reg) {
-      assert(Reg < 8 && "Register number out of range!");
+      assert(Reg < NumFPRegs && "Register number out of range!");
       if (StackTop >= 8)
         report_fatal_error("Stack overflow!");
       Stack[StackTop] = Reg;
@@ -236,7 +279,7 @@ namespace {
     /// Adjust the live registers to be the set in Mask.
     void adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I);
 
-    /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is
+    /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
     /// st(0), FP reg FixStack[1] is st(1) etc.
     void shuffleStackTop(const unsigned char *FixStack, unsigned FixCount,
                          MachineBasicBlock::iterator I);
@@ -251,7 +294,14 @@ namespace {
     void handleCondMovFP(MachineBasicBlock::iterator &I);
     void handleSpecialFP(MachineBasicBlock::iterator &I);
 
-    bool translateCopy(MachineInstr*);
+    // Check if a COPY instruction is using FP registers.
+    bool isFPCopy(MachineInstr *MI) {
+      unsigned DstReg = MI->getOperand(0).getReg();
+      unsigned SrcReg = MI->getOperand(1).getReg();
+
+      return X86::RFP80RegClass.contains(DstReg) ||
+        X86::RFP80RegClass.contains(SrcReg);
+    }
   };
   char FPS::ID = 0;
 }
@@ -341,6 +391,7 @@ void FPS::bundleCFG(MachineFunction &MF) {
 bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
   bool Changed = false;
   MBB = &BB;
+  NumPendingSTs = 0;
 
   setupBlockStack();
 
@@ -352,7 +403,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
     if (MI->isInlineAsm())
       FPInstClass = X86II::SpecialFP;
 
-    if (MI->isCopy() && translateCopy(MI))
+    if (MI->isCopy() && isFPCopy(MI))
       FPInstClass = X86II::SpecialFP;
 
     if (FPInstClass == X86II::NotFP)
@@ -833,7 +884,7 @@ void FPS::adjustLiveRegs(unsigned Mask, MachineBasicBlock::iterator I) {
   // Kill registers by popping.
   if (Kills && I != MBB->begin()) {
     MachineBasicBlock::iterator I2 = llvm::prior(I);
-    for (;;) {
+    while (StackTop) {
       unsigned KReg = getStackEntry(0);
       if (!(Kills & (1 << KReg)))
         break;
@@ -881,7 +932,8 @@ void FPS::shuffleStackTop(const unsigned char *FixStack,
       continue;
     // (Reg st0) (OldReg st0) = (Reg OldReg st0)
     moveToTop(Reg, I);
-    moveToTop(OldReg, I);
+    if (FixCount > 0)
+      moveToTop(OldReg, I);
   }
   DEBUG(dumpStack());
 }
@@ -1239,141 +1291,307 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
   MachineInstr *MI = I;
   switch (MI->getOpcode()) {
   default: llvm_unreachable("Unknown SpecialFP instruction!");
-  case X86::FpGET_ST0_32:// Appears immediately after a call returning FP type!
-  case X86::FpGET_ST0_64:// Appears immediately after a call returning FP type!
-  case X86::FpGET_ST0_80:// Appears immediately after a call returning FP type!
-    assert(StackTop == 0 && "Stack should be empty after a call!");
-    pushReg(getFPReg(MI->getOperand(0)));
-    break;
-  case X86::FpGET_ST1_32:// Appears immediately after a call returning FP type!
-  case X86::FpGET_ST1_64:// Appears immediately after a call returning FP type!
-  case X86::FpGET_ST1_80:{// Appears immediately after a call returning FP type!
-    // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm.
-    // The pattern we expect is:
-    //  CALL
-    //  FP1 = FpGET_ST0
-    //  FP4 = FpGET_ST1
-    //
-    // At this point, we've pushed FP1 on the top of stack, so it should be
-    // present if it isn't dead.  If it was dead, we already emitted a pop to
-    // remove it from the stack and StackTop = 0.
-    
-    // Push FP4 as top of stack next.
-    pushReg(getFPReg(MI->getOperand(0)));
+  case TargetOpcode::COPY: {
+    // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
+    const MachineOperand &MO1 = MI->getOperand(1);
+    const MachineOperand &MO0 = MI->getOperand(0);
+    unsigned DstST = MO0.getReg() - X86::ST0;
+    unsigned SrcST = MO1.getReg() - X86::ST0;
+    bool KillsSrc = MI->killsRegister(MO1.getReg());
+
+    // ST = COPY FP. Set up a pending ST register.
+    if (DstST < 8) {
+      unsigned SrcFP = getFPReg(MO1);
+      assert(isLive(SrcFP) && "Cannot copy dead register");
+      assert(!MO0.isDead() && "Cannot copy to dead ST register");
+
+      // Unallocated STs are marked as the nonexistent FP255.
+      while (NumPendingSTs <= DstST)
+        PendingST[NumPendingSTs++] = NumFPRegs;
+
+      // STi could still be live from a previous inline asm.
+      if (isScratchReg(PendingST[DstST])) {
+        DEBUG(dbgs() << "Clobbering old ST in FP" << unsigned(PendingST[DstST])
+                     << '\n');
+        freeStackSlotBefore(MI, PendingST[DstST]);
+      }
 
-    // If StackTop was 0 before we pushed our operand, then ST(0) must have been
-    // dead.  In this case, the ST(1) value is the only thing that is live, so
-    // it should be on the TOS (after the pop that was emitted) and is.  Just
-    // continue in this case.
-    if (StackTop == 1)
+      // When the source is killed, allocate a scratch FP register.
+      if (KillsSrc) {
+        unsigned Slot = getSlot(SrcFP);
+        unsigned SR = getScratchReg();
+        PendingST[DstST] = SR;
+        Stack[Slot] = SR;
+        RegMap[SR] = Slot;
+      } else
+        PendingST[DstST] = SrcFP;
       break;
-    
-    // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top
-    // elements so that our accounting is correct.
-    unsigned RegOnTop = getStackEntry(0);
-    unsigned RegNo = getStackEntry(1);
-    
-    // Swap the slots the regs are in.
-    std::swap(RegMap[RegNo], RegMap[RegOnTop]);
-    
-    // Swap stack slot contents.
-    if (RegMap[RegOnTop] >= StackTop)
-      report_fatal_error("Access past stack top!");
-    std::swap(Stack[RegMap[RegOnTop]], Stack[StackTop-1]);
-    break;
-  }
-  case X86::FpSET_ST0_32:
-  case X86::FpSET_ST0_64:
-  case X86::FpSET_ST0_80: {
-    // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm
-    // arguments that use an st constraint. We expect a sequence of
-    // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM
-    unsigned Op0 = getFPReg(MI->getOperand(0));
-
-    if (!MI->killsRegister(X86::FP0 + Op0)) {
-      // Duplicate Op0 into a temporary on the stack top.
-      duplicateToTop(Op0, getScratchReg(), I);
-    } else {
-      // Op0 is killed, so just swap it into position.
-      moveToTop(Op0, I);
     }
-    --StackTop;   // "Forget" we have something on the top of stack!
-    break;
-  }
-  case X86::FpSET_ST1_32:
-  case X86::FpSET_ST1_64:
-  case X86::FpSET_ST1_80: {
-    // Set up st(1) for inline asm. We are assuming that st(0) has already been
-    // set up by FpSET_ST0, and our StackTop is off by one because of it.
-    unsigned Op0 = getFPReg(MI->getOperand(0));
-    // Restore the actual StackTop from before Fp_SET_ST0.
-    // Note we can't handle Fp_SET_ST1 without a preceding Fp_SET_ST0, and we
-    // are not enforcing the constraint.
-    ++StackTop;
-    unsigned RegOnTop = getStackEntry(0); // This reg must remain in st(0).
-    if (!MI->killsRegister(X86::FP0 + Op0)) {
-      duplicateToTop(Op0, getScratchReg(), I);
-      moveToTop(RegOnTop, I);
-    } else if (getSTReg(Op0) != X86::ST1) {
-      // We have the wrong value at st(1). Shuffle! Untested!
-      moveToTop(getStackEntry(1), I);
-      moveToTop(Op0, I);
-      moveToTop(RegOnTop, I);
+
+    // FP = COPY ST. Extract fixed stack value.
+    // Any instruction defining ST registers must have assigned them to a
+    // scratch register.
+    if (SrcST < 8) {
+      unsigned DstFP = getFPReg(MO0);
+      assert(!isLive(DstFP) && "Cannot copy ST to live FP register");
+      assert(NumPendingSTs > SrcST && "Cannot copy from dead ST register");
+      unsigned SrcFP = PendingST[SrcST];
+      assert(isScratchReg(SrcFP) && "Expected ST in a scratch register");
+      assert(isLive(SrcFP) && "Scratch holding ST is dead");
+
+      // DstFP steals the stack slot from SrcFP.
+      unsigned Slot = getSlot(SrcFP);
+      Stack[Slot] = DstFP;
+      RegMap[DstFP] = Slot;
+
+      // Always treat the ST as killed.
+      PendingST[SrcST] = NumFPRegs;
+      while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
+        --NumPendingSTs;
+      break;
     }
-    assert(StackTop >= 2 && "Too few live registers");
-    StackTop -= 2; // "Forget" both st(0) and st(1).
-    break;
-  }
-  case X86::MOV_Fp3232:
-  case X86::MOV_Fp3264:
-  case X86::MOV_Fp6432:
-  case X86::MOV_Fp6464: 
-  case X86::MOV_Fp3280:
-  case X86::MOV_Fp6480:
-  case X86::MOV_Fp8032:
-  case X86::MOV_Fp8064: 
-  case X86::MOV_Fp8080: {
-    const MachineOperand &MO1 = MI->getOperand(1);
-    unsigned SrcReg = getFPReg(MO1);
 
-    const MachineOperand &MO0 = MI->getOperand(0);
-    unsigned DestReg = getFPReg(MO0);
-    if (MI->killsRegister(X86::FP0+SrcReg)) {
+    // FP <- FP copy.
+    unsigned DstFP = getFPReg(MO0);
+    unsigned SrcFP = getFPReg(MO1);
+    assert(isLive(SrcFP) && "Cannot copy dead register");
+    if (KillsSrc) {
       // If the input operand is killed, we can just change the owner of the
       // incoming stack slot into the result.
-      unsigned Slot = getSlot(SrcReg);
-      assert(Slot < 7 && DestReg < 7 && "FpMOV operands invalid!");
-      Stack[Slot] = DestReg;
-      RegMap[DestReg] = Slot;
-
+      unsigned Slot = getSlot(SrcFP);
+      Stack[Slot] = DstFP;
+      RegMap[DstFP] = Slot;
     } else {
-      // For FMOV we just duplicate the specified value to a new stack slot.
+      // For COPY we just duplicate the specified value to a new stack slot.
       // This could be made better, but would require substantial changes.
-      duplicateToTop(SrcReg, DestReg, I);
+      duplicateToTop(SrcFP, DstFP, I);
     }
+    break;
+  }
+
+  case X86::FpPOP_RETVAL: {
+    // The FpPOP_RETVAL instruction is used after calls that return a value on
+    // the floating point stack. We cannot model this with ST defs since CALL
+    // instructions have fixed clobber lists. This instruction is interpreted
+    // to mean that there is one more live register on the stack than we
+    // thought.
+    //
+    // This means that StackTop does not match the hardware stack between a
+    // call and the FpPOP_RETVAL instructions.  We do tolerate FP instructions
+    // between CALL and FpPOP_RETVAL as long as they don't overflow the
+    // hardware stack.
+    unsigned DstFP = getFPReg(MI->getOperand(0));
+
+    // Move existing stack elements up to reflect reality.
+    assert(StackTop < 8 && "Stack overflowed before FpPOP_RETVAL");
+    if (StackTop) {
+      std::copy_backward(Stack, Stack + StackTop, Stack + StackTop + 1);
+      for (unsigned i = 0; i != NumFPRegs; ++i)
+        ++RegMap[i];
     }
+    ++StackTop;
+
+    // DstFP is the new bottom of the stack.
+    Stack[0] = DstFP;
+    RegMap[DstFP] = 0;
+
+    // DstFP will be killed by processBasicBlock if this was a dead def.
     break;
+  }
+
   case TargetOpcode::INLINEASM: {
     // The inline asm MachineInstr currently only *uses* FP registers for the
     // 'f' constraint.  These should be turned into the current ST(x) register
-    // in the machine instr.  Also, any kills should be explicitly popped after
-    // the inline asm.
-    unsigned Kills = 0;
+    // in the machine instr.
+    //
+    // There are special rules for x87 inline assembly. The compiler must know
+    // exactly how many registers are popped and pushed implicitly by the asm.
+    // Otherwise it is not possible to restore the stack state after the inline
+    // asm.
+    //
+    // There are 3 kinds of input operands:
+    //
+    // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
+    //    popped input operand must be in a fixed stack slot, and it is either
+    //    tied to an output operand, or in the clobber list. The MI has ST use
+    //    and def operands for these inputs.
+    //
+    // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
+    //    preserved by the inline asm. The fixed stack slots must be STn-STm
+    //    following the popped inputs. A fixed input operand cannot be tied to
+    //    an output or appear in the clobber list. The MI has ST use operands
+    //    and no defs for these inputs.
+    //
+    // 3. Preserved inputs. These inputs use the "f" constraint which is
+    //    represented as an FP register. The inline asm won't change these
+    //    stack slots.
+    //
+    // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
+    // registers do not count as output operands. The inline asm changes the
+    // stack as if it popped all the popped inputs and then pushed all the
+    // output operands.
+
+    // Scan the assembly for ST registers used, defined and clobbered. We can
+    // only tell clobbers from defs by looking at the asm descriptor.
+    unsigned STUses = 0, STDefs = 0, STClobbers = 0, STDeadDefs = 0;
+    unsigned NumOps = 0;
+    for (unsigned i = InlineAsm::MIOp_FirstOperand, e = MI->getNumOperands();
+         i != e && MI->getOperand(i).isImm(); i += 1 + NumOps) {
+      unsigned Flags = MI->getOperand(i).getImm();
+      NumOps = InlineAsm::getNumOperandRegisters(Flags);
+      if (NumOps != 1)
+        continue;
+      const MachineOperand &MO = MI->getOperand(i + 1);
+      if (!MO.isReg())
+        continue;
+      unsigned STReg = MO.getReg() - X86::ST0;
+      if (STReg >= 8)
+        continue;
+
+      switch (InlineAsm::getKind(Flags)) {
+      case InlineAsm::Kind_RegUse:
+        STUses |= (1u << STReg);
+        break;
+      case InlineAsm::Kind_RegDef:
+      case InlineAsm::Kind_RegDefEarlyClobber:
+        STDefs |= (1u << STReg);
+        if (MO.isDead())
+          STDeadDefs |= (1u << STReg);
+        break;
+      case InlineAsm::Kind_Clobber:
+        STClobbers |= (1u << STReg);
+        break;
+      default:
+        break;
+      }
+    }
+
+    if (STUses && !isMask_32(STUses))
+      MI->emitError("fixed input regs must be last on the x87 stack");
+    unsigned NumSTUses = CountTrailingOnes_32(STUses);
+
+    // Defs must be contiguous from the stack top. ST0-STn.
+    if (STDefs && !isMask_32(STDefs)) {
+      MI->emitError("output regs must be last on the x87 stack");
+      STDefs = NextPowerOf2(STDefs) - 1;
+    }
+    unsigned NumSTDefs = CountTrailingOnes_32(STDefs);
+
+    // So must the clobbered stack slots. ST0-STm, m >= n.
+    if (STClobbers && !isMask_32(STDefs | STClobbers))
+      MI->emitError("clobbers must be last on the x87 stack");
+
+    // Popped inputs are the ones that are also clobbered or defined.
+    unsigned STPopped = STUses & (STDefs | STClobbers);
+    if (STPopped && !isMask_32(STPopped))
+      MI->emitError("implicitly popped regs must be last on the x87 stack");
+    unsigned NumSTPopped = CountTrailingOnes_32(STPopped);
+
+    DEBUG(dbgs() << "Asm uses " << NumSTUses << " fixed regs, pops "
+                 << NumSTPopped << ", and defines " << NumSTDefs << " regs.\n");
+
+    // Scan the instruction for FP uses corresponding to "f" constraints.
+    // Collect FP registers to kill afer the instruction.
+    // Always kill all the scratch regs.
+    unsigned FPKills = ((1u << NumFPRegs) - 1) & ~0xff;
+    unsigned FPUsed = 0;
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
       MachineOperand &Op = MI->getOperand(i);
       if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
         continue;
-      assert(Op.isUse() && "Only handle inline asm uses right now");
-      
+      if (!Op.isUse())
+        MI->emitError("illegal \"f\" output constraint");
       unsigned FPReg = getFPReg(Op);
-      Op.setReg(getSTReg(FPReg));
-      
+      FPUsed |= 1U << FPReg;
+
       // If we kill this operand, make sure to pop it from the stack after the
       // asm.  We just remember it for now, and pop them all off at the end in
       // a batch.
       if (Op.isKill())
-        Kills |= 1U << FPReg;
+        FPKills |= 1U << FPReg;
+    }
+
+    // The popped inputs will be killed by the instruction, so duplicate them
+    // if the FP register needs to be live after the instruction, or if it is
+    // used in the instruction itself. We effectively treat the popped inputs
+    // as early clobbers.
+    for (unsigned i = 0; i < NumSTPopped; ++i) {
+      if ((FPKills & ~FPUsed) & (1u << PendingST[i]))
+        continue;
+      unsigned SR = getScratchReg();
+      duplicateToTop(PendingST[i], SR, I);
+      DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
+                   << unsigned(PendingST[i]) << " to avoid clobbering it.\n");
+      PendingST[i] = SR;
+    }
+
+    // Make sure we have a unique live register for every fixed use. Some of
+    // them could be undef uses, and we need to emit LD_F0 instructions.
+    for (unsigned i = 0; i < NumSTUses; ++i) {
+      if (i < NumPendingSTs && PendingST[i] < NumFPRegs) {
+        // Check for shared assignments.
+        for (unsigned j = 0; j < i; ++j) {
+          if (PendingST[j] != PendingST[i])
+            continue;
+          // STi and STj are inn the same register, create a copy.
+          unsigned SR = getScratchReg();
+          duplicateToTop(PendingST[i], SR, I);
+          DEBUG(dbgs() << "Duplicating ST" << i << " in FP"
+                       << unsigned(PendingST[i])
+                       << " to avoid collision with ST" << j << '\n');
+          PendingST[i] = SR;
+        }
+        continue;
+      }
+      unsigned SR = getScratchReg();
+      DEBUG(dbgs() << "Emitting LD_F0 for ST" << i << " in FP" << SR << '\n');
+      BuildMI(*MBB, I, MI->getDebugLoc(), TII->get(X86::LD_F0));
+      pushReg(SR);
+      PendingST[i] = SR;
+      if (NumPendingSTs == i)
+        ++NumPendingSTs;
+    }
+    assert(NumPendingSTs >= NumSTUses && "Fixed registers should be assigned");
+
+    // Now we can rearrange the live registers to match what was requested.
+    shuffleStackTop(PendingST, NumPendingSTs, I);
+    DEBUG({dbgs() << "Before asm: "; dumpStack();});
+
+    // With the stack layout fixed, rewrite the FP registers.
+    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      MachineOperand &Op = MI->getOperand(i);
+      if (!Op.isReg() || Op.getReg() < X86::FP0 || Op.getReg() > X86::FP6)
+        continue;
+      unsigned FPReg = getFPReg(Op);
+      Op.setReg(getSTReg(FPReg));
+    }
+
+    // Simulate the inline asm popping its inputs and pushing its outputs.
+    StackTop -= NumSTPopped;
+
+    // Hold the fixed output registers in scratch FP registers. They will be
+    // transferred to real FP registers by copies.
+    NumPendingSTs = 0;
+    for (unsigned i = 0; i < NumSTDefs; ++i) {
+      unsigned SR = getScratchReg();
+      pushReg(SR);
+      FPKills &= ~(1u << SR);
+    }
+    for (unsigned i = 0; i < NumSTDefs; ++i)
+      PendingST[NumPendingSTs++] = getStackEntry(i);
+    DEBUG({dbgs() << "After asm: "; dumpStack();});
+
+    // If any of the ST defs were dead, pop them immediately. Our caller only
+    // handles dead FP defs.
+    MachineBasicBlock::iterator InsertPt = MI;
+    for (unsigned i = 0; STDefs & (1u << i); ++i) {
+      if (!(STDeadDefs & (1u << i)))
+        continue;
+      freeStackSlotAfter(InsertPt, PendingST[i]);
+      PendingST[i] = NumFPRegs;
     }
+    while (NumPendingSTs && PendingST[NumPendingSTs - 1] == NumFPRegs)
+      --NumPendingSTs;
 
     // If this asm kills any FP registers (is the last use of them) we must
     // explicitly emit pop instructions for them.  Do this now after the asm has
@@ -1382,16 +1600,16 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
     //
     // Note: this might be a non-optimal pop sequence.  We might be able to do
     // better by trying to pop in stack order or something.
-    MachineBasicBlock::iterator InsertPt = MI;
-    while (Kills) {
-      unsigned FPReg = CountTrailingZeros_32(Kills);
-      freeStackSlotAfter(InsertPt, FPReg);
-      Kills &= ~(1U << FPReg);
+    while (FPKills) {
+      unsigned FPReg = CountTrailingZeros_32(FPKills);
+      if (isLive(FPReg))
+        freeStackSlotAfter(InsertPt, FPReg);
+      FPKills &= ~(1U << FPReg);
     }
     // Don't delete the inline asm!
     return;
   }
-      
+
   case X86::RET:
   case X86::RETI:
     // If RET has an FP register use operand, pass the first one in ST(0) and
@@ -1489,33 +1707,3 @@ void FPS::handleSpecialFP(MachineBasicBlock::iterator &I) {
   } else
     --I;
 }
-
-// Translate a COPY instruction to a pseudo-op that handleSpecialFP understands.
-bool FPS::translateCopy(MachineInstr *MI) {
-  unsigned DstReg = MI->getOperand(0).getReg();
-  unsigned SrcReg = MI->getOperand(1).getReg();
-
-  if (DstReg == X86::ST0) {
-    MI->setDesc(TII->get(X86::FpSET_ST0_80));
-    MI->RemoveOperand(0);
-    return true;
-  }
-  if (DstReg == X86::ST1) {
-    MI->setDesc(TII->get(X86::FpSET_ST1_80));
-    MI->RemoveOperand(0);
-    return true;
-  }
-  if (SrcReg == X86::ST0) {
-    MI->setDesc(TII->get(X86::FpGET_ST0_80));
-    return true;
-  }
-  if (SrcReg == X86::ST1) {
-    MI->setDesc(TII->get(X86::FpGET_ST1_80));
-    return true;
-  }
-  if (X86::RFP80RegClass.contains(DstReg, SrcReg)) {
-    MI->setDesc(TII->get(X86::MOV_Fp8080));
-    return true;
-  }
-  return false;
-}
diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp
index 2e95300..ed45a9a 100644
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@@ -1,4 +1,4 @@
-//=======- X86FrameLowering.cpp - X86 Frame Information ------------*- C++ -*-====//
+//=======- X86FrameLowering.cpp - X86 Frame Information --------*- C++ -*-====//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -23,6 +23,7 @@
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Support/CommandLine.h"
@@ -160,8 +161,10 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
         Opc = isSub
           ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r)
           : (Is64Bit ? X86::POP64r  : X86::POP32r);
-        BuildMI(MBB, MBBI, DL, TII.get(Opc))
+        MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc))
           .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub));
+        if (isSub)
+          MI->setFlag(MachineInstr::FrameSetup);
         Offset -= ThisVal;
         continue;
       }
@@ -171,6 +174,8 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
       BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
       .addReg(StackPtr)
       .addImm(ThisVal);
+    if (isSub)
+      MI->setFlag(MachineInstr::FrameSetup);
     MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
     Offset -= ThisVal;
   }
@@ -409,7 +414,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
               TII.get(getSUBriOpcode(Is64Bit, -TailCallReturnAddrDelta)),
               StackPtr)
         .addReg(StackPtr)
-        .addImm(-TailCallReturnAddrDelta);
+        .addImm(-TailCallReturnAddrDelta)
+        .setMIFlag(MachineInstr::FrameSetup);
     MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
   }
 
@@ -447,7 +453,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
 
     // Save EBP/RBP into the appropriate stack slot.
     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r))
-      .addReg(FramePtr, RegState::Kill);
+      .addReg(FramePtr, RegState::Kill)
+      .setMIFlag(MachineInstr::FrameSetup);
 
     if (needsFrameMoves) {
       // Mark the place where EBP/RBP was saved.
@@ -474,7 +481,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
     // Update EBP with the new base value...
     BuildMI(MBB, MBBI, DL,
             TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr)
-        .addReg(StackPtr);
+        .addReg(StackPtr)
+        .setMIFlag(MachineInstr::FrameSetup);
 
     if (needsFrameMoves) {
       // Mark effective beginning of when frame pointer becomes valid.
@@ -642,7 +650,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
 }
 
 void X86FrameLowering::emitEpilogue(MachineFunction &MF,
-                                MachineBasicBlock &MBB) const {
+                                    MachineBasicBlock &MBB) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   const X86RegisterInfo *RegInfo = TM.getRegisterInfo();
@@ -919,7 +927,8 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
       // X86RegisterInfo::emitPrologue will handle spilling of frame register.
       continue;
     CalleeFrameSize += SlotSize;
-    BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill);
+    BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill)
+      .setMIFlag(MachineInstr::FrameSetup);
   }
 
   X86FI->setCalleeSavedFrameSize(CalleeFrameSize);
@@ -1021,3 +1030,181 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
     FrameIdx = 0;
   }
 }
+
+/// permuteEncode - Create the permutation encoding used with frameless
+/// stacks. It is passed the number of registers to be saved and an array of the
+/// registers saved.
+static uint32_t permuteEncode(unsigned SavedCount, unsigned Registers[6]) {
+  // The saved registers are numbered from 1 to 6. In order to encode the order
+  // in which they were saved, we re-number them according to their place in the
+  // register order. The re-numbering is relative to the last re-numbered
+  // register. E.g., if we have registers {6, 2, 4, 5} saved in that order:
+  //
+  //    Orig  Re-Num
+  //    ----  ------
+  //     6       6
+  //     2       2
+  //     4       3
+  //     5       3
+  //
+  bool Used[7] = { false, false, false, false, false, false, false };
+  uint32_t RenumRegs[6];
+  for (unsigned I = 0; I < SavedCount; ++I) {
+    uint32_t Renum = 0;
+    for (unsigned U = 1; U < 7; ++U) {
+      if (U == Registers[I])
+        break;
+      if (!Used[U])
+        ++Renum;
+    }
+
+    Used[Registers[I]] = true;
+    RenumRegs[I] = Renum;
+  }
+
+  // Take the renumbered values and encode them into a 10-bit number.
+  uint32_t permutationEncoding = 0;
+  switch (SavedCount) {
+  case 6:
+    permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+                           + 6 * RenumRegs[2] +  2 * RenumRegs[3]
+                           +     RenumRegs[4];
+    break;
+  case 5:
+    permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]
+                           + 6 * RenumRegs[2] +  2 * RenumRegs[3]
+                           +     RenumRegs[4];
+    break;
+  case 4:
+    permutationEncoding |= 60 * RenumRegs[0] + 12 * RenumRegs[1]
+                          + 3 * RenumRegs[2] +      RenumRegs[3];
+    break;
+  case 3:
+    permutationEncoding |= 20 * RenumRegs[0] + 4 * RenumRegs[1]
+                              + RenumRegs[2];
+    break;
+  case 2:
+    permutationEncoding |=  5 * RenumRegs[0] +     RenumRegs[1];
+    break;
+  case 1:
+    permutationEncoding |=      RenumRegs[0];
+    break;
+  }
+
+  return permutationEncoding;
+}
+
+uint32_t X86FrameLowering::
+getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
+                         int DataAlignmentFactor, bool IsEH) const {
+  uint32_t Encoding = 0;
+  int CFAOffset = 0;
+  const TargetRegisterInfo *TRI = TM.getRegisterInfo();
+  unsigned SavedRegs[6] = { 0, 0, 0, 0, 0, 0 };
+  unsigned SavedRegIdx = 0;
+  int FramePointerReg = -1;
+
+  for (ArrayRef<MCCFIInstruction>::const_iterator
+         I = Instrs.begin(), E = Instrs.end(); I != E; ++I) {
+    const MCCFIInstruction &Inst = *I;
+    MCSymbol *Label = Inst.getLabel();
+
+    // Ignore invalid labels.
+    if (Label && !Label->isDefined()) continue;
+
+    unsigned Operation = Inst.getOperation();
+    if (Operation != MCCFIInstruction::Move &&
+        Operation != MCCFIInstruction::RelMove)
+      // FIXME: We can't handle this frame just yet.
+      return 0;
+
+    const MachineLocation &Dst = Inst.getDestination();
+    const MachineLocation &Src = Inst.getSource();
+    const bool IsRelative = (Operation == MCCFIInstruction::RelMove);
+
+    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+      if (Src.getReg() != MachineLocation::VirtualFP) {
+        // DW_CFA_def_cfa
+        assert(FramePointerReg == -1 &&"Defining more than one frame pointer?");
+        if (TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::EBP &&
+            TRI->getLLVMRegNum(Src.getReg(), IsEH) != X86::RBP)
+          // The frame pointer isn't EBP/RBP. Cannot make unwind information
+          // compact.
+          return 0;
+        FramePointerReg = TRI->getCompactUnwindRegNum(Src.getReg(), IsEH);
+      } // else DW_CFA_def_cfa_offset
+
+      if (IsRelative)
+        CFAOffset += Src.getOffset();
+      else
+        CFAOffset -= Src.getOffset();
+
+      continue;
+    }
+
+    if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+      // DW_CFA_def_cfa_register
+      assert(FramePointerReg == -1 && "Defining more than one frame pointer?");
+
+      if (TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::EBP &&
+          TRI->getLLVMRegNum(Dst.getReg(), IsEH) != X86::RBP)
+        // The frame pointer isn't EBP/RBP. Cannot make unwind information
+        // compact.
+        return 0;
+
+      FramePointerReg = TRI->getCompactUnwindRegNum(Dst.getReg(), IsEH);
+      if (SavedRegIdx != 1 || SavedRegs[0] != unsigned(FramePointerReg))
+        return 0;
+
+      SavedRegs[0] = 0;
+      SavedRegIdx = 0;
+      continue;
+    }
+
+    unsigned Reg = Src.getReg();
+    int Offset = Dst.getOffset();
+    if (IsRelative)
+      Offset -= CFAOffset;
+    Offset /= DataAlignmentFactor;
+
+    if (Offset < 0) {
+      // FIXME: Handle?
+      // DW_CFA_offset_extended_sf
+      return 0;
+    } else if (Reg < 64) {
+      // DW_CFA_offset + Reg
+      if (SavedRegIdx >= 6) return 0;
+      int CURegNum = TRI->getCompactUnwindRegNum(Reg, IsEH);
+      if (CURegNum == -1) return 0;
+      SavedRegs[SavedRegIdx++] = CURegNum;
+    } else {
+      // FIXME: Handle?
+      // DW_CFA_offset_extended
+      return 0;
+    }
+  }
+
+  // Bail if there are too many registers to encode.
+  if (SavedRegIdx > 6) return 0;
+
+  // Check if the offset is too big.
+  CFAOffset /= 4;
+  if ((CFAOffset & 0xFF) != CFAOffset)
+    return 0;
+  Encoding |= (CFAOffset & 0xFF) << 16; // Size encoding.
+
+  if (FramePointerReg != -1) {
+    Encoding |= 0x01000000;     // EBP/RBP Unwind Frame
+    for (unsigned I = 0; I != SavedRegIdx; ++I) {
+      unsigned Reg = SavedRegs[I];
+      if (Reg == unsigned(FramePointerReg)) continue;
+      Encoding |= (Reg & 0x7) << (I * 3); // Register encoding
+    }
+  } else {
+    Encoding |= 0x02000000;     // Frameless unwind with small stack
+    Encoding |= (SavedRegIdx & 0x7) << 10;
+    Encoding |= permuteEncode(SavedRegIdx, SavedRegs);
+  }
+
+  return Encoding;
+}
diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h
index d71108c..14c31ed 100644
--- a/lib/Target/X86/X86FrameLowering.h
+++ b/lib/Target/X86/X86FrameLowering.h
@@ -15,6 +15,7 @@
 #define X86_FRAMELOWERING_H
 
 #include "X86Subtarget.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/Target/TargetFrameLowering.h"
 
 namespace llvm {
@@ -58,6 +59,9 @@ public:
 
   void getInitialFrameState(std::vector<MachineMove> &Moves) const;
   int getFrameIndexOffset(const MachineFunction &MF, int FI) const;
+
+  uint32_t getCompactUnwindEncoding(ArrayRef<MCCFIInstruction> Instrs,
+                                    int DataAlignmentFactor, bool IsEH) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 1fcc274..2b0f283 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Intrinsics.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Type.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -191,6 +192,7 @@ namespace {
     SDNode *SelectAtomicLoadAdd(SDNode *Node, EVT NVT);
     SDNode *SelectAtomicLoadArith(SDNode *Node, EVT NVT);
 
+    bool FoldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM);
     bool MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM);
     bool MatchWrapper(SDValue N, X86ISelAddressMode &AM);
     bool MatchAddress(SDValue N, X86ISelAddressMode &AM);
@@ -546,6 +548,34 @@ void X86DAGToDAGISel::EmitFunctionEntryCode() {
       EmitSpecialCodeForMain(MF->begin(), MF->getFrameInfo());
 }
 
+static bool isDispSafeForFrameIndex(int64_t Val) {
+  // On 64-bit platforms, we can run into an issue where a frame index
+  // includes a displacement that, when added to the explicit displacement,
+  // will overflow the displacement field. Assuming that the frame index
+  // displacement fits into a 31-bit integer  (which is only slightly more
+  // aggressive than the current fundamental assumption that it fits into
+  // a 32-bit integer), a 31-bit disp should always be safe.
+  return isInt<31>(Val);
+}
+
+bool X86DAGToDAGISel::FoldOffsetIntoAddress(uint64_t Offset,
+                                            X86ISelAddressMode &AM) {
+  int64_t Val = AM.Disp + Offset;
+  CodeModel::Model M = TM.getCodeModel();
+  if (Subtarget->is64Bit()) {
+    if (!X86::isOffsetSuitableForCodeModel(Val, M,
+                                           AM.hasSymbolicDisplacement()))
+      return true;
+    // In addition to the checks required for a register base, check that
+    // we do not try to use an unsafe Disp with a frame index.
+    if (AM.BaseType == X86ISelAddressMode::FrameIndexBase &&
+        !isDispSafeForFrameIndex(Val))
+      return true;
+  }
+  AM.Disp = Val;
+  return false;
+
+}
 
 bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM){
   SDValue Address = N->getOperand(1);
@@ -595,18 +625,22 @@ bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
       // must allow RIP.
       !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
-      int64_t Offset = AM.Disp + G->getOffset();
-      if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
+      X86ISelAddressMode Backup = AM;
       AM.GV = G->getGlobal();
-      AM.Disp = Offset;
       AM.SymbolFlags = G->getTargetFlags();
+      if (FoldOffsetIntoAddress(G->getOffset(), AM)) {
+        AM = Backup;
+        return true;
+      }
     } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
-      int64_t Offset = AM.Disp + CP->getOffset();
-      if (!X86::isOffsetSuitableForCodeModel(Offset, M)) return true;
+      X86ISelAddressMode Backup = AM;
       AM.CP = CP->getConstVal();
       AM.Align = CP->getAlignment();
-      AM.Disp = Offset;
       AM.SymbolFlags = CP->getTargetFlags();
+      if (FoldOffsetIntoAddress(CP->getOffset(), AM)) {
+        AM = Backup;
+        return true;
+      }
     } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
       AM.ES = S->getSymbol();
       AM.SymbolFlags = S->getTargetFlags();
@@ -688,7 +722,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM) {
 
 bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
                                               unsigned Depth) {
-  bool is64Bit = Subtarget->is64Bit();
   DebugLoc dl = N.getDebugLoc();
   DEBUG({
       dbgs() << "MatchAddress: ";
@@ -698,8 +731,6 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
   if (Depth > 5)
     return MatchAddressBase(N, AM);
 
-  CodeModel::Model M = TM.getCodeModel();
-
   // If this is already a %rip relative address, we can only merge immediates
   // into it.  Instead of handling this in every case, we handle it here.
   // RIP relative addressing: %rip + 32-bit displacement!
@@ -709,14 +740,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     // consistency.
     if (!AM.ES && AM.JT != -1) return true;
 
-    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) {
-      int64_t Val = AM.Disp + Cst->getSExtValue();
-      if (X86::isOffsetSuitableForCodeModel(Val, M,
-                                            AM.hasSymbolicDisplacement())) {
-        AM.Disp = Val;
+    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N))
+      if (!FoldOffsetIntoAddress(Cst->getSExtValue(), AM))
         return false;
-      }
-    }
     return true;
   }
 
@@ -724,12 +750,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
   default: break;
   case ISD::Constant: {
     uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
-    if (!is64Bit ||
-        X86::isOffsetSuitableForCodeModel(AM.Disp + Val, M,
-                                          AM.hasSymbolicDisplacement())) {
-      AM.Disp += Val;
+    if (!FoldOffsetIntoAddress(Val, AM))
       return false;
-    }
     break;
   }
 
@@ -745,8 +767,9 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     break;
 
   case ISD::FrameIndex:
-    if (AM.BaseType == X86ISelAddressMode::RegBase
-        && AM.Base_Reg.getNode() == 0) {
+    if (AM.BaseType == X86ISelAddressMode::RegBase &&
+        AM.Base_Reg.getNode() == 0 &&
+        (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) {
       AM.BaseType = X86ISelAddressMode::FrameIndexBase;
       AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
       return false;
@@ -775,16 +798,12 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
           AM.IndexReg = ShVal.getNode()->getOperand(0);
           ConstantSDNode *AddVal =
             cast<ConstantSDNode>(ShVal.getNode()->getOperand(1));
-          uint64_t Disp = AM.Disp + (AddVal->getSExtValue() << Val);
-          if (!is64Bit ||
-              X86::isOffsetSuitableForCodeModel(Disp, M,
-                                                AM.hasSymbolicDisplacement()))
-            AM.Disp = Disp;
-          else
-            AM.IndexReg = ShVal;
-        } else {
-          AM.IndexReg = ShVal;
+          uint64_t Disp = AddVal->getSExtValue() << Val;
+          if (!FoldOffsetIntoAddress(Disp, AM))
+            return false;
         }
+
+        AM.IndexReg = ShVal;
         return false;
       }
     break;
@@ -818,13 +837,8 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
             Reg = MulVal.getNode()->getOperand(0);
             ConstantSDNode *AddVal =
               cast<ConstantSDNode>(MulVal.getNode()->getOperand(1));
-            uint64_t Disp = AM.Disp + AddVal->getSExtValue() *
-                                      CN->getZExtValue();
-            if (!is64Bit ||
-                X86::isOffsetSuitableForCodeModel(Disp, M,
-                                                  AM.hasSymbolicDisplacement()))
-              AM.Disp = Disp;
-            else
+            uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue();
+            if (FoldOffsetIntoAddress(Disp, AM))
               Reg = N.getNode()->getOperand(0);
           } else {
             Reg = N.getNode()->getOperand(0);
@@ -949,19 +963,11 @@ bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
     if (CurDAG->isBaseWithConstantOffset(N)) {
       X86ISelAddressMode Backup = AM;
       ConstantSDNode *CN = cast<ConstantSDNode>(N.getOperand(1));
-      uint64_t Offset = CN->getSExtValue();
 
       // Start with the LHS as an addr mode.
       if (!MatchAddressRecursively(N.getOperand(0), AM, Depth+1) &&
-          // Address could not have picked a GV address for the displacement.
-          AM.GV == NULL &&
-          // On x86-64, the resultant disp must fit in 32-bits.
-          (!is64Bit ||
-           X86::isOffsetSuitableForCodeModel(AM.Disp + Offset, M,
-                                             AM.hasSymbolicDisplacement()))) {
-        AM.Disp += Offset;
+          !FoldOffsetIntoAddress(CN->getSExtValue(), AM))
         return false;
-      }
       AM = Backup;
     }
     break;
@@ -1351,7 +1357,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
 
   bool isInc = false, isDec = false, isSub = false, isCN = false;
   ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
-  if (CN) {
+  if (CN && CN->getSExtValue() == (int32_t)CN->getSExtValue()) {
     isCN = true;
     int64_t CNVal = CN->getSExtValue();
     if (CNVal == 1)
@@ -1371,6 +1377,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
     Val = Val.getOperand(1);
   }
 
+  DebugLoc dl = Node->getDebugLoc();
   unsigned Opc = 0;
   switch (NVT.getSimpleVT().SimpleTy) {
   default: return 0;
@@ -1462,7 +1469,6 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
     break;
   }
 
-  DebugLoc dl = Node->getDebugLoc();
   SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                                  dl, NVT), 0);
   MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
@@ -1579,7 +1585,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
   
   bool isCN = false;
   ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val);
-  if (CN) {
+  if (CN && (int32_t)CN->getSExtValue() == CN->getSExtValue()) {
     isCN = true;
     Val = CurDAG->getTargetConstant(CN->getSExtValue(), NVT);
   }
@@ -1612,16 +1618,18 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadArith(SDNode *Node, EVT NVT) {
         Opc = AtomicOpcTbl[Op][I32];
       break;
     case MVT::i64:
+      Opc = AtomicOpcTbl[Op][I64];
       if (isCN) {
         if (immSext8(Val.getNode()))
           Opc = AtomicOpcTbl[Op][SextConstantI64];
         else if (i64immSExt32(Val.getNode()))
           Opc = AtomicOpcTbl[Op][ConstantI64];
-      } else
-        Opc = AtomicOpcTbl[Op][I64];
+      }
       break;
   }
   
+  assert(Opc != 0 && "Invalid arith lock transform!");
+
   DebugLoc dl = Node->getDebugLoc();
   SDValue Undef = SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,
                                                  dl, NVT), 0);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 294a6a7..5096d9a 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -235,10 +235,16 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     // Setup Windows compiler runtime calls.
     setLibcallName(RTLIB::SDIV_I64, "_alldiv");
     setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
+    setLibcallName(RTLIB::SREM_I64, "_allrem");
+    setLibcallName(RTLIB::UREM_I64, "_aullrem");
+    setLibcallName(RTLIB::MUL_I64, "_allmul");
     setLibcallName(RTLIB::FPTOUINT_F64_I64, "_ftol2");
     setLibcallName(RTLIB::FPTOUINT_F32_I64, "_ftol2");
     setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
     setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
+    setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
     setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::C);
     setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::C);
   }
@@ -646,6 +652,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
   }
 
+  // We don't support FMA.
+  setOperationAction(ISD::FMA, MVT::f64, Expand);
+  setOperationAction(ISD::FMA, MVT::f32, Expand);
+
   // Long double always uses X87.
   if (!UseSoftFloat) {
     addRegisterClass(MVT::f80, X86::RFP80RegisterClass);
@@ -670,6 +680,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
       setOperationAction(ISD::FSIN           , MVT::f80  , Expand);
       setOperationAction(ISD::FCOS           , MVT::f80  , Expand);
     }
+
+    setOperationAction(ISD::FMA, MVT::f80, Expand);
   }
 
   // Always use a library call for pow.
@@ -976,7 +988,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     addRegisterClass(MVT::v32i8, X86::VR256RegisterClass);
 
     setOperationAction(ISD::LOAD,               MVT::v8f32, Legal);
-    setOperationAction(ISD::LOAD,               MVT::v8i32, Legal);
     setOperationAction(ISD::LOAD,               MVT::v4f64, Legal);
     setOperationAction(ISD::LOAD,               MVT::v4i64, Legal);
 
@@ -994,63 +1005,58 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FSQRT,              MVT::v4f64, Legal);
     setOperationAction(ISD::FNEG,               MVT::v4f64, Custom);
 
-    // Custom lower build_vector, vector_shuffle, scalar_to_vector,
-    // insert_vector_elt extract_subvector and extract_vector_elt for
-    // 256-bit types.
-    for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
-         ++i) {
-      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
-      // Do not attempt to custom lower non-256-bit vectors
-      if (!isPowerOf2_32(MVT(VT).getVectorNumElements())
-          || (MVT(VT).getSizeInBits() < 256))
-        continue;
-      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
-      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
-      setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
-      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
-      setOperationAction(ISD::SCALAR_TO_VECTOR,   VT, Custom);
-    }
-    // Custom-lower insert_subvector and extract_subvector based on
-    // the result type.
+    // Custom lower several nodes for 256-bit types.
     for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-         i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE;
-         ++i) {
-      MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
-      // Do not attempt to custom lower non-256-bit vectors
-      if (!isPowerOf2_32(MVT(VT).getVectorNumElements()))
+                  i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
+      MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+      EVT VT = SVT;
+
+      // Extract subvector is special because the value type
+      // (result) is 128-bit but the source is 256-bit wide.
+      if (VT.is128BitVector())
+        setOperationAction(ISD::EXTRACT_SUBVECTOR, SVT, Custom);
+
+      // Do not attempt to custom lower other non-256-bit vectors
+      if (!VT.is256BitVector())
         continue;
 
-      if (MVT(VT).getSizeInBits() == 128) {
-        setOperationAction(ISD::EXTRACT_SUBVECTOR,  VT, Custom);
-      }
-      else if (MVT(VT).getSizeInBits() == 256) {
-        setOperationAction(ISD::INSERT_SUBVECTOR,  VT, Custom);
-      }
+      setOperationAction(ISD::BUILD_VECTOR,       SVT, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE,     SVT, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  SVT, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, SVT, Custom);
+      setOperationAction(ISD::SCALAR_TO_VECTOR,   SVT, Custom);
+      setOperationAction(ISD::INSERT_SUBVECTOR,   SVT, Custom);
     }
 
     // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
-    // Don't promote loads because we need them for VPERM vector index versions.
+    for (unsigned i = (unsigned)MVT::v32i8; i != (unsigned)MVT::v4i64; ++i) {
+      MVT::SimpleValueType SVT = (MVT::SimpleValueType)i;
+      EVT VT = SVT;
 
-    for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
-         VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE;
-         VT++) {
-      if (!isPowerOf2_32(MVT((MVT::SimpleValueType)VT).getVectorNumElements())
-          || (MVT((MVT::SimpleValueType)VT).getSizeInBits() < 256))
+      // Do not attempt to promote non-256-bit vectors
+      if (!VT.is256BitVector())
         continue;
-      setOperationAction(ISD::AND,    (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::AND,    (MVT::SimpleValueType)VT, MVT::v4i64);
-      setOperationAction(ISD::OR,     (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::OR,     (MVT::SimpleValueType)VT, MVT::v4i64);
-      setOperationAction(ISD::XOR,    (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::XOR,    (MVT::SimpleValueType)VT, MVT::v4i64);
-      //setOperationAction(ISD::LOAD,   (MVT::SimpleValueType)VT, Promote);
-      //AddPromotedToType (ISD::LOAD,   (MVT::SimpleValueType)VT, MVT::v4i64);
-      setOperationAction(ISD::SELECT, (MVT::SimpleValueType)VT, Promote);
-      AddPromotedToType (ISD::SELECT, (MVT::SimpleValueType)VT, MVT::v4i64);
+
+      setOperationAction(ISD::AND,    SVT, Promote);
+      AddPromotedToType (ISD::AND,    SVT, MVT::v4i64);
+      setOperationAction(ISD::OR,     SVT, Promote);
+      AddPromotedToType (ISD::OR,     SVT, MVT::v4i64);
+      setOperationAction(ISD::XOR,    SVT, Promote);
+      AddPromotedToType (ISD::XOR,    SVT, MVT::v4i64);
+      setOperationAction(ISD::LOAD,   SVT, Promote);
+      AddPromotedToType (ISD::LOAD,   SVT, MVT::v4i64);
+      setOperationAction(ISD::SELECT, SVT, Promote);
+      AddPromotedToType (ISD::SELECT, SVT, MVT::v4i64);
     }
   }
 
+  // SIGN_EXTEND_INREGs are evaluated by the extend type. Handle the expansion
+  // of this type with custom code.
+  for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; VT++) {
+    setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT, Custom);
+  }
+
   // We want to custom lower some of our intrinsics.
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
@@ -1511,20 +1517,15 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
     // If this is a call to a function that returns an fp value on the floating
     // point stack, we must guarantee the the value is popped from the stack, so
     // a CopyFromReg is not good enough - the copy instruction may be eliminated
-    // if the return value is not used. We use the FpGET_ST0 instructions
+    // if the return value is not used. We use the FpPOP_RETVAL instruction
     // instead.
     if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) {
       // If we prefer to use the value in xmm registers, copy it out as f80 and
       // use a truncate to move it from fp stack reg to xmm reg.
       if (isScalarFPTypeInSSEReg(VA.getValVT())) CopyVT = MVT::f80;
-      bool isST0 = VA.getLocReg() == X86::ST0;
-      unsigned Opc = 0;
-      if (CopyVT == MVT::f32) Opc = isST0 ? X86::FpGET_ST0_32:X86::FpGET_ST1_32;
-      if (CopyVT == MVT::f64) Opc = isST0 ? X86::FpGET_ST0_64:X86::FpGET_ST1_64;
-      if (CopyVT == MVT::f80) Opc = isST0 ? X86::FpGET_ST0_80:X86::FpGET_ST1_80;
       SDValue Ops[] = { Chain, InFlag };
-      Chain = SDValue(DAG.getMachineNode(Opc, dl, CopyVT, MVT::Other, MVT::Glue,
-                                         Ops, 2), 1);
+      Chain = SDValue(DAG.getMachineNode(X86::FpPOP_RETVAL, dl, CopyVT,
+                                         MVT::Other, MVT::Glue, Ops, 2), 1);
       Val = Chain.getValue(0);
 
       // Round the f80 to the right size, which also moves it to the appropriate
@@ -1898,7 +1899,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
   }
 
   // Some CCs need callee pop.
-  if (Subtarget->IsCalleePop(isVarArg, CallConv)) {
+  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt)) {
     FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
   } else {
     FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
@@ -2271,6 +2272,8 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
     const GlobalValue *GV = G->getGlobal();
     if (!GV->hasDLLImportLinkage()) {
       unsigned char OpFlags = 0;
+      bool ExtraLoad = false;
+      unsigned WrapperKind = ISD::DELETED_NODE;
 
       // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
       // external symbols most go through the PLT in PIC mode.  If the symbol
@@ -2288,10 +2291,28 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
         // unless we're building with the leopard linker or later, which
         // automatically synthesizes these stubs.
         OpFlags = X86II::MO_DARWIN_STUB;
+      } else if (Subtarget->isPICStyleRIPRel() &&
+                 isa<Function>(GV) &&
+                 cast<Function>(GV)->hasFnAttr(Attribute::NonLazyBind)) {
+        // If the function is marked as non-lazy, generate an indirect call
+        // which loads from the GOT directly. This avoids runtime overhead
+        // at the cost of eager binding (and one extra byte of encoding).
+        OpFlags = X86II::MO_GOTPCREL;
+        WrapperKind = X86ISD::WrapperRIP;
+        ExtraLoad = true;
       }
 
       Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(),
                                           G->getOffset(), OpFlags);
+
+      // Add a wrapper if needed.
+      if (WrapperKind != ISD::DELETED_NODE)
+        Callee = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Callee);
+      // Add extra indirection if needed.
+      if (ExtraLoad)
+        Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee,
+                             MachinePointerInfo::getGOT(),
+                             false, false, 0);
     }
   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
     unsigned char OpFlags = 0;
@@ -2363,7 +2384,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
 
   // Create the CALLSEQ_END node.
   unsigned NumBytesForCalleeToPush;
-  if (Subtarget->IsCalleePop(isVarArg, CallConv))
+  if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, GuaranteedTailCallOpt))
     NumBytesForCalleeToPush = NumBytes;    // Callee pops everything
   else if (!Is64Bit && !IsTailCallConvention(CallConv) && IsStructRet)
     // If this is a call to a struct-return function, the callee
@@ -2485,6 +2506,10 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
     if (!FINode)
       return false;
     FI = FINode->getIndex();
+  } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
+    FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
+    FI = FINode->getIndex();
+    Bytes = Flags.getByValSize();
   } else
     return false;
 
@@ -2536,6 +2561,11 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   if (isCalleeStructRet || isCallerStructRet)
     return false;
 
+  // An stdcall caller is expected to clean up its arguments; the callee
+  // isn't going to do that.
+  if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
+    return false;
+
   // Do not sibcall optimize vararg calls unless all arguments are passed via
   // registers.
   if (isVarArg && !Outs.empty()) {
@@ -2672,11 +2702,6 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     }
   }
 
-  // An stdcall caller is expected to clean up its arguments; the callee
-  // isn't going to do that.
-  if (!CCMatch && CallerCC==CallingConv::X86_StdCall)
-    return false;
-
   return true;
 }
 
@@ -2856,6 +2881,29 @@ bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
   return false;
 }
 
+/// isCalleePop - Determines whether the callee is required to pop its
+/// own arguments. Callee pop is necessary to support tail calls.
+bool X86::isCalleePop(CallingConv::ID CallingConv,
+                      bool is64Bit, bool IsVarArg, bool TailCallOpt) {
+  if (IsVarArg)
+    return false;
+
+  switch (CallingConv) {
+  default:
+    return false;
+  case CallingConv::X86_StdCall:
+    return !is64Bit;
+  case CallingConv::X86_FastCall:
+    return !is64Bit;
+  case CallingConv::X86_ThisCall:
+    return !is64Bit;
+  case CallingConv::Fast:
+    return TailCallOpt;
+  case CallingConv::GHC:
+    return TailCallOpt;
+  }
+}
+
 /// TranslateX86CC - do a one to one translation of a ISD::CondCode to the X86
 /// specific condition code, returning the condition code and the LHS/RHS of the
 /// comparison to make.
@@ -3790,19 +3838,24 @@ static SDValue getZeroVector(EVT VT, bool HasSSE2, SelectionDAG &DAG,
 }
 
 /// getOnesVector - Returns a vector of specified type with all bits set.
-///
+/// Always build ones vectors as <4 x i32> or <8 x i32> bitcasted to
+/// their original type, ensuring they get CSE'd.
 static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   assert(VT.isVector() && "Expected a vector type");
+  assert((VT.is128BitVector() || VT.is256BitVector())
+         && "Expected a 128-bit or 256-bit vector type");
 
-  // Always build ones vectors as <4 x i32> or <2 x i32> bitcasted to their dest
-  // type.  This ensures they get CSE'd.
   SDValue Cst = DAG.getTargetConstant(~0U, MVT::i32);
+
   SDValue Vec;
-  Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
+  if (VT.is256BitVector()) {
+    SDValue Ops[] = { Cst, Cst, Cst, Cst, Cst, Cst, Cst, Cst };
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i32, Ops, 8);
+  } else
+    Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Cst, Cst, Cst, Cst);
   return DAG.getNode(ISD::BITCAST, dl, VT, Vec);
 }
 
-
 /// NormalizeMask - V2 is a splat, modify the mask (if needed) so all elements
 /// that point to V2 points to its first element.
 static SDValue NormalizeMask(ShuffleVectorSDNode *SVOp, SelectionDAG &DAG) {
@@ -4417,17 +4470,17 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
     return ConcatVectors(Lower, Upper, DAG);
   }
 
-  // All zero's are handled with pxor in SSE2 and above, xorps in SSE1.
-  // All one's are handled with pcmpeqd. In AVX, zero's are handled with
-  // vpxor in 128-bit and xor{pd,ps} in 256-bit, but no 256 version of pcmpeqd
-  // is present, so AllOnes is ignored.
+  // All zero's:
+  //  - pxor (SSE2), xorps (SSE1), vpxor (128 AVX), xorp[s|d] (256 AVX)
+  // All one's:
+  //  - pcmpeqd (SSE2 and 128 AVX), fallback to constant pools (256 AVX)
   if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
-      (Op.getValueType().getSizeInBits() != 256 &&
-       ISD::isBuildVectorAllOnes(Op.getNode()))) {
-    // Canonicalize this to <4 x i32> (SSE) to
+      ISD::isBuildVectorAllOnes(Op.getNode())) {
+    // Canonicalize this to <4 x i32> or <8 x 32> (SSE) to
     // 1) ensure the zero vectors are CSE'd, and 2) ensure that i64 scalars are
     // eliminated on x86-32 hosts.
-    if (Op.getValueType() == MVT::v4i32)
+    if (Op.getValueType() == MVT::v4i32 ||
+        Op.getValueType() == MVT::v8i32)
       return Op;
 
     if (ISD::isBuildVectorAllOnes(Op.getNode()))
@@ -8874,8 +8927,8 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const {
   }
 
   // Lower SHL with variable shift amount.
-  // Cannot lower SHL without SSE4.1 or later.
-  if (!Subtarget->hasSSE41()) return SDValue();
+  // Cannot lower SHL without SSE2 or later.
+  if (!Subtarget->hasSSE2()) return SDValue();
 
   if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) {
     Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
@@ -9022,13 +9075,66 @@ SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
   return Sum;
 }
 
+SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const{
+  DebugLoc dl = Op.getDebugLoc();
+  SDNode* Node = Op.getNode();
+  EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
+  EVT VT = Node->getValueType(0);
+
+  if (Subtarget->hasSSE2() && VT.isVector()) {
+    unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
+                        ExtraVT.getScalarType().getSizeInBits();
+    SDValue ShAmt = DAG.getConstant(BitsDiff, MVT::i32);
+
+    unsigned SHLIntrinsicsID = 0;
+    unsigned SRAIntrinsicsID = 0;
+    switch (VT.getSimpleVT().SimpleTy) {
+      default:
+        return SDValue();
+      case MVT::v2i64: {
+        SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_q;
+        SRAIntrinsicsID = 0;
+        break;
+      }
+      case MVT::v4i32: {
+        SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_d;
+        SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_d;
+        break;
+      }
+      case MVT::v8i16: {
+        SHLIntrinsicsID = Intrinsic::x86_sse2_pslli_w;
+        SRAIntrinsicsID = Intrinsic::x86_sse2_psrai_w;
+        break;
+      }
+    }
+
+    SDValue Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                         DAG.getConstant(SHLIntrinsicsID, MVT::i32),
+                         Node->getOperand(0), ShAmt);
+
+    // In case of 1 bit sext, no need to shr
+    if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1;
+
+    if (SRAIntrinsicsID) {
+      Tmp1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
+                         DAG.getConstant(SRAIntrinsicsID, MVT::i32),
+                         Tmp1, ShAmt);
+    }
+    return Tmp1;
+  }
+
+  return SDValue();
+}
+
+
 SDValue X86TargetLowering::LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const{
   DebugLoc dl = Op.getDebugLoc();
 
-  if (!Subtarget->hasSSE2()) {
+  // Go ahead and emit the fence on x86-64 even if we asked for no-sse2.
+  // There isn't any reason to disable it if the target processor supports it.
+  if (!Subtarget->hasSSE2() && !Subtarget->is64Bit()) {
     SDValue Chain = Op.getOperand(0);
-    SDValue Zero = DAG.getConstant(0,
-                                   Subtarget->is64Bit() ? MVT::i64 : MVT::i32);
+    SDValue Zero = DAG.getConstant(0, MVT::i32);
     SDValue Ops[] = {
       DAG.getRegister(X86::ESP, MVT::i32), // Base
       DAG.getTargetConstant(1, MVT::i8),   // Scale
@@ -9183,6 +9289,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
 SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Should not custom lower this!");
+  case ISD::SIGN_EXTEND_INREG:  return LowerSIGN_EXTEND_INREG(Op,DAG);
   case ISD::MEMBARRIER:         return LowerMEMBARRIER(Op,DAG);
   case ISD::ATOMIC_CMP_SWAP:    return LowerCMP_SWAP(Op,DAG);
   case ISD::ATOMIC_LOAD_SUB:    return LowerLOAD_SUB(Op,DAG);
@@ -9281,6 +9388,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
   default:
     assert(false && "Do not know how to custom type legalize this operation!");
     return;
+  case ISD::SIGN_EXTEND_INREG:
   case ISD::ADDC:
   case ISD::ADDE:
   case ISD::SUBC:
@@ -9415,7 +9523,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::PINSRB:             return "X86ISD::PINSRB";
   case X86ISD::PINSRW:             return "X86ISD::PINSRW";
   case X86ISD::PSHUFB:             return "X86ISD::PSHUFB";
-  case X86ISD::PANDN:              return "X86ISD::PANDN";
+  case X86ISD::ANDNP:              return "X86ISD::ANDNP";
   case X86ISD::PSIGNB:             return "X86ISD::PSIGNB";
   case X86ISD::PSIGNW:             return "X86ISD::PSIGNW";
   case X86ISD::PSIGND:             return "X86ISD::PSIGND";
@@ -11766,10 +11874,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
   if (R.getNode())
     return R;
 
-  // Want to form PANDN nodes, in the hopes of then easily combining them with
-  // OR and AND nodes to form PBLEND/PSIGN.
+  // Want to form ANDNP nodes:
+  // 1) In the hopes of then easily combining them with OR and AND nodes
+  //    to form PBLEND/PSIGN.
+  // 2) To match ANDN packed intrinsics
   EVT VT = N->getValueType(0);
-  if (VT != MVT::v2i64)
+  if (VT != MVT::v2i64 && VT != MVT::v4i64)
     return SDValue();
 
   SDValue N0 = N->getOperand(0);
@@ -11779,12 +11889,12 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG,
   // Check LHS for vnot
   if (N0.getOpcode() == ISD::XOR &&
       ISD::isBuildVectorAllOnes(N0.getOperand(1).getNode()))
-    return DAG.getNode(X86ISD::PANDN, DL, VT, N0.getOperand(0), N1);
+    return DAG.getNode(X86ISD::ANDNP, DL, VT, N0.getOperand(0), N1);
 
   // Check RHS for vnot
   if (N1.getOpcode() == ISD::XOR &&
       ISD::isBuildVectorAllOnes(N1.getOperand(1).getNode()))
-    return DAG.getNode(X86ISD::PANDN, DL, VT, N1.getOperand(0), N0);
+    return DAG.getNode(X86ISD::ANDNP, DL, VT, N1.getOperand(0), N0);
 
   return SDValue();
 }
@@ -11810,10 +11920,10 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
   if (Subtarget->hasSSSE3()) {
     if (VT == MVT::v2i64) {
       // Canonicalize pandn to RHS
-      if (N0.getOpcode() == X86ISD::PANDN)
+      if (N0.getOpcode() == X86ISD::ANDNP)
         std::swap(N0, N1);
       // or (and (m, x), (pandn m, y))
-      if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::PANDN) {
+      if (N0.getOpcode() == ISD::AND && N1.getOpcode() == X86ISD::ANDNP) {
         SDValue Mask = N1.getOperand(0);
         SDValue X    = N1.getOperand(1);
         SDValue Y;
@@ -11822,7 +11932,7 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG,
         if (N0.getOperand(1) == Mask)
           Y = N0.getOperand(0);
 
-        // Check to see if the mask appeared in both the AND and PANDN and
+        // Check to see if the mask appeared in both the AND and ANDNP and
         if (!Y.getNode())
           return SDValue();
 
@@ -12166,8 +12276,8 @@ static SDValue PerformSETCCCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
-static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86TargetLowering *XTLI) {
-  DebugLoc dl = N->getDebugLoc();
+static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG,
+                                        const X86TargetLowering *XTLI) {
   SDValue Op0 = N->getOperand(0);
   // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
   // a 32-bit target where SSE doesn't support i64->FP operations.
@@ -12178,7 +12288,8 @@ static SDValue PerformSINT_TO_FPCombine(SDNode *N, SelectionDAG &DAG, const X86T
         ISD::isNON_EXTLoad(Op0.getNode()) && Op0.hasOneUse() &&
         !XTLI->getSubtarget()->is64Bit() &&
         !DAG.getTargetLoweringInfo().isTypeLegal(VT)) {
-      SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0), Ld->getChain(), Op0, DAG);
+      SDValue FILDChain = XTLI->BuildFILD(SDValue(N, 0), Ld->getValueType(0),
+                                          Ld->getChain(), Op0, DAG);
       DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), FILDChain.getValue(1));
       return FILDChain;
     }
@@ -12549,6 +12660,7 @@ X86TargetLowering::getConstraintType(const std::string &Constraint) const {
     case 'y':
     case 'x':
     case 'Y':
+    case 'l':
       return C_RegisterClass;
     case 'a':
     case 'b':
@@ -12832,60 +12944,6 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
   return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
 }
 
-std::vector<unsigned> X86TargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const {
-  if (Constraint.size() == 1) {
-    // FIXME: not handling fp-stack yet!
-    switch (Constraint[0]) {      // GCC X86 Constraint Letters
-    default: break;  // Unknown constraint letter
-    case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
-      if (Subtarget->is64Bit()) {
-        if (VT == MVT::i32)
-          return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX,
-                                       X86::ESI, X86::EDI, X86::R8D, X86::R9D,
-                                       X86::R10D,X86::R11D,X86::R12D,
-                                       X86::R13D,X86::R14D,X86::R15D,
-                                       X86::EBP, X86::ESP, 0);
-        else if (VT == MVT::i16)
-          return make_vector<unsigned>(X86::AX,  X86::DX,  X86::CX, X86::BX,
-                                       X86::SI,  X86::DI,  X86::R8W,X86::R9W,
-                                       X86::R10W,X86::R11W,X86::R12W,
-                                       X86::R13W,X86::R14W,X86::R15W,
-                                       X86::BP,  X86::SP, 0);
-        else if (VT == MVT::i8)
-          return make_vector<unsigned>(X86::AL,  X86::DL,  X86::CL, X86::BL,
-                                       X86::SIL, X86::DIL, X86::R8B,X86::R9B,
-                                       X86::R10B,X86::R11B,X86::R12B,
-                                       X86::R13B,X86::R14B,X86::R15B,
-                                       X86::BPL, X86::SPL, 0);
-
-        else if (VT == MVT::i64)
-          return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX,
-                                       X86::RSI, X86::RDI, X86::R8,  X86::R9,
-                                       X86::R10, X86::R11, X86::R12,
-                                       X86::R13, X86::R14, X86::R15,
-                                       X86::RBP, X86::RSP, 0);
-
-        break;
-      }
-      // 32-bit fallthrough
-    case 'Q':   // Q_REGS
-      if (VT == MVT::i32)
-        return make_vector<unsigned>(X86::EAX, X86::EDX, X86::ECX, X86::EBX, 0);
-      else if (VT == MVT::i16)
-        return make_vector<unsigned>(X86::AX, X86::DX, X86::CX, X86::BX, 0);
-      else if (VT == MVT::i8)
-        return make_vector<unsigned>(X86::AL, X86::DL, X86::CL, X86::BL, 0);
-      else if (VT == MVT::i64)
-        return make_vector<unsigned>(X86::RAX, X86::RDX, X86::RCX, X86::RBX, 0);
-      break;
-    }
-  }
-
-  return std::vector<unsigned>();
-}
-
 std::pair<unsigned, const TargetRegisterClass*>
 X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
                                                 EVT VT) const {
@@ -12895,9 +12953,35 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
     // GCC Constraint Letters
     switch (Constraint[0]) {
     default: break;
+      // TODO: Slight differences here in allocation order and leaving
+      // RIP in the class. Do they matter any more here than they do
+      // in the normal allocation?
+    case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
+      if (Subtarget->is64Bit()) {
+	if (VT == MVT::i32 || VT == MVT::f32)
+	  return std::make_pair(0U, X86::GR32RegisterClass);
+	else if (VT == MVT::i16)
+	  return std::make_pair(0U, X86::GR16RegisterClass);
+	else if (VT == MVT::i8 || VT == MVT::i1)
+	  return std::make_pair(0U, X86::GR8RegisterClass);
+	else if (VT == MVT::i64 || VT == MVT::f64)
+	  return std::make_pair(0U, X86::GR64RegisterClass);
+	break;
+      }
+      // 32-bit fallthrough
+    case 'Q':   // Q_REGS
+      if (VT == MVT::i32 || VT == MVT::f32)
+	return std::make_pair(0U, X86::GR32_ABCDRegisterClass);
+      else if (VT == MVT::i16)
+	return std::make_pair(0U, X86::GR16_ABCDRegisterClass);
+      else if (VT == MVT::i8 || VT == MVT::i1)
+	return std::make_pair(0U, X86::GR8_ABCD_LRegisterClass);
+      else if (VT == MVT::i64)
+	return std::make_pair(0U, X86::GR64_ABCDRegisterClass);
+      break;
     case 'r':   // GENERAL_REGS
     case 'l':   // INDEX_REGS
-      if (VT == MVT::i8)
+      if (VT == MVT::i8 || VT == MVT::i1)
         return std::make_pair(0U, X86::GR8RegisterClass);
       if (VT == MVT::i16)
         return std::make_pair(0U, X86::GR16RegisterClass);
@@ -12905,7 +12989,7 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
         return std::make_pair(0U, X86::GR32RegisterClass);
       return std::make_pair(0U, X86::GR64RegisterClass);
     case 'R':   // LEGACY_REGS
-      if (VT == MVT::i8)
+      if (VT == MVT::i8 || VT == MVT::i1)
         return std::make_pair(0U, X86::GR8_NOREXRegisterClass);
       if (VT == MVT::i16)
         return std::make_pair(0U, X86::GR16_NOREXRegisterClass);
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index d61a125..b603678 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -169,8 +169,8 @@ namespace llvm {
       /// PSHUFB - Shuffle 16 8-bit values within a vector.
       PSHUFB,
 
-      /// PANDN - and with not'd value.
-      PANDN,
+      /// ANDNP - Bitwise Logical AND NOT of Packed FP values.
+      ANDNP,
 
       /// PSIGNB/W/D - Copy integer sign.
       PSIGNB, PSIGNW, PSIGND,
@@ -466,6 +466,12 @@ namespace llvm {
     /// fit into displacement field of the instruction.
     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
                                       bool hasSymbolicDisplacement = true);
+
+
+    /// isCalleePop - Determines whether the callee is required to pop its
+    /// own arguments. Callee pop is necessary to support tail calls.
+    bool isCalleePop(CallingConv::ID CallingConv,
+                     bool is64Bit, bool IsVarArg, bool TailCallOpt);
   }
 
   //===--------------------------------------------------------------------===//
@@ -590,10 +596,6 @@ namespace llvm {
     virtual ConstraintWeight getSingleConstraintMatchWeight(
       AsmOperandInfo &info, const char *constraint) const;
 
-    std::vector<unsigned>
-      getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                        EVT VT) const;
-
     virtual const char *LowerXConstraint(EVT ConstraintVT) const;
 
     /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
@@ -823,6 +825,7 @@ namespace llvm {
     SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
 
     // Utility functions to help LowerVECTOR_SHUFFLE
     SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 1ea8071..0245e5c 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -150,11 +150,11 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
   MachineInstr *MI = MIB;
   MachineFunction &MF = *MI->getParent()->getParent();
   MachineFrameInfo &MFI = *MF.getFrameInfo();
-  const TargetInstrDesc &TID = MI->getDesc();
+  const MCInstrDesc &MCID = MI->getDesc();
   unsigned Flags = 0;
-  if (TID.mayLoad())
+  if (MCID.mayLoad())
     Flags |= MachineMemOperand::MOLoad;
-  if (TID.mayStore())
+  if (MCID.mayStore())
     Flags |= MachineMemOperand::MOStore;
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI, Offset),
diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td
index 33534cd..adcc747 100644
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@@ -1368,6 +1368,11 @@ def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
 
 
 // (shl x, 1) ==> (add x, x)
+// Note that if x is undef (immediate or otherwise), we could theoretically
+// end up with the two uses of x getting different values, producing a result
+// where the least significant bit is not 0. However, the probability of this
+// happening is considered low enough that this is officially not a
+// "real problem".
 def : Pat<(shl GR8 :$src1, (i8 1)), (ADD8rr  GR8 :$src1, GR8 :$src1)>;
 def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
 def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index b506f5e..7cb870f 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -112,31 +112,8 @@ let usesCustomInserter = 1 in {  // Expanded after instruction selection.
 // a pattern) and the FPI instruction should have emission info (e.g. opcode
 // encoding and asm printing info).
 
-// Pseudo Instructions for FP stack return values.
-def FpGET_ST0_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
-def FpGET_ST0_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
-def FpGET_ST0_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(0)
-
-// FpGET_ST1* should only be issued *after* an FpGET_ST0* has been issued when
-// there are two values live out on the stack from a call or inlineasm.  This
-// magic is handled by the stackifier.  It is not valid to emit FpGET_ST1* and
-// then FpGET_ST0*.  In addition, it is invalid for any FP-using operations to
-// occur between them.
-def FpGET_ST1_32 : FpI_<(outs RFP32:$dst), (ins), SpecialFP, []>; // FPR = ST(1)
-def FpGET_ST1_64 : FpI_<(outs RFP64:$dst), (ins), SpecialFP, []>; // FPR = ST(1)
-def FpGET_ST1_80 : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>; // FPR = ST(1)
-
-let Defs = [ST0] in {
-def FpSET_ST0_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(0) = FPR
-def FpSET_ST0_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(0) = FPR
-def FpSET_ST0_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(0) = FPR
-}
-
-let Defs = [ST1] in {
-def FpSET_ST1_32 : FpI_<(outs), (ins RFP32:$src), SpecialFP, []>; // ST(1) = FPR
-def FpSET_ST1_64 : FpI_<(outs), (ins RFP64:$src), SpecialFP, []>; // ST(1) = FPR
-def FpSET_ST1_80 : FpI_<(outs), (ins RFP80:$src), SpecialFP, []>; // ST(1) = FPR
-}
+// Pseudo Instruction for FP stack return values.
+def FpPOP_RETVAL : FpI_<(outs RFP80:$dst), (ins), SpecialFP, []>;
 
 // FpIf32, FpIf64 - Floating Point Pseudo Instruction template.
 // f32 instructions can use SSE1 and are predicated on FPStackf32 == !SSE1.
@@ -147,19 +124,6 @@ class FpIf32<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
 class FpIf64<dag outs, dag ins, FPFormat fp, list<dag> pattern> :
   FpI_<outs, ins, fp, pattern>, Requires<[FPStackf64]>;
 
-// Register copies.  Just copies, the shortening ones do not truncate.
-let neverHasSideEffects = 1 in {
-  def MOV_Fp3232 : FpIf32<(outs RFP32:$dst), (ins RFP32:$src), SpecialFP, []>; 
-  def MOV_Fp3264 : FpIf32<(outs RFP64:$dst), (ins RFP32:$src), SpecialFP, []>; 
-  def MOV_Fp6432 : FpIf32<(outs RFP32:$dst), (ins RFP64:$src), SpecialFP, []>; 
-  def MOV_Fp6464 : FpIf64<(outs RFP64:$dst), (ins RFP64:$src), SpecialFP, []>; 
-  def MOV_Fp8032 : FpIf32<(outs RFP32:$dst), (ins RFP80:$src), SpecialFP, []>; 
-  def MOV_Fp3280 : FpIf32<(outs RFP80:$dst), (ins RFP32:$src), SpecialFP, []>; 
-  def MOV_Fp8064 : FpIf64<(outs RFP64:$dst), (ins RFP80:$src), SpecialFP, []>; 
-  def MOV_Fp6480 : FpIf64<(outs RFP80:$dst), (ins RFP64:$src), SpecialFP, []>; 
-  def MOV_Fp8080 : FpI_  <(outs RFP80:$dst), (ins RFP80:$src), SpecialFP, []>; 
-}
-
 // Factoring for arithmetic.
 multiclass FPBinary_rr<SDNode OpNode> {
 // Register op register -> register
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index 7daa264..6d89bcc 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -460,6 +460,11 @@ class AESAI<bits<8> o, Format F, dag outs, dag ins, string asm,
 class CLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
                list<dag>pattern>
       : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
+        OpSize, Requires<[HasCLMUL]>;
+
+class AVXCLMULIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
+                  list<dag>pattern>
+      : Ii8<o, F, outs, ins, asm, pattern, SSEPackedInt>, TA,
         OpSize, VEX_4V, Requires<[HasAVX, HasCLMUL]>;
 
 // FMA3 Instruction Templates
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 7c9a9f7..b00109c 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -46,8 +46,8 @@ def X86cmpsd   : SDNode<"X86ISD::FSETCCsd",    SDTX86Cmpsd>;
 def X86pshufb  : SDNode<"X86ISD::PSHUFB",
                  SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
-def X86pandn   : SDNode<"X86ISD::PANDN", 
-                 SDTypeProfile<1, 2, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
+def X86andnp   : SDNode<"X86ISD::ANDNP",
+                 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
 def X86psignb  : SDNode<"X86ISD::PSIGNB", 
                  SDTypeProfile<1, 2, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>,
@@ -168,11 +168,13 @@ def ssmem : Operand<v4f32> {
   let PrintMethod = "printf32mem";
   let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
   let ParserMatchClass = X86MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
 }
 def sdmem : Operand<v2f64> {
   let PrintMethod = "printf64mem";
   let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm);
   let ParserMatchClass = X86MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
 }
 
 //===----------------------------------------------------------------------===//
@@ -301,6 +303,7 @@ def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>;
 
 // 256-bit bitconvert pattern fragments
 def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>;
+def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
 
 def vzmovl_v2i64 : PatFrag<(ops node:$src),
                            (bitconvert (v2i64 (X86vzmovl
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index e2016eb..55b5835 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -13,7 +13,6 @@
 
 #include "X86InstrInfo.h"
 #include "X86.h"
-#include "X86GenInstrInfo.inc"
 #include "X86InstrBuilder.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
@@ -36,6 +35,9 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include <limits>
 
+#define GET_INSTRINFO_CTOR
+#include "X86GenInstrInfo.inc"
+
 using namespace llvm;
 
 static cl::opt<bool>
@@ -52,7 +54,12 @@ ReMatPICStubLoad("remat-pic-stub-load",
                  cl::init(false), cl::Hidden);
 
 X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
-  : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)),
+  : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit()
+                     ? X86::ADJCALLSTACKDOWN64
+                     : X86::ADJCALLSTACKDOWN32),
+                    (tm.getSubtarget<X86Subtarget>().is64Bit()
+                     ? X86::ADJCALLSTACKUP64
+                     : X86::ADJCALLSTACKUP32)),
     TM(tm), RI(tm, *this) {
   enum {
     TB_NOT_REVERSABLE = 1U << 31,
@@ -293,12 +300,17 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::MOVAPDrr,    X86::MOVAPDmr, 0, 16 },
     { X86::MOVAPSrr,    X86::MOVAPSmr, 0, 16 },
     { X86::MOVDQArr,    X86::MOVDQAmr, 0, 16 },
+    { X86::VMOVAPDYrr,  X86::VMOVAPDYmr, 0, 32 },
+    { X86::VMOVAPSYrr,  X86::VMOVAPSYmr, 0, 32 },
+    { X86::VMOVDQAYrr,  X86::VMOVDQAYmr, 0, 32 },
     { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
     { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
     { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
     { X86::MOVSS2DIrr,  X86::MOVSS2DImr, 0, 0 },
     { X86::MOVUPDrr,    X86::MOVUPDmr, 0, 0 },
     { X86::MOVUPSrr,    X86::MOVUPSmr, 0, 0 },
+    { X86::VMOVUPDYrr,  X86::VMOVUPDYmr, 0, 0 },
+    { X86::VMOVUPSYrr,  X86::VMOVUPSYmr, 0, 0 },
     { X86::MUL16r,      X86::MUL16m, 1, 0 },
     { X86::MUL32r,      X86::MUL32m, 1, 0 },
     { X86::MUL64r,      X86::MUL64m, 1, 0 },
@@ -403,10 +415,13 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::MOV8rr,          X86::MOV8rm, 0 },
     { X86::MOVAPDrr,        X86::MOVAPDrm, 16 },
     { X86::MOVAPSrr,        X86::MOVAPSrm, 16 },
+    { X86::VMOVAPDYrr,      X86::VMOVAPDYrm, 32 },
+    { X86::VMOVAPSYrr,      X86::VMOVAPSYrm, 32 },
     { X86::MOVDDUPrr,       X86::MOVDDUPrm, 0 },
     { X86::MOVDI2PDIrr,     X86::MOVDI2PDIrm, 0 },
     { X86::MOVDI2SSrr,      X86::MOVDI2SSrm, 0 },
     { X86::MOVDQArr,        X86::MOVDQArm, 16 },
+    { X86::VMOVDQAYrr,      X86::VMOVDQAYrm, 16 },
     { X86::MOVSHDUPrr,      X86::MOVSHDUPrm, 16 },
     { X86::MOVSLDUPrr,      X86::MOVSLDUPrm, 16 },
     { X86::MOVSX16rr8,      X86::MOVSX16rm8, 0 },
@@ -417,6 +432,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
     { X86::MOVSX64rr8,      X86::MOVSX64rm8, 0 },
     { X86::MOVUPDrr,        X86::MOVUPDrm, 16 },
     { X86::MOVUPSrr,        X86::MOVUPSrm, 0 },
+    { X86::VMOVUPDYrr,      X86::VMOVUPDYrm, 0 },
+    { X86::VMOVUPSYrr,      X86::VMOVUPSYrm, 0 },
     { X86::MOVZDI2PDIrr,    X86::MOVZDI2PDIrm, 0 },
     { X86::MOVZQI2PQIrr,    X86::MOVZQI2PQIrm, 0 },
     { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 },
@@ -779,6 +796,9 @@ static bool isFrameLoadOpcode(int Opcode) {
   case X86::MOVAPSrm:
   case X86::MOVAPDrm:
   case X86::MOVDQArm:
+  case X86::VMOVAPSYrm:
+  case X86::VMOVAPDYrm:
+  case X86::VMOVDQAYrm:
   case X86::MMX_MOVD64rm:
   case X86::MMX_MOVQ64rm:
     return true;
@@ -800,6 +820,9 @@ static bool isFrameStoreOpcode(int Opcode) {
   case X86::MOVAPSmr:
   case X86::MOVAPDmr:
   case X86::MOVDQAmr:
+  case X86::VMOVAPSYmr:
+  case X86::VMOVAPDYmr:
+  case X86::VMOVDQAYmr:
   case X86::MMX_MOVD64mr:
   case X86::MMX_MOVQ64mr:
   case X86::MMX_MOVNTQmr:
@@ -918,6 +941,10 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
     case X86::MOVUPSrm:
     case X86::MOVAPDrm:
     case X86::MOVDQArm:
+    case X86::VMOVAPSYrm:
+    case X86::VMOVUPSYrm:
+    case X86::VMOVAPDYrm:
+    case X86::VMOVDQAYrm:
     case X86::MMX_MOVD64rm:
     case X86::MMX_MOVQ64rm:
     case X86::FsMOVAPSrm:
@@ -1689,13 +1716,13 @@ X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
 }
 
 bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
-  const TargetInstrDesc &TID = MI->getDesc();
-  if (!TID.isTerminator()) return false;
+  const MCInstrDesc &MCID = MI->getDesc();
+  if (!MCID.isTerminator()) return false;
 
   // Conditional branch is a special case.
-  if (TID.isBranch() && !TID.isBarrier())
+  if (MCID.isBranch() && !MCID.isBarrier())
     return true;
-  if (!TID.isPredicable())
+  if (!MCID.isPredicable())
     return true;
   return !isPredicated(MI);
 }
@@ -1789,7 +1816,6 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
           .addMBB(UnCondBrIter->getOperand(0).getMBB());
         BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(X86::JMP_4))
           .addMBB(TargetBB);
-        MBB.addSuccessor(TargetBB);
 
         OldInst->eraseFromParent();
         UnCondBrIter->eraseFromParent();
@@ -1968,6 +1994,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       Opc = X86::MOV8rr;
   } else if (X86::VR128RegClass.contains(DestReg, SrcReg))
     Opc = X86::MOVAPSrr;
+  else if (X86::VR256RegClass.contains(DestReg, SrcReg))
+    Opc = X86::VMOVAPSYrr;
   else if (X86::VR64RegClass.contains(DestReg, SrcReg))
     Opc = X86::MMX_MOVQ64rr;
   else
@@ -2057,6 +2085,13 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
       return load ? X86::MOVAPSrm : X86::MOVAPSmr;
     else
       return load ? X86::MOVUPSrm : X86::MOVUPSmr;
+  case 32:
+    assert(X86::VR256RegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
+    // If stack is realigned we can use aligned stores.
+    if (isStackAligned)
+      return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr;
+    else
+      return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr;
   }
 }
 
@@ -2083,7 +2118,8 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   const MachineFunction &MF = *MBB.getParent();
   assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
          "Stack slot too small for store");
-  bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
+  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+    RI.canRealignStack(MF);
   unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
   DebugLoc DL = MBB.findDebugLoc(MI);
   addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx)
@@ -2115,7 +2151,8 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                         const TargetRegisterClass *RC,
                                         const TargetRegisterInfo *TRI) const {
   const MachineFunction &MF = *MBB.getParent();
-  bool isAligned = (RI.getStackAlignment() >= 16) || RI.canRealignStack(MF);
+  bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= 16) ||
+    RI.canRealignStack(MF);
   unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
   DebugLoc DL = MBB.findDebugLoc(MI);
   addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx);
@@ -2224,7 +2261,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
   bool isTwoAddrFold = false;
   unsigned NumOps = MI->getDesc().getNumOperands();
   bool isTwoAddr = NumOps > 1 &&
-    MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
+    MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
 
   // FIXME: AsmPrinter doesn't know how to handle
   // X86II::MO_GOT_ABSOLUTE_ADDRESS after folding.
@@ -2273,7 +2310,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
         return NULL;
       bool NarrowToMOV32rm = false;
       if (Size) {
-        unsigned RCSize =  MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize();
+        unsigned RCSize = getRegClass(MI->getDesc(), i, &RI)->getSize();
         if (Size < RCSize) {
           // Check if it's safe to fold the load. If the size of the object is
           // narrower than the load width, then it's not.
@@ -2542,7 +2579,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
   unsigned Opc = MI->getOpcode();
   unsigned NumOps = MI->getDesc().getNumOperands();
   bool isTwoAddr = NumOps > 1 &&
-    MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1;
+    MI->getDesc().getOperandConstraint(1, MCOI::TIED_TO) != -1;
 
   // Folding a memory location into the two-address part of a two-address
   // instruction is different than folding it other places.  It requires
@@ -2588,9 +2625,8 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
     return false;
   UnfoldStore &= FoldedStore;
 
-  const TargetInstrDesc &TID = get(Opc);
-  const TargetOperandInfo &TOI = TID.OpInfo[Index];
-  const TargetRegisterClass *RC = TOI.getRegClass(&RI);
+  const MCInstrDesc &MCID = get(Opc);
+  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
   if (!MI->hasOneMemOperand() &&
       RC == &X86::VR128RegClass &&
       !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast())
@@ -2632,7 +2668,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
   }
 
   // Emit the data processing instruction.
-  MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true);
+  MachineInstr *DataMI = MF.CreateMachineInstr(MCID, MI->getDebugLoc(), true);
   MachineInstrBuilder MIB(DataMI);
 
   if (FoldedStore)
@@ -2685,7 +2721,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
 
   // Emit the store instruction.
   if (UnfoldStore) {
-    const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI);
+    const TargetRegisterClass *DstRC = getRegClass(MCID, 0, &RI);
     std::pair<MachineInstr::mmo_iterator,
               MachineInstr::mmo_iterator> MMOs =
       MF.extractStoreMemRefs(MI->memoperands_begin(),
@@ -2710,9 +2746,9 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
   unsigned Index = I->second.second & 0xf;
   bool FoldedLoad = I->second.second & (1 << 4);
   bool FoldedStore = I->second.second & (1 << 5);
-  const TargetInstrDesc &TID = get(Opc);
-  const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI);
-  unsigned NumDefs = TID.NumDefs;
+  const MCInstrDesc &MCID = get(Opc);
+  const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI);
+  unsigned NumDefs = MCID.NumDefs;
   std::vector<SDValue> AddrOps;
   std::vector<SDValue> BeforeOps;
   std::vector<SDValue> AfterOps;
@@ -2756,13 +2792,13 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
   // Emit the data processing instruction.
   std::vector<EVT> VTs;
   const TargetRegisterClass *DstRC = 0;
-  if (TID.getNumDefs() > 0) {
-    DstRC = TID.OpInfo[0].getRegClass(&RI);
+  if (MCID.getNumDefs() > 0) {
+    DstRC = getRegClass(MCID, 0, &RI);
     VTs.push_back(*DstRC->vt_begin());
   }
   for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
     EVT VT = N->getValueType(i);
-    if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs())
+    if (VT != MVT::Other && i >= (unsigned)MCID.getNumDefs())
       VTs.push_back(VT);
   }
   if (Load)
@@ -2845,6 +2881,11 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::MOVAPDrm:
   case X86::MOVDQArm:
   case X86::MOVDQUrm:
+  case X86::VMOVAPSYrm:
+  case X86::VMOVUPSYrm:
+  case X86::VMOVAPDYrm:
+  case X86::VMOVDQAYrm:
+  case X86::VMOVDQUYrm:
     break;
   }
   switch (Opc2) {
@@ -2867,6 +2908,11 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::MOVAPDrm:
   case X86::MOVDQArm:
   case X86::MOVDQUrm:
+  case X86::VMOVAPSYrm:
+  case X86::VMOVUPSYrm:
+  case X86::VMOVAPDYrm:
+  case X86::VMOVDQAYrm:
+  case X86::VMOVDQUYrm:
     break;
   }
 
@@ -3045,6 +3091,13 @@ static const unsigned ReplaceableInstrs[][3] = {
   { X86::AVX_SET0PS, X86::AVX_SET0PD, X86::AVX_SET0PI },
   { X86::VXORPSrm,   X86::VXORPDrm,   X86::VPXORrm    },
   { X86::VXORPSrr,   X86::VXORPDrr,   X86::VPXORrr    },
+  // AVX 256-bit support
+  { X86::VMOVAPSYmr,   X86::VMOVAPDYmr,   X86::VMOVDQAYmr  },
+  { X86::VMOVAPSYrm,   X86::VMOVAPDYrm,   X86::VMOVDQAYrm  },
+  { X86::VMOVAPSYrr,   X86::VMOVAPDYrr,   X86::VMOVDQAYrr  },
+  { X86::VMOVUPSYmr,   X86::VMOVUPDYmr,   X86::VMOVDQUYmr  },
+  { X86::VMOVUPSYrm,   X86::VMOVUPDYrm,   X86::VMOVDQUYrm  },
+  { X86::VMOVNTPSYmr,  X86::VMOVNTPDYmr,  X86::VMOVNTDQYmr },
 };
 
 // FIXME: Some shuffle and unpack instructions have equivalents in different
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index d895023..5f2eba3 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -19,6 +19,9 @@
 #include "X86RegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
 
+#define GET_INSTRINFO_HEADER
+#include "X86GenInstrInfo.inc"
+
 namespace llvm {
   class X86RegisterInfo;
   class X86TargetMachine;
@@ -611,7 +614,7 @@ inline static bool isMem(const MachineInstr *MI, unsigned Op) {
     isLeaMem(MI, Op);
 }
 
-class X86InstrInfo : public TargetInstrInfoImpl {
+class X86InstrInfo : public X86GenInstrInfo {
   X86TargetMachine &TM;
   const X86RegisterInfo RI;
 
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 8cab808..7eb07b0 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -251,6 +251,7 @@ class X86MemOperand<string printMethod> : Operand<iPTR> {
   let ParserMatchClass = X86MemAsmOperand;
 }
 
+let OperandType = "OPERAND_MEMORY" in {
 def opaque32mem : X86MemOperand<"printopaquemem">;
 def opaque48mem : X86MemOperand<"printopaquemem">;
 def opaque80mem : X86MemOperand<"printopaquemem">;
@@ -267,6 +268,7 @@ def f64mem  : X86MemOperand<"printf64mem">;
 def f80mem  : X86MemOperand<"printf80mem">;
 def f128mem : X86MemOperand<"printf128mem">;
 def f256mem : X86MemOperand<"printf256mem">;
+}
 
 // A version of i8mem for use on x86-64 that uses GR64_NOREX instead of
 // plain GR64, so that it doesn't potentially require a REX prefix.
@@ -274,6 +276,7 @@ def i8mem_NOREX : Operand<i64> {
   let PrintMethod = "printi8mem";
   let MIOperandInfo = (ops GR64_NOREX, i8imm, GR64_NOREX_NOSP, i32imm, i8imm);
   let ParserMatchClass = X86MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
 }
 
 // GPRs available for tailcall.
@@ -287,6 +290,7 @@ def i32mem_TC : Operand<i32> {
   let PrintMethod = "printi32mem";
   let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm);
   let ParserMatchClass = X86MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
 }
 
 // Special i64mem for addresses of load folding tail calls. These are not
@@ -297,9 +301,11 @@ def i64mem_TC : Operand<i64> {
   let MIOperandInfo = (ops ptr_rc_tailcall, i8imm,
                        ptr_rc_tailcall, i32imm, i8imm);
   let ParserMatchClass = X86MemAsmOperand;
+  let OperandType = "OPERAND_MEMORY";
 }
 
-let ParserMatchClass = X86AbsMemAsmOperand,
+let OperandType = "OPERAND_PCREL",
+    ParserMatchClass = X86AbsMemAsmOperand,
     PrintMethod = "print_pcrel_imm" in {
 def i32imm_pcrel : Operand<i32>;
 def i16imm_pcrel : Operand<i16>;
@@ -317,6 +323,7 @@ def brtarget8 : Operand<OtherVT>;
 
 def SSECC : Operand<i8> {
   let PrintMethod = "printSSECC";
+  let OperandType = "OPERAND_IMMEDIATE";
 }
 
 class ImmSExtAsmOperandClass : AsmOperandClass {
@@ -363,15 +370,18 @@ def ImmSExti64i8AsmOperand : ImmSExtAsmOperandClass {
 // 16-bits but only 8 bits are significant.
 def i16i8imm  : Operand<i16> {
   let ParserMatchClass = ImmSExti16i8AsmOperand;
+  let OperandType = "OPERAND_IMMEDIATE";
 }
 // 32-bits but only 8 bits are significant.
 def i32i8imm  : Operand<i32> {
   let ParserMatchClass = ImmSExti32i8AsmOperand;
+  let OperandType = "OPERAND_IMMEDIATE";
 }
 
 // 64-bits but only 32 bits are significant.
 def i64i32imm  : Operand<i64> {
   let ParserMatchClass = ImmSExti64i32AsmOperand;
+  let OperandType = "OPERAND_IMMEDIATE";
 }
 
 // 64-bits but only 32 bits are significant, and those bits are treated as being
@@ -438,8 +448,10 @@ def HasFMA3      : Predicate<"Subtarget->hasFMA3()">;
 def HasFMA4      : Predicate<"Subtarget->hasFMA4()">;
 def FPStackf32   : Predicate<"!Subtarget->hasXMM()">;
 def FPStackf64   : Predicate<"!Subtarget->hasXMMInt()">;
-def In32BitMode  : Predicate<"!Subtarget->is64Bit()">, AssemblerPredicate;
-def In64BitMode  : Predicate<"Subtarget->is64Bit()">, AssemblerPredicate;
+def In32BitMode  : Predicate<"!Subtarget->is64Bit()">,
+                             AssemblerPredicate<"!Mode64Bit">;
+def In64BitMode  : Predicate<"Subtarget->is64Bit()">,
+                             AssemblerPredicate<"Mode64Bit">;
 def IsWin64      : Predicate<"Subtarget->isTargetWin64()">;
 def NotWin64     : Predicate<"!Subtarget->isTargetWin64()">;
 def SmallCode    : Predicate<"TM.getCodeModel() == CodeModel::Small">;
@@ -669,7 +681,7 @@ def PUSH64rmm: I<0xFF, MRM6m, (outs), (ins i64mem:$src), "push{q}\t$src", []>;
 }
 
 let Defs = [RSP], Uses = [RSP], neverHasSideEffects = 1, mayStore = 1 in {
-def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i8imm:$imm),
+def PUSH64i8   : Ii8<0x6a, RawFrm, (outs), (ins i64i8imm:$imm),
                      "push{q}\t$imm", []>;
 def PUSH64i16  : Ii16<0x68, RawFrm, (outs), (ins i16imm:$imm),
                       "push{q}\t$imm", []>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index b64c03a..fe11d77 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -512,6 +512,26 @@ defm VCVTSI2SDL  : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, XD,
 defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, XD,
                                   VEX_4V, VEX_W;
 
+let Predicates = [HasAVX] in {
+  def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
+            (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>;
+  def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
+            (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>;
+  def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))),
+            (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>;
+  def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))),
+            (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>;
+
+  def : Pat<(f32 (sint_to_fp GR32:$src)),
+            (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>;
+  def : Pat<(f32 (sint_to_fp GR64:$src)),
+            (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>;
+  def : Pat<(f64 (sint_to_fp GR32:$src)),
+            (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>;
+  def : Pat<(f64 (sint_to_fp GR64:$src)),
+            (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>;
+}
+
 defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32,
                       "cvttss2si\t{$src, $dst|$dst, $src}">, XS;
 defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32,
@@ -1473,83 +1493,68 @@ let neverHasSideEffects = 1, Pattern = []<dag>, isCommutable = 0 in
 /// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
 ///
 multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
-                                 SDNode OpNode, int HasPat = 0,
-                                 list<list<dag>> Pattern = []> {
+                                   SDNode OpNode> {
   let Pattern = []<dag> in {
     defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
          !strconcat(OpcodeStr, "ps"), f128mem,
-         !if(HasPat, Pattern[0], // rr
-                     [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
-                                                      VR128:$src2)))]),
-         !if(HasPat, Pattern[2], // rm
-                     [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
-                                               (memopv2i64 addr:$src2)))]), 0>,
-                                               VEX_4V;
+         [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+                                   (memopv2i64 addr:$src2)))], 0>, VEX_4V;
 
     defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
          !strconcat(OpcodeStr, "pd"), f128mem,
-         !if(HasPat, Pattern[1], // rr
-                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
-                                               (bc_v2i64 (v2f64
-                                               VR128:$src2))))]),
-         !if(HasPat, Pattern[3], // rm
-                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
-                                               (memopv2i64 addr:$src2)))]), 0>,
-                                                               OpSize, VEX_4V;
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                   (bc_v2i64 (v2f64 VR128:$src2))))],
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                   (memopv2i64 addr:$src2)))], 0>,
+                                                   OpSize, VEX_4V;
   }
   let Constraints = "$src1 = $dst" in {
     defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
          !strconcat(OpcodeStr, "ps"), f128mem,
-         !if(HasPat, Pattern[0], // rr
-                     [(set VR128:$dst, (v2i64 (OpNode VR128:$src1,
-                                                      VR128:$src2)))]),
-         !if(HasPat, Pattern[2], // rm
-                     [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
-                                               (memopv2i64 addr:$src2)))])>, TB;
+         [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+                                   (memopv2i64 addr:$src2)))]>, TB;
 
     defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble,
          !strconcat(OpcodeStr, "pd"), f128mem,
-         !if(HasPat, Pattern[1], // rr
-                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
-                                               (bc_v2i64 (v2f64
-                                               VR128:$src2))))]),
-         !if(HasPat, Pattern[3], // rm
-                     [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
-                                               (memopv2i64 addr:$src2)))])>,
-                                                                    TB, OpSize;
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                   (bc_v2i64 (v2f64 VR128:$src2))))],
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)),
+                                   (memopv2i64 addr:$src2)))]>, TB, OpSize;
   }
 }
 
 /// sse12_fp_packed_logical_y - AVX 256-bit SSE 1 & 2 logical ops forms
 ///
-multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr> {
+multiclass sse12_fp_packed_logical_y<bits<8> opc, string OpcodeStr,
+                                     SDNode OpNode> {
     defm PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
-          !strconcat(OpcodeStr, "ps"), f256mem, [], [], 0>, VEX_4V;
+          !strconcat(OpcodeStr, "ps"), f256mem,
+          [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
+          [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
+                                    (memopv4i64 addr:$src2)))], 0>, VEX_4V;
 
     defm PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble,
-          !strconcat(OpcodeStr, "pd"), f256mem, [], [], 0>, OpSize, VEX_4V;
+          !strconcat(OpcodeStr, "pd"), f256mem,
+          [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+                                    (bc_v4i64 (v4f64 VR256:$src2))))],
+          [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)),
+                                    (memopv4i64 addr:$src2)))], 0>,
+                                    OpSize, VEX_4V;
 }
 
 // AVX 256-bit packed logical ops forms
-defm VAND : sse12_fp_packed_logical_y<0x54, "and">;
-defm VOR  : sse12_fp_packed_logical_y<0x56, "or">;
-defm VXOR : sse12_fp_packed_logical_y<0x57, "xor">;
-let isCommutable = 0 in
-  defm VANDN : sse12_fp_packed_logical_y<0x55, "andn">;
+defm VAND  : sse12_fp_packed_logical_y<0x54, "and", and>;
+defm VOR   : sse12_fp_packed_logical_y<0x56, "or", or>;
+defm VXOR  : sse12_fp_packed_logical_y<0x57, "xor", xor>;
+defm VANDN : sse12_fp_packed_logical_y<0x55, "andn", X86andnp>;
 
 defm AND  : sse12_fp_packed_logical<0x54, "and", and>;
 defm OR   : sse12_fp_packed_logical<0x56, "or", or>;
 defm XOR  : sse12_fp_packed_logical<0x57, "xor", xor>;
 let isCommutable = 0 in
-  defm ANDN : sse12_fp_packed_logical<0x55, "andn", undef /* dummy */, 1, [
-    // single r+r
-    [(set VR128:$dst, (X86pandn VR128:$src1, VR128:$src2))],
-    // double r+r
-    [],
-    // single r+m
-    [(set VR128:$dst, (X86pandn VR128:$src1, (memopv2i64 addr:$src2)))],
-    // double r+m
-    []]>;
+  defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
 
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Arithmetic Instructions
@@ -1991,11 +1996,11 @@ def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
 
 // There is no AVX form for instructions below this point
 def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
-                 "movnti\t{$src, $dst|$dst, $src}",
+                 "movnti{l}\t{$src, $dst|$dst, $src}",
                  [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
                TB, Requires<[HasSSE2]>;
 def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
-                     "movnti\t{$src, $dst|$dst, $src}",
+                     "movnti{q}\t{$src, $dst|$dst, $src}",
                      [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
                   TB, Requires<[HasSSE2]>;
 }
@@ -2006,13 +2011,13 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
 
 // Prefetch intrinsic.
 def PREFETCHT0   : PSI<0x18, MRM1m, (outs), (ins i8mem:$src),
-    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3))]>;
+    "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))]>;
 def PREFETCHT1   : PSI<0x18, MRM2m, (outs), (ins i8mem:$src),
-    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2))]>;
+    "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))]>;
 def PREFETCHT2   : PSI<0x18, MRM3m, (outs), (ins i8mem:$src),
-    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1))]>;
+    "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))]>;
 def PREFETCHNTA  : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
-    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
+    "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))]>;
 
 // Load, store, and memory fence
 def SFENCE : I<0xAE, MRM_F8, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>,
@@ -2037,7 +2042,10 @@ def V_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
 }
 
 // The same as done above but for AVX. The 128-bit versions are the
-// same, but re-encoded. The 256-bit does not support PI version.
+// same, but re-encoded. The 256-bit does not support PI version, and
+// doesn't need it because on sandy bridge the register is set to zero
+// at the rename stage without using any execution unit, so SET0PSY
+// and SET0PDY can be used for vector int instructions without penalty
 // FIXME: Change encoding to pseudo! This is blocked right now by the x86
 // JIT implementatioan, it does not expand the instructions below like
 // X86MCInstLower does.
@@ -2052,8 +2060,8 @@ def AVX_SET0PSY : PSI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
 def AVX_SET0PDY : PDI<0x57, MRMInitReg, (outs VR256:$dst), (ins), "",
                    [(set VR256:$dst, (v4f64 immAllZerosV))]>, VEX_4V;
 let ExeDomain = SSEPackedInt in
-def AVX_SET0PI : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
-                 [(set VR128:$dst, (v4i32 immAllZerosV))]>;
+def AVX_SET0PI  : PDI<0xEF, MRMInitReg, (outs VR128:$dst), (ins), "",
+                   [(set VR128:$dst, (v4i32 immAllZerosV))]>;
 }
 
 def : Pat<(v2i64 immAllZerosV), (V_SET0PI)>;
@@ -2063,6 +2071,15 @@ def : Pat<(v16i8 immAllZerosV), (V_SET0PI)>;
 def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
           (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), sub_ss))>;
 
+// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
+// in the non-AVX version bits 127:64 aren't touched. Find a better way to
+// represent this instead of always zeroing SRC1. One possible solution is
+// to represent the instruction w/ something similar as the "$src1 = $dst"
+// constraint but without the tied operands.
+def : Pat<(extloadf32 addr:$src),
+          (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)), addr:$src)>,
+      Requires<[HasAVX, OptForSpeed]>;
+
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Load/Store XCSR register
 //===----------------------------------------------------------------------===//
@@ -2959,6 +2976,22 @@ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
             (MOVZDI2PDIrm addr:$src)>;
 }
 
+// These are the correct encodings of the instructions so that we know how to
+// read correct assembly, even though we continue to emit the wrong ones for
+// compatibility with Darwin's buggy assembler.
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOV64toPQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOV64toSDrr FR64:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOVPQIto64rr GR64:$dst, VR128:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOVSDto64rr GR64:$dst, FR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (VMOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
+                (MOVZQI2PQIrr VR128:$dst, GR64:$src), 0>;
+
 //===---------------------------------------------------------------------===//
 // SSE2 - Move Quadword
 //===---------------------------------------------------------------------===//
@@ -3589,6 +3622,16 @@ let Predicates = [HasSSE2] in
  def : Pat<(fextend (loadf32 addr:$src)),
            (CVTSS2SDrm addr:$src)>;
 
+// FIXME: According to the intel manual, DEST[127:64] <- SRC1[127:64], while
+// in the non-AVX version bits 127:64 aren't touched. Find a better way to
+// represent this instead of always zeroing SRC1. One possible solution is
+// to represent the instruction w/ something similar as the "$src1 = $dst"
+// constraint but without the tied operands.
+let Predicates = [HasAVX] in
+ def : Pat<(fextend (loadf32 addr:$src)),
+           (VCVTSS2SDrm (f32 (EXTRACT_SUBREG (AVX_SET0PS), sub_ss)),
+                        addr:$src)>;
+
 // bit_convert
 let Predicates = [HasXMMInt] in {
   def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
@@ -3625,6 +3668,19 @@ let Predicates = [HasXMMInt] in {
 
 let Predicates = [HasAVX] in {
   def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>;
+  def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>;
+  def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>;
+  def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>;
+  def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>;
+  def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>;
+  def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>;
+  def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>;
+  def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>;
+  def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>;
+  def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>;
+  def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>;
+  def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>;
+  def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>;
 }
 
 // Move scalar to XMM zero-extended
@@ -3807,6 +3863,8 @@ def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
           (CVTTPS2DQrr VR128:$src)>, Requires<[HasSSE2]>;
 
 // Use movaps / movups for SSE integer load / store (one byte shorter).
+// The instructions selected below are then converted to MOVDQA/MOVDQU
+// during the SSE domain pass.
 let Predicates = [HasSSE1] in {
   def : Pat<(alignedloadv4i32 addr:$src),
             (MOVAPSrm addr:$src)>;
@@ -3835,8 +3893,9 @@ let Predicates = [HasSSE1] in {
             (MOVUPSmr addr:$dst, VR128:$src)>;
 }
 
-// Use vmovaps/vmovups for AVX 128-bit integer load/store (one byte shorter).
+// Use vmovaps/vmovups for AVX integer load/store.
 let Predicates = [HasAVX] in {
+  // 128-bit load/store
   def : Pat<(alignedloadv4i32 addr:$src),
             (VMOVAPSrm addr:$src)>;
   def : Pat<(loadv4i32 addr:$src),
@@ -3862,6 +3921,24 @@ let Predicates = [HasAVX] in {
             (VMOVUPSmr addr:$dst, VR128:$src)>;
   def : Pat<(store (v16i8 VR128:$src), addr:$dst),
             (VMOVUPSmr addr:$dst, VR128:$src)>;
+
+  // 256-bit load/store
+  def : Pat<(alignedloadv4i64 addr:$src),
+            (VMOVAPSYrm addr:$src)>;
+  def : Pat<(loadv4i64 addr:$src),
+            (VMOVUPSYrm addr:$src)>;
+  def : Pat<(alignedloadv8i32 addr:$src),
+            (VMOVAPSYrm addr:$src)>;
+  def : Pat<(loadv8i32 addr:$src),
+            (VMOVUPSYrm addr:$src)>;
+  def : Pat<(alignedstore (v4i64 VR256:$src), addr:$dst),
+            (VMOVAPSYmr addr:$dst, VR256:$src)>;
+  def : Pat<(alignedstore (v8i32 VR256:$src), addr:$dst),
+            (VMOVAPSYmr addr:$dst, VR256:$src)>;
+  def : Pat<(store (v4i64 VR256:$src), addr:$dst),
+            (VMOVUPSYmr addr:$dst, VR256:$src)>;
+  def : Pat<(store (v8i32 VR256:$src), addr:$dst),
+            (VMOVUPSYmr addr:$dst, VR256:$src)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -5160,33 +5237,52 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst),
 // CLMUL Instructions
 //===----------------------------------------------------------------------===//
 
-// Only the AVX version of CLMUL instructions are described here.
-
 // Carry-less Multiplication instructions
-def VPCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+let Constraints = "$src1 = $dst" in {
+def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
+           (ins VR128:$src1, VR128:$src2, i8imm:$src3),
+           "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+           []>;
+
+def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+           (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
+           "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+           []>;
+}
+
+// AVX carry-less Multiplication instructions
+def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst),
            (ins VR128:$src1, VR128:$src2, i8imm:$src3),
            "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            []>;
 
-def VPCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
+def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst),
            (ins VR128:$src1, i128mem:$src2, i8imm:$src3),
            "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
            []>;
 
-// Assembler Only
-multiclass avx_vpclmul<string asm> {
-  def rr : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
-             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             []>;
-
-  def rm : I<0, Pseudo, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
-             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             []>;
-}
-defm VPCLMULHQHQDQ : avx_vpclmul<"vpclmulhqhqdq">;
-defm VPCLMULHQLQDQ : avx_vpclmul<"vpclmulhqlqdq">;
-defm VPCLMULLQHQDQ : avx_vpclmul<"vpclmullqhqdq">;
-defm VPCLMULLQLQDQ : avx_vpclmul<"vpclmullqlqdq">;
+
+multiclass pclmul_alias<string asm, int immop> {
+  def : InstAlias<!strconcat("pclmul", asm, 
+                           "dq {$src, $dst|$dst, $src}"),
+                  (PCLMULQDQrr VR128:$dst, VR128:$src, immop)>;
+
+  def : InstAlias<!strconcat("pclmul", asm, 
+                             "dq {$src, $dst|$dst, $src}"),
+                  (PCLMULQDQrm VR128:$dst, i128mem:$src, immop)>;
+
+  def : InstAlias<!strconcat("vpclmul", asm, 
+                             "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop)>;
+
+  def : InstAlias<!strconcat("vpclmul", asm, 
+                             "dq {$src2, $src1, $dst|$dst, $src1, $src2}"),
+                  (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop)>;
+}
+defm : pclmul_alias<"hqhq", 0x11>;
+defm : pclmul_alias<"hqlq", 0x01>;
+defm : pclmul_alias<"lqhq", 0x10>;
+defm : pclmul_alias<"lqlq", 0x00>;
 
 //===----------------------------------------------------------------------===//
 // AVX Instructions
diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td
index f73cff3..31de878 100644
--- a/lib/Target/X86/X86InstrSystem.td
+++ b/lib/Target/X86/X86InstrSystem.td
@@ -411,6 +411,8 @@ let Uses = [RDX, RAX, RCX] in
 let Defs = [RAX, RDI], Uses = [RDX, RDI] in
   def XSTORE : I<0xc0, RawFrm, (outs), (ins), "xstore", []>, A7;
 
+def : InstAlias<"xstorerng", (XSTORE)>;
+
 let Defs = [RSI, RDI], Uses = [RBX, RDX, RSI, RDI] in {
   def XCRYPTECB : I<0xc8, RawFrm, (outs), (ins), "xcryptecb", []>, A7;
   def XCRYPTCBC : I<0xd0, RawFrm, (outs), (ins), "xcryptcbc", []>, A7;
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 55aceba..ce8ef49 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -18,26 +18,32 @@
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/raw_ostream.h"
+
 using namespace llvm;
 
 namespace {
 class X86MCCodeEmitter : public MCCodeEmitter {
   X86MCCodeEmitter(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
   void operator=(const X86MCCodeEmitter &); // DO NOT IMPLEMENT
-  const TargetMachine &TM;
-  const TargetInstrInfo &TII;
+  const MCInstrInfo &MCII;
+  const MCSubtargetInfo &STI;
   MCContext &Ctx;
-  bool Is64BitMode;
 public:
-  X86MCCodeEmitter(TargetMachine &tm, MCContext &ctx, bool is64Bit)
-    : TM(tm), TII(*TM.getInstrInfo()), Ctx(ctx) {
-    Is64BitMode = is64Bit;
+  X86MCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti,
+                   MCContext &ctx)
+    : MCII(mcii), STI(sti), Ctx(ctx) {
   }
 
   ~X86MCCodeEmitter() {}
 
+  bool is64BitMode() const {
+    // FIXME: Can tablegen auto-generate this?
+    return (STI.getFeatureBits() & X86::Mode64Bit) != 0;
+  }
+
   static unsigned GetX86RegNum(const MCOperand &MO) {
     return X86RegisterInfo::getX86RegNum(MO.getReg());
   }
@@ -111,7 +117,7 @@ public:
                          SmallVectorImpl<MCFixup> &Fixups) const;
 
   void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
-                           const MCInst &MI, const TargetInstrDesc &Desc,
+                           const MCInst &MI, const MCInstrDesc &Desc,
                            raw_ostream &OS) const;
 
   void EmitSegmentOverridePrefix(uint64_t TSFlags, unsigned &CurByte,
@@ -119,23 +125,17 @@ public:
                                  raw_ostream &OS) const;
 
   void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, int MemOperand,
-                        const MCInst &MI, const TargetInstrDesc &Desc,
+                        const MCInst &MI, const MCInstrDesc &Desc,
                         raw_ostream &OS) const;
 };
 
 } // end anonymous namespace
 
 
-MCCodeEmitter *llvm::createX86_32MCCodeEmitter(const Target &,
-                                               TargetMachine &TM,
-                                               MCContext &Ctx) {
-  return new X86MCCodeEmitter(TM, Ctx, false);
-}
-
-MCCodeEmitter *llvm::createX86_64MCCodeEmitter(const Target &,
-                                               TargetMachine &TM,
-                                               MCContext &Ctx) {
-  return new X86MCCodeEmitter(TM, Ctx, true);
+MCCodeEmitter *llvm::createX86MCCodeEmitter(const MCInstrInfo &MCII,
+                                            const MCSubtargetInfo &STI,
+                                            MCContext &Ctx) {
+  return new X86MCCodeEmitter(MCII, STI, Ctx);
 }
 
 /// isDisp8 - Return true if this signed displacement fits in a 8-bit
@@ -245,7 +245,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
 
   // Handle %rip relative addressing.
   if (BaseReg == X86::RIP) {    // [disp32+RIP] in X86-64 mode
-    assert(Is64BitMode && "Rip-relative addressing requires 64-bit mode");
+    assert(is64BitMode() && "Rip-relative addressing requires 64-bit mode");
     assert(IndexReg.getReg() == 0 && "Invalid rip-relative address");
     EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
 
@@ -284,7 +284,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
       BaseRegNo != N86::ESP &&
       // If there is no base register and we're in 64-bit mode, we need a SIB
       // byte to emit an addr that is just 'disp32' (the non-RIP relative form).
-      (!Is64BitMode || BaseReg != 0)) {
+      (!is64BitMode() || BaseReg != 0)) {
 
     if (BaseReg == 0) {          // [disp32]     in X86-32 mode
       EmitByte(ModRMByte(0, RegOpcodeField, 5), CurByte, OS);
@@ -379,7 +379,7 @@ void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
 /// called VEX.
 void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
                                            int MemOperand, const MCInst &MI,
-                                           const TargetInstrDesc &Desc,
+                                           const MCInstrDesc &Desc,
                                            raw_ostream &OS) const {
   bool HasVEX_4V = false;
   if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
@@ -586,7 +586,7 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
 /// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
 /// size, and 3) use of X86-64 extended registers.
 static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
-                                   const TargetInstrDesc &Desc) {
+                                   const MCInstrDesc &Desc) {
   unsigned REX = 0;
   if (TSFlags & X86II::REX_W)
     REX |= 1 << 3; // set REX.W
@@ -596,7 +596,7 @@ static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
   unsigned NumOps = MI.getNumOperands();
   // FIXME: MCInst should explicitize the two-addrness.
   bool isTwoAddr = NumOps > 1 &&
-                      Desc.getOperandConstraint(1, TOI::TIED_TO) != -1;
+                      Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1;
 
   // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
   unsigned i = isTwoAddr ? 1 : 0;
@@ -713,7 +713,7 @@ void X86MCCodeEmitter::EmitSegmentOverridePrefix(uint64_t TSFlags,
 /// Not present, it is -1.
 void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
                                         int MemOperand, const MCInst &MI,
-                                        const TargetInstrDesc &Desc,
+                                        const MCInstrDesc &Desc,
                                         raw_ostream &OS) const {
 
   // Emit the lock opcode prefix as needed.
@@ -729,7 +729,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
 
   // Emit the address size opcode prefix as needed.
   if ((TSFlags & X86II::AdSize) ||
-      (MemOperand != -1 && Is64BitMode && Is32BitMemOperand(MI, MemOperand)))
+      (MemOperand != -1 && is64BitMode() && Is32BitMemOperand(MI, MemOperand)))
     EmitByte(0x67, CurByte, OS);
   
   // Emit the operand size opcode prefix as needed.
@@ -772,7 +772,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
 
   // Handle REX prefix.
   // FIXME: Can this come before F2 etc to simplify emission?
-  if (Is64BitMode) {
+  if (is64BitMode()) {
     if (unsigned REX = DetermineREXPrefix(MI, TSFlags, Desc))
       EmitByte(0x40 | REX, CurByte, OS);
   }
@@ -803,7 +803,7 @@ void X86MCCodeEmitter::
 EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                   SmallVectorImpl<MCFixup> &Fixups) const {
   unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = TII.get(Opcode);
+  const MCInstrDesc &Desc = MCII.get(Opcode);
   uint64_t TSFlags = Desc.TSFlags;
 
   // Pseudo instructions don't get encoded.
@@ -814,9 +814,9 @@ EncodeInstruction(const MCInst &MI, raw_ostream &OS,
   // FIXME: This should be handled during MCInst lowering.
   unsigned NumOps = Desc.getNumOperands();
   unsigned CurOp = 0;
-  if (NumOps > 1 && Desc.getOperandConstraint(1, TOI::TIED_TO) != -1)
+  if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) != -1)
     ++CurOp;
-  else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0)
+  else if (NumOps > 2 && Desc.getOperandConstraint(NumOps-1, MCOI::TIED_TO)== 0)
     // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32
     --NumOps;
 
diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp
index 793156f..e385335 100644
--- a/lib/Target/X86/X86MCInstLower.cpp
+++ b/lib/Target/X86/X86MCInstLower.cpp
@@ -16,8 +16,8 @@
 #include "X86MCInstLower.h"
 #include "X86AsmPrinter.h"
 #include "X86COFFMachineModuleInfo.h"
-#include "X86MCAsmInfo.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
diff --git a/lib/Target/X86/X86MachObjectWriter.cpp b/lib/Target/X86/X86MachObjectWriter.cpp
index 8f3dd32..3711038 100644
--- a/lib/Target/X86/X86MachObjectWriter.cpp
+++ b/lib/Target/X86/X86MachObjectWriter.cpp
@@ -8,19 +8,541 @@
 //===----------------------------------------------------------------------===//
 
 #include "X86.h"
+#include "X86FixupKinds.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCAsmLayout.h"
 #include "llvm/MC/MCMachObjectWriter.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Object/MachOFormat.h"
+
 using namespace llvm;
+using namespace llvm::object;
 
 namespace {
 class X86MachObjectWriter : public MCMachObjectTargetWriter {
+  void RecordScatteredRelocation(MachObjectWriter *Writer,
+                                 const MCAssembler &Asm,
+                                 const MCAsmLayout &Layout,
+                                 const MCFragment *Fragment,
+                                 const MCFixup &Fixup,
+                                 MCValue Target,
+                                 unsigned Log2Size,
+                                 uint64_t &FixedValue);
+  void RecordTLVPRelocation(MachObjectWriter *Writer,
+                            const MCAssembler &Asm,
+                            const MCAsmLayout &Layout,
+                            const MCFragment *Fragment,
+                            const MCFixup &Fixup,
+                            MCValue Target,
+                            uint64_t &FixedValue);
+
+  void RecordX86Relocation(MachObjectWriter *Writer,
+                              const MCAssembler &Asm,
+                              const MCAsmLayout &Layout,
+                              const MCFragment *Fragment,
+                              const MCFixup &Fixup,
+                              MCValue Target,
+                              uint64_t &FixedValue);
+  void RecordX86_64Relocation(MachObjectWriter *Writer,
+                              const MCAssembler &Asm,
+                              const MCAsmLayout &Layout,
+                              const MCFragment *Fragment,
+                              const MCFixup &Fixup,
+                              MCValue Target,
+                              uint64_t &FixedValue);
 public:
   X86MachObjectWriter(bool Is64Bit, uint32_t CPUType,
                       uint32_t CPUSubtype)
     : MCMachObjectTargetWriter(Is64Bit, CPUType, CPUSubtype,
                                /*UseAggressiveSymbolFolding=*/Is64Bit) {}
+
+  void RecordRelocation(MachObjectWriter *Writer,
+                        const MCAssembler &Asm, const MCAsmLayout &Layout,
+                        const MCFragment *Fragment, const MCFixup &Fixup,
+                        MCValue Target, uint64_t &FixedValue) {
+    if (Writer->is64Bit())
+      RecordX86_64Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+                             FixedValue);
+    else
+      RecordX86Relocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+                          FixedValue);
+  }
 };
 }
 
+static bool isFixupKindRIPRel(unsigned Kind) {
+  return Kind == X86::reloc_riprel_4byte ||
+    Kind == X86::reloc_riprel_4byte_movq_load;
+}
+
+static unsigned getFixupKindLog2Size(unsigned Kind) {
+  switch (Kind) {
+  default:
+    llvm_unreachable("invalid fixup kind!");
+  case FK_PCRel_1:
+  case FK_Data_1: return 0;
+  case FK_PCRel_2:
+  case FK_Data_2: return 1;
+  case FK_PCRel_4:
+    // FIXME: Remove these!!!
+  case X86::reloc_riprel_4byte:
+  case X86::reloc_riprel_4byte_movq_load:
+  case X86::reloc_signed_4byte:
+  case FK_Data_4: return 2;
+  case FK_Data_8: return 3;
+  }
+}
+
+void X86MachObjectWriter::RecordX86_64Relocation(MachObjectWriter *Writer,
+                                                 const MCAssembler &Asm,
+                                                 const MCAsmLayout &Layout,
+                                                 const MCFragment *Fragment,
+                                                 const MCFixup &Fixup,
+                                                 MCValue Target,
+                                                 uint64_t &FixedValue) {
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned IsRIPRel = isFixupKindRIPRel(Fixup.getKind());
+  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+
+  // See <reloc.h>.
+  uint32_t FixupOffset =
+    Layout.getFragmentOffset(Fragment) + Fixup.getOffset();
+  uint32_t FixupAddress =
+    Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+  int64_t Value = 0;
+  unsigned Index = 0;
+  unsigned IsExtern = 0;
+  unsigned Type = 0;
+
+  Value = Target.getConstant();
+
+  if (IsPCRel) {
+    // Compensate for the relocation offset, Darwin x86_64 relocations only have
+    // the addend and appear to have attempted to define it to be the actual
+    // expression addend without the PCrel bias. However, instructions with data
+    // following the relocation are not accommodated for (see comment below
+    // regarding SIGNED{1,2,4}), so it isn't exactly that either.
+    Value += 1LL << Log2Size;
+  }
+
+  if (Target.isAbsolute()) { // constant
+    // SymbolNum of 0 indicates the absolute section.
+    Type = macho::RIT_X86_64_Unsigned;
+    Index = 0;
+
+    // FIXME: I believe this is broken, I don't think the linker can understand
+    // it. I think it would require a local relocation, but I'm not sure if that
+    // would work either. The official way to get an absolute PCrel relocation
+    // is to use an absolute symbol (which we don't support yet).
+    if (IsPCRel) {
+      IsExtern = 1;
+      Type = macho::RIT_X86_64_Branch;
+    }
+  } else if (Target.getSymB()) { // A - B + constant
+    const MCSymbol *A = &Target.getSymA()->getSymbol();
+    MCSymbolData &A_SD = Asm.getSymbolData(*A);
+    const MCSymbolData *A_Base = Asm.getAtom(&A_SD);
+
+    const MCSymbol *B = &Target.getSymB()->getSymbol();
+    MCSymbolData &B_SD = Asm.getSymbolData(*B);
+    const MCSymbolData *B_Base = Asm.getAtom(&B_SD);
+
+    // Neither symbol can be modified.
+    if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None ||
+        Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None)
+      report_fatal_error("unsupported relocation of modified symbol");
+
+    // We don't support PCrel relocations of differences. Darwin 'as' doesn't
+    // implement most of these correctly.
+    if (IsPCRel)
+      report_fatal_error("unsupported pc-relative relocation of difference");
+
+    // The support for the situation where one or both of the symbols would
+    // require a local relocation is handled just like if the symbols were
+    // external.  This is certainly used in the case of debug sections where the
+    // section has only temporary symbols and thus the symbols don't have base
+    // symbols.  This is encoded using the section ordinal and non-extern
+    // relocation entries.
+
+    // Darwin 'as' doesn't emit correct relocations for this (it ends up with a
+    // single SIGNED relocation); reject it for now.  Except the case where both
+    // symbols don't have a base, equal but both NULL.
+    if (A_Base == B_Base && A_Base)
+      report_fatal_error("unsupported relocation with identical base");
+
+    Value += Writer->getSymbolAddress(&A_SD, Layout) -
+      (A_Base == NULL ? 0 : Writer->getSymbolAddress(A_Base, Layout));
+    Value -= Writer->getSymbolAddress(&B_SD, Layout) -
+      (B_Base == NULL ? 0 : Writer->getSymbolAddress(B_Base, Layout));
+
+    if (A_Base) {
+      Index = A_Base->getIndex();
+      IsExtern = 1;
+    }
+    else {
+      Index = A_SD.getFragment()->getParent()->getOrdinal() + 1;
+      IsExtern = 0;
+    }
+    Type = macho::RIT_X86_64_Unsigned;
+
+    macho::RelocationEntry MRE;
+    MRE.Word0 = FixupOffset;
+    MRE.Word1 = ((Index     <<  0) |
+                 (IsPCRel   << 24) |
+                 (Log2Size  << 25) |
+                 (IsExtern  << 27) |
+                 (Type      << 28));
+    Writer->addRelocation(Fragment->getParent(), MRE);
+
+    if (B_Base) {
+      Index = B_Base->getIndex();
+      IsExtern = 1;
+    }
+    else {
+      Index = B_SD.getFragment()->getParent()->getOrdinal() + 1;
+      IsExtern = 0;
+    }
+    Type = macho::RIT_X86_64_Subtractor;
+  } else {
+    const MCSymbol *Symbol = &Target.getSymA()->getSymbol();
+    MCSymbolData &SD = Asm.getSymbolData(*Symbol);
+    const MCSymbolData *Base = Asm.getAtom(&SD);
+
+    // Relocations inside debug sections always use local relocations when
+    // possible. This seems to be done because the debugger doesn't fully
+    // understand x86_64 relocation entries, and expects to find values that
+    // have already been fixed up.
+    if (Symbol->isInSection()) {
+      const MCSectionMachO &Section = static_cast<const MCSectionMachO&>(
+        Fragment->getParent()->getSection());
+      if (Section.hasAttribute(MCSectionMachO::S_ATTR_DEBUG))
+        Base = 0;
+    }
+
+    // x86_64 almost always uses external relocations, except when there is no
+    // symbol to use as a base address (a local symbol with no preceding
+    // non-local symbol).
+    if (Base) {
+      Index = Base->getIndex();
+      IsExtern = 1;
+
+      // Add the local offset, if needed.
+      if (Base != &SD)
+        Value += Layout.getSymbolOffset(&SD) - Layout.getSymbolOffset(Base);
+    } else if (Symbol->isInSection() && !Symbol->isVariable()) {
+      // The index is the section ordinal (1-based).
+      Index = SD.getFragment()->getParent()->getOrdinal() + 1;
+      IsExtern = 0;
+      Value += Writer->getSymbolAddress(&SD, Layout);
+
+      if (IsPCRel)
+        Value -= FixupAddress + (1 << Log2Size);
+    } else if (Symbol->isVariable()) {
+      const MCExpr *Value = Symbol->getVariableValue();
+      int64_t Res;
+      bool isAbs = Value->EvaluateAsAbsolute(Res, Layout,
+                                             Writer->getSectionAddressMap());
+      if (isAbs) {
+        FixedValue = Res;
+        return;
+      } else {
+        report_fatal_error("unsupported relocation of variable '" +
+                           Symbol->getName() + "'");
+      }
+    } else {
+      report_fatal_error("unsupported relocation of undefined symbol '" +
+                         Symbol->getName() + "'");
+    }
+
+    MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind();
+    if (IsPCRel) {
+      if (IsRIPRel) {
+        if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+          // x86_64 distinguishes movq foo@GOTPCREL so that the linker can
+          // rewrite the movq to an leaq at link time if the symbol ends up in
+          // the same linkage unit.
+          if (unsigned(Fixup.getKind()) == X86::reloc_riprel_4byte_movq_load)
+            Type = macho::RIT_X86_64_GOTLoad;
+          else
+            Type = macho::RIT_X86_64_GOT;
+        }  else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+          Type = macho::RIT_X86_64_TLV;
+        }  else if (Modifier != MCSymbolRefExpr::VK_None) {
+          report_fatal_error("unsupported symbol modifier in relocation");
+        } else {
+          Type = macho::RIT_X86_64_Signed;
+
+          // The Darwin x86_64 relocation format has a problem where it cannot
+          // encode an address (L<foo> + <constant>) which is outside the atom
+          // containing L<foo>. Generally, this shouldn't occur but it does
+          // happen when we have a RIPrel instruction with data following the
+          // relocation entry (e.g., movb $012, L0(%rip)). Even with the PCrel
+          // adjustment Darwin x86_64 uses, the offset is still negative and the
+          // linker has no way to recognize this.
+          //
+          // To work around this, Darwin uses several special relocation types
+          // to indicate the offsets. However, the specification or
+          // implementation of these seems to also be incomplete; they should
+          // adjust the addend as well based on the actual encoded instruction
+          // (the additional bias), but instead appear to just look at the final
+          // offset.
+          switch (-(Target.getConstant() + (1LL << Log2Size))) {
+          case 1: Type = macho::RIT_X86_64_Signed1; break;
+          case 2: Type = macho::RIT_X86_64_Signed2; break;
+          case 4: Type = macho::RIT_X86_64_Signed4; break;
+          }
+        }
+      } else {
+        if (Modifier != MCSymbolRefExpr::VK_None)
+          report_fatal_error("unsupported symbol modifier in branch "
+                             "relocation");
+
+        Type = macho::RIT_X86_64_Branch;
+      }
+    } else {
+      if (Modifier == MCSymbolRefExpr::VK_GOT) {
+        Type = macho::RIT_X86_64_GOT;
+      } else if (Modifier == MCSymbolRefExpr::VK_GOTPCREL) {
+        // GOTPCREL is allowed as a modifier on non-PCrel instructions, in which
+        // case all we do is set the PCrel bit in the relocation entry; this is
+        // used with exception handling, for example. The source is required to
+        // include any necessary offset directly.
+        Type = macho::RIT_X86_64_GOT;
+        IsPCRel = 1;
+      } else if (Modifier == MCSymbolRefExpr::VK_TLVP) {
+        report_fatal_error("TLVP symbol modifier should have been rip-rel");
+      } else if (Modifier != MCSymbolRefExpr::VK_None)
+        report_fatal_error("unsupported symbol modifier in relocation");
+      else
+        Type = macho::RIT_X86_64_Unsigned;
+    }
+  }
+
+  // x86_64 always writes custom values into the fixups.
+  FixedValue = Value;
+
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = FixupOffset;
+  MRE.Word1 = ((Index     <<  0) |
+               (IsPCRel   << 24) |
+               (Log2Size  << 25) |
+               (IsExtern  << 27) |
+               (Type      << 28));
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordScatteredRelocation(MachObjectWriter *Writer,
+                                                    const MCAssembler &Asm,
+                                                    const MCAsmLayout &Layout,
+                                                    const MCFragment *Fragment,
+                                                    const MCFixup &Fixup,
+                                                    MCValue Target,
+                                                    unsigned Log2Size,
+                                                    uint64_t &FixedValue) {
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Type = macho::RIT_Vanilla;
+
+  // See <reloc.h>.
+  const MCSymbol *A = &Target.getSymA()->getSymbol();
+  MCSymbolData *A_SD = &Asm.getSymbolData(*A);
+
+  if (!A_SD->getFragment())
+    report_fatal_error("symbol '" + A->getName() +
+                       "' can not be undefined in a subtraction expression");
+
+  uint32_t Value = Writer->getSymbolAddress(A_SD, Layout);
+  uint64_t SecAddr = Writer->getSectionAddress(A_SD->getFragment()->getParent());
+  FixedValue += SecAddr;
+  uint32_t Value2 = 0;
+
+  if (const MCSymbolRefExpr *B = Target.getSymB()) {
+    MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol());
+
+    if (!B_SD->getFragment())
+      report_fatal_error("symbol '" + B->getSymbol().getName() +
+                         "' can not be undefined in a subtraction expression");
+
+    // Select the appropriate difference relocation type.
+    //
+    // Note that there is no longer any semantic difference between these two
+    // relocation types from the linkers point of view, this is done solely for
+    // pedantic compatibility with 'as'.
+    Type = A_SD->isExternal() ? (unsigned)macho::RIT_Difference :
+      (unsigned)macho::RIT_Generic_LocalDifference;
+    Value2 = Writer->getSymbolAddress(B_SD, Layout);
+    FixedValue -= Writer->getSectionAddress(B_SD->getFragment()->getParent());
+  }
+
+  // Relocations are written out in reverse order, so the PAIR comes first.
+  if (Type == macho::RIT_Difference ||
+      Type == macho::RIT_Generic_LocalDifference) {
+    macho::RelocationEntry MRE;
+    MRE.Word0 = ((0         <<  0) |
+                 (macho::RIT_Pair  << 24) |
+                 (Log2Size  << 28) |
+                 (IsPCRel   << 30) |
+                 macho::RF_Scattered);
+    MRE.Word1 = Value2;
+    Writer->addRelocation(Fragment->getParent(), MRE);
+  }
+
+  macho::RelocationEntry MRE;
+  MRE.Word0 = ((FixupOffset <<  0) |
+               (Type        << 24) |
+               (Log2Size    << 28) |
+               (IsPCRel     << 30) |
+               macho::RF_Scattered);
+  MRE.Word1 = Value;
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordTLVPRelocation(MachObjectWriter *Writer,
+                                               const MCAssembler &Asm,
+                                               const MCAsmLayout &Layout,
+                                               const MCFragment *Fragment,
+                                               const MCFixup &Fixup,
+                                               MCValue Target,
+                                               uint64_t &FixedValue) {
+  assert(Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP &&
+         !is64Bit() &&
+         "Should only be called with a 32-bit TLVP relocation!");
+
+  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+  uint32_t Value = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned IsPCRel = 0;
+
+  // Get the symbol data.
+  MCSymbolData *SD_A = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+  unsigned Index = SD_A->getIndex();
+
+  // We're only going to have a second symbol in pic mode and it'll be a
+  // subtraction from the picbase. For 32-bit pic the addend is the difference
+  // between the picbase and the next address.  For 32-bit static the addend is
+  // zero.
+  if (Target.getSymB()) {
+    // If this is a subtraction then we're pcrel.
+    uint32_t FixupAddress =
+      Writer->getFragmentAddress(Fragment, Layout) + Fixup.getOffset();
+    MCSymbolData *SD_B = &Asm.getSymbolData(Target.getSymB()->getSymbol());
+    IsPCRel = 1;
+    FixedValue = (FixupAddress - Writer->getSymbolAddress(SD_B, Layout) +
+                  Target.getConstant());
+    FixedValue += 1ULL << Log2Size;
+  } else {
+    FixedValue = 0;
+  }
+
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = Value;
+  MRE.Word1 = ((Index                  <<  0) |
+               (IsPCRel                << 24) |
+               (Log2Size               << 25) |
+               (1                      << 27) | // Extern
+               (macho::RIT_Generic_TLV << 28)); // Type
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
+void X86MachObjectWriter::RecordX86Relocation(MachObjectWriter *Writer,
+                                              const MCAssembler &Asm,
+                                              const MCAsmLayout &Layout,
+                                              const MCFragment *Fragment,
+                                              const MCFixup &Fixup,
+                                              MCValue Target,
+                                              uint64_t &FixedValue) {
+  unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind());
+  unsigned Log2Size = getFixupKindLog2Size(Fixup.getKind());
+
+  // If this is a 32-bit TLVP reloc it's handled a bit differently.
+  if (Target.getSymA() &&
+      Target.getSymA()->getKind() == MCSymbolRefExpr::VK_TLVP) {
+    RecordTLVPRelocation(Writer, Asm, Layout, Fragment, Fixup, Target,
+                         FixedValue);
+    return;
+  }
+
+  // If this is a difference or a defined symbol plus an offset, then we need a
+  // scattered relocation entry. Differences always require scattered
+  // relocations.
+  if (Target.getSymB())
+    return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+                                     Target, Log2Size, FixedValue);
+
+  // Get the symbol data, if any.
+  MCSymbolData *SD = 0;
+  if (Target.getSymA())
+    SD = &Asm.getSymbolData(Target.getSymA()->getSymbol());
+
+  // If this is an internal relocation with an offset, it also needs a scattered
+  // relocation entry.
+  uint32_t Offset = Target.getConstant();
+  if (IsPCRel)
+    Offset += 1 << Log2Size;
+  if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD))
+    return RecordScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup,
+                                     Target, Log2Size, FixedValue);
+
+  // See <reloc.h>.
+  uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset();
+  unsigned Index = 0;
+  unsigned IsExtern = 0;
+  unsigned Type = 0;
+
+  if (Target.isAbsolute()) { // constant
+    // SymbolNum of 0 indicates the absolute section.
+    //
+    // FIXME: Currently, these are never generated (see code below). I cannot
+    // find a case where they are actually emitted.
+    Type = macho::RIT_Vanilla;
+  } else {
+    // Resolve constant variables.
+    if (SD->getSymbol().isVariable()) {
+      int64_t Res;
+      if (SD->getSymbol().getVariableValue()->EvaluateAsAbsolute(
+            Res, Layout, Writer->getSectionAddressMap())) {
+        FixedValue = Res;
+        return;
+      }
+    }
+
+    // Check whether we need an external or internal relocation.
+    if (Writer->doesSymbolRequireExternRelocation(SD)) {
+      IsExtern = 1;
+      Index = SD->getIndex();
+      // For external relocations, make sure to offset the fixup value to
+      // compensate for the addend of the symbol address, if it was
+      // undefined. This occurs with weak definitions, for example.
+      if (!SD->Symbol->isUndefined())
+        FixedValue -= Layout.getSymbolOffset(SD);
+    } else {
+      // The index is the section ordinal (1-based).
+      const MCSectionData &SymSD = Asm.getSectionData(
+        SD->getSymbol().getSection());
+      Index = SymSD.getOrdinal() + 1;
+      FixedValue += Writer->getSectionAddress(&SymSD);
+    }
+    if (IsPCRel)
+      FixedValue -= Writer->getSectionAddress(Fragment->getParent());
+
+    Type = macho::RIT_Vanilla;
+  }
+
+  // struct relocation_info (8 bytes)
+  macho::RelocationEntry MRE;
+  MRE.Word0 = FixupOffset;
+  MRE.Word1 = ((Index     <<  0) |
+               (IsPCRel   << 24) |
+               (Log2Size  << 25) |
+               (IsExtern  << 27) |
+               (Type      << 28));
+  Writer->addRelocation(Fragment->getParent(), MRE);
+}
+
 MCObjectWriter *llvm::createX86MachObjectWriter(raw_ostream &OS,
                                                 bool Is64Bit,
                                                 uint32_t CPUType,
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 1ad6203..f2faf59 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -39,6 +39,10 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/CommandLine.h"
+
+#define GET_REGINFO_TARGET_DESC
+#include "X86GenRegisterInfo.inc"
+
 using namespace llvm;
 
 cl::opt<bool>
@@ -49,18 +53,11 @@ ForceStackAlign("force-align-stack",
 
 X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
                                  const TargetInstrInfo &tii)
-  : X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit() ?
-                         X86::ADJCALLSTACKDOWN64 :
-                         X86::ADJCALLSTACKDOWN32,
-                       tm.getSubtarget<X86Subtarget>().is64Bit() ?
-                         X86::ADJCALLSTACKUP64 :
-                         X86::ADJCALLSTACKUP32),
-    TM(tm), TII(tii) {
+  : X86GenRegisterInfo(), TM(tm), TII(tii) {
   // Cache some information.
   const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>();
   Is64Bit = Subtarget->is64Bit();
   IsWin64 = Subtarget->isTargetWin64();
-  StackAlign = TM.getFrameLowering()->getStackAlignment();
 
   if (Is64Bit) {
     SlotSize = 8;
@@ -107,6 +104,21 @@ int X86RegisterInfo::getLLVMRegNum(unsigned DwarfRegNo, bool isEH) const {
   return X86GenRegisterInfo::getLLVMRegNumFull(DwarfRegNo, Flavour);
 }
 
+/// getCompactUnwindRegNum - This function maps the register to the number for
+/// compact unwind encoding. Return -1 if the register isn't valid.
+int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const {
+  switch (getLLVMRegNum(RegNum, isEH)) {
+  case X86::EBX: case X86::RBX: return 1;
+  case X86::ECX: case X86::R12: return 2;
+  case X86::EDX: case X86::R13: return 3;
+  case X86::EDI: case X86::R14: return 4;
+  case X86::ESI: case X86::R15: return 5;
+  case X86::EBP: case X86::RBP: return 6;
+  }
+
+  return -1;
+}
+
 int
 X86RegisterInfo::getSEHRegNum(unsigned i) const {
   int reg = getX86RegNum(i);
@@ -495,18 +507,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     Reserved.set(X86::BPL);
   }
 
-  // Mark the x87 stack registers as reserved, since they don't behave normally
-  // with respect to liveness. We don't fully model the effects of x87 stack
-  // pushes and pops after stackification.
-  Reserved.set(X86::ST0);
-  Reserved.set(X86::ST1);
-  Reserved.set(X86::ST2);
-  Reserved.set(X86::ST3);
-  Reserved.set(X86::ST4);
-  Reserved.set(X86::ST5);
-  Reserved.set(X86::ST6);
-  Reserved.set(X86::ST7);
-
   // Mark the segment registers as reserved.
   Reserved.set(X86::CS);
   Reserved.set(X86::SS);
@@ -517,13 +517,20 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
   // Reserve the registers that only exist in 64-bit mode.
   if (!Is64Bit) {
+    // These 8-bit registers are part of the x86-64 extension even though their
+    // super-registers are old 32-bits.
+    Reserved.set(X86::SIL);
+    Reserved.set(X86::DIL);
+    Reserved.set(X86::BPL);
+    Reserved.set(X86::SPL);
+
     for (unsigned n = 0; n != 8; ++n) {
+      // R8, R9, ...
       const unsigned GPR64[] = {
         X86::R8,  X86::R9,  X86::R10, X86::R11,
         X86::R12, X86::R13, X86::R14, X86::R15
       };
-      for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI;
-           ++AI)
+      for (const unsigned *AI = getOverlaps(GPR64[n]); unsigned Reg = *AI; ++AI)
         Reserved.set(Reg);
 
       // XMM8, XMM9, ...
@@ -550,6 +557,7 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
 bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
   const MachineFrameInfo *MFI = MF.getFrameInfo();
   const Function *F = MF.getFunction();
+  unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
   bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
                                F->hasFnAttr(Attribute::StackAlignment));
 
@@ -608,7 +616,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
   const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering();
   bool reseveCallFrame = TFI->hasReservedCallFrame(MF);
   int Opcode = I->getOpcode();
-  bool isDestroy = Opcode == getCallFrameDestroyOpcode();
+  bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
   DebugLoc DL = I->getDebugLoc();
   uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
   uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
@@ -625,16 +633,17 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     // We need to keep the stack aligned properly.  To do this, we round the
     // amount of space needed for the outgoing arguments up to the next
     // alignment boundary.
+    unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
     Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign;
 
     MachineInstr *New = 0;
-    if (Opcode == getCallFrameSetupOpcode()) {
+    if (Opcode == TII.getCallFrameSetupOpcode()) {
       New = BuildMI(MF, DL, TII.get(getSUBriOpcode(Is64Bit, Amount)),
                     StackPtr)
         .addReg(StackPtr)
         .addImm(Amount);
     } else {
-      assert(Opcode == getCallFrameDestroyOpcode());
+      assert(Opcode == TII.getCallFrameDestroyOpcode());
 
       // Factor out the amount the callee already popped.
       Amount -= CalleeAmt;
@@ -657,7 +666,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     return;
   }
 
-  if (Opcode == getCallFrameDestroyOpcode() && CalleeAmt) {
+  if (Opcode == TII.getCallFrameDestroyOpcode() && CalleeAmt) {
     // If we are performing frame pointer elimination and if the callee pops
     // something off the stack pointer, add it back.  We do this until we have
     // more advanced stack pointer tracking ability.
@@ -667,6 +676,13 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 
     // The EFLAGS implicit def is dead.
     New->getOperand(3).setIsDead();
+
+    // We are not tracking the stack pointer adjustment by the callee, so make
+    // sure we restore the stack pointer immediately after the call, there may
+    // be spill code inserted between the CALL and ADJCALLSTACKUP instructions.
+    MachineBasicBlock::iterator B = MBB.begin();
+    while (I != B && !llvm::prior(I)->getDesc().isCall())
+      --I;
     MBB.insert(I, New);
   }
 }
@@ -713,7 +729,10 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   if (MI.getOperand(i+3).isImm()) {
     // Offset is a 32-bit integer.
-    int Offset = FIOffset + (int)(MI.getOperand(i + 3).getImm());
+    int Imm = (int)(MI.getOperand(i + 3).getImm());
+    int Offset = FIOffset + Imm;
+    assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
+           "Requesting 64-bit offset in 32-bit immediate!");
     MI.getOperand(i + 3).ChangeToImmediate(Offset);
   } else {
     // Offset is symbolic. This is extremely rare.
@@ -910,8 +929,6 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
 }
 }
 
-#include "X86GenRegisterInfo.inc"
-
 namespace {
   struct MSAH : public MachineFunctionPass {
     static char ID;
@@ -920,10 +937,10 @@ namespace {
     virtual bool runOnMachineFunction(MachineFunction &MF) {
       const X86TargetMachine *TM =
         static_cast<const X86TargetMachine *>(&MF.getTarget());
-      const X86RegisterInfo *X86RI = TM->getRegisterInfo();
+      const TargetFrameLowering *TFI = TM->getFrameLowering();
       MachineRegisterInfo &RI = MF.getRegInfo();
       X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
-      unsigned StackAlignment = X86RI->getStackAlignment();
+      unsigned StackAlignment = TFI->getStackAlignment();
 
       // Be over-conservative: scan over all vreg defs and find whether vector
       // registers are used. If yes, there is a possibility that vector register
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index dd3d3dc..a12eb12 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -15,7 +15,9 @@
 #define X86REGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "X86GenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "X86GenRegisterInfo.inc"
 
 namespace llvm {
   class Type;
@@ -56,10 +58,6 @@ private:
   ///
   unsigned SlotSize;
 
-  /// StackAlign - Default stack alignment.
-  ///
-  unsigned StackAlign;
-
   /// StackPtr - X86 physical register used as stack ptr.
   ///
   unsigned StackPtr;
@@ -75,8 +73,6 @@ public:
   /// register identifier.
   static unsigned getX86RegNum(unsigned RegNo);
 
-  unsigned getStackAlignment() const { return StackAlign; }
-
   /// getDwarfRegNum - allows modification of X86GenRegisterInfo::getDwarfRegNum
   /// (created by TableGen) for target dependencies.
   int getDwarfRegNum(unsigned RegNum, bool isEH) const;
@@ -85,6 +81,10 @@ public:
   // FIXME: This should be tablegen'd like getDwarfRegNum is
   int getSEHRegNum(unsigned i) const;
 
+  /// getCompactUnwindRegNum - This function maps the register to the number for
+  /// compact unwind encoding. Return -1 if the register isn't valid.
+  int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const;
+
   /// Code Generation virtual methods...
   /// 
 
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index f1d149c..203722a 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -206,15 +206,22 @@ let Namespace = "X86" in {
   def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegAlias<XMM15>;
   }
 
-  // Floating point stack registers
-  def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
-  def ST1 : Register<"st(1)">, DwarfRegNum<[34, 13, 12]>;
-  def ST2 : Register<"st(2)">, DwarfRegNum<[35, 14, 13]>;
-  def ST3 : Register<"st(3)">, DwarfRegNum<[36, 15, 14]>;
-  def ST4 : Register<"st(4)">, DwarfRegNum<[37, 16, 15]>;
-  def ST5 : Register<"st(5)">, DwarfRegNum<[38, 17, 16]>;
-  def ST6 : Register<"st(6)">, DwarfRegNum<[39, 18, 17]>;
-  def ST7 : Register<"st(7)">, DwarfRegNum<[40, 19, 18]>;
+  class STRegister<string Name, list<Register> A> : Register<Name> {
+    let Aliases = A;
+  }
+
+  // Floating point stack registers. These don't map one-to-one to the FP
+  // pseudo registers, but we still mark them as aliasing FP registers. That
+  // way both kinds can be live without exceeding the stack depth. ST registers
+  // are only live around inline assembly.
+  def ST0 : STRegister<"st(0)", []>, DwarfRegNum<[33, 12, 11]>;
+  def ST1 : STRegister<"st(1)", [FP6]>, DwarfRegNum<[34, 13, 12]>;
+  def ST2 : STRegister<"st(2)", [FP5]>, DwarfRegNum<[35, 14, 13]>;
+  def ST3 : STRegister<"st(3)", [FP4]>, DwarfRegNum<[36, 15, 14]>;
+  def ST4 : STRegister<"st(4)", [FP3]>, DwarfRegNum<[37, 16, 15]>;
+  def ST5 : STRegister<"st(5)", [FP2]>, DwarfRegNum<[38, 17, 16]>;
+  def ST6 : STRegister<"st(6)", [FP1]>, DwarfRegNum<[39, 18, 17]>;
+  def ST7 : STRegister<"st(7)", [FP0]>, DwarfRegNum<[40, 19, 18]>;
 
   // Status flags register
   def EFLAGS : Register<"flags">;
@@ -279,58 +286,23 @@ let Namespace = "X86" in {
 // require a REX prefix. For example, "addb %ah, %dil" and "movzbl %ah, %r8d"
 // cannot be encoded.
 def GR8 : RegisterClass<"X86", [i8],  8,
-                        [AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
-                         R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B]> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    static const unsigned X86_GR8_AO_64[] = {
-      X86::AL,   X86::CL,   X86::DL,   X86::SIL, X86::DIL,
-      X86::R8B,  X86::R9B,  X86::R10B, X86::R11B,
-      X86::BL,   X86::R14B, X86::R15B, X86::R12B, X86::R13B, X86::BPL
-    };
-
-    GR8Class::iterator
-    GR8Class::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      if (Subtarget.is64Bit())
-        return X86_GR8_AO_64;
-      else
-        return begin();
-    }
-
-    GR8Class::iterator
-    GR8Class::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const TargetFrameLowering *TFI = TM.getFrameLowering();
-      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      const X86MachineFunctionInfo *MFI = MF.getInfo<X86MachineFunctionInfo>();
-      // Does the function dedicate RBP / EBP to being a frame ptr?
-      if (!Subtarget.is64Bit())
-        // In 32-mode, none of the 8-bit registers aliases EBP or ESP.
-        return begin() + 8;
-      else if (TFI->hasFP(MF) || MFI->getReserveFP())
-        // If so, don't allocate SPL or BPL.
-        return array_endof(X86_GR8_AO_64) - 1;
-      else
-        // If not, just don't allocate SPL.
-        return array_endof(X86_GR8_AO_64);
-    }
+                        (add AL, CL, DL, AH, CH, DH, BL, BH, SIL, DIL, BPL, SPL,
+                             R8B, R9B, R10B, R11B, R14B, R15B, R12B, R13B)> {
+  let AltOrders = [(sub GR8, AH, BH, CH, DH)];
+  let AltOrderSelect = [{
+    return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
   }];
 }
 
 def GR16 : RegisterClass<"X86", [i16], 16,
-                         [AX, CX, DX, SI, DI, BX, BP, SP,
-                          R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W]> {
+                         (add AX, CX, DX, SI, DI, BX, BP, SP,
+                              R8W, R9W, R10W, R11W, R14W, R15W, R12W, R13W)> {
   let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi)];
 }
 
 def GR32 : RegisterClass<"X86", [i32], 32,
-                         [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
-                          R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
+                         (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP,
+                              R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D)> {
   let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
 }
 
@@ -338,8 +310,8 @@ def GR32 : RegisterClass<"X86", [i32], 32,
 // RIP isn't really a register and it can't be used anywhere except in an
 // address, but it doesn't cause trouble.
 def GR64 : RegisterClass<"X86", [i64], 64,
-                         [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-                          RBX, R14, R15, R12, R13, RBP, RSP, RIP]> {
+                         (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
+                              RBX, R14, R15, R12, R13, RBP, RSP, RIP)> {
   let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
                        (GR16 sub_16bit),
                        (GR32 sub_32bit)];
@@ -348,16 +320,13 @@ def GR64 : RegisterClass<"X86", [i64], 64,
 // Segment registers for use by MOV instructions (and others) that have a
 //   segment register as one operand.  Always contain a 16-bit segment
 //   descriptor.
-def SEGMENT_REG : RegisterClass<"X86", [i16], 16, [CS, DS, SS, ES, FS, GS]>;
+def SEGMENT_REG : RegisterClass<"X86", [i16], 16, (add CS, DS, SS, ES, FS, GS)>;
 
 // Debug registers.
-def DEBUG_REG : RegisterClass<"X86", [i32], 32,
-                              [DR0, DR1, DR2, DR3, DR4, DR5, DR6, DR7]>;
+def DEBUG_REG : RegisterClass<"X86", [i32], 32, (sequence "DR%u", 0, 7)>;
 
 // Control registers.
-def CONTROL_REG : RegisterClass<"X86", [i64], 64,
-                                [CR0, CR1, CR2, CR3, CR4, CR5, CR6, CR7, CR8,
-                                 CR9, CR10, CR11, CR12, CR13, CR14, CR15]>;
+def CONTROL_REG : RegisterClass<"X86", [i64], 64, (sequence "CR%u", 0, 15)>;
 
 // GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD, GR64_ABCD - Subclasses of
 // GR8, GR16, GR32, and GR64 which contain just the "a" "b", "c", and "d"
@@ -365,99 +334,69 @@ def CONTROL_REG : RegisterClass<"X86", [i64], 64,
 // that support 8-bit subreg operations. On x86-64, GR16_ABCD, GR32_ABCD,
 // and GR64_ABCD are classes for registers that support 8-bit h-register
 // operations.
-def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, [AL, CL, DL, BL]>;
-def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, [AH, CH, DH, BH]>;
-def GR16_ABCD : RegisterClass<"X86", [i16], 16, [AX, CX, DX, BX]> {
+def GR8_ABCD_L : RegisterClass<"X86", [i8], 8, (add AL, CL, DL, BL)>;
+def GR8_ABCD_H : RegisterClass<"X86", [i8], 8, (add AH, CH, DH, BH)>;
+def GR16_ABCD : RegisterClass<"X86", [i16], 16, (add AX, CX, DX, BX)> {
   let SubRegClasses = [(GR8_ABCD_L sub_8bit), (GR8_ABCD_H sub_8bit_hi)];
 }
-def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
+def GR32_ABCD : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX, EBX)> {
   let SubRegClasses = [(GR8_ABCD_L sub_8bit),
                        (GR8_ABCD_H sub_8bit_hi),
                        (GR16_ABCD sub_16bit)];
 }
-def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
+def GR64_ABCD : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RBX)> {
   let SubRegClasses = [(GR8_ABCD_L sub_8bit),
                        (GR8_ABCD_H sub_8bit_hi),
                        (GR16_ABCD sub_16bit),
                        (GR32_ABCD sub_32bit)];
 }
-def GR32_TC   : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> {
+def GR32_TC   : RegisterClass<"X86", [i32], 32, (add EAX, ECX, EDX)> {
   let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
 }
-def GR64_TC   : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
-                                                 R8, R9, R11, RIP]> {
+def GR64_TC   : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX, RSI, RDI,
+                                                     R8, R9, R11, RIP)> {
   let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
                        (GR16 sub_16bit),
                        (GR32_TC sub_32bit)];
 }
 
-def GR64_TCW64   : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX,
-                                                    R8, R9, R11]>;
+def GR64_TCW64 : RegisterClass<"X86", [i64], 64, (add RAX, RCX, RDX,
+                                                      R8, R9, R11)>;
 
 // GR8_NOREX - GR8 registers which do not require a REX prefix.
 def GR8_NOREX : RegisterClass<"X86", [i8], 8,
-                              [AL, CL, DL, AH, CH, DH, BL, BH]> {
-  let MethodProtos = [{
-    iterator allocation_order_begin(const MachineFunction &MF) const;
-    iterator allocation_order_end(const MachineFunction &MF) const;
-  }];
-  let MethodBodies = [{
-    // In 64-bit mode, it's not safe to blindly allocate H registers.
-    static const unsigned X86_GR8_NOREX_AO_64[] = {
-      X86::AL, X86::CL, X86::DL, X86::BL
-    };
-
-    GR8_NOREXClass::iterator
-    GR8_NOREXClass::allocation_order_begin(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      if (Subtarget.is64Bit())
-        return X86_GR8_NOREX_AO_64;
-      else
-        return begin();
-    }
-
-    GR8_NOREXClass::iterator
-    GR8_NOREXClass::allocation_order_end(const MachineFunction &MF) const {
-      const TargetMachine &TM = MF.getTarget();
-      const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>();
-      if (Subtarget.is64Bit())
-        return array_endof(X86_GR8_NOREX_AO_64);
-      else
-        return end();
-    }
+                              (add AL, CL, DL, AH, CH, DH, BL, BH)> {
+  let AltOrders = [(sub GR8_NOREX, AH, BH, CH, DH)];
+  let AltOrderSelect = [{
+    return MF.getTarget().getSubtarget<X86Subtarget>().is64Bit();
   }];
 }
 // GR16_NOREX - GR16 registers which do not require a REX prefix.
 def GR16_NOREX : RegisterClass<"X86", [i16], 16,
-                               [AX, CX, DX, SI, DI, BX, BP, SP]> {
+                               (add AX, CX, DX, SI, DI, BX, BP, SP)> {
   let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi)];
 }
 // GR32_NOREX - GR32 registers which do not require a REX prefix.
 def GR32_NOREX : RegisterClass<"X86", [i32], 32,
-                               [EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP]> {
+                               (add EAX, ECX, EDX, ESI, EDI, EBX, EBP, ESP)> {
   let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
                        (GR16_NOREX sub_16bit)];
 }
 // GR64_NOREX - GR64 registers which do not require a REX prefix.
 def GR64_NOREX : RegisterClass<"X86", [i64], 64,
-                               [RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP]> {
+                            (add RAX, RCX, RDX, RSI, RDI, RBX, RBP, RSP, RIP)> {
   let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
                        (GR16_NOREX sub_16bit),
                        (GR32_NOREX sub_32bit)];
 }
 
 // GR32_NOSP - GR32 registers except ESP.
-def GR32_NOSP : RegisterClass<"X86", [i32], 32,
-                              [EAX, ECX, EDX, ESI, EDI, EBX, EBP,
-                               R8D, R9D, R10D, R11D, R14D, R15D, R12D, R13D]> {
+def GR32_NOSP : RegisterClass<"X86", [i32], 32, (sub GR32, ESP)> {
   let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi), (GR16 sub_16bit)];
 }
 
 // GR64_NOSP - GR64 registers except RSP (and RIP).
-def GR64_NOSP : RegisterClass<"X86", [i64], 64,
-                              [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-                               RBX, R14, R15, R12, R13, RBP]> {
+def GR64_NOSP : RegisterClass<"X86", [i64], 64, (sub GR64, RSP, RIP)> {
   let SubRegClasses = [(GR8 sub_8bit, sub_8bit_hi),
                        (GR16 sub_16bit),
                        (GR32_NOSP sub_32bit)];
@@ -466,36 +405,30 @@ def GR64_NOSP : RegisterClass<"X86", [i64], 64,
 // GR32_NOREX_NOSP - GR32 registers which do not require a REX prefix except
 // ESP.
 def GR32_NOREX_NOSP : RegisterClass<"X86", [i32], 32,
-                               [EAX, ECX, EDX, ESI, EDI, EBX, EBP]> {
+                                    (and GR32_NOREX, GR32_NOSP)> {
   let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
                        (GR16_NOREX sub_16bit)];
 }
 
 // GR64_NOREX_NOSP - GR64_NOREX registers except RSP.
 def GR64_NOREX_NOSP : RegisterClass<"X86", [i64], 64,
-                                    [RAX, RCX, RDX, RSI, RDI, RBX, RBP]> {
+                                    (and GR64_NOREX, GR64_NOSP)> {
   let SubRegClasses = [(GR8_NOREX sub_8bit, sub_8bit_hi),
                        (GR16_NOREX sub_16bit),
                        (GR32_NOREX_NOSP sub_32bit)];
 }
 
 // A class to support the 'A' assembler constraint: EAX then EDX.
-def GR32_AD : RegisterClass<"X86", [i32], 32, [EAX, EDX]> {
+def GR32_AD : RegisterClass<"X86", [i32], 32, (add EAX, EDX)> {
   let SubRegClasses = [(GR8_ABCD_L sub_8bit),
                        (GR8_ABCD_H sub_8bit_hi),
                        (GR16_ABCD sub_16bit)];
 }
 
 // Scalar SSE2 floating point registers.
-def FR32 : RegisterClass<"X86", [f32], 32,
-                         [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-                          XMM8, XMM9, XMM10, XMM11,
-                          XMM12, XMM13, XMM14, XMM15]>;
+def FR32 : RegisterClass<"X86", [f32], 32, (sequence "XMM%u", 0, 15)>;
 
-def FR64 : RegisterClass<"X86", [f64], 64,
-                         [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-                          XMM8, XMM9, XMM10, XMM11,
-                          XMM12, XMM13, XMM14, XMM15]>;
+def FR64 : RegisterClass<"X86", [f64], 64, (add FR32)>;
 
 
 // FIXME: This sets up the floating point register files as though they are f64
@@ -504,37 +437,31 @@ def FR64 : RegisterClass<"X86", [f64], 64,
 // faster on common hardware.  In reality, this should be controlled by a
 // command line option or something.
 
-def RFP32 : RegisterClass<"X86",[f32], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
-def RFP64 : RegisterClass<"X86",[f64], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
-def RFP80 : RegisterClass<"X86",[f80], 32, [FP0, FP1, FP2, FP3, FP4, FP5, FP6]>;
+def RFP32 : RegisterClass<"X86",[f32], 32, (sequence "FP%u", 0, 6)>;
+def RFP64 : RegisterClass<"X86",[f64], 32, (add RFP32)>;
+def RFP80 : RegisterClass<"X86",[f80], 32, (add RFP32)>;
 
 // Floating point stack registers (these are not allocatable by the
 // register allocator - the floating point stackifier is responsible
 // for transforming FPn allocations to STn registers)
-def RST : RegisterClass<"X86", [f80, f64, f32], 32,
-                        [ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7]> {
+def RST : RegisterClass<"X86", [f80, f64, f32], 32, (sequence "ST%u", 0, 7)> {
   let isAllocatable = 0;
 }
 
 // Generic vector registers: VR64 and VR128.
-def VR64: RegisterClass<"X86", [x86mmx], 64,
-                          [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7]>;
-def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
-                          [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
-                           XMM8, XMM9, XMM10, XMM11,
-                           XMM12, XMM13, XMM14, XMM15]> {
+def VR64: RegisterClass<"X86", [x86mmx], 64, (sequence "MM%u", 0, 7)>;
+def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+                          128, (add FR32)> {
   let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd)];
 }
 
 def VR256 : RegisterClass<"X86", [v32i8, v8i32, v4i64, v8f32, v4f64], 256,
-                          [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
-                           YMM8, YMM9, YMM10, YMM11,
-                           YMM12, YMM13, YMM14, YMM15]> {
+                          (sequence "YMM%u", 0, 15)> {
   let SubRegClasses = [(FR32 sub_ss), (FR64 sub_sd), (VR128 sub_xmm)];
 }
 
 // Status flags registers.
-def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
+def CCR : RegisterClass<"X86", [i32], 32, (add EFLAGS)> {
   let CopyCost = -1;  // Don't allow copying of status registers.
   let isAllocatable = 0;
 }
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 481e821..5e6c659 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -7,21 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the X86 specific subclass of TargetSubtarget.
+// This file implements the X86 specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "subtarget"
 #include "X86Subtarget.h"
 #include "X86InstrInfo.h"
-#include "X86GenSubtarget.inc"
 #include "llvm/GlobalValue.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
 #include "llvm/ADT/SmallVector.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "X86GenSubtargetInfo.inc"
+
 using namespace llvm;
 
 #if defined(_MSC_VER)
@@ -154,7 +157,7 @@ const char *X86Subtarget::getBZeroEntry() const {
 /// IsLegalToCallImmediateAddr - Return true if the subtarget allows calls
 /// to immediate address.
 bool X86Subtarget::IsLegalToCallImmediateAddr(const TargetMachine &TM) const {
-  if (Is64Bit)
+  if (In64BitMode)
     return false;
   return isTargetELF() || TM.getRelocationModel() == Reloc::Static;
 }
@@ -170,73 +173,6 @@ unsigned X86Subtarget::getSpecialAddressLatency() const {
   return 200;
 }
 
-/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
-/// specified arguments.  If we can't run cpuid on the host, return true.
-static bool GetCpuIDAndInfo(unsigned value, unsigned *rEAX,
-                            unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
-  #if defined(__GNUC__)
-    // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
-    asm ("movq\t%%rbx, %%rsi\n\t"
-         "cpuid\n\t"
-         "xchgq\t%%rbx, %%rsi\n\t"
-         : "=a" (*rEAX),
-           "=S" (*rEBX),
-           "=c" (*rECX),
-           "=d" (*rEDX)
-         :  "a" (value));
-    return false;
-  #elif defined(_MSC_VER)
-    int registers[4];
-    __cpuid(registers, value);
-    *rEAX = registers[0];
-    *rEBX = registers[1];
-    *rECX = registers[2];
-    *rEDX = registers[3];
-    return false;
-  #endif
-#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
-  #if defined(__GNUC__)
-    asm ("movl\t%%ebx, %%esi\n\t"
-         "cpuid\n\t"
-         "xchgl\t%%ebx, %%esi\n\t"
-         : "=a" (*rEAX),
-           "=S" (*rEBX),
-           "=c" (*rECX),
-           "=d" (*rEDX)
-         :  "a" (value));
-    return false;
-  #elif defined(_MSC_VER)
-    __asm {
-      mov   eax,value
-      cpuid
-      mov   esi,rEAX
-      mov   dword ptr [esi],eax
-      mov   esi,rEBX
-      mov   dword ptr [esi],ebx
-      mov   esi,rECX
-      mov   dword ptr [esi],ecx
-      mov   esi,rEDX
-      mov   dword ptr [esi],edx
-    }
-    return false;
-  #endif
-#endif
-  return true;
-}
-
-static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
-  Family = (EAX >> 8) & 0xf; // Bits 8 - 11
-  Model  = (EAX >> 4) & 0xf; // Bits 4 - 7
-  if (Family == 6 || Family == 0xf) {
-    if (Family == 0xf)
-      // Examine extended family ID if family ID is F.
-      Family += (EAX >> 20) & 0xff;    // Bits 20 - 27
-    // Examine extended model ID if family ID is 6 or F.
-    Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
-  }
-}
-
 void X86Subtarget::AutoDetectSubtargetFeatures() {
   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
   union {
@@ -244,50 +180,66 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
     char     c[12];
   } text;
   
-  if (GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
+  if (X86_MC::GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1))
     return;
 
-  GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
+  X86_MC::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX);
   
-  if ((EDX >> 15) & 1) HasCMov = true;
-  if ((EDX >> 23) & 1) X86SSELevel = MMX;
-  if ((EDX >> 25) & 1) X86SSELevel = SSE1;
-  if ((EDX >> 26) & 1) X86SSELevel = SSE2;
-  if (ECX & 0x1)       X86SSELevel = SSE3;
-  if ((ECX >> 9)  & 1) X86SSELevel = SSSE3;
-  if ((ECX >> 19) & 1) X86SSELevel = SSE41;
-  if ((ECX >> 20) & 1) X86SSELevel = SSE42;
+  if ((EDX >> 15) & 1) HasCMov = true;      ToggleFeature(X86::FeatureCMOV);
+  if ((EDX >> 23) & 1) X86SSELevel = MMX;   ToggleFeature(X86::FeatureMMX);
+  if ((EDX >> 25) & 1) X86SSELevel = SSE1;  ToggleFeature(X86::FeatureSSE1);
+  if ((EDX >> 26) & 1) X86SSELevel = SSE2;  ToggleFeature(X86::FeatureSSE2);
+  if (ECX & 0x1)       X86SSELevel = SSE3;  ToggleFeature(X86::FeatureSSE3);
+  if ((ECX >> 9)  & 1) X86SSELevel = SSSE3; ToggleFeature(X86::FeatureSSSE3);
+  if ((ECX >> 19) & 1) X86SSELevel = SSE41; ToggleFeature(X86::FeatureSSE41);
+  if ((ECX >> 20) & 1) X86SSELevel = SSE42; ToggleFeature(X86::FeatureSSE42);
   // FIXME: AVX codegen support is not ready.
-  //if ((ECX >> 28) & 1) { HasAVX = true; X86SSELevel = NoMMXSSE; }
+  //if ((ECX >> 28) & 1) { HasAVX = true; } ToggleFeature(X86::FeatureAVX);
 
   bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
   bool IsAMD   = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
 
-  HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);
-  HasFMA3  = IsIntel && ((ECX >> 12) & 0x1);
-  HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1);
-  HasAES   = IsIntel && ((ECX >> 25) & 0x1);
+  HasCLMUL = IsIntel && ((ECX >> 1) & 0x1);   ToggleFeature(X86::FeatureCLMUL);
+  HasFMA3  = IsIntel && ((ECX >> 12) & 0x1);  ToggleFeature(X86::FeatureFMA3);
+  HasPOPCNT = IsIntel && ((ECX >> 23) & 0x1); ToggleFeature(X86::FeaturePOPCNT);
+  HasAES   = IsIntel && ((ECX >> 25) & 0x1);  ToggleFeature(X86::FeatureAES);
 
   if (IsIntel || IsAMD) {
     // Determine if bit test memory instructions are slow.
     unsigned Family = 0;
     unsigned Model  = 0;
-    DetectFamilyModel(EAX, Family, Model);
-    IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
+    X86_MC::DetectFamilyModel(EAX, Family, Model);
+    if (IsAMD || (Family == 6 && Model >= 13)) {
+      IsBTMemSlow = true;
+      ToggleFeature(X86::FeatureSlowBTMem);
+    }
     // If it's Nehalem, unaligned memory access is fast.
-    if (Family == 15 && Model == 26)
+    if (Family == 15 && Model == 26) {
       IsUAMemFast = true;
+      ToggleFeature(X86::FeatureFastUAMem);
+    }
 
-    GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
-    HasX86_64 = (EDX >> 29) & 0x1;
-    HasSSE4A = IsAMD && ((ECX >> 6) & 0x1);
-    HasFMA4 = IsAMD && ((ECX >> 16) & 0x1);
+    X86_MC::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+    if ((EDX >> 29) & 0x1) {
+      HasX86_64 = true;
+      ToggleFeature(X86::Feature64Bit);
+    }
+    if (IsAMD && ((ECX >> 6) & 0x1)) {
+      HasSSE4A = true;
+      ToggleFeature(X86::FeatureSSE4A);
+    }
+    if (IsAMD && ((ECX >> 16) & 0x1)) {
+      HasFMA4 = true;
+      ToggleFeature(X86::FeatureFMA4);
+    }
   }
 }
 
-X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS, 
-                           bool is64Bit)
-  : PICStyle(PICStyles::None)
+X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
+                           const std::string &FS, 
+                           unsigned StackAlignOverride, bool is64Bit)
+  : X86GenSubtargetInfo(TT, CPU, FS)
+  , PICStyle(PICStyles::None)
   , X86SSELevel(NoMMXSSE)
   , X863DNowLevel(NoThreeDNow)
   , HasCMov(false)
@@ -306,73 +258,66 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
   // FIXME: this is a known good value for Yonah. How about others?
   , MaxInlineSizeThreshold(128)
   , TargetTriple(TT)
-  , Is64Bit(is64Bit) {
-
-  // default to hard float ABI
-  if (FloatABIType == FloatABI::Default)
-    FloatABIType = FloatABI::Hard;
-    
+  , In64BitMode(is64Bit) {
   // Determine default and user specified characteristics
-  if (!FS.empty()) {
+  if (!FS.empty() || !CPU.empty()) {
+    std::string CPUName = CPU;
+    if (CPUName.empty()) {
+#if defined (__x86_64__) || defined(__i386__)
+      CPUName = sys::getHostCPUName();
+#else
+      CPUName = "generic";
+#endif
+    }
+
+    // Make sure 64-bit features are available in 64-bit mode. (But make sure
+    // SSE2 can be turned off explicitly.)
+    std::string FullFS = FS;
+    if (In64BitMode) {
+      if (!FullFS.empty())
+        FullFS = "+64bit,+sse2," + FullFS;
+      else
+        FullFS = "+64bit,+sse2";
+    }
+
     // If feature string is not empty, parse features string.
-    std::string CPU = sys::getHostCPUName();
-    ParseSubtargetFeatures(FS, CPU);
-    // All X86-64 CPUs also have SSE2, however user might request no SSE via 
-    // -mattr, so don't force SSELevel here.
-    if (HasAVX)
-      X86SSELevel = NoMMXSSE;
+    ParseSubtargetFeatures(CPUName, FullFS);
   } else {
     // Otherwise, use CPUID to auto-detect feature set.
     AutoDetectSubtargetFeatures();
-    // Make sure SSE2 is enabled; it is available on all X86-64 CPUs.
-    if (Is64Bit && !HasAVX && X86SSELevel < SSE2)
-      X86SSELevel = SSE2;
-  }
 
-  // If requesting codegen for X86-64, make sure that 64-bit features
-  // are enabled.
-  if (Is64Bit) {
-    HasX86_64 = true;
+    // Make sure 64-bit features are available in 64-bit mode.
+    if (In64BitMode) {
+      HasX86_64 = true; ToggleFeature(X86::Feature64Bit);
+      HasCMov = true;   ToggleFeature(X86::FeatureCMOV);
 
-    // All 64-bit cpus have cmov support.
-    HasCMov = true;
+      if (!HasAVX && X86SSELevel < SSE2) {
+        X86SSELevel = SSE2;
+        ToggleFeature(X86::FeatureSSE1);
+        ToggleFeature(X86::FeatureSSE2);
+      }
+    }
   }
+
+  // It's important to keep the MCSubtargetInfo feature bits in sync with
+  // target data structure which is shared with MC code emitter, etc.
+  if (In64BitMode)
+    ToggleFeature(X86::Mode64Bit);
+
+  if (HasAVX)
+    X86SSELevel = NoMMXSSE;
     
   DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel
                << ", 3DNowLevel " << X863DNowLevel
                << ", 64bit " << HasX86_64 << "\n");
-  assert((!Is64Bit || HasX86_64) &&
+  assert((!In64BitMode || HasX86_64) &&
          "64-bit code requested on a subtarget that doesn't support it!");
 
   // Stack alignment is 16 bytes on Darwin, FreeBSD, Linux and Solaris (both
   // 32 and 64 bit) and for all 64-bit targets.
-  if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() ||
-      isTargetSolaris() || Is64Bit)
+  if (StackAlignOverride)
+    stackAlignment = StackAlignOverride;
+  else if (isTargetDarwin() || isTargetFreeBSD() || isTargetLinux() ||
+           isTargetSolaris() || In64BitMode)
     stackAlignment = 16;
-
-  if (StackAlignment)
-    stackAlignment = StackAlignment;
-}
-
-/// IsCalleePop - Determines whether the callee is required to pop its
-/// own arguments. Callee pop is necessary to support tail calls.
-bool X86Subtarget::IsCalleePop(bool IsVarArg,
-                               CallingConv::ID CallingConv) const {
-  if (IsVarArg)
-    return false;
-
-  switch (CallingConv) {
-  default:
-    return false;
-  case CallingConv::X86_StdCall:
-    return !is64Bit();
-  case CallingConv::X86_FastCall:
-    return !is64Bit();
-  case CallingConv::X86_ThisCall:
-    return !is64Bit();
-  case CallingConv::Fast:
-    return GuaranteedTailCallOpt;
-  case CallingConv::GHC:
-    return GuaranteedTailCallOpt;
-  }
 }
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 286a798..6d22027 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the X86 specific subclass of TargetSubtarget.
+// This file declares the X86 specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,12 +15,16 @@
 #define X86SUBTARGET_H
 
 #include "llvm/ADT/Triple.h"
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/CallingConv.h"
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "X86GenSubtargetInfo.inc"
+
 namespace llvm {
 class GlobalValue;
+class StringRef;
 class TargetMachine;
 
 /// PICStyles - The X86 backend supports a number of different styles of PIC.
@@ -35,7 +39,7 @@ enum Style {
 };
 }
 
-class X86Subtarget : public TargetSubtarget {
+class X86Subtarget : public X86GenSubtargetInfo {
 protected:
   enum X86SSEEnum {
     NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42
@@ -108,16 +112,17 @@ protected:
   Triple TargetTriple;
 
 private:
-  /// Is64Bit - True if the processor supports 64-bit instructions and
-  /// pointer size is 64 bit.
-  bool Is64Bit;
+  /// In64BitMode - True if compiling for 64-bit, false for 32-bit.
+  bool In64BitMode;
 
 public:
 
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  X86Subtarget(const std::string &TT, const std::string &FS, bool is64Bit);
+  X86Subtarget(const std::string &TT, const std::string &CPU,
+               const std::string &FS,
+               unsigned StackAlignOverride, bool is64Bit);
 
   /// getStackAlignment - Returns the minimum alignment known to hold of the
   /// stack frame on entry to the function and which must be maintained by every
@@ -130,14 +135,13 @@ public:
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   /// AutoDetectSubtargetFeatures - Auto-detect CPU features using CPUID
   /// instruction.
   void AutoDetectSubtargetFeatures();
 
-  bool is64Bit() const { return Is64Bit; }
+  bool is64Bit() const { return In64BitMode; }
 
   PICStyles::Style getPICStyle() const { return PICStyle; }
   void setPICStyle(PICStyles::Style Style)  { PICStyle = Style; }
@@ -195,7 +199,7 @@ public:
   }
 
   bool isTargetWin64() const {
-    return Is64Bit && (isTargetMingw() || isTargetWindows());
+    return In64BitMode && (isTargetMingw() || isTargetWindows());
   }
 
   bool isTargetEnvMacho() const {
@@ -203,7 +207,7 @@ public:
   }
 
   bool isTargetWin32() const {
-    return !Is64Bit && (isTargetMingw() || isTargetWindows());
+    return !In64BitMode && (isTargetMingw() || isTargetWindows());
   }
 
   bool isPICStyleSet() const { return PICStyle != PICStyles::None; }
@@ -248,9 +252,6 @@ public:
   /// indicating the number of scheduling cycles of backscheduling that
   /// should be attempted.
   unsigned getSpecialAddressLatency() const;
-
-  /// IsCalleePop - Test whether a function should pop its own arguments.
-  bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
 };
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 7483329..9cab0e0 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -11,7 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "X86MCAsmInfo.h"
 #include "X86TargetMachine.h"
 #include "X86.h"
 #include "llvm/PassManager.h"
@@ -24,22 +23,6 @@
 #include "llvm/Target/TargetRegistry.h"
 using namespace llvm;
 
-static MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
-  Triple TheTriple(TT);
-
-  if (TheTriple.isOSDarwin() || TheTriple.getEnvironment() == Triple::MachO) {
-    if (TheTriple.getArch() == Triple::x86_64)
-      return new X86_64MCAsmInfoDarwin(TheTriple);
-    else
-      return new X86MCAsmInfoDarwin(TheTriple);
-  }
-
-  if (TheTriple.isOSWindows())
-    return new X86MCAsmInfoCOFF(TheTriple);
-
-  return new X86ELFMCAsmInfo(TheTriple);
-}
-
 static MCStreamer *createMCStreamer(const Target &T, const std::string &TT,
                                     MCContext &Ctx, TargetAsmBackend &TAB,
                                     raw_ostream &_OS,
@@ -62,15 +45,11 @@ extern "C" void LLVMInitializeX86Target() {
   RegisterTargetMachine<X86_32TargetMachine> X(TheX86_32Target);
   RegisterTargetMachine<X86_64TargetMachine> Y(TheX86_64Target);
 
-  // Register the target asm info.
-  RegisterAsmInfoFn A(TheX86_32Target, createMCAsmInfo);
-  RegisterAsmInfoFn B(TheX86_64Target, createMCAsmInfo);
-
   // Register the code emitter.
   TargetRegistry::RegisterCodeEmitter(TheX86_32Target,
-                                      createX86_32MCCodeEmitter);
+                                      createX86MCCodeEmitter);
   TargetRegistry::RegisterCodeEmitter(TheX86_64Target,
-                                      createX86_64MCCodeEmitter);
+                                      createX86MCCodeEmitter);
 
   // Register the asm backend.
   TargetRegistry::RegisterAsmBackend(TheX86_32Target,
@@ -87,8 +66,9 @@ extern "C" void LLVMInitializeX86Target() {
 
 
 X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
+                                         const std::string &CPU,
                                          const std::string &FS)
-  : X86TargetMachine(T, TT, FS, false),
+  : X86TargetMachine(T, TT, CPU, FS, false),
     DataLayout(getSubtargetImpl()->isTargetDarwin() ?
                "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-f128:128:128-n8:16:32" :
                (getSubtargetImpl()->isTargetCygMing() ||
@@ -103,8 +83,9 @@ X86_32TargetMachine::X86_32TargetMachine(const Target &T, const std::string &TT,
 
 
 X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
+                                         const std::string &CPU, 
                                          const std::string &FS)
-  : X86TargetMachine(T, TT, FS, true),
+  : X86TargetMachine(T, TT, CPU, FS, true),
     DataLayout("e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-f128:128:128-n8:16:32:64"),
     InstrInfo(*this),
     TSInfo(*this),
@@ -115,9 +96,10 @@ X86_64TargetMachine::X86_64TargetMachine(const Target &T, const std::string &TT,
 /// X86TargetMachine ctor - Create an X86 target.
 ///
 X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
+                                   const std::string &CPU,
                                    const std::string &FS, bool is64Bit)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS, is64Bit),
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS, StackAlignmentOverride, is64Bit),
     FrameLowering(*this, Subtarget),
     ELFWriterInfo(is64Bit, true) {
   DefRelocModel = getRelocationModel();
@@ -182,6 +164,10 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
   // Finally, if we have "none" as our PIC style, force to static mode.
   if (Subtarget.getPICStyle() == PICStyles::None)
     setRelocationModel(Reloc::Static);
+
+  // default to hard float ABI
+  if (FloatABIType == FloatABI::Default)
+    FloatABIType = FloatABI::Hard;    
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index 5973922..885334a 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -43,7 +43,8 @@ private:
   
 public:
   X86TargetMachine(const Target &T, const std::string &TT, 
-                   const std::string &FS, bool is64Bit);
+                   const std::string &CPU, const std::string &FS,
+                   bool is64Bit);
 
   virtual const X86InstrInfo     *getInstrInfo() const {
     llvm_unreachable("getInstrInfo not implemented");
@@ -87,7 +88,7 @@ class X86_32TargetMachine : public X86TargetMachine {
   X86JITInfo        JITInfo;
 public:
   X86_32TargetMachine(const Target &T, const std::string &M,
-                      const std::string &FS);
+                      const std::string &CPU, const std::string &FS);
   virtual const TargetData *getTargetData() const { return &DataLayout; }
   virtual const X86TargetLowering *getTargetLowering() const {
     return &TLInfo;
@@ -113,7 +114,7 @@ class X86_64TargetMachine : public X86TargetMachine {
   X86JITInfo        JITInfo;
 public:
   X86_64TargetMachine(const Target &T, const std::string &TT,
-                      const std::string &FS);
+                      const std::string &CPU, const std::string &FS);
   virtual const TargetData *getTargetData() const { return &DataLayout; }
   virtual const X86TargetLowering *getTargetLowering() const {
     return &TLInfo;
diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt
index 9093de6..a1d73c6 100644
--- a/lib/Target/XCore/CMakeLists.txt
+++ b/lib/Target/XCore/CMakeLists.txt
@@ -1,14 +1,11 @@
 set(LLVM_TARGET_DEFINITIONS XCore.td)
 
-tablegen(XCoreGenRegisterInfo.h.inc -gen-register-desc-header)
-tablegen(XCoreGenRegisterNames.inc -gen-register-enums)
-tablegen(XCoreGenRegisterInfo.inc -gen-register-desc)
-tablegen(XCoreGenInstrNames.inc -gen-instr-enums)
-tablegen(XCoreGenInstrInfo.inc -gen-instr-desc)
+tablegen(XCoreGenRegisterInfo.inc -gen-register-info)
+tablegen(XCoreGenInstrInfo.inc -gen-instr-info)
 tablegen(XCoreGenAsmWriter.inc -gen-asm-writer)
 tablegen(XCoreGenDAGISel.inc -gen-dag-isel)
 tablegen(XCoreGenCallingConv.inc -gen-callingconv)
-tablegen(XCoreGenSubtarget.inc -gen-subtarget)
+tablegen(XCoreGenSubtargetInfo.inc -gen-subtarget)
 
 add_llvm_target(XCoreCodeGen
   XCoreAsmPrinter.cpp
@@ -16,7 +13,6 @@ add_llvm_target(XCoreCodeGen
   XCoreInstrInfo.cpp
   XCoreISelDAGToDAG.cpp
   XCoreISelLowering.cpp
-  XCoreMCAsmInfo.cpp
   XCoreRegisterInfo.cpp
   XCoreSubtarget.cpp
   XCoreTargetMachine.cpp
@@ -25,3 +21,4 @@ add_llvm_target(XCoreCodeGen
   )
 
 add_subdirectory(TargetInfo)
+add_subdirectory(MCTargetDesc)
diff --git a/lib/Target/XCore/MCTargetDesc/CMakeLists.txt b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
new file mode 100644
index 0000000..c3b3dc9
--- /dev/null
+++ b/lib/Target/XCore/MCTargetDesc/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMXCoreDesc
+  XCoreMCTargetDesc.cpp
+  XCoreMCAsmInfo.cpp
+  )
+
+# Hack: we need to include 'main' target directory to grab private headers
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..)
diff --git a/lib/Target/XCore/MCTargetDesc/Makefile b/lib/Target/XCore/MCTargetDesc/Makefile
new file mode 100644
index 0000000..de61543
--- /dev/null
+++ b/lib/Target/XCore/MCTargetDesc/Makefile
@@ -0,0 +1,16 @@
+##===- lib/Target/XCore/TargetDesc/Makefile ----------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMXCoreDesc
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/XCore/XCoreMCAsmInfo.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
index 42ab1b3..42ab1b3 100644
--- a/lib/Target/XCore/XCoreMCAsmInfo.cpp
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp
diff --git a/lib/Target/XCore/XCoreMCAsmInfo.h b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
index 8403922..8403922 100644
--- a/lib/Target/XCore/XCoreMCAsmInfo.h
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
new file mode 100644
index 0000000..939d97c
--- /dev/null
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -0,0 +1,56 @@
+//===-- XCoreMCTargetDesc.cpp - XCore Target Descriptions -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides XCore specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "XCoreMCTargetDesc.h"
+#include "XCoreMCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "XCoreGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "XCoreGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "XCoreGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createXCoreMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitXCoreMCInstrInfo(X);
+  return X;
+}
+
+extern "C" void LLVMInitializeXCoreMCInstrInfo() {
+  TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo);
+}
+
+static MCSubtargetInfo *createXCoreMCSubtargetInfo(StringRef TT, StringRef CPU,
+                                                   StringRef FS) {
+  MCSubtargetInfo *X = new MCSubtargetInfo();
+  InitXCoreMCSubtargetInfo(X, TT, CPU, FS);
+  return X;
+}
+
+extern "C" void LLVMInitializeXCoreMCSubtargetInfo() {
+  TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget,
+                                          createXCoreMCSubtargetInfo);
+}
+
+extern "C" void LLVMInitializeXCoreMCAsmInfo() {
+  RegisterMCAsmInfo<XCoreMCAsmInfo> X(TheXCoreTarget);
+}
diff --git a/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
new file mode 100644
index 0000000..3cfc3764
--- /dev/null
+++ b/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -0,0 +1,40 @@
+//===-- XCoreMCTargetDesc.h - XCore Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides XCore specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef XCOREMCTARGETDESC_H
+#define XCOREMCTARGETDESC_H
+
+namespace llvm {
+class MCSubtargetInfo;
+class Target;
+class StringRef;
+
+extern Target TheXCoreTarget;
+
+} // End llvm namespace
+
+// Defines symbolic names for XCore registers.  This defines a mapping from
+// register name to register number.
+//
+#define GET_REGINFO_ENUM
+#include "XCoreGenRegisterInfo.inc"
+
+// Defines symbolic names for the XCore instructions.
+//
+#define GET_INSTRINFO_ENUM
+#include "XCoreGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_ENUM
+#include "XCoreGenSubtargetInfo.inc"
+
+#endif
diff --git a/lib/Target/XCore/Makefile b/lib/Target/XCore/Makefile
index 6c1ef88..b823c4e 100644
--- a/lib/Target/XCore/Makefile
+++ b/lib/Target/XCore/Makefile
@@ -12,13 +12,12 @@ LIBRARYNAME = LLVMXCoreCodeGen
 TARGET = XCore
 
 # Make sure that tblgen is run, first thing.
-BUILT_SOURCES = XCoreGenRegisterInfo.h.inc XCoreGenRegisterNames.inc \
-                XCoreGenRegisterInfo.inc XCoreGenInstrNames.inc \
-                XCoreGenInstrInfo.inc XCoreGenAsmWriter.inc \
+BUILT_SOURCES = XCoreGenRegisterInfo.inc XCoreGenInstrInfo.inc \
+		XCoreGenAsmWriter.inc \
                 XCoreGenDAGISel.inc XCoreGenCallingConv.inc \
-		XCoreGenSubtarget.inc
+		XCoreGenSubtargetInfo.inc
 
-DIRS = TargetInfo
+DIRS = TargetInfo MCTargetDesc
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Target/XCore/XCore.h b/lib/Target/XCore/XCore.h
index 8937fbe..b8fb0ca 100644
--- a/lib/Target/XCore/XCore.h
+++ b/lib/Target/XCore/XCore.h
@@ -15,6 +15,7 @@
 #ifndef TARGET_XCORE_H
 #define TARGET_XCORE_H
 
+#include "MCTargetDesc/XCoreMCTargetDesc.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
@@ -25,17 +26,6 @@ namespace llvm {
 
   FunctionPass *createXCoreISelDag(XCoreTargetMachine &TM);
 
-  extern Target TheXCoreTarget;
-
 } // end namespace llvm;
 
-// Defines symbolic names for XCore registers.  This defines a mapping from
-// register name to register number.
-//
-#include "XCoreGenRegisterNames.inc"
-
-// Defines symbolic names for the XCore instructions.
-//
-#include "XCoreGenInstrNames.inc"
-
 #endif
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index 8f06dd3..1a43714 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -16,7 +16,6 @@
 #include "XCore.h"
 #include "XCoreInstrInfo.h"
 #include "XCoreSubtarget.h"
-#include "XCoreMCAsmInfo.h"
 #include "XCoreTargetMachine.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
@@ -27,6 +26,7 @@
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/Mangler.h"
@@ -114,7 +114,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
   
   MCSymbol *GVSym = Mang->getSymbol(GV);
-  Constant *C = GV->getInitializer();
+  const Constant *C = GV->getInitializer();
   unsigned Align = (unsigned)TD->getPreferredTypeAlignmentShift(C->getType());
   
   // Mark the start of the global
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 8cabbbf..6d040e0 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1591,21 +1591,18 @@ XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
 //                           XCore Inline Assembly Support
 //===----------------------------------------------------------------------===//
 
-std::vector<unsigned> XCoreTargetLowering::
-getRegClassForInlineAsmConstraint(const std::string &Constraint,
-                                  EVT VT) const
-{
-  if (Constraint.size() != 1)
-    return std::vector<unsigned>();
-
-  switch (Constraint[0]) {
+std::pair<unsigned, const TargetRegisterClass*>
+XCoreTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint,
+			     EVT VT) const {
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
     default : break;
     case 'r':
-      return make_vector<unsigned>(XCore::R0, XCore::R1,  XCore::R2,
-                                   XCore::R3, XCore::R4,  XCore::R5,
-                                   XCore::R6, XCore::R7,  XCore::R8,
-                                   XCore::R9, XCore::R10, XCore::R11, 0);
-      break;
+      return std::make_pair(0U, XCore::GRRegsRegisterClass);
+    }
   }
-  return std::vector<unsigned>();
+  // Use the default implementation in TargetLowering to convert the register
+  // constraint into a member of a register class.
+  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
 }
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index a8d67d4..9c803be 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -148,9 +148,9 @@ namespace llvm {
     SDValue LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
 
     // Inline asm support
-    std::vector<unsigned>
-    getRegClassForInlineAsmConstraint(const std::string &Constraint,
-              EVT VT) const;
+    std::pair<unsigned, const TargetRegisterClass*>
+    getRegForInlineAsmConstraint(const std::string &Constraint,
+				 EVT VT) const;
 
     // Expand specifics
     SDValue TryExpandADDWithMul(SDNode *Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 9cb6a7d..f90481f 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -18,11 +18,14 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineLocation.h"
-#include "XCoreGenInstrInfo.inc"
+#include "llvm/Target/TargetRegistry.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 
+#define GET_INSTRINFO_CTOR
+#include "XCoreGenInstrInfo.inc"
+
 namespace llvm {
 namespace XCore {
 
@@ -38,7 +41,7 @@ namespace XCore {
 using namespace llvm;
 
 XCoreInstrInfo::XCoreInstrInfo()
-  : TargetInstrInfoImpl(XCoreInsts, array_lengthof(XCoreInsts)),
+  : XCoreGenInstrInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP),
     RI(*this) {
 }
 
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 977fe8d..840b1e1 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -17,9 +17,12 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "XCoreRegisterInfo.h"
 
+#define GET_INSTRINFO_HEADER
+#include "XCoreGenInstrInfo.inc"
+
 namespace llvm {
 
-class XCoreInstrInfo : public TargetInstrInfoImpl {
+class XCoreInstrInfo : public XCoreGenInstrInfo {
   const XCoreRegisterInfo RI;
 public:
   XCoreInstrInfo();
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 46c9e57..357a4a0 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -33,11 +33,13 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 
+#define GET_REGINFO_TARGET_DESC
+#include "XCoreGenRegisterInfo.inc"
+
 using namespace llvm;
 
 XCoreRegisterInfo::XCoreRegisterInfo(const TargetInstrInfo &tii)
-  : XCoreGenRegisterInfo(XCore::ADJCALLSTACKDOWN, XCore::ADJCALLSTACKUP),
-    TII(tii) {
+  : XCoreGenRegisterInfo(), TII(tii) {
 }
 
 // helper functions
@@ -193,7 +195,16 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   #endif
 
   Offset += StackSize;
-  
+
+  unsigned FrameReg = getFrameRegister(MF);
+
+  // Special handling of DBG_VALUE instructions.
+  if (MI.isDebugValue()) {
+    MI.getOperand(i).ChangeToRegister(FrameReg, false /*isDef*/);
+    MI.getOperand(i+1).ChangeToImmediate(Offset);
+    return;
+  }
+
   // fold constant into offset.
   Offset += MI.getOperand(i + 1).getImm();
   MI.getOperand(i + 1).ChangeToImmediate(0);
@@ -205,7 +216,7 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   Offset/=4;
   
   bool FP = TFI->hasFP(MF);
-  
+
   unsigned Reg = MI.getOperand(0).getReg();
   bool isKill = MI.getOpcode() == XCore::STWFI && MI.getOperand(0).isKill();
 
@@ -216,7 +227,6 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   
   if (FP) {
     bool isUs = isImmUs(Offset);
-    unsigned FramePtr = XCore::R10;
     
     if (!isUs) {
       if (!RS)
@@ -228,18 +238,18 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       switch (MI.getOpcode()) {
       case XCore::LDWFI:
         BuildMI(MBB, II, dl, TII.get(XCore::LDW_3r), Reg)
-              .addReg(FramePtr)
+              .addReg(FrameReg)
               .addReg(ScratchReg, RegState::Kill);
         break;
       case XCore::STWFI:
         BuildMI(MBB, II, dl, TII.get(XCore::STW_3r))
               .addReg(Reg, getKillRegState(isKill))
-              .addReg(FramePtr)
+              .addReg(FrameReg)
               .addReg(ScratchReg, RegState::Kill);
         break;
       case XCore::LDAWFI:
         BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l3r), Reg)
-              .addReg(FramePtr)
+              .addReg(FrameReg)
               .addReg(ScratchReg, RegState::Kill);
         break;
       default:
@@ -249,18 +259,18 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       switch (MI.getOpcode()) {
       case XCore::LDWFI:
         BuildMI(MBB, II, dl, TII.get(XCore::LDW_2rus), Reg)
-              .addReg(FramePtr)
+              .addReg(FrameReg)
               .addImm(Offset);
         break;
       case XCore::STWFI:
         BuildMI(MBB, II, dl, TII.get(XCore::STW_2rus))
               .addReg(Reg, getKillRegState(isKill))
-              .addReg(FramePtr)
+              .addReg(FrameReg)
               .addImm(Offset);
         break;
       case XCore::LDAWFI:
         BuildMI(MBB, II, dl, TII.get(XCore::LDAWF_l2rus), Reg)
-              .addReg(FramePtr)
+              .addReg(FrameReg)
               .addImm(Offset);
         break;
       default:
@@ -328,6 +338,3 @@ unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
 unsigned XCoreRegisterInfo::getRARegister() const {
   return XCore::LR;
 }
-
-#include "XCoreGenRegisterInfo.inc"
-
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index 7a9bc9f..801d9eb 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -15,7 +15,9 @@
 #define XCOREREGISTERINFO_H
 
 #include "llvm/Target/TargetRegisterInfo.h"
-#include "XCoreGenRegisterInfo.h.inc"
+
+#define GET_REGINFO_HEADER
+#include "XCoreGenRegisterInfo.inc"
 
 namespace llvm {
 
diff --git a/lib/Target/XCore/XCoreRegisterInfo.td b/lib/Target/XCore/XCoreRegisterInfo.td
index 0951097..c354230 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.td
+++ b/lib/Target/XCore/XCoreRegisterInfo.td
@@ -44,13 +44,13 @@ def LR : Ri<15, "lr">, DwarfRegNum<[15]>;
 //
 def GRRegs : RegisterClass<"XCore", [i32], 32,
   // Return values and arguments
-  [R0, R1, R2, R3,
+  (add R0, R1, R2, R3,
   // Not preserved across procedure calls
   R11,
   // Callee save
-  R4, R5, R6, R7, R8, R9, R10]>;
+  R4, R5, R6, R7, R8, R9, R10)>;
 
 // Reserved
-def RRegs : RegisterClass<"XCore", [i32], 32, [CP, DP, SP, LR]> {
+def RRegs : RegisterClass<"XCore", [i32], 32, (add CP, DP, SP, LR)> {
   let isAllocatable = 0;
 }
diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp
index 78a6fa5..ad069bf 100644
--- a/lib/Target/XCore/XCoreSubtarget.cpp
+++ b/lib/Target/XCore/XCoreSubtarget.cpp
@@ -7,14 +7,22 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements the XCore specific subclass of TargetSubtarget.
+// This file implements the XCore specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #include "XCoreSubtarget.h"
 #include "XCore.h"
+#include "llvm/Target/TargetRegistry.h"
+
+#define GET_SUBTARGETINFO_TARGET_DESC
+#define GET_SUBTARGETINFO_CTOR
+#include "XCoreGenSubtargetInfo.inc"
+
 using namespace llvm;
 
-XCoreSubtarget::XCoreSubtarget(const std::string &TT, const std::string &FS)
+XCoreSubtarget::XCoreSubtarget(const std::string &TT,
+                               const std::string &CPU, const std::string &FS)
+  : XCoreGenSubtargetInfo(TT, CPU, FS)
 {
 }
diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h
index f8be3ec..7b29fa2 100644
--- a/lib/Target/XCore/XCoreSubtarget.h
+++ b/lib/Target/XCore/XCoreSubtarget.h
@@ -7,32 +7,35 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file declares the XCore specific subclass of TargetSubtarget.
+// This file declares the XCore specific subclass of TargetSubtargetInfo.
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef XCORESUBTARGET_H
 #define XCORESUBTARGET_H
 
-#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Target/TargetMachine.h"
-
 #include <string>
 
+#define GET_SUBTARGETINFO_HEADER
+#include "XCoreGenSubtargetInfo.inc"
+
 namespace llvm {
+class StringRef;
 
-class XCoreSubtarget : public TargetSubtarget {
+class XCoreSubtarget : public XCoreGenSubtargetInfo {
 
 public:
   /// This constructor initializes the data members to match that
   /// of the specified triple.
   ///
-  XCoreSubtarget(const std::string &TT, const std::string &FS);
+  XCoreSubtarget(const std::string &TT, const std::string &CPU,
+                 const std::string &FS);
   
   /// ParseSubtargetFeatures - Parses features string setting specified 
   /// subtarget options.  Definition of function is auto generated by tblgen.
-  std::string ParseSubtargetFeatures(const std::string &FS,
-                                     const std::string &CPU);
+  void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 };
 } // End llvm namespace
 
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 30da2c8..342966a 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -10,7 +10,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "XCoreMCAsmInfo.h"
 #include "XCoreTargetMachine.h"
 #include "XCore.h"
 #include "llvm/Module.h"
@@ -21,9 +20,10 @@ using namespace llvm;
 /// XCoreTargetMachine ctor - Create an ILP32 architecture model
 ///
 XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT,
+                                       const std::string &CPU,
                                        const std::string &FS)
-  : LLVMTargetMachine(T, TT),
-    Subtarget(TT, FS),
+  : LLVMTargetMachine(T, TT, CPU, FS),
+    Subtarget(TT, CPU, FS),
     DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
                "i16:16:32-i32:32:32-i64:32:32-n32"),
     InstrInfo(),
@@ -41,5 +41,4 @@ bool XCoreTargetMachine::addInstSelector(PassManagerBase &PM,
 // Force static initialization.
 extern "C" void LLVMInitializeXCoreTarget() {
   RegisterTargetMachine<XCoreTargetMachine> X(TheXCoreTarget);
-  RegisterAsmInfo<XCoreMCAsmInfo> Y(TheXCoreTarget);
 }
diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h
index 24daadc..6235ac3 100644
--- a/lib/Target/XCore/XCoreTargetMachine.h
+++ b/lib/Target/XCore/XCoreTargetMachine.h
@@ -33,7 +33,7 @@ class XCoreTargetMachine : public LLVMTargetMachine {
   XCoreSelectionDAGInfo TSInfo;
 public:
   XCoreTargetMachine(const Target &T, const std::string &TT,
-                     const std::string &FS);
+                     const std::string &CPU, const std::string &FS);
 
   virtual const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; }
   virtual const XCoreFrameLowering *getFrameLowering() const {
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 54a7f67..fa007cf 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -493,7 +493,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
   // Start by computing a new prototype for the function, which is the same as
   // the old function, but has modified arguments.
   const FunctionType *FTy = F->getFunctionType();
-  std::vector<const Type*> Params;
+  std::vector<Type*> Params;
 
   typedef std::set<IndicesVector> ScalarizeTable;
 
@@ -733,12 +733,12 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
     Instruction *New;
     if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
       New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
-                               Args.begin(), Args.end(), "", Call);
+                               Args, "", Call);
       cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
       cast<InvokeInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
                                                           AttributesVec.end()));
     } else {
-      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+      New = CallInst::Create(NF, Args, "", Call);
       cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
       cast<CallInst>(New)->setAttributes(AttrListPtr::get(AttributesVec.begin(),
                                                         AttributesVec.end()));
diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt
index 179b150..3de7bfc 100644
--- a/lib/Transforms/IPO/CMakeLists.txt
+++ b/lib/Transforms/IPO/CMakeLists.txt
@@ -2,7 +2,6 @@ add_llvm_library(LLVMipo
   ArgumentPromotion.cpp
   ConstantMerge.cpp
   DeadArgumentElimination.cpp
-  DeadTypeElimination.cpp
   ExtractGV.cpp
   FunctionAttrs.cpp
   GlobalDCE.cpp
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index d4eaf0c..1517765 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -208,7 +208,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
   // the old function, but doesn't have isVarArg set.
   const FunctionType *FTy = Fn.getFunctionType();
 
-  std::vector<const Type*> Params(FTy->param_begin(), FTy->param_end());
+  std::vector<Type*> Params(FTy->param_begin(), FTy->param_end());
   FunctionType *NFTy = FunctionType::get(FTy->getReturnType(),
                                                 Params, false);
   unsigned NumArgs = Params.size();
@@ -244,11 +244,11 @@ bool DAE::DeleteDeadVarargs(Function &Fn) {
     Instruction *New;
     if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
       New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
-                               Args.begin(), Args.end(), "", Call);
+                               Args, "", Call);
       cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
       cast<InvokeInst>(New)->setAttributes(PAL);
     } else {
-      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+      New = CallInst::Create(NF, Args, "", Call);
       cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
       cast<CallInst>(New)->setAttributes(PAL);
       if (cast<CallInst>(Call)->isTailCall())
@@ -647,7 +647,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
   // Start by computing a new prototype for the function, which is the same as
   // the old function, but has fewer arguments and a different return type.
   const FunctionType *FTy = F->getFunctionType();
-  std::vector<const Type*> Params;
+  std::vector<Type*> Params;
 
   // Set up to build a new list of parameter attributes.
   SmallVector<AttributeWithIndex, 8> AttributesVec;
@@ -659,13 +659,13 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
 
   // Find out the new return value.
 
-  const Type *RetTy = FTy->getReturnType();
+  Type *RetTy = FTy->getReturnType();
   const Type *NRetTy = NULL;
   unsigned RetCount = NumRetVals(F);
 
   // -1 means unused, other numbers are the new index
   SmallVector<int, 5> NewRetIdxs(RetCount, -1);
-  std::vector<const Type*> RetTypes;
+  std::vector<Type*> RetTypes;
   if (RetTy->isVoidTy()) {
     NRetTy = RetTy;
   } else {
@@ -822,11 +822,11 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
     Instruction *New;
     if (InvokeInst *II = dyn_cast<InvokeInst>(Call)) {
       New = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),
-                               Args.begin(), Args.end(), "", Call);
+                               Args, "", Call);
       cast<InvokeInst>(New)->setCallingConv(CS.getCallingConv());
       cast<InvokeInst>(New)->setAttributes(NewCallPAL);
     } else {
-      New = CallInst::Create(NF, Args.begin(), Args.end(), "", Call);
+      New = CallInst::Create(NF, Args, "", Call);
       cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
       cast<CallInst>(New)->setAttributes(NewCallPAL);
       if (cast<CallInst>(Call)->isTailCall())
diff --git a/lib/Transforms/IPO/DeadTypeElimination.cpp b/lib/Transforms/IPO/DeadTypeElimination.cpp
deleted file mode 100644
index d3d4963..0000000
--- a/lib/Transforms/IPO/DeadTypeElimination.cpp
+++ /dev/null
@@ -1,112 +0,0 @@
-//===- DeadTypeElimination.cpp - Eliminate unused types for symbol table --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass is used to cleanup the output of GCC.  It eliminate names for types
-// that are unused in the entire translation unit, using the FindUsedTypes pass.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "deadtypeelim"
-#include "llvm/Transforms/IPO.h"
-#include "llvm/Analysis/FindUsedTypes.h"
-#include "llvm/Module.h"
-#include "llvm/TypeSymbolTable.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/ADT/Statistic.h"
-using namespace llvm;
-
-STATISTIC(NumKilled, "Number of unused typenames removed from symtab");
-
-namespace {
-  struct DTE : public ModulePass {
-    static char ID; // Pass identification, replacement for typeid
-    DTE() : ModulePass(ID) {
-      initializeDTEPass(*PassRegistry::getPassRegistry());
-    }
-
-    // doPassInitialization - For this pass, it removes global symbol table
-    // entries for primitive types.  These are never used for linking in GCC and
-    // they make the output uglier to look at, so we nuke them.
-    //
-    // Also, initialize instance variables.
-    //
-    bool runOnModule(Module &M);
-
-    // getAnalysisUsage - This function needs FindUsedTypes to do its job...
-    //
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.addRequired<FindUsedTypes>();
-    }
-  };
-}
-
-char DTE::ID = 0;
-INITIALIZE_PASS_BEGIN(DTE, "deadtypeelim", "Dead Type Elimination",
-                      false, false)
-INITIALIZE_PASS_DEPENDENCY(FindUsedTypes)
-INITIALIZE_PASS_END(DTE, "deadtypeelim", "Dead Type Elimination", false, false)
-
-ModulePass *llvm::createDeadTypeEliminationPass() {
-  return new DTE();
-}
-
-
-// ShouldNukeSymtabEntry - Return true if this module level symbol table entry
-// should be eliminated.
-//
-static inline bool ShouldNukeSymtabEntry(const Type *Ty){
-  // Nuke all names for primitive types!
-  if (Ty->isPrimitiveType() || Ty->isIntegerTy()) 
-    return true;
-
-  // Nuke all pointers to primitive types as well...
-  if (const PointerType *PT = dyn_cast<PointerType>(Ty))
-    if (PT->getElementType()->isPrimitiveType() ||
-        PT->getElementType()->isIntegerTy()) 
-      return true;
-
-  return false;
-}
-
-// run - For this pass, it removes global symbol table entries for primitive
-// types.  These are never used for linking in GCC and they make the output
-// uglier to look at, so we nuke them.  Also eliminate types that are never used
-// in the entire program as indicated by FindUsedTypes.
-//
-bool DTE::runOnModule(Module &M) {
-  bool Changed = false;
-
-  TypeSymbolTable &ST = M.getTypeSymbolTable();
-  const SetVector<const Type*> &T = getAnalysis<FindUsedTypes>().getTypes();
-  std::set<const Type*> UsedTypes(T.begin(), T.end());
-
-  // Check the symbol table for superfluous type entries...
-  //
-  // Grab the 'type' plane of the module symbol...
-  TypeSymbolTable::iterator TI = ST.begin();
-  TypeSymbolTable::iterator TE = ST.end();
-  while ( TI != TE ) {
-    // If this entry should be unconditionally removed, or if we detect that
-    // the type is not used, remove it.
-    const Type *RHS = TI->second;
-    if (ShouldNukeSymtabEntry(RHS) || !UsedTypes.count(RHS)) {
-      ST.remove(TI++);
-      ++NumKilled;
-      Changed = true;
-    } else {
-      ++TI;
-      // We only need to leave one name for each type.
-      UsedTypes.erase(RHS);
-    }
-  }
-
-  return Changed;
-}
-
-// vim: sw=2
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index cdf7b76..4ac721d 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -1999,9 +1999,13 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
 static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
                                           const std::vector<Function*> &Ctors) {
   // If we made a change, reassemble the initializer list.
-  std::vector<Constant*> CSVals;
-  CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535));
-  CSVals.push_back(0);
+  Constant *CSVals[2];
+  CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()), 65535);
+  CSVals[1] = 0;
+
+  const StructType *StructTy =
+    cast <StructType>(
+    cast<ArrayType>(GCL->getType()->getElementType())->getElementType());
 
   // Create the new init list.
   std::vector<Constant*> CAList;
@@ -2016,12 +2020,10 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
       CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
                                    0x7fffffff);
     }
-    CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false));
+    CAList.push_back(ConstantStruct::get(StructTy, CSVals));
   }
 
   // Create the array initializer.
-  const Type *StructTy =
-      cast<ArrayType>(GCL->getType()->getElementType())->getElementType();
   Constant *CA = ConstantArray::get(ArrayType::get(StructTy,
                                                    CAList.size()), CAList);
 
@@ -2218,42 +2220,40 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
     Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
 
     // Return the modified struct.
-    return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(),
-                               STy->isPacked());
-  } else {
-    ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
-    const SequentialType *InitTy = cast<SequentialType>(Init->getType());
-
-    uint64_t NumElts;
-    if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
-      NumElts = ATy->getNumElements();
-    else
-      NumElts = cast<VectorType>(InitTy)->getNumElements();
-
+    return ConstantStruct::get(STy, Elts);
+  }
+  
+  ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
+  const SequentialType *InitTy = cast<SequentialType>(Init->getType());
 
-    // Break up the array into elements.
-    if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
-      for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
-        Elts.push_back(cast<Constant>(*i));
-    } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) {
-      for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i)
-        Elts.push_back(cast<Constant>(*i));
-    } else if (isa<ConstantAggregateZero>(Init)) {
-      Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType()));
-    } else {
-      assert(isa<UndefValue>(Init) && "This code is out of sync with "
-             " ConstantFoldLoadThroughGEPConstantExpr");
-      Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
-    }
+  uint64_t NumElts;
+  if (const ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
+    NumElts = ATy->getNumElements();
+  else
+    NumElts = cast<VectorType>(InitTy)->getNumElements();
+
+  // Break up the array into elements.
+  if (ConstantArray *CA = dyn_cast<ConstantArray>(Init)) {
+    for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i)
+      Elts.push_back(cast<Constant>(*i));
+  } else if (ConstantVector *CV = dyn_cast<ConstantVector>(Init)) {
+    for (User::op_iterator i = CV->op_begin(), e = CV->op_end(); i != e; ++i)
+      Elts.push_back(cast<Constant>(*i));
+  } else if (isa<ConstantAggregateZero>(Init)) {
+    Elts.assign(NumElts, Constant::getNullValue(InitTy->getElementType()));
+  } else {
+    assert(isa<UndefValue>(Init) && "This code is out of sync with "
+           " ConstantFoldLoadThroughGEPConstantExpr");
+    Elts.assign(NumElts, UndefValue::get(InitTy->getElementType()));
+  }
 
-    assert(CI->getZExtValue() < NumElts);
-    Elts[CI->getZExtValue()] =
-      EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
+  assert(CI->getZExtValue() < NumElts);
+  Elts[CI->getZExtValue()] =
+    EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
 
-    if (Init->getType()->isArrayTy())
-      return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
-    return ConstantVector::get(Elts);
-  }
+  if (Init->getType()->isArrayTy())
+    return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
+  return ConstantVector::get(Elts);
 }
 
 /// CommitValueTo - We have decided that Addr (which satisfies the predicate
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 21dcb51..31ce95f 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -25,7 +25,6 @@ void llvm::initializeIPO(PassRegistry &Registry) {
   initializeConstantMergePass(Registry);
   initializeDAEPass(Registry);
   initializeDAHPass(Registry);
-  initializeDTEPass(Registry);
   initializeFunctionAttrsPass(Registry);
   initializeGlobalDCEPass(Registry);
   initializeGlobalOptPass(Registry);
@@ -63,10 +62,6 @@ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createDeadArgEliminationPass());
 }
 
-void LLVMAddDeadTypeEliminationPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createDeadTypeEliminationPass());
-}
-
 void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createFunctionAttrsPass());
 }
diff --git a/lib/Transforms/IPO/LowerSetJmp.cpp b/lib/Transforms/IPO/LowerSetJmp.cpp
index 52ecf17..659476b 100644
--- a/lib/Transforms/IPO/LowerSetJmp.cpp
+++ b/lib/Transforms/IPO/LowerSetJmp.cpp
@@ -267,7 +267,7 @@ void LowerSetJmp::TransformLongJmpCall(CallInst* Inst)
   CastInst* CI = 
     new BitCastInst(Inst->getArgOperand(0), SBPTy, "LJBuf", Inst);
   Value *Args[] = { CI, Inst->getArgOperand(1) };
-  CallInst::Create(ThrowLongJmp, Args, Args + 2, "", Inst);
+  CallInst::Create(ThrowLongJmp, Args, "", Inst);
 
   SwitchValuePair& SVP = SwitchValMap[Inst->getParent()->getParent()];
 
@@ -386,7 +386,7 @@ void LowerSetJmp::TransformSetJmpCall(CallInst* Inst)
     GetSetJmpMap(Func), BufPtr,
     ConstantInt::get(Type::getInt32Ty(Inst->getContext()), SetJmpIDMap[Func]++)
   };
-  CallInst::Create(AddSJToMap, Args, Args + 3, "", Inst);
+  CallInst::Create(AddSJToMap, Args, "", Inst);
 
   // We are guaranteed that there are no values live across basic blocks
   // (because we are "not in SSA form" yet), but there can still be values live
@@ -482,7 +482,7 @@ void LowerSetJmp::visitCallInst(CallInst& CI)
   std::vector<Value*> Params(CS.arg_begin(), CS.arg_end());
   InvokeInst* II =
     InvokeInst::Create(CI.getCalledValue(), NewBB, PrelimBBMap[Func],
-                       Params.begin(), Params.end(), CI.getName(), Term);
+                       Params, CI.getName(), Term);
   II->setCallingConv(CI.getCallingConv());
   II->setAttributes(CI.getAttributes());
 
diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp
index f741443..7796d05 100644
--- a/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/lib/Transforms/IPO/MergeFunctions.cpp
@@ -218,7 +218,6 @@ bool FunctionComparator::isEquivalentType(const Type *Ty1,
     llvm_unreachable("Unknown type!");
     // Fall through in Release mode.
   case Type::IntegerTyID:
-  case Type::OpaqueTyID:
   case Type::VectorTyID:
     // Ty1 == Ty2 would have returned true earlier.
     return false;
@@ -733,7 +732,7 @@ void MergeFunctions::writeThunk(Function *F, Function *G) {
     ++i;
   }
 
-  CallInst *CI = Builder.CreateCall(F, Args.begin(), Args.end());
+  CallInst *CI = Builder.CreateCall(F, Args);
   CI->setTailCall();
   CI->setCallingConv(F->getCallingConv());
   if (NewG->getReturnType()->isVoidTy()) {
diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp
index 2f3baeb..b7e63dc 100644
--- a/lib/Transforms/IPO/PruneEH.cpp
+++ b/lib/Transforms/IPO/PruneEH.cpp
@@ -175,8 +175,7 @@ bool PruneEH::SimplifyFunction(Function *F) {
       if (II->doesNotThrow()) {
         SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
         // Insert a call instruction before the invoke.
-        CallInst *Call = CallInst::Create(II->getCalledValue(),
-                                          Args.begin(), Args.end(), "", II);
+        CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II);
         Call->takeName(II);
         Call->setCallingConv(II->getCallingConv());
         Call->setAttributes(II->getAttributes());
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index a690765..0fbaff1 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -28,8 +28,8 @@
 #include "llvm/Pass.h"
 #include "llvm/Analysis/DebugInfo.h"
 #include "llvm/ValueSymbolTable.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 using namespace llvm;
 
@@ -143,8 +143,7 @@ static void RemoveDeadConstant(Constant *C) {
   assert(C->use_empty() && "Constant is not dead!");
   SmallPtrSet<Constant*, 4> Operands;
   for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
-    if (isa<DerivedType>(C->getOperand(i)->getType()) &&
-        OnlyUsedBy(C->getOperand(i), C)) 
+    if (OnlyUsedBy(C->getOperand(i), C)) 
       Operands.insert(cast<Constant>(C->getOperand(i)));
   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) {
     if (!GV->hasLocalLinkage()) return;   // Don't delete non static globals.
@@ -174,13 +173,19 @@ static void StripSymtab(ValueSymbolTable &ST, bool PreserveDbgInfo) {
   }
 }
 
-// Strip the symbol table of its names.
-static void StripTypeSymtab(TypeSymbolTable &ST, bool PreserveDbgInfo) {
-  for (TypeSymbolTable::iterator TI = ST.begin(), E = ST.end(); TI != E; ) {
-    if (PreserveDbgInfo && StringRef(TI->first).startswith("llvm.dbg"))
-      ++TI;
-    else
-      ST.remove(TI++);
+// Strip any named types of their names.
+static void StripTypeNames(Module &M, bool PreserveDbgInfo) {
+  std::vector<StructType*> StructTypes;
+  M.findUsedStructTypes(StructTypes);
+
+  for (unsigned i = 0, e = StructTypes.size(); i != e; ++i) {
+    StructType *STy = StructTypes[i];
+    if (STy->isAnonymous() || STy->getName().empty()) continue;
+    
+    if (PreserveDbgInfo && STy->getName().startswith("llvm.dbg"))
+      continue;
+
+    STy->setName("");
   }
 }
 
@@ -221,7 +226,7 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
   }
   
   // Remove all names from types.
-  StripTypeSymtab(M.getTypeSymbolTable(), PreserveDbgInfo);
+  StripTypeNames(M, PreserveDbgInfo);
 
   return true;
 }
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index a08446e..64ea36f 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1400,7 +1400,7 @@ static bool CollectBSwapParts(Value *V, int OverallLeftShift, uint32_t ByteMask,
 /// MatchBSwap - Given an OR instruction, check to see if this is a bswap idiom.
 /// If so, insert the new bswap intrinsic and return it.
 Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
-  const IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
+  IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
   if (!ITy || ITy->getBitWidth() % 16 || 
       // ByteMask only allows up to 32-byte values.
       ITy->getBitWidth() > 32*8) 
@@ -1424,9 +1424,8 @@ Instruction *InstCombiner::MatchBSwap(BinaryOperator &I) {
   for (unsigned i = 1, e = ByteValues.size(); i != e; ++i)
     if (ByteValues[i] != V)
       return 0;
-  const Type *Tys[] = { ITy };
   Module *M = I.getParent()->getParent()->getParent();
-  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+  Function *F = Intrinsic::getDeclaration(M, Intrinsic::bswap, ITy);
   return CallInst::Create(F, V);
 }
 
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index ef67701..537f2b3 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -217,10 +217,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         if (GVSrc->isConstant()) {
           Module *M = CI.getParent()->getParent()->getParent();
           Intrinsic::ID MemCpyID = Intrinsic::memcpy;
-          const Type *Tys[3] = { CI.getArgOperand(0)->getType(),
-                                 CI.getArgOperand(1)->getType(),
-                                 CI.getArgOperand(2)->getType() };
-          CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys, 3));
+          Type *Tys[3] = { CI.getArgOperand(0)->getType(),
+                           CI.getArgOperand(1)->getType(),
+                           CI.getArgOperand(2)->getType() };
+          CI.setCalledFunction(Intrinsic::getDeclaration(M, MemCpyID, Tys));
           Changed = true;
         }
     }
@@ -355,7 +355,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::cttz: {
     // If all bits below the first known one are known zero,
     // this value is constant.
-    const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
+    const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
+    // FIXME: Try to simplify vectors of integers.
+    if (!IT) break;
     uint32_t BitWidth = IT->getBitWidth();
     APInt KnownZero(BitWidth, 0);
     APInt KnownOne(BitWidth, 0);
@@ -372,7 +374,9 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::ctlz: {
     // If all bits above the first known one are known zero,
     // this value is constant.
-    const IntegerType *IT = cast<IntegerType>(II->getArgOperand(0)->getType());
+    const IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
+    // FIXME: Try to simplify vectors of integers.
+    if (!IT) break;
     uint32_t BitWidth = IT->getBitWidth();
     APInt KnownZero(BitWidth, 0);
     APInt KnownOne(BitWidth, 0);
@@ -412,7 +416,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(LHS->getType()),
           ConstantInt::getTrue(II->getContext())
         };
-        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        const StructType *ST = cast<StructType>(II->getType());
+        Constant *Struct = ConstantStruct::get(ST, V);
         return InsertValueInst::Create(Struct, Add, 0);
       }
 
@@ -425,7 +430,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(LHS->getType()),
           ConstantInt::getFalse(II->getContext())
         };
-        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        const StructType *ST = cast<StructType>(II->getType());
+        Constant *Struct = ConstantStruct::get(ST, V);
         return InsertValueInst::Create(Struct, Add, 0);
       }
     }
@@ -452,7 +458,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(II->getArgOperand(0)->getType()),
           ConstantInt::getFalse(II->getContext())
         };
-        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        Constant *Struct =
+          ConstantStruct::get(cast<StructType>(II->getType()), V);
         return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
       }
     }
@@ -472,7 +479,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(II->getArgOperand(0)->getType()),
           ConstantInt::getFalse(II->getContext())
         };
-        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        Constant *Struct = 
+          ConstantStruct::get(cast<StructType>(II->getType()), V);
         return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
       }
     }
@@ -503,7 +511,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         UndefValue::get(LHS->getType()),
         Builder->getFalse()
       };
-      Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+      Constant *Struct = ConstantStruct::get(cast<StructType>(II->getType()),V);
       return InsertValueInst::Create(Struct, Mul, 0);
     }
   } // FALL THROUGH
@@ -532,7 +540,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           UndefValue::get(II->getArgOperand(0)->getType()),
           ConstantInt::getFalse(II->getContext())
         };
-        Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+        Constant *Struct = 
+          ConstantStruct::get(cast<StructType>(II->getType()), V);
         return InsertValueInst::Create(Struct, II->getArgOperand(0), 0);
       }
     }
@@ -1109,13 +1118,13 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
   Instruction *NC;
   if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
     NC = Builder->CreateInvoke(Callee, II->getNormalDest(),
-                               II->getUnwindDest(), Args.begin(), Args.end());
+                               II->getUnwindDest(), Args);
     NC->takeName(II);
     cast<InvokeInst>(NC)->setCallingConv(II->getCallingConv());
     cast<InvokeInst>(NC)->setAttributes(NewCallerPAL);
   } else {
     CallInst *CI = cast<CallInst>(Caller);
-    NC = Builder->CreateCall(Callee, Args.begin(), Args.end());
+    NC = Builder->CreateCall(Callee, Args);
     NC->takeName(CI);
     if (CI->isTailCall())
       cast<CallInst>(NC)->setTailCall();
@@ -1178,7 +1187,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
   const AttrListPtr &NestAttrs = NestF->getAttributes();
   if (!NestAttrs.isEmpty()) {
     unsigned NestIdx = 1;
-    const Type *NestTy = 0;
+    Type *NestTy = 0;
     Attributes NestAttr = Attribute::None;
 
     // Look for a parameter marked with the 'nest' attribute.
@@ -1240,7 +1249,7 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
       // Handle this by synthesizing a new function type, equal to FTy
       // with the chain parameter inserted.
 
-      std::vector<const Type*> NewTypes;
+      std::vector<Type*> NewTypes;
       NewTypes.reserve(FTy->getNumParams()+1);
 
       // Insert the chain's type into the list of parameter types, which may
@@ -1280,11 +1289,11 @@ Instruction *InstCombiner::transformCallThroughTrampoline(CallSite CS) {
       if (InvokeInst *II = dyn_cast<InvokeInst>(Caller)) {
         NewCaller = InvokeInst::Create(NewCallee,
                                        II->getNormalDest(), II->getUnwindDest(),
-                                       NewArgs.begin(), NewArgs.end());
+                                       NewArgs);
         cast<InvokeInst>(NewCaller)->setCallingConv(II->getCallingConv());
         cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
       } else {
-        NewCaller = CallInst::Create(NewCallee, NewArgs.begin(), NewArgs.end());
+        NewCaller = CallInst::Create(NewCallee, NewArgs);
         if (cast<CallInst>(Caller)->isTailCall())
           cast<CallInst>(NewCaller)->setTailCall();
         cast<CallInst>(NewCaller)->
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 199902a..82c734e 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -30,6 +30,14 @@ static Value *DecomposeSimpleLinearExpr(Value *Val, unsigned &Scale,
   }
   
   if (BinaryOperator *I = dyn_cast<BinaryOperator>(Val)) {
+    // Cannot look past anything that might overflow.
+    OverflowingBinaryOperator *OBI = dyn_cast<OverflowingBinaryOperator>(Val);
+    if (OBI && !OBI->hasNoUnsignedWrap()) {
+      Scale = 1;
+      Offset = 0;
+      return Val;
+    }
+
     if (ConstantInt *RHS = dyn_cast<ConstantInt>(I->getOperand(1))) {
       if (I->getOpcode() == Instruction::Shl) {
         // This is a value scaled by '1 << the shift amt'.
@@ -1208,7 +1216,8 @@ Instruction *InstCombiner::visitFPTrunc(FPTruncInst &CI) {
   CallInst *Call = dyn_cast<CallInst>(CI.getOperand(0));
   if (Call && Call->getCalledFunction() &&
       Call->getCalledFunction()->getName() == "sqrt" &&
-      Call->getNumArgOperands() == 1) {
+      Call->getNumArgOperands() == 1 &&
+      Call->hasOneUse()) {
     CastInst *Arg = dyn_cast<CastInst>(Call->getArgOperand(0));
     if (Arg && Arg->getOpcode() == Instruction::FPExt &&
         CI.getType()->isFloatTy() &&
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index c7ed098..c78760b 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -42,13 +42,12 @@ static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
 static bool HasAddOverflow(ConstantInt *Result,
                            ConstantInt *In1, ConstantInt *In2,
                            bool IsSigned) {
-  if (IsSigned)
-    if (In2->getValue().isNegative())
-      return Result->getValue().sgt(In1->getValue());
-    else
-      return Result->getValue().slt(In1->getValue());
-  else
+  if (!IsSigned)
     return Result->getValue().ult(In1->getValue());
+
+  if (In2->isNegative())
+    return Result->getValue().sgt(In1->getValue());
+  return Result->getValue().slt(In1->getValue());
 }
 
 /// AddWithOverflow - Compute Result = In1+In2, returning true if the result
@@ -77,13 +76,13 @@ static bool AddWithOverflow(Constant *&Result, Constant *In1,
 static bool HasSubOverflow(ConstantInt *Result,
                            ConstantInt *In1, ConstantInt *In2,
                            bool IsSigned) {
-  if (IsSigned)
-    if (In2->getValue().isNegative())
-      return Result->getValue().slt(In1->getValue());
-    else
-      return Result->getValue().sgt(In1->getValue());
-  else
+  if (!IsSigned)
     return Result->getValue().ugt(In1->getValue());
+  
+  if (In2->isNegative())
+    return Result->getValue().slt(In1->getValue());
+
+  return Result->getValue().sgt(In1->getValue());
 }
 
 /// SubWithOverflow - Compute Result = In1-In2, returning true if the result
@@ -128,8 +127,7 @@ static bool isSignBitCheck(ICmpInst::Predicate pred, ConstantInt *RHS,
   case ICmpInst::ICMP_UGT:
     // True if LHS u> RHS and RHS == high-bit-mask - 1
     TrueIfSigned = true;
-    return RHS->getValue() ==
-      APInt::getSignedMaxValue(RHS->getType()->getPrimitiveSizeInBits());
+    return RHS->isMaxValue(true);
   case ICmpInst::ICMP_UGE: 
     // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
     TrueIfSigned = true;
@@ -278,8 +276,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
     
     // If this is indexing an array of structures, get the structure element.
     if (!LaterIndices.empty())
-      Elt = ConstantExpr::getExtractValue(Elt, LaterIndices.data(),
-                                          LaterIndices.size());
+      Elt = ConstantExpr::getExtractValue(Elt, LaterIndices);
     
     // If the element is masked, handle it.
     if (AndCst) Elt = ConstantExpr::getAnd(Elt, AndCst);
@@ -828,7 +825,7 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
         LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
       }
     }
-  } else if (DivRHS->getValue().isNegative()) { // Divisor is < 0.
+  } else if (DivRHS->isNegative()) { // Divisor is < 0.
     if (DivI->isExact())
       RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
     if (CmpRHSV == 0) {       // (X / neg) op 0
@@ -1028,7 +1025,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         
         // If the sign bit of the XorCST is not set, there is no change to
         // the operation, just stop using the Xor.
-        if (!XorCST->getValue().isNegative()) {
+        if (!XorCST->isNegative()) {
           ICI.setOperand(0, CompareVal);
           Worklist.Add(LHSI);
           return &ICI;
@@ -1061,7 +1058,7 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         }
 
         // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
-        if (!ICI.isEquality() && XorCST->getValue().isMaxSignedValue()) {
+        if (!ICI.isEquality() && XorCST->isMaxValue(true)) {
           const APInt &NotSignBit = XorCST->getValue();
           ICmpInst::Predicate Pred = ICI.isSigned()
                                          ? ICI.getUnsignedPredicate()
@@ -1087,22 +1084,33 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
         // have its sign bit set or if it is an equality comparison. 
         // Extending a relational comparison when we're checking the sign
         // bit would not work.
-        if (Cast->hasOneUse() &&
-            (ICI.isEquality() ||
-             (AndCST->getValue().isNonNegative() && RHSV.isNonNegative()))) {
-          uint32_t BitWidth = 
-            cast<IntegerType>(Cast->getOperand(0)->getType())->getBitWidth();
-          APInt NewCST = AndCST->getValue().zext(BitWidth);
-          APInt NewCI = RHSV.zext(BitWidth);
-          Value *NewAnd = 
+        if (ICI.isEquality() ||
+            (!AndCST->isNegative() && RHSV.isNonNegative())) {
+          Value *NewAnd =
             Builder->CreateAnd(Cast->getOperand(0),
-                           ConstantInt::get(ICI.getContext(), NewCST),
-                               LHSI->getName());
+                               ConstantExpr::getZExt(AndCST, Cast->getSrcTy()));
+          NewAnd->takeName(LHSI);
           return new ICmpInst(ICI.getPredicate(), NewAnd,
-                              ConstantInt::get(ICI.getContext(), NewCI));
+                              ConstantExpr::getZExt(RHS, Cast->getSrcTy()));
         }
       }
-      
+
+      // If the LHS is an AND of a zext, and we have an equality compare, we can
+      // shrink the and/compare to the smaller type, eliminating the cast.
+      if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) {
+        const IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy());
+        // Make sure we don't compare the upper bits, SimplifyDemandedBits
+        // should fold the icmp to true/false in that case.
+        if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) {
+          Value *NewAnd =
+            Builder->CreateAnd(Cast->getOperand(0),
+                               ConstantExpr::getTrunc(AndCST, Ty));
+          NewAnd->takeName(LHSI);
+          return new ICmpInst(ICI.getPredicate(), NewAnd,
+                              ConstantExpr::getTrunc(RHS, Ty));
+        }
+      }
+
       // If this is: (X >> C1) & C2 != C3 (where any shift and any compare
       // could exist), turn it into (X & (C2 << C1)) != (C3 << C1).  This
       // happens a LOT in code produced by the C front-end, for bitfield
@@ -1396,18 +1404,27 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
       case Instruction::Xor:
         // For the xor case, we can xor two constants together, eliminating
         // the explicit xor.
-        if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1)))
-          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), 
+        if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
+          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
                               ConstantExpr::getXor(RHS, BOC));
-        
-        // FALLTHROUGH
+        } else if (RHSV == 0) {
+          // Replace ((xor A, B) != 0) with (A != B)
+          return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
+                              BO->getOperand(1));
+        }
+        break;
       case Instruction::Sub:
-        // Replace (([sub|xor] A, B) != 0) with (A != B)
-        if (RHSV == 0)
+        // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants.
+        if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) {
+          if (BO->hasOneUse())
+            return new ICmpInst(ICI.getPredicate(), BO->getOperand(1),
+                                ConstantExpr::getSub(BOp0C, RHS));
+        } else if (RHSV == 0) {
+          // Replace ((sub A, B) != 0) with (A != B)
           return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
                               BO->getOperand(1));
+        }
         break;
-        
       case Instruction::Or:
         // If bits are being or'd in that are not present in the constant we
         // are comparing against, then the comparison could never succeed!
@@ -1434,7 +1451,11 @@ Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
             return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
                                 ICmpInst::ICMP_NE, LHSI,
                                 Constant::getNullValue(RHS->getType()));
-          
+
+          // Don't perform the following transforms if the AND has multiple uses
+          if (!BO->hasOneUse())
+            break;
+
           // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
           if (BOC->getValue().isSignBit()) {
             Value *X = BO->getOperand(0);
@@ -1659,9 +1680,9 @@ static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
   // result and the overflow bit.
   Module *M = I.getParent()->getParent()->getParent();
   
-  const Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
+  Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
   Value *F = Intrinsic::getDeclaration(M, Intrinsic::sadd_with_overflow,
-                                       &NewType, 1);
+                                       NewType);
 
   InstCombiner::BuilderTy *Builder = IC.Builder;
   
@@ -1701,8 +1722,8 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV,
   Builder->SetInsertPoint(OrigAdd);
 
   Module *M = I.getParent()->getParent()->getParent();
-  const Type *Ty = LHS->getType();
-  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, &Ty,1);
+  Type *Ty = LHS->getType();
+  Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
   CallInst *Call = Builder->CreateCall2(F, LHS, RHS, "uadd");
   Value *Add = Builder->CreateExtractValue(Call, 0);
 
@@ -2364,7 +2385,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
                                 BO1->getOperand(0));
           }
           
-          if (CI->getValue().isMaxSignedValue()) {
+          if (CI->isMaxValue(true)) {
             ICmpInst::Predicate Pred = I.isSigned()
                                            ? I.getUnsignedPredicate()
                                            : I.getSignedPredicate();
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 2d29403..630a6fe 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -691,14 +691,14 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
     bool hasNegative = false;
     for (unsigned i = 0; !hasNegative && i != VWidth; ++i)
       if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i)))
-        if (RHS->getValue().isNegative())
+        if (RHS->isNegative())
           hasNegative = true;
 
     if (hasNegative) {
       std::vector<Constant *> Elts(VWidth);
       for (unsigned i = 0; i != VWidth; ++i) {
         if (ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV->getOperand(i))) {
-          if (RHS->getValue().isNegative())
+          if (RHS->isNegative())
             Elts[i] = cast<ConstantInt>(ConstantExpr::getNeg(RHS));
           else
             Elts[i] = RHS;
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index aeb3c3e..5733c20 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -796,7 +796,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
             // So at this point we know we have (Y -> OtherAddOp):
             //        select C, (add X, Y), (sub X, Z)
             Value *NegVal;  // Compute -Z
-            if (SI.getType()->isFloatingPointTy()) {
+            if (SI.getType()->isFPOrFPVectorTy()) {
               NegVal = Builder->CreateFNeg(SubOp->getOperand(1));
             } else {
               NegVal = Builder->CreateNeg(SubOp->getOperand(1));
@@ -810,7 +810,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
               Builder->CreateSelect(CondVal, NewTrueOp,
                                     NewFalseOp, SI.getName() + ".p");
 
-            if (SI.getType()->isFloatingPointTy())
+            if (SI.getType()->isFPOrFPVectorTy())
               return BinaryOperator::CreateFAdd(SubOp->getOperand(0), NewSel);
             else
               return BinaryOperator::CreateAdd(SubOp->getOperand(0), NewSel);
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 92c10f5..ab98ef9 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -785,6 +785,14 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   // getelementptr instructions into a single instruction.
   //
   if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) {
+
+    // If this GEP has only 0 indices, it is the same pointer as
+    // Src. If Src is not a trivial GEP too, don't combine
+    // the indices.
+    if (GEP.hasAllZeroIndices() && !Src->hasAllZeroIndices() &&
+        !Src->hasOneUse())
+      return 0;
+
     // Note that if our source is a gep chain itself that we wait for that
     // chain to be resolved before we perform this transformation.  This
     // avoids us creating a TON of code in some cases.
@@ -1191,7 +1199,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       if (EV.getNumIndices() > 1)
         // Extract the remaining indices out of the constant indexed by the
         // first index
-        return ExtractValueInst::Create(V, EV.idx_begin() + 1, EV.idx_end());
+        return ExtractValueInst::Create(V, EV.getIndices().slice(1));
       else
         return ReplaceInstUsesWith(EV, V);
     }
@@ -1214,7 +1222,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
         // with
         // %E = extractvalue { i32, { i32 } } %A, 0
         return ExtractValueInst::Create(IV->getAggregateOperand(),
-                                        EV.idx_begin(), EV.idx_end());
+                                        EV.getIndices());
     }
     if (exti == exte && insi == inse)
       // Both iterators are at the end: Index lists are identical. Replace
@@ -1232,9 +1240,9 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       // by switching the order of the insert and extract (though the
       // insertvalue should be left in, since it may have other uses).
       Value *NewEV = Builder->CreateExtractValue(IV->getAggregateOperand(),
-                                                 EV.idx_begin(), EV.idx_end());
+                                                 EV.getIndices());
       return InsertValueInst::Create(NewEV, IV->getInsertedValueOperand(),
-                                     insi, inse);
+                                     ArrayRef<unsigned>(insi, inse));
     }
     if (insi == inse)
       // The insert list is a prefix of the extract list
@@ -1246,7 +1254,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
       // with
       // %E extractvalue { i32 } { i32 42 }, 0
       return ExtractValueInst::Create(IV->getInsertedValueOperand(), 
-                                      exti, exte);
+                                      ArrayRef<unsigned>(exti, exte));
   }
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
     // We're extracting from an intrinsic, see if we're the only user, which
diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index b902213..3f2c412 100644
--- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -561,25 +561,24 @@ GlobalVariable *GCOVProfiler::buildEdgeLookupTable(
     Edge += Successors;
   }
 
+  ArrayRef<Constant*> V(&EdgeTable[0], Succs.size() * Preds.size());
   GlobalVariable *EdgeTableGV =
       new GlobalVariable(
           *M, EdgeTableTy, true, GlobalValue::InternalLinkage,
-          ConstantArray::get(EdgeTableTy,
-                             &EdgeTable[0], Succs.size() * Preds.size()),
+          ConstantArray::get(EdgeTableTy, V),
           "__llvm_gcda_edge_table");
   EdgeTableGV->setUnnamedAddr(true);
   return EdgeTableGV;
 }
 
 Constant *GCOVProfiler::getStartFileFunc() {
-  const Type *Args[] = { Type::getInt8PtrTy(*Ctx) };
   const FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx),
-                                              Args, false);
+                                              Type::getInt8PtrTy(*Ctx), false);
   return M->getOrInsertFunction("llvm_gcda_start_file", FTy);
 }
 
 Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
-  const Type *Args[] = {
+  Type *Args[] = {
     Type::getInt32PtrTy(*Ctx),                  // uint32_t *predecessor
     Type::getInt64PtrTy(*Ctx)->getPointerTo(),  // uint64_t **state_table_row
   };
@@ -589,7 +588,7 @@ Constant *GCOVProfiler::getIncrementIndirectCounterFunc() {
 }
 
 Constant *GCOVProfiler::getEmitFunctionFunc() {
-  const Type *Args[2] = {
+  Type *Args[2] = {
     Type::getInt32Ty(*Ctx),    // uint32_t ident
     Type::getInt8PtrTy(*Ctx),  // const char *function_name
   };
@@ -599,7 +598,7 @@ Constant *GCOVProfiler::getEmitFunctionFunc() {
 }
 
 Constant *GCOVProfiler::getEmitArcsFunc() {
-  const Type *Args[] = {
+  Type *Args[] = {
     Type::getInt32Ty(*Ctx),     // uint32_t num_counters
     Type::getInt64PtrTy(*Ctx),  // uint64_t *counters
   };
diff --git a/lib/Transforms/Instrumentation/PathProfiling.cpp b/lib/Transforms/Instrumentation/PathProfiling.cpp
index 182a43d..7541663 100644
--- a/lib/Transforms/Instrumentation/PathProfiling.cpp
+++ b/lib/Transforms/Instrumentation/PathProfiling.cpp
@@ -376,7 +376,7 @@ namespace llvm {
   public:
     static const StructType *get(LLVMContext& C) {
       return( StructType::get(
-                C, TypeBuilder<types::i<32>, xcompile>::get(C), // type
+                TypeBuilder<types::i<32>, xcompile>::get(C), // type
                 TypeBuilder<types::i<32>, xcompile>::get(C), // array size
                 TypeBuilder<types::i<8>*, xcompile>::get(C), // array/hash ptr
                 NULL));
@@ -1062,7 +1062,7 @@ void PathProfiler::insertCounterIncrement(Value* incValue,
 
     CallInst::Create(
       increment ? llvmIncrementHashFunction : llvmDecrementHashFunction,
-      args.begin(), args.end(), "", insertPoint);
+      args, "", insertPoint);
   }
 }
 
diff --git a/lib/Transforms/Instrumentation/ProfilingUtils.cpp b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
index 7435bc3..445a5b6 100644
--- a/lib/Transforms/Instrumentation/ProfilingUtils.cpp
+++ b/lib/Transforms/Instrumentation/ProfilingUtils.cpp
@@ -62,8 +62,7 @@ void llvm::InsertProfilingInitCall(Function *MainFn, const char *FnName,
   }
   Args[3] = ConstantInt::get(Type::getInt32Ty(Context), NumElements);
 
-  CallInst *InitCall = CallInst::Create(InitFn, Args.begin(), Args.end(),
-                                        "newargc", InsertPos);
+  CallInst *InitCall = CallInst::Create(InitFn, Args, "newargc", InsertPos);
 
   // If argc or argv are not available in main, just pass null values in.
   Function::arg_iterator AI;
@@ -134,7 +133,7 @@ void llvm::IncrementCounterInBlock(BasicBlock *BB, unsigned CounterNum,
 void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
   // llvm.global_dtors is an array of type { i32, void ()* }. Prepare those
   // types.
-  const Type *GlobalDtorElems[2] = {
+  Type *GlobalDtorElems[2] = {
     Type::getInt32Ty(Mod->getContext()),
     FunctionType::get(Type::getVoidTy(Mod->getContext()), false)->getPointerTo()
   };
@@ -164,7 +163,8 @@ void llvm::InsertProfilingShutdownCall(Function *Callee, Module *Mod) {
   GlobalVariable *GlobalDtors = new GlobalVariable(
       *Mod, ArrayType::get(GlobalDtorElemTy, 1), false,
       GlobalValue::AppendingLinkage, NULL, "llvm.global_dtors");
-  dtors.push_back(ConstantStruct::get(Mod->getContext(), Elem, 2, false));
+                                    
+  dtors.push_back(ConstantStruct::get(GlobalDtorElemTy, Elem));
   GlobalDtors->setInitializer(ConstantArray::get(
       cast<ArrayType>(GlobalDtors->getType()->getElementType()), dtors));
 }
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index fcf914f..c223da6 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_library(LLVMScalarOpts
   LoopUnswitch.cpp
   LowerAtomic.cpp
   MemCpyOptimizer.cpp
+  ObjCARC.cpp
   Reassociate.cpp
   Reg2Mem.cpp
   SCCP.cpp
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 53e4640..cb9b5be 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -437,12 +437,9 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
 
     MemDepResult InstDep = MD->getDependency(Inst);
     
-    // Ignore non-local store liveness.
+    // Ignore any store where we can't find a local dependence.
     // FIXME: cross-block DSE would be fun. :)
-    if (InstDep.isNonLocal() || 
-        // Ignore self dependence, which happens in the entry block of the
-        // function.
-        InstDep.getInst() == Inst)
+    if (InstDep.isNonLocal() || InstDep.isUnknown())
       continue;
      
     // If we're storing the same value back to a pointer that we just
@@ -478,7 +475,7 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
     if (Loc.Ptr == 0)
       continue;
     
-    while (!InstDep.isNonLocal()) {
+    while (!InstDep.isNonLocal() && !InstDep.isUnknown()) {
       // Get the memory clobbered by the instruction we depend on.  MemDep will
       // skip any instructions that 'Loc' clearly doesn't interact with.  If we
       // end up depending on a may- or must-aliased load, then we can't optimize
@@ -542,24 +539,26 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
 /// HandleFree - Handle frees of entire structures whose dependency is a store
 /// to a field of that structure.
 bool DSE::HandleFree(CallInst *F) {
+  bool MadeChange = false;
+
   MemDepResult Dep = MD->getDependency(F);
-  do {
-    if (Dep.isNonLocal()) return false;
-    
+
+  while (!Dep.isNonLocal() && !Dep.isUnknown()) {
     Instruction *Dependency = Dep.getInst();
     if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency))
-      return false;
+      return MadeChange;
   
     Value *DepPointer =
       GetUnderlyingObject(getStoredPointerOperand(Dependency));
 
     // Check for aliasing.
     if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
-      return false;
+      return MadeChange;
   
     // DCE instructions only used to calculate that store
     DeleteDeadInstruction(Dependency, *MD);
     ++NumFastStores;
+    MadeChange = true;
 
     // Inst's old Dependency is now deleted. Compute the next dependency,
     // which may also be dead, as in
@@ -567,9 +566,9 @@ bool DSE::HandleFree(CallInst *F) {
     //    s[1] = 0; // This has just been deleted.
     //    free(s);
     Dep = MD->getDependency(F);
-  } while (!Dep.isNonLocal());
+  };
   
-  return true;
+  return MadeChange;
 }
 
 /// handleEndBlock - Remove dead stores to stack-allocated locations in the
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 2515fd1..87b7317 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -91,6 +91,7 @@ namespace {
     uint32_t nextValueNumber;
 
     Expression create_expression(Instruction* I);
+    Expression create_extractvalue_expression(ExtractValueInst* EI);
     uint32_t lookup_or_add_call(CallInst* C);
   public:
     ValueTable() : nextValueNumber(1) { }
@@ -141,7 +142,6 @@ template <> struct DenseMapInfo<Expression> {
 //                     ValueTable Internal Functions
 //===----------------------------------------------------------------------===//
 
-
 Expression ValueTable::create_expression(Instruction *I) {
   Expression e;
   e.type = I->getType();
@@ -150,12 +150,8 @@ Expression ValueTable::create_expression(Instruction *I) {
        OI != OE; ++OI)
     e.varargs.push_back(lookup_or_add(*OI));
   
-  if (CmpInst *C = dyn_cast<CmpInst>(I))
+  if (CmpInst *C = dyn_cast<CmpInst>(I)) {
     e.opcode = (C->getOpcode() << 8) | C->getPredicate();
-  else if (ExtractValueInst *E = dyn_cast<ExtractValueInst>(I)) {
-    for (ExtractValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
-         II != IE; ++II)
-      e.varargs.push_back(*II);
   } else if (InsertValueInst *E = dyn_cast<InsertValueInst>(I)) {
     for (InsertValueInst::idx_iterator II = E->idx_begin(), IE = E->idx_end();
          II != IE; ++II)
@@ -165,6 +161,58 @@ Expression ValueTable::create_expression(Instruction *I) {
   return e;
 }
 
+Expression ValueTable::create_extractvalue_expression(ExtractValueInst *EI) {
+  assert(EI != 0 && "Not an ExtractValueInst?");
+  Expression e;
+  e.type = EI->getType();
+  e.opcode = 0;
+
+  IntrinsicInst *I = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
+  if (I != 0 && EI->getNumIndices() == 1 && *EI->idx_begin() == 0 ) {
+    // EI might be an extract from one of our recognised intrinsics. If it
+    // is we'll synthesize a semantically equivalent expression instead on
+    // an extract value expression.
+    switch (I->getIntrinsicID()) {
+      case Intrinsic::sadd_with_overflow:
+      case Intrinsic::uadd_with_overflow:
+        e.opcode = Instruction::Add;
+        break;
+      case Intrinsic::ssub_with_overflow:
+      case Intrinsic::usub_with_overflow:
+        e.opcode = Instruction::Sub;
+        break;
+      case Intrinsic::smul_with_overflow:
+      case Intrinsic::umul_with_overflow:
+        e.opcode = Instruction::Mul;
+        break;
+      default:
+        break;
+    }
+
+    if (e.opcode != 0) {
+      // Intrinsic recognized. Grab its args to finish building the expression.
+      assert(I->getNumArgOperands() == 2 &&
+             "Expect two args for recognised intrinsics.");
+      e.varargs.push_back(lookup_or_add(I->getArgOperand(0)));
+      e.varargs.push_back(lookup_or_add(I->getArgOperand(1)));
+      return e;
+    }
+  }
+
+  // Not a recognised intrinsic. Fall back to producing an extract value
+  // expression.
+  e.opcode = EI->getOpcode();
+  for (Instruction::op_iterator OI = EI->op_begin(), OE = EI->op_end();
+       OI != OE; ++OI)
+    e.varargs.push_back(lookup_or_add(*OI));
+
+  for (ExtractValueInst::idx_iterator II = EI->idx_begin(), IE = EI->idx_end();
+         II != IE; ++II)
+    e.varargs.push_back(*II);
+
+  return e;
+}
+
 //===----------------------------------------------------------------------===//
 //                     ValueTable External Functions
 //===----------------------------------------------------------------------===//
@@ -227,21 +275,19 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
     // Non-local case.
     const MemoryDependenceAnalysis::NonLocalDepInfo &deps =
       MD->getNonLocalCallDependency(CallSite(C));
-    // FIXME: call/call dependencies for readonly calls should return def, not
-    // clobber!  Move the checking logic to MemDep!
+    // FIXME: Move the checking logic to MemDep!
     CallInst* cdep = 0;
 
     // Check to see if we have a single dominating call instruction that is
     // identical to C.
     for (unsigned i = 0, e = deps.size(); i != e; ++i) {
       const NonLocalDepEntry *I = &deps[i];
-      // Ignore non-local dependencies.
       if (I->getResult().isNonLocal())
         continue;
 
-      // We don't handle non-depedencies.  If we already have a call, reject
+      // We don't handle non-definitions.  If we already have a call, reject
       // instruction dependencies.
-      if (I->getResult().isClobber() || cdep != 0) {
+      if (!I->getResult().isDef() || cdep != 0) {
         cdep = 0;
         break;
       }
@@ -338,11 +384,13 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
     case Instruction::ExtractElement:
     case Instruction::InsertElement:
     case Instruction::ShuffleVector:
-    case Instruction::ExtractValue:
     case Instruction::InsertValue:
     case Instruction::GetElementPtr:
       exp = create_expression(I);
       break;
+    case Instruction::ExtractValue:
+      exp = create_extractvalue_expression(cast<ExtractValueInst>(I));
+      break;
     default:
       valueNumbering[V] = nextValueNumber;
       return nextValueNumber++;
@@ -1192,8 +1240,10 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
     // escaping uses to any values that are operands to these PHIs.
     for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i) {
       PHINode *P = NewPHIs[i];
-      for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii)
-        AA->addEscapingUse(P->getOperandUse(2*ii));
+      for (unsigned ii = 0, ee = P->getNumIncomingValues(); ii != ee; ++ii) {
+        unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
+        AA->addEscapingUse(P->getOperandUse(jj));
+      }
     }
   }
 
@@ -1224,12 +1274,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
 
   // If we had a phi translation failure, we'll have a single entry which is a
   // clobber in the current block.  Reject this early.
-  if (Deps.size() == 1 && Deps[0].getResult().isClobber() &&
-      Deps[0].getResult().getInst()->getParent() == LI->getParent()) {
+  if (Deps.size() == 1 && Deps[0].getResult().isUnknown()) {
     DEBUG(
       dbgs() << "GVN: non-local load ";
       WriteAsOperand(dbgs(), LI);
-      dbgs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
+      dbgs() << " has unknown dependencies\n";
     );
     return false;
   }
@@ -1245,6 +1294,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
     BasicBlock *DepBB = Deps[i].getBB();
     MemDepResult DepInfo = Deps[i].getResult();
 
+    if (DepInfo.isUnknown()) {
+      UnavailableBlocks.push_back(DepBB);
+      continue;
+    }
+
     if (DepInfo.isClobber()) {
       // The address being loaded in this non-local block may not be the same as
       // the pointer operand of the load if PHI translation occurs.  Make sure
@@ -1305,6 +1359,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
       continue;
     }
 
+    assert(DepInfo.isDef() && "Expecting def here");
+
     Instruction *DepInst = DepInfo.getInst();
 
     // Loading the allocation -> undef.
@@ -1691,10 +1747,22 @@ bool GVN::processLoad(LoadInst *L) {
     return false;
   }
 
+  if (Dep.isUnknown()) {
+    DEBUG(
+      // fast print dep, using operator<< on instruction is too slow.
+      dbgs() << "GVN: load ";
+      WriteAsOperand(dbgs(), L);
+      dbgs() << " has unknown dependence\n";
+    );
+    return false;
+  }
+
   // If it is defined in another block, try harder.
   if (Dep.isNonLocal())
     return processNonLocalLoad(L);
 
+  assert(Dep.isDef() && "Expecting def here");
+
   Instruction *DepInst = Dep.getInst();
   if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInst)) {
     Value *StoredVal = DepSI->getValueOperand();
@@ -2133,8 +2201,11 @@ bool GVN::performPRE(Function &F) {
         // Because we have added a PHI-use of the pointer value, it has now
         // "escaped" from alias analysis' perspective.  We need to inform
         // AA of this.
-        for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee; ++ii)
-          VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(2*ii));
+        for (unsigned ii = 0, ee = Phi->getNumIncomingValues(); ii != ee;
+             ++ii) {
+          unsigned jj = PHINode::getOperandNumForIncomingValue(ii);
+          VN.getAliasAnalysis()->addEscapingUse(Phi->getOperandUse(jj));
+        }
         
         if (MD)
           MD->invalidateCachedPointerInfo(Phi);
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 04ee7c8..dee3d38 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -52,30 +52,32 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Support/CFG.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 using namespace llvm;
 
-STATISTIC(NumRemoved , "Number of aux indvars removed");
-STATISTIC(NumWidened , "Number of indvars widened");
-STATISTIC(NumInserted, "Number of canonical indvars added");
-STATISTIC(NumReplaced, "Number of exit values replaced");
-STATISTIC(NumLFTR    , "Number of loop exit tests replaced");
-STATISTIC(NumElimExt , "Number of IV sign/zero extends eliminated");
-STATISTIC(NumElimRem , "Number of IV remainder operations eliminated");
-STATISTIC(NumElimCmp , "Number of IV comparisons eliminated");
-
-// DisableIVRewrite mode currently affects IVUsers, so is defined in libAnalysis
-// and referenced here.
-namespace llvm {
-  extern bool DisableIVRewrite;
-}
+STATISTIC(NumRemoved     , "Number of aux indvars removed");
+STATISTIC(NumWidened     , "Number of indvars widened");
+STATISTIC(NumInserted    , "Number of canonical indvars added");
+STATISTIC(NumReplaced    , "Number of exit values replaced");
+STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
+STATISTIC(NumElimIdentity, "Number of IV identities eliminated");
+STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
+STATISTIC(NumElimRem     , "Number of IV remainder operations eliminated");
+STATISTIC(NumElimCmp     , "Number of IV comparisons eliminated");
+STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
+
+static cl::opt<bool> DisableIVRewrite(
+  "disable-iv-rewrite", cl::Hidden,
+  cl::desc("Disable canonical induction variable rewriting"));
 
 namespace {
   class IndVarSimplify : public LoopPass {
@@ -84,12 +86,14 @@ namespace {
     ScalarEvolution *SE;
     DominatorTree   *DT;
     TargetData      *TD;
+
     SmallVector<WeakVH, 16> DeadInsts;
     bool Changed;
   public:
 
     static char ID; // Pass identification, replacement for typeid
-    IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0) {
+    IndVarSimplify() : LoopPass(ID), IU(0), LI(0), SE(0), DT(0), TD(0),
+                       Changed(false) {
       initializeIndVarSimplifyPass(*PassRegistry::getPassRegistry());
     }
 
@@ -101,36 +105,46 @@ namespace {
       AU.addRequired<ScalarEvolution>();
       AU.addRequiredID(LoopSimplifyID);
       AU.addRequiredID(LCSSAID);
-      AU.addRequired<IVUsers>();
+      if (!DisableIVRewrite)
+        AU.addRequired<IVUsers>();
       AU.addPreserved<ScalarEvolution>();
       AU.addPreservedID(LoopSimplifyID);
       AU.addPreservedID(LCSSAID);
-      AU.addPreserved<IVUsers>();
+      if (!DisableIVRewrite)
+        AU.addPreserved<IVUsers>();
       AU.setPreservesCFG();
     }
 
   private:
+    virtual void releaseMemory() {
+      DeadInsts.clear();
+    }
+
     bool isValidRewrite(Value *FromVal, Value *ToVal);
 
+    void HandleFloatingPointIV(Loop *L, PHINode *PH);
+    void RewriteNonIntegerIVs(Loop *L);
+
+    void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
+
     void SimplifyIVUsers(SCEVExpander &Rewriter);
+    void SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter);
+
+    bool EliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
     void EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
     void EliminateIVRemainder(BinaryOperator *Rem,
                               Value *IVOperand,
-                              bool IsSigned,
-                              PHINode *IVPhi);
-    void RewriteNonIntegerIVs(Loop *L);
+                              bool IsSigned);
+
+    void SimplifyCongruentIVs(Loop *L);
+
+    void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
 
     ICmpInst *LinearFunctionTestReplace(Loop *L, const SCEV *BackedgeTakenCount,
                                         PHINode *IndVar,
                                         SCEVExpander &Rewriter);
 
-    void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
-
-    void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
-
     void SinkUnusedInvariants(Loop *L);
-
-    void HandleFloatingPointIV(Loop *L, PHINode *PH);
   };
 }
 
@@ -197,156 +211,262 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
   return true;
 }
 
-/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
-/// count expression can be safely and cheaply expanded into an instruction
-/// sequence that can be used by LinearFunctionTestReplace.
-static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
-  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
-  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
-      BackedgeTakenCount->isZero())
-    return false;
+//===----------------------------------------------------------------------===//
+// RewriteNonIntegerIVs and helpers. Prefer integer IVs.
+//===----------------------------------------------------------------------===//
 
-  if (!L->getExitingBlock())
+/// ConvertToSInt - Convert APF to an integer, if possible.
+static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
+  bool isExact = false;
+  if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
     return false;
-
-  // Can't rewrite non-branch yet.
-  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
-  if (!BI)
+  // See if we can convert this to an int64_t
+  uint64_t UIntVal;
+  if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
+                           &isExact) != APFloat::opOK || !isExact)
     return false;
-
-  // Special case: If the backedge-taken count is a UDiv, it's very likely a
-  // UDiv that ScalarEvolution produced in order to compute a precise
-  // expression, rather than a UDiv from the user's code. If we can't find a
-  // UDiv in the code with some simple searching, assume the former and forego
-  // rewriting the loop.
-  if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
-    ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
-    if (!OrigCond) return false;
-    const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
-    R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
-    if (R != BackedgeTakenCount) {
-      const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
-      L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
-      if (L != BackedgeTakenCount)
-        return false;
-    }
-  }
+  IntVal = UIntVal;
   return true;
 }
 
-/// getBackedgeIVType - Get the widest type used by the loop test after peeking
-/// through Truncs.
+/// HandleFloatingPointIV - If the loop has floating induction variable
+/// then insert corresponding integer induction variable if possible.
+/// For example,
+/// for(double i = 0; i < 10000; ++i)
+///   bar(i)
+/// is converted into
+/// for(int i = 0; i < 10000; ++i)
+///   bar((double)i);
 ///
-/// TODO: Unnecessary once LinearFunctionTestReplace is removed.
-static const Type *getBackedgeIVType(Loop *L) {
-  if (!L->getExitingBlock())
-    return 0;
+void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
+  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
+  unsigned BackEdge     = IncomingEdge^1;
 
-  // Can't rewrite non-branch yet.
-  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
-  if (!BI)
-    return 0;
+  // Check incoming value.
+  ConstantFP *InitValueVal =
+    dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
 
-  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
-  if (!Cond)
-    return 0;
+  int64_t InitValue;
+  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
+    return;
 
-  const Type *Ty = 0;
-  for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
-      OI != OE; ++OI) {
-    assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
-    TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
-    if (!Trunc)
-      continue;
+  // Check IV increment. Reject this PN if increment operation is not
+  // an add or increment value can not be represented by an integer.
+  BinaryOperator *Incr =
+    dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
+  if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
 
-    return Trunc->getSrcTy();
+  // If this is not an add of the PHI with a constantfp, or if the constant fp
+  // is not an integer, bail out.
+  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
+  int64_t IncValue;
+  if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
+      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
+    return;
+
+  // Check Incr uses. One user is PN and the other user is an exit condition
+  // used by the conditional terminator.
+  Value::use_iterator IncrUse = Incr->use_begin();
+  Instruction *U1 = cast<Instruction>(*IncrUse++);
+  if (IncrUse == Incr->use_end()) return;
+  Instruction *U2 = cast<Instruction>(*IncrUse++);
+  if (IncrUse != Incr->use_end()) return;
+
+  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
+  // only used by a branch, we can't transform it.
+  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
+  if (!Compare)
+    Compare = dyn_cast<FCmpInst>(U2);
+  if (Compare == 0 || !Compare->hasOneUse() ||
+      !isa<BranchInst>(Compare->use_back()))
+    return;
+
+  BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
+
+  // We need to verify that the branch actually controls the iteration count
+  // of the loop.  If not, the new IV can overflow and no one will notice.
+  // The branch block must be in the loop and one of the successors must be out
+  // of the loop.
+  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
+  if (!L->contains(TheBr->getParent()) ||
+      (L->contains(TheBr->getSuccessor(0)) &&
+       L->contains(TheBr->getSuccessor(1))))
+    return;
+
+
+  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
+  // transform it.
+  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
+  int64_t ExitValue;
+  if (ExitValueVal == 0 ||
+      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
+    return;
+
+  // Find new predicate for integer comparison.
+  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
+  switch (Compare->getPredicate()) {
+  default: return;  // Unknown comparison.
+  case CmpInst::FCMP_OEQ:
+  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
+  case CmpInst::FCMP_ONE:
+  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
+  case CmpInst::FCMP_OGT:
+  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
+  case CmpInst::FCMP_OGE:
+  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
+  case CmpInst::FCMP_OLT:
+  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
+  case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
   }
-  return Ty;
-}
 
-/// LinearFunctionTestReplace - This method rewrites the exit condition of the
-/// loop to be a canonical != comparison against the incremented loop induction
-/// variable.  This pass is able to rewrite the exit tests of any loop where the
-/// SCEV analysis can determine a loop-invariant trip count of the loop, which
-/// is actually a much broader range than just linear tests.
-ICmpInst *IndVarSimplify::
-LinearFunctionTestReplace(Loop *L,
-                          const SCEV *BackedgeTakenCount,
-                          PHINode *IndVar,
-                          SCEVExpander &Rewriter) {
-  assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
-  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  // We convert the floating point induction variable to a signed i32 value if
+  // we can.  This is only safe if the comparison will not overflow in a way
+  // that won't be trapped by the integer equivalent operations.  Check for this
+  // now.
+  // TODO: We could use i64 if it is native and the range requires it.
 
-  // If the exiting block is not the same as the backedge block, we must compare
-  // against the preincremented value, otherwise we prefer to compare against
-  // the post-incremented value.
-  Value *CmpIndVar;
-  const SCEV *RHS = BackedgeTakenCount;
-  if (L->getExitingBlock() == L->getLoopLatch()) {
-    // Add one to the "backedge-taken" count to get the trip count.
-    // If this addition may overflow, we have to be more pessimistic and
-    // cast the induction variable before doing the add.
-    const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
-    const SCEV *N =
-      SE->getAddExpr(BackedgeTakenCount,
-                     SE->getConstant(BackedgeTakenCount->getType(), 1));
-    if ((isa<SCEVConstant>(N) && !N->isZero()) ||
-        SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
-      // No overflow. Cast the sum.
-      RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
-    } else {
-      // Potential overflow. Cast before doing the add.
-      RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
-                                        IndVar->getType());
-      RHS = SE->getAddExpr(RHS,
-                           SE->getConstant(IndVar->getType(), 1));
+  // The start/stride/exit values must all fit in signed i32.
+  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
+    return;
+
+  // If not actually striding (add x, 0.0), avoid touching the code.
+  if (IncValue == 0)
+    return;
+
+  // Positive and negative strides have different safety conditions.
+  if (IncValue > 0) {
+    // If we have a positive stride, we require the init to be less than the
+    // exit value and an equality or less than comparison.
+    if (InitValue >= ExitValue ||
+        NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
+      return;
+
+    uint32_t Range = uint32_t(ExitValue-InitValue);
+    if (NewPred == CmpInst::ICMP_SLE) {
+      // Normalize SLE -> SLT, check for infinite loop.
+      if (++Range == 0) return;  // Range overflows.
     }
 
-    // The BackedgeTaken expression contains the number of times that the
-    // backedge branches to the loop header.  This is one less than the
-    // number of times the loop executes, so use the incremented indvar.
-    CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+    unsigned Leftover = Range % uint32_t(IncValue);
+
+    // If this is an equality comparison, we require that the strided value
+    // exactly land on the exit value, otherwise the IV condition will wrap
+    // around and do things the fp IV wouldn't.
+    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+        Leftover != 0)
+      return;
+
+    // If the stride would wrap around the i32 before exiting, we can't
+    // transform the IV.
+    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
+      return;
+
   } else {
-    // We have to use the preincremented value...
-    RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
-                                      IndVar->getType());
-    CmpIndVar = IndVar;
+    // If we have a negative stride, we require the init to be greater than the
+    // exit value and an equality or greater than comparison.
+    if (InitValue >= ExitValue ||
+        NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
+      return;
+
+    uint32_t Range = uint32_t(InitValue-ExitValue);
+    if (NewPred == CmpInst::ICMP_SGE) {
+      // Normalize SGE -> SGT, check for infinite loop.
+      if (++Range == 0) return;  // Range overflows.
+    }
+
+    unsigned Leftover = Range % uint32_t(-IncValue);
+
+    // If this is an equality comparison, we require that the strided value
+    // exactly land on the exit value, otherwise the IV condition will wrap
+    // around and do things the fp IV wouldn't.
+    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
+        Leftover != 0)
+      return;
+
+    // If the stride would wrap around the i32 before exiting, we can't
+    // transform the IV.
+    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
+      return;
   }
 
-  // Expand the code for the iteration count.
-  assert(SE->isLoopInvariant(RHS, L) &&
-         "Computed iteration count is not loop invariant!");
-  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+  const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
 
-  // Insert a new icmp_ne or icmp_eq instruction before the branch.
-  ICmpInst::Predicate Opcode;
-  if (L->contains(BI->getSuccessor(0)))
-    Opcode = ICmpInst::ICMP_NE;
-  else
-    Opcode = ICmpInst::ICMP_EQ;
+  // Insert new integer induction variable.
+  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
+  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
+                      PN->getIncomingBlock(IncomingEdge));
 
-  DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
-               << "      LHS:" << *CmpIndVar << '\n'
-               << "       op:\t"
-               << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
-               << "      RHS:\t" << *RHS << "\n");
+  Value *NewAdd =
+    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
+                              Incr->getName()+".int", Incr);
+  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
 
-  ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
+  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
+                                      ConstantInt::get(Int32Ty, ExitValue),
+                                      Compare->getName());
 
-  Value *OrigCond = BI->getCondition();
-  // It's tempting to use replaceAllUsesWith here to fully replace the old
-  // comparison, but that's not immediately safe, since users of the old
-  // comparison may not be dominated by the new comparison. Instead, just
-  // update the branch to use the new comparison; in the common case this
-  // will make old comparison dead.
-  BI->setCondition(Cond);
-  DeadInsts.push_back(OrigCond);
+  // In the following deletions, PN may become dead and may be deleted.
+  // Use a WeakVH to observe whether this happens.
+  WeakVH WeakPH = PN;
 
-  ++NumLFTR;
-  Changed = true;
-  return Cond;
+  // Delete the old floating point exit comparison.  The branch starts using the
+  // new comparison.
+  NewCompare->takeName(Compare);
+  Compare->replaceAllUsesWith(NewCompare);
+  RecursivelyDeleteTriviallyDeadInstructions(Compare);
+
+  // Delete the old floating point increment.
+  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
+  RecursivelyDeleteTriviallyDeadInstructions(Incr);
+
+  // If the FP induction variable still has uses, this is because something else
+  // in the loop uses its value.  In order to canonicalize the induction
+  // variable, we chose to eliminate the IV and rewrite it in terms of an
+  // int->fp cast.
+  //
+  // We give preference to sitofp over uitofp because it is faster on most
+  // platforms.
+  if (WeakPH) {
+    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
+                                 PN->getParent()->getFirstNonPHI());
+    PN->replaceAllUsesWith(Conv);
+    RecursivelyDeleteTriviallyDeadInstructions(PN);
+  }
+
+  // Add a new IVUsers entry for the newly-created integer PHI.
+  if (IU)
+    IU->AddUsersIfInteresting(NewPHI);
 }
 
+void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
+  // First step.  Check to see if there are any floating-point recurrences.
+  // If there are, change them into integer recurrences, permitting analysis by
+  // the SCEV routines.
+  //
+  BasicBlock *Header = L->getHeader();
+
+  SmallVector<WeakVH, 8> PHIs;
+  for (BasicBlock::iterator I = Header->begin();
+       PHINode *PN = dyn_cast<PHINode>(I); ++I)
+    PHIs.push_back(PN);
+
+  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
+    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
+      HandleFloatingPointIV(L, PN);
+
+  // If the loop previously had floating-point IV, ScalarEvolution
+  // may not have been able to compute a trip count. Now that we've done some
+  // re-writing, the trip count may be computable.
+  if (Changed)
+    SE->forgetLoop(L);
+}
+
+//===----------------------------------------------------------------------===//
+// RewriteLoopExitValues - Optimize IV users outside the loop.
+// As a side effect, reduces the amount of IV processing within the loop.
+//===----------------------------------------------------------------------===//
+
 /// RewriteLoopExitValues - Check to see if this loop has a computable
 /// loop-invariant execution count.  If so, this means that we can compute the
 /// final value of any expressions that are recurrent in the loop, and
@@ -460,29 +580,168 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
   Rewriter.clearInsertPoint();
 }
 
-void IndVarSimplify::RewriteNonIntegerIVs(Loop *L) {
-  // First step.  Check to see if there are any floating-point recurrences.
-  // If there are, change them into integer recurrences, permitting analysis by
-  // the SCEV routines.
+//===----------------------------------------------------------------------===//
+//  Rewrite IV users based on a canonical IV.
+//  To be replaced by -disable-iv-rewrite.
+//===----------------------------------------------------------------------===//
+
+/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
+/// loop. IVUsers is treated as a worklist. Each successive simplification may
+/// push more users which may themselves be candidates for simplification.
+///
+/// This is the old approach to IV simplification to be replaced by
+/// SimplifyIVUsersNoRewrite.
+///
+void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
+  // Each round of simplification involves a round of eliminating operations
+  // followed by a round of widening IVs. A single IVUsers worklist is used
+  // across all rounds. The inner loop advances the user. If widening exposes
+  // more uses, then another pass through the outer loop is triggered.
+  for (IVUsers::iterator I = IU->begin(); I != IU->end(); ++I) {
+    Instruction *UseInst = I->getUser();
+    Value *IVOperand = I->getOperandValToReplace();
+
+    if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+      EliminateIVComparison(ICmp, IVOperand);
+      continue;
+    }
+    if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+      bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+      if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+        EliminateIVRemainder(Rem, IVOperand, IsSigned);
+        continue;
+      }
+    }
+  }
+}
+
+// FIXME: It is an extremely bad idea to indvar substitute anything more
+// complex than affine induction variables.  Doing so will put expensive
+// polynomial evaluations inside of the loop, and the str reduction pass
+// currently can only reduce affine polynomials.  For now just disable
+// indvar subst on anything more complex than an affine addrec, unless
+// it can be expanded to a trivial value.
+static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
+  // Loop-invariant values are safe.
+  if (SE->isLoopInvariant(S, L)) return true;
+
+  // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
+  // to transform them into efficient code.
+  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+    return AR->isAffine();
+
+  // An add is safe it all its operands are safe.
+  if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
+    for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
+         E = Commutative->op_end(); I != E; ++I)
+      if (!isSafe(*I, L, SE)) return false;
+    return true;
+  }
+
+  // A cast is safe if its operand is.
+  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+    return isSafe(C->getOperand(), L, SE);
+
+  // A udiv is safe if its operands are.
+  if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
+    return isSafe(UD->getLHS(), L, SE) &&
+           isSafe(UD->getRHS(), L, SE);
+
+  // SCEVUnknown is always safe.
+  if (isa<SCEVUnknown>(S))
+    return true;
+
+  // Nothing else is safe.
+  return false;
+}
+
+void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
+  // Rewrite all induction variable expressions in terms of the canonical
+  // induction variable.
   //
-  BasicBlock *Header = L->getHeader();
+  // If there were induction variables of other sizes or offsets, manually
+  // add the offsets to the primary induction variable and cast, avoiding
+  // the need for the code evaluation methods to insert induction variables
+  // of different sizes.
+  for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
+    Value *Op = UI->getOperandValToReplace();
+    const Type *UseTy = Op->getType();
+    Instruction *User = UI->getUser();
 
-  SmallVector<WeakVH, 8> PHIs;
-  for (BasicBlock::iterator I = Header->begin();
-       PHINode *PN = dyn_cast<PHINode>(I); ++I)
-    PHIs.push_back(PN);
+    // Compute the final addrec to expand into code.
+    const SCEV *AR = IU->getReplacementExpr(*UI);
 
-  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
-    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
-      HandleFloatingPointIV(L, PN);
+    // Evaluate the expression out of the loop, if possible.
+    if (!L->contains(UI->getUser())) {
+      const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
+      if (SE->isLoopInvariant(ExitVal, L))
+        AR = ExitVal;
+    }
 
-  // If the loop previously had floating-point IV, ScalarEvolution
-  // may not have been able to compute a trip count. Now that we've done some
-  // re-writing, the trip count may be computable.
-  if (Changed)
-    SE->forgetLoop(L);
+    // FIXME: It is an extremely bad idea to indvar substitute anything more
+    // complex than affine induction variables.  Doing so will put expensive
+    // polynomial evaluations inside of the loop, and the str reduction pass
+    // currently can only reduce affine polynomials.  For now just disable
+    // indvar subst on anything more complex than an affine addrec, unless
+    // it can be expanded to a trivial value.
+    if (!isSafe(AR, L, SE))
+      continue;
+
+    // Determine the insertion point for this user. By default, insert
+    // immediately before the user. The SCEVExpander class will automatically
+    // hoist loop invariants out of the loop. For PHI nodes, there may be
+    // multiple uses, so compute the nearest common dominator for the
+    // incoming blocks.
+    Instruction *InsertPt = User;
+    if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
+      for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+        if (PHI->getIncomingValue(i) == Op) {
+          if (InsertPt == User)
+            InsertPt = PHI->getIncomingBlock(i)->getTerminator();
+          else
+            InsertPt =
+              DT->findNearestCommonDominator(InsertPt->getParent(),
+                                             PHI->getIncomingBlock(i))
+                    ->getTerminator();
+        }
+
+    // Now expand it into actual Instructions and patch it into place.
+    Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
+
+    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
+                 << "   into = " << *NewVal << "\n");
+
+    if (!isValidRewrite(Op, NewVal)) {
+      DeadInsts.push_back(NewVal);
+      continue;
+    }
+    // Inform ScalarEvolution that this value is changing. The change doesn't
+    // affect its value, but it does potentially affect which use lists the
+    // value will be on after the replacement, which affects ScalarEvolution's
+    // ability to walk use lists and drop dangling pointers when a value is
+    // deleted.
+    SE->forgetValue(User);
+
+    // Patch the new value into place.
+    if (Op->hasName())
+      NewVal->takeName(Op);
+    if (Instruction *NewValI = dyn_cast<Instruction>(NewVal))
+      NewValI->setDebugLoc(User->getDebugLoc());
+    User->replaceUsesOfWith(Op, NewVal);
+    UI->setOperandValToReplace(NewVal);
+
+    ++NumRemoved;
+    Changed = true;
+
+    // The old value may be dead now.
+    DeadInsts.push_back(Op);
+  }
 }
 
+//===----------------------------------------------------------------------===//
+//  IV Widening - Extend the width of an IV to cover its widest uses.
+//===----------------------------------------------------------------------===//
+
 namespace {
   // Collect information about induction variables that are used by sign/zero
   // extend operations. This information is recorded by CollectExtend and
@@ -493,33 +752,30 @@ namespace {
 
     WideIVInfo() : WidestNativeType(0), IsSigned(false) {}
   };
-  typedef std::map<PHINode *, WideIVInfo> WideIVMap;
 }
 
 /// CollectExtend - Update information about the induction variable that is
 /// extended by this sign or zero extend operation. This is used to determine
 /// the final width of the IV before actually widening it.
-static void CollectExtend(CastInst *Cast, PHINode *Phi, bool IsSigned,
-                          WideIVMap &IVMap, ScalarEvolution *SE,
-                          const TargetData *TD) {
+static void CollectExtend(CastInst *Cast, bool IsSigned, WideIVInfo &WI,
+                          ScalarEvolution *SE, const TargetData *TD) {
   const Type *Ty = Cast->getType();
   uint64_t Width = SE->getTypeSizeInBits(Ty);
   if (TD && !TD->isLegalInteger(Width))
     return;
 
-  WideIVInfo &IVInfo = IVMap[Phi];
-  if (!IVInfo.WidestNativeType) {
-    IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty);
-    IVInfo.IsSigned = IsSigned;
+  if (!WI.WidestNativeType) {
+    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+    WI.IsSigned = IsSigned;
     return;
   }
 
   // We extend the IV to satisfy the sign of its first user, arbitrarily.
-  if (IVInfo.IsSigned != IsSigned)
+  if (WI.IsSigned != IsSigned)
     return;
 
-  if (Width > SE->getTypeSizeInBits(IVInfo.WidestNativeType))
-    IVInfo.WidestNativeType = SE->getEffectiveSCEVType(Ty);
+  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
+    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
 }
 
 namespace {
@@ -529,43 +785,45 @@ namespace {
 /// inserting truncs whenever we stop propagating the type.
 ///
 class WidenIV {
+  // Parameters
   PHINode *OrigPhi;
   const Type *WideType;
   bool IsSigned;
 
-  IVUsers *IU;
-  LoopInfo *LI;
-  Loop *L;
+  // Context
+  LoopInfo        *LI;
+  Loop            *L;
   ScalarEvolution *SE;
-  DominatorTree *DT;
-  SmallVectorImpl<WeakVH> &DeadInsts;
+  DominatorTree   *DT;
 
+  // Result
   PHINode *WidePhi;
   Instruction *WideInc;
   const SCEV *WideIncExpr;
+  SmallVectorImpl<WeakVH> &DeadInsts;
 
-  SmallPtrSet<Instruction*,16> Processed;
+  SmallPtrSet<Instruction*,16> Widened;
+  SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers;
 
 public:
-  WidenIV(PHINode *PN, const WideIVInfo &IVInfo, IVUsers *IUsers,
-          LoopInfo *LInfo, ScalarEvolution *SEv, DominatorTree *DTree,
+  WidenIV(PHINode *PN, const WideIVInfo &WI, LoopInfo *LInfo,
+          ScalarEvolution *SEv, DominatorTree *DTree,
           SmallVectorImpl<WeakVH> &DI) :
     OrigPhi(PN),
-    WideType(IVInfo.WidestNativeType),
-    IsSigned(IVInfo.IsSigned),
-    IU(IUsers),
+    WideType(WI.WidestNativeType),
+    IsSigned(WI.IsSigned),
     LI(LInfo),
     L(LI->getLoopFor(OrigPhi->getParent())),
     SE(SEv),
     DT(DTree),
-    DeadInsts(DI),
     WidePhi(0),
     WideInc(0),
-    WideIncExpr(0) {
+    WideIncExpr(0),
+    DeadInsts(DI) {
     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
   }
 
-  bool CreateWideIV(SCEVExpander &Rewriter);
+  PHINode *CreateWideIV(SCEVExpander &Rewriter);
 
 protected:
   Instruction *CloneIVUser(Instruction *NarrowUse,
@@ -574,58 +832,13 @@ protected:
 
   const SCEVAddRecExpr *GetWideRecurrence(Instruction *NarrowUse);
 
-  Instruction *WidenIVUse(Instruction *NarrowUse,
-                          Instruction *NarrowDef,
+  Instruction *WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
                           Instruction *WideDef);
+
+  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
 };
 } // anonymous namespace
 
-/// SimplifyIVUsers - Iteratively perform simplification on IVUsers within this
-/// loop. IVUsers is treated as a worklist. Each successive simplification may
-/// push more users which may themselves be candidates for simplification.
-///
-void IndVarSimplify::SimplifyIVUsers(SCEVExpander &Rewriter) {
-  WideIVMap IVMap;
-
-  // Each round of simplification involves a round of eliminating operations
-  // followed by a round of widening IVs. A single IVUsers worklist is used
-  // across all rounds. The inner loop advances the user. If widening exposes
-  // more uses, then another pass through the outer loop is triggered.
-  for (IVUsers::iterator I = IU->begin(), E = IU->end(); I != E;) {
-    for(; I != E; ++I) {
-      Instruction *UseInst = I->getUser();
-      Value *IVOperand = I->getOperandValToReplace();
-
-      if (DisableIVRewrite) {
-        if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
-          bool IsSigned = Cast->getOpcode() == Instruction::SExt;
-          if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
-            CollectExtend(Cast, I->getPhi(), IsSigned, IVMap, SE, TD);
-            continue;
-          }
-        }
-      }
-      if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
-        EliminateIVComparison(ICmp, IVOperand);
-        continue;
-      }
-      if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
-        bool IsSigned = Rem->getOpcode() == Instruction::SRem;
-        if (IsSigned || Rem->getOpcode() == Instruction::URem) {
-          EliminateIVRemainder(Rem, IVOperand, IsSigned, I->getPhi());
-          continue;
-        }
-      }
-    }
-    for (WideIVMap::const_iterator I = IVMap.begin(), E = IVMap.end();
-         I != E; ++I) {
-      WidenIV Widener(I->first, I->second, IU, LI, SE, DT, DeadInsts);
-      if (Widener.CreateWideIV(Rewriter))
-        Changed = true;
-    }
-  }
-}
-
 static Value *getExtend( Value *NarrowOper, const Type *WideType,
                                bool IsSigned, IRBuilder<> &Builder) {
   return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
@@ -671,34 +884,16 @@ Instruction *WidenIV::CloneIVUser(Instruction *NarrowUse,
                                                     LHS, RHS,
                                                     NarrowBO->getName());
     Builder.Insert(WideBO);
-    if (NarrowBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
-    if (NarrowBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
-
+    if (const OverflowingBinaryOperator *OBO =
+        dyn_cast<OverflowingBinaryOperator>(NarrowBO)) {
+      if (OBO->hasNoUnsignedWrap()) WideBO->setHasNoUnsignedWrap();
+      if (OBO->hasNoSignedWrap()) WideBO->setHasNoSignedWrap();
+    }
     return WideBO;
   }
   llvm_unreachable(0);
 }
 
-// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
-// perspective after widening it's type? In other words, can the extend be
-// safely hoisted out of the loop with SCEV reducing the value to a recurrence
-// on the same loop. If so, return the sign or zero extended
-// recurrence. Otherwise return NULL.
-const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
-  if (!SE->isSCEVable(NarrowUse->getType()))
-    return 0;
-
-  const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
-  const SCEV *WideExpr = IsSigned ?
-    SE->getSignExtendExpr(NarrowExpr, WideType) :
-    SE->getZeroExtendExpr(NarrowExpr, WideType);
-  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
-  if (!AddRec || AddRec->getLoop() != L)
-    return 0;
-
-  return AddRec;
-}
-
 /// HoistStep - Attempt to hoist an IV increment above a potential use.
 ///
 /// To successfully hoist, two criteria must be met:
@@ -733,18 +928,41 @@ static bool HoistStep(Instruction *IncV, Instruction *InsertPos,
   return true;
 }
 
+// GetWideRecurrence - Is this instruction potentially interesting from IVUsers'
+// perspective after widening it's type? In other words, can the extend be
+// safely hoisted out of the loop with SCEV reducing the value to a recurrence
+// on the same loop. If so, return the sign or zero extended
+// recurrence. Otherwise return NULL.
+const SCEVAddRecExpr *WidenIV::GetWideRecurrence(Instruction *NarrowUse) {
+  if (!SE->isSCEVable(NarrowUse->getType()))
+    return 0;
+
+  const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
+  if (SE->getTypeSizeInBits(NarrowExpr->getType())
+      >= SE->getTypeSizeInBits(WideType)) {
+    // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
+    // index. So don't follow this use.
+    return 0;
+  }
+
+  const SCEV *WideExpr = IsSigned ?
+    SE->getSignExtendExpr(NarrowExpr, WideType) :
+    SE->getZeroExtendExpr(NarrowExpr, WideType);
+  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
+  if (!AddRec || AddRec->getLoop() != L)
+    return 0;
+
+  return AddRec;
+}
+
 /// WidenIVUse - Determine whether an individual user of the narrow IV can be
 /// widened. If so, return the wide clone of the user.
-Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
-                                 Instruction *NarrowDef,
+Instruction *WidenIV::WidenIVUse(Use &NarrowDefUse, Instruction *NarrowDef,
                                  Instruction *WideDef) {
-  // To be consistent with IVUsers, stop traversing the def-use chain at
-  // inner-loop phis or post-loop phis.
-  if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L)
-    return 0;
+  Instruction *NarrowUse = cast<Instruction>(NarrowDefUse.getUser());
 
-  // Handle data flow merges and bizarre phi cycles.
-  if (!Processed.insert(NarrowUse))
+  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
+  if (isa<PHINode>(NarrowUse) && LI->getLoopFor(NarrowUse->getParent()) != L)
     return 0;
 
   // Our raison d'etre! Eliminate sign and zero extension.
@@ -755,7 +973,7 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
       unsigned IVWidth = SE->getTypeSizeInBits(WideType);
       if (CastWidth < IVWidth) {
         // The cast isn't as wide as the IV, so insert a Trunc.
-        IRBuilder<> Builder(NarrowUse);
+        IRBuilder<> Builder(NarrowDefUse);
         NewDef = Builder.CreateTrunc(WideDef, NarrowUse->getType());
       }
       else {
@@ -775,23 +993,32 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
       NarrowUse->replaceAllUsesWith(NewDef);
       DeadInsts.push_back(NarrowUse);
     }
-    // Now that the extend is gone, expose it's uses to IVUsers for potential
-    // further simplification within SimplifyIVUsers.
-    IU->AddUsersIfInteresting(WideDef, WidePhi);
+    // Now that the extend is gone, we want to expose it's uses for potential
+    // further simplification. We don't need to directly inform SimplifyIVUsers
+    // of the new users, because their parent IV will be processed later as a
+    // new loop phi. If we preserved IVUsers analysis, we would also want to
+    // push the uses of WideDef here.
 
     // No further widening is needed. The deceased [sz]ext had done it for us.
     return 0;
   }
+
+  // Does this user itself evaluate to a recurrence after widening?
   const SCEVAddRecExpr *WideAddRec = GetWideRecurrence(NarrowUse);
   if (!WideAddRec) {
     // This user does not evaluate to a recurence after widening, so don't
     // follow it. Instead insert a Trunc to kill off the original use,
     // eventually isolating the original narrow IV so it can be removed.
-    IRBuilder<> Builder(NarrowUse);
+    IRBuilder<> Builder(NarrowDefUse);
     Value *Trunc = Builder.CreateTrunc(WideDef, NarrowDef->getType());
     NarrowUse->replaceUsesOfWith(NarrowDef, Trunc);
     return 0;
   }
+  // We assume that block terminators are not SCEVable. We wouldn't want to
+  // insert a Trunc after a terminator if there happens to be a critical edge.
+  assert(NarrowUse != NarrowUse->getParent()->getTerminator() &&
+         "SCEV is not expected to evaluate a block terminator");
+
   // Reuse the IV increment that SCEVExpander created as long as it dominates
   // NarrowUse.
   Instruction *WideUse = 0;
@@ -803,11 +1030,11 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
     if (!WideUse)
       return 0;
   }
-  // GetWideRecurrence ensured that the narrow expression could be extended
-  // outside the loop without overflow. This suggests that the wide use
+  // Evaluation of WideAddRec ensured that the narrow expression could be
+  // extended outside the loop without overflow. This suggests that the wide use
   // evaluates to the same expression as the extended narrow use, but doesn't
   // absolutely guarantee it. Hence the following failsafe check. In rare cases
-  // where it fails, we simple throw away the newly created wide use.
+  // where it fails, we simply throw away the newly created wide use.
   if (WideAddRec != SE->getSCEV(WideUse)) {
     DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
           << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
@@ -819,21 +1046,36 @@ Instruction *WidenIV::WidenIVUse(Instruction *NarrowUse,
   return WideUse;
 }
 
+/// pushNarrowIVUsers - Add eligible users of NarrowDef to NarrowIVUsers.
+///
+void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
+  for (Value::use_iterator UI = NarrowDef->use_begin(),
+         UE = NarrowDef->use_end(); UI != UE; ++UI) {
+    Use &U = UI.getUse();
+
+    // Handle data flow merges and bizarre phi cycles.
+    if (!Widened.insert(cast<Instruction>(U.getUser())))
+      continue;
+
+    NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideDef));
+  }
+}
+
 /// CreateWideIV - Process a single induction variable. First use the
 /// SCEVExpander to create a wide induction variable that evaluates to the same
 /// recurrence as the original narrow IV. Then use a worklist to forward
-/// traverse the narrow IV's def-use chain. After WidenIVUse as processed all
+/// traverse the narrow IV's def-use chain. After WidenIVUse has processed all
 /// interesting IV users, the narrow IV will be isolated for removal by
 /// DeleteDeadPHIs.
 ///
 /// It would be simpler to delete uses as they are processed, but we must avoid
 /// invalidating SCEV expressions.
 ///
-bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
+PHINode *WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
   // Is this phi an induction variable?
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
   if (!AddRec)
-    return false;
+    return NULL;
 
   // Widen the induction variable expression.
   const SCEV *WideIVExpr = IsSigned ?
@@ -846,9 +1088,9 @@ bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
   // Can the IV be extended outside the loop without overflow?
   AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
   if (!AddRec || AddRec->getLoop() != L)
-    return false;
+    return NULL;
 
-  // An AddRec must have loop-invariant operands. Since this AddRec it
+  // An AddRec must have loop-invariant operands. Since this AddRec is
   // materialized by a loop header phi, the expression cannot have any post-loop
   // operands, so they must dominate the loop header.
   assert(SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
@@ -876,39 +1118,37 @@ bool WidenIV::CreateWideIV(SCEVExpander &Rewriter) {
   ++NumWidened;
 
   // Traverse the def-use chain using a worklist starting at the original IV.
-  assert(Processed.empty() && "expect initial state" );
+  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
+
+  Widened.insert(OrigPhi);
+  pushNarrowIVUsers(OrigPhi, WidePhi);
 
-  // Each worklist entry has a Narrow def-use link and Wide def.
-  SmallVector<std::pair<Use *, Instruction *>, 8> NarrowIVUsers;
-  for (Value::use_iterator UI = OrigPhi->use_begin(),
-         UE = OrigPhi->use_end(); UI != UE; ++UI) {
-    NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WidePhi));
-  }
   while (!NarrowIVUsers.empty()) {
-    Use *NarrowDefUse;
+    Use *UsePtr;
     Instruction *WideDef;
-    tie(NarrowDefUse, WideDef) = NarrowIVUsers.pop_back_val();
+    tie(UsePtr, WideDef) = NarrowIVUsers.pop_back_val();
+    Use &NarrowDefUse = *UsePtr;
 
     // Process a def-use edge. This may replace the use, so don't hold a
     // use_iterator across it.
-    Instruction *NarrowDef = cast<Instruction>(NarrowDefUse->get());
-    Instruction *NarrowUse = cast<Instruction>(NarrowDefUse->getUser());
-    Instruction *WideUse = WidenIVUse(NarrowUse, NarrowDef, WideDef);
+    Instruction *NarrowDef = cast<Instruction>(NarrowDefUse.get());
+    Instruction *WideUse = WidenIVUse(NarrowDefUse, NarrowDef, WideDef);
 
     // Follow all def-use edges from the previous narrow use.
-    if (WideUse) {
-      for (Value::use_iterator UI = NarrowUse->use_begin(),
-             UE = NarrowUse->use_end(); UI != UE; ++UI) {
-        NarrowIVUsers.push_back(std::make_pair(&UI.getUse(), WideUse));
-      }
-    }
+    if (WideUse)
+      pushNarrowIVUsers(cast<Instruction>(NarrowDefUse.getUser()), WideUse);
+
     // WidenIVUse may have removed the def-use edge.
     if (NarrowDef->use_empty())
       DeadInsts.push_back(NarrowDef);
   }
-  return true;
+  return WidePhi;
 }
 
+//===----------------------------------------------------------------------===//
+//  Simplification of IV users based on SCEV evaluation.
+//===----------------------------------------------------------------------===//
+
 void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
   unsigned IVOperIdx = 0;
   ICmpInst::Predicate Pred = ICmp->getPredicate();
@@ -945,8 +1185,7 @@ void IndVarSimplify::EliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {
 
 void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
                                           Value *IVOperand,
-                                          bool IsSigned,
-                                          PHINode *IVPhi) {
+                                          bool IsSigned) {
   // We're only interested in the case where we know something about
   // the numerator.
   if (IVOperand != Rem->getOperand(0))
@@ -989,15 +1228,465 @@ void IndVarSimplify::EliminateIVRemainder(BinaryOperator *Rem,
   }
 
   // Inform IVUsers about the new users.
-  if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
-    IU->AddUsersIfInteresting(I, IVPhi);
-
+  if (IU) {
+    if (Instruction *I = dyn_cast<Instruction>(Rem->getOperand(0)))
+      IU->AddUsersIfInteresting(I);
+  }
   DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');
   ++NumElimRem;
   Changed = true;
   DeadInsts.push_back(Rem);
 }
 
+/// EliminateIVUser - Eliminate an operation that consumes a simple IV and has
+/// no observable side-effect given the range of IV values.
+bool IndVarSimplify::EliminateIVUser(Instruction *UseInst,
+                                     Instruction *IVOperand) {
+  if (ICmpInst *ICmp = dyn_cast<ICmpInst>(UseInst)) {
+    EliminateIVComparison(ICmp, IVOperand);
+    return true;
+  }
+  if (BinaryOperator *Rem = dyn_cast<BinaryOperator>(UseInst)) {
+    bool IsSigned = Rem->getOpcode() == Instruction::SRem;
+    if (IsSigned || Rem->getOpcode() == Instruction::URem) {
+      EliminateIVRemainder(Rem, IVOperand, IsSigned);
+      return true;
+    }
+  }
+
+  // Eliminate any operation that SCEV can prove is an identity function.
+  if (!SE->isSCEVable(UseInst->getType()) ||
+      (UseInst->getType() != IVOperand->getType()) ||
+      (SE->getSCEV(UseInst) != SE->getSCEV(IVOperand)))
+    return false;
+
+  DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');
+
+  UseInst->replaceAllUsesWith(IVOperand);
+  ++NumElimIdentity;
+  Changed = true;
+  DeadInsts.push_back(UseInst);
+  return true;
+}
+
+/// pushIVUsers - Add all uses of Def to the current IV's worklist.
+///
+static void pushIVUsers(
+  Instruction *Def,
+  SmallPtrSet<Instruction*,16> &Simplified,
+  SmallVectorImpl< std::pair<Instruction*,Instruction*> > &SimpleIVUsers) {
+
+  for (Value::use_iterator UI = Def->use_begin(), E = Def->use_end();
+       UI != E; ++UI) {
+    Instruction *User = cast<Instruction>(*UI);
+
+    // Avoid infinite or exponential worklist processing.
+    // Also ensure unique worklist users.
+    // If Def is a LoopPhi, it may not be in the Simplified set, so check for
+    // self edges first.
+    if (User != Def && Simplified.insert(User))
+      SimpleIVUsers.push_back(std::make_pair(User, Def));
+  }
+}
+
+/// isSimpleIVUser - Return true if this instruction generates a simple SCEV
+/// expression in terms of that IV.
+///
+/// This is similar to IVUsers' isInsteresting() but processes each instruction
+/// non-recursively when the operand is already known to be a simpleIVUser.
+///
+static bool isSimpleIVUser(Instruction *I, const Loop *L, ScalarEvolution *SE) {
+  if (!SE->isSCEVable(I->getType()))
+    return false;
+
+  // Get the symbolic expression for this instruction.
+  const SCEV *S = SE->getSCEV(I);
+
+  // We assume that terminators are not SCEVable.
+  assert((!S || I != I->getParent()->getTerminator()) &&
+         "can't fold terminators");
+
+  // Only consider affine recurrences.
+  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
+  if (AR && AR->getLoop() == L)
+    return true;
+
+  return false;
+}
+
+/// SimplifyIVUsersNoRewrite - Iteratively perform simplification on a worklist
+/// of IV users. Each successive simplification may push more users which may
+/// themselves be candidates for simplification.
+///
+/// The "NoRewrite" algorithm does not require IVUsers analysis. Instead, it
+/// simplifies instructions in-place during analysis. Rather than rewriting
+/// induction variables bottom-up from their users, it transforms a chain of
+/// IVUsers top-down, updating the IR only when it encouters a clear
+/// optimization opportunitiy. A SCEVExpander "Rewriter" instance is still
+/// needed, but only used to generate a new IV (phi) of wider type for sign/zero
+/// extend elimination.
+///
+/// Once DisableIVRewrite is default, LSR will be the only client of IVUsers.
+///
+void IndVarSimplify::SimplifyIVUsersNoRewrite(Loop *L, SCEVExpander &Rewriter) {
+  std::map<PHINode *, WideIVInfo> WideIVMap;
+
+  SmallVector<PHINode*, 8> LoopPhis;
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+    LoopPhis.push_back(cast<PHINode>(I));
+  }
+  // Each round of simplification iterates through the SimplifyIVUsers worklist
+  // for all current phis, then determines whether any IVs can be
+  // widened. Widening adds new phis to LoopPhis, inducing another round of
+  // simplification on the wide IVs.
+  while (!LoopPhis.empty()) {
+    // Evaluate as many IV expressions as possible before widening any IVs. This
+    // forces SCEV to set no-wrap flags before evaluating sign/zero
+    // extension. The first time SCEV attempts to normalize sign/zero extension,
+    // the result becomes final. So for the most predictable results, we delay
+    // evaluation of sign/zero extend evaluation until needed, and avoid running
+    // other SCEV based analysis prior to SimplifyIVUsersNoRewrite.
+    do {
+      PHINode *CurrIV = LoopPhis.pop_back_val();
+
+      // Information about sign/zero extensions of CurrIV.
+      WideIVInfo WI;
+
+      // Instructions processed by SimplifyIVUsers for CurrIV.
+      SmallPtrSet<Instruction*,16> Simplified;
+
+      // Use-def pairs if IV users waiting to be processed for CurrIV.
+      SmallVector<std::pair<Instruction*, Instruction*>, 8> SimpleIVUsers;
+
+      // Push users of the current LoopPhi. In rare cases, pushIVUsers may be
+      // called multiple times for the same LoopPhi. This is the proper thing to
+      // do for loop header phis that use each other.
+      pushIVUsers(CurrIV, Simplified, SimpleIVUsers);
+
+      while (!SimpleIVUsers.empty()) {
+        Instruction *UseInst, *Operand;
+        tie(UseInst, Operand) = SimpleIVUsers.pop_back_val();
+        // Bypass back edges to avoid extra work.
+        if (UseInst == CurrIV) continue;
+
+        if (EliminateIVUser(UseInst, Operand)) {
+          pushIVUsers(Operand, Simplified, SimpleIVUsers);
+          continue;
+        }
+        if (CastInst *Cast = dyn_cast<CastInst>(UseInst)) {
+          bool IsSigned = Cast->getOpcode() == Instruction::SExt;
+          if (IsSigned || Cast->getOpcode() == Instruction::ZExt) {
+            CollectExtend(Cast, IsSigned, WI, SE, TD);
+          }
+          continue;
+        }
+        if (isSimpleIVUser(UseInst, L, SE)) {
+          pushIVUsers(UseInst, Simplified, SimpleIVUsers);
+        }
+      }
+      if (WI.WidestNativeType) {
+        WideIVMap[CurrIV] = WI;
+      }
+    } while(!LoopPhis.empty());
+
+    for (std::map<PHINode *, WideIVInfo>::const_iterator I = WideIVMap.begin(),
+           E = WideIVMap.end(); I != E; ++I) {
+      WidenIV Widener(I->first, I->second, LI, SE, DT, DeadInsts);
+      if (PHINode *WidePhi = Widener.CreateWideIV(Rewriter)) {
+        Changed = true;
+        LoopPhis.push_back(WidePhi);
+      }
+    }
+    WideIVMap.clear();
+  }
+}
+
+/// SimplifyCongruentIVs - Check for congruent phis in this loop header and
+/// populate ExprToIVMap for use later.
+///
+void IndVarSimplify::SimplifyCongruentIVs(Loop *L) {
+  DenseMap<const SCEV *, PHINode *> ExprToIVMap;
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
+    PHINode *Phi = cast<PHINode>(I);
+    if (!SE->isSCEVable(Phi->getType()))
+      continue;
+
+    const SCEV *S = SE->getSCEV(Phi);
+    DenseMap<const SCEV *, PHINode *>::const_iterator Pos;
+    bool Inserted;
+    tie(Pos, Inserted) = ExprToIVMap.insert(std::make_pair(S, Phi));
+    if (Inserted)
+      continue;
+    PHINode *OrigPhi = Pos->second;
+    // Replacing the congruent phi is sufficient because acyclic redundancy
+    // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
+    // that a phi is congruent, it's almost certain to be the head of an IV
+    // user cycle that is isomorphic with the original phi. So it's worth
+    // eagerly cleaning up the common case of a single IV increment.
+    if (BasicBlock *LatchBlock = L->getLoopLatch()) {
+      Instruction *OrigInc =
+        cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
+      Instruction *IsomorphicInc =
+        cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
+      if (OrigInc != IsomorphicInc &&
+          SE->getSCEV(OrigInc) == SE->getSCEV(IsomorphicInc) &&
+          HoistStep(OrigInc, IsomorphicInc, DT)) {
+        DEBUG(dbgs() << "INDVARS: Eliminated congruent iv.inc: "
+              << *IsomorphicInc << '\n');
+        IsomorphicInc->replaceAllUsesWith(OrigInc);
+        DeadInsts.push_back(IsomorphicInc);
+      }
+    }
+    DEBUG(dbgs() << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
+    ++NumElimIV;
+    Phi->replaceAllUsesWith(OrigPhi);
+    DeadInsts.push_back(Phi);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  LinearFunctionTestReplace and its kin. Rewrite the loop exit condition.
+//===----------------------------------------------------------------------===//
+
+/// canExpandBackedgeTakenCount - Return true if this loop's backedge taken
+/// count expression can be safely and cheaply expanded into an instruction
+/// sequence that can be used by LinearFunctionTestReplace.
+static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) {
+  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+  if (isa<SCEVCouldNotCompute>(BackedgeTakenCount) ||
+      BackedgeTakenCount->isZero())
+    return false;
+
+  if (!L->getExitingBlock())
+    return false;
+
+  // Can't rewrite non-branch yet.
+  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  if (!BI)
+    return false;
+
+  // Special case: If the backedge-taken count is a UDiv, it's very likely a
+  // UDiv that ScalarEvolution produced in order to compute a precise
+  // expression, rather than a UDiv from the user's code. If we can't find a
+  // UDiv in the code with some simple searching, assume the former and forego
+  // rewriting the loop.
+  if (isa<SCEVUDivExpr>(BackedgeTakenCount)) {
+    ICmpInst *OrigCond = dyn_cast<ICmpInst>(BI->getCondition());
+    if (!OrigCond) return false;
+    const SCEV *R = SE->getSCEV(OrigCond->getOperand(1));
+    R = SE->getMinusSCEV(R, SE->getConstant(R->getType(), 1));
+    if (R != BackedgeTakenCount) {
+      const SCEV *L = SE->getSCEV(OrigCond->getOperand(0));
+      L = SE->getMinusSCEV(L, SE->getConstant(L->getType(), 1));
+      if (L != BackedgeTakenCount)
+        return false;
+    }
+  }
+  return true;
+}
+
+/// getBackedgeIVType - Get the widest type used by the loop test after peeking
+/// through Truncs.
+///
+/// TODO: Unnecessary if LFTR does not force a canonical IV.
+static const Type *getBackedgeIVType(Loop *L) {
+  if (!L->getExitingBlock())
+    return 0;
+
+  // Can't rewrite non-branch yet.
+  BranchInst *BI = dyn_cast<BranchInst>(L->getExitingBlock()->getTerminator());
+  if (!BI)
+    return 0;
+
+  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!Cond)
+    return 0;
+
+  const Type *Ty = 0;
+  for(User::op_iterator OI = Cond->op_begin(), OE = Cond->op_end();
+      OI != OE; ++OI) {
+    assert((!Ty || Ty == (*OI)->getType()) && "bad icmp operand types");
+    TruncInst *Trunc = dyn_cast<TruncInst>(*OI);
+    if (!Trunc)
+      continue;
+
+    return Trunc->getSrcTy();
+  }
+  return Ty;
+}
+
+/// LinearFunctionTestReplace - This method rewrites the exit condition of the
+/// loop to be a canonical != comparison against the incremented loop induction
+/// variable.  This pass is able to rewrite the exit tests of any loop where the
+/// SCEV analysis can determine a loop-invariant trip count of the loop, which
+/// is actually a much broader range than just linear tests.
+ICmpInst *IndVarSimplify::
+LinearFunctionTestReplace(Loop *L,
+                          const SCEV *BackedgeTakenCount,
+                          PHINode *IndVar,
+                          SCEVExpander &Rewriter) {
+  assert(canExpandBackedgeTakenCount(L, SE) && "precondition");
+  BranchInst *BI = cast<BranchInst>(L->getExitingBlock()->getTerminator());
+
+  // If the exiting block is not the same as the backedge block, we must compare
+  // against the preincremented value, otherwise we prefer to compare against
+  // the post-incremented value.
+  Value *CmpIndVar;
+  const SCEV *RHS = BackedgeTakenCount;
+  if (L->getExitingBlock() == L->getLoopLatch()) {
+    // Add one to the "backedge-taken" count to get the trip count.
+    // If this addition may overflow, we have to be more pessimistic and
+    // cast the induction variable before doing the add.
+    const SCEV *Zero = SE->getConstant(BackedgeTakenCount->getType(), 0);
+    const SCEV *N =
+      SE->getAddExpr(BackedgeTakenCount,
+                     SE->getConstant(BackedgeTakenCount->getType(), 1));
+    if ((isa<SCEVConstant>(N) && !N->isZero()) ||
+        SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) {
+      // No overflow. Cast the sum.
+      RHS = SE->getTruncateOrZeroExtend(N, IndVar->getType());
+    } else {
+      // Potential overflow. Cast before doing the add.
+      RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+                                        IndVar->getType());
+      RHS = SE->getAddExpr(RHS,
+                           SE->getConstant(IndVar->getType(), 1));
+    }
+
+    // The BackedgeTaken expression contains the number of times that the
+    // backedge branches to the loop header.  This is one less than the
+    // number of times the loop executes, so use the incremented indvar.
+    CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock());
+  } else {
+    // We have to use the preincremented value...
+    RHS = SE->getTruncateOrZeroExtend(BackedgeTakenCount,
+                                      IndVar->getType());
+    CmpIndVar = IndVar;
+  }
+
+  // Expand the code for the iteration count.
+  assert(SE->isLoopInvariant(RHS, L) &&
+         "Computed iteration count is not loop invariant!");
+  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
+
+  // Insert a new icmp_ne or icmp_eq instruction before the branch.
+  ICmpInst::Predicate Opcode;
+  if (L->contains(BI->getSuccessor(0)))
+    Opcode = ICmpInst::ICMP_NE;
+  else
+    Opcode = ICmpInst::ICMP_EQ;
+
+  DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
+               << "      LHS:" << *CmpIndVar << '\n'
+               << "       op:\t"
+               << (Opcode == ICmpInst::ICMP_NE ? "!=" : "==") << "\n"
+               << "      RHS:\t" << *RHS << "\n");
+
+  ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
+  Cond->setDebugLoc(BI->getDebugLoc());
+  Value *OrigCond = BI->getCondition();
+  // It's tempting to use replaceAllUsesWith here to fully replace the old
+  // comparison, but that's not immediately safe, since users of the old
+  // comparison may not be dominated by the new comparison. Instead, just
+  // update the branch to use the new comparison; in the common case this
+  // will make old comparison dead.
+  BI->setCondition(Cond);
+  DeadInsts.push_back(OrigCond);
+
+  ++NumLFTR;
+  Changed = true;
+  return Cond;
+}
+
+//===----------------------------------------------------------------------===//
+//  SinkUnusedInvariants. A late subpass to cleanup loop preheaders.
+//===----------------------------------------------------------------------===//
+
+/// If there's a single exit block, sink any loop-invariant values that
+/// were defined in the preheader but not used inside the loop into the
+/// exit block to reduce register pressure in the loop.
+void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
+  BasicBlock *ExitBlock = L->getExitBlock();
+  if (!ExitBlock) return;
+
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) return;
+
+  Instruction *InsertPt = ExitBlock->getFirstNonPHI();
+  BasicBlock::iterator I = Preheader->getTerminator();
+  while (I != Preheader->begin()) {
+    --I;
+    // New instructions were inserted at the end of the preheader.
+    if (isa<PHINode>(I))
+      break;
+
+    // Don't move instructions which might have side effects, since the side
+    // effects need to complete before instructions inside the loop.  Also don't
+    // move instructions which might read memory, since the loop may modify
+    // memory. Note that it's okay if the instruction might have undefined
+    // behavior: LoopSimplify guarantees that the preheader dominates the exit
+    // block.
+    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
+      continue;
+
+    // Skip debug info intrinsics.
+    if (isa<DbgInfoIntrinsic>(I))
+      continue;
+
+    // Don't sink static AllocaInsts out of the entry block, which would
+    // turn them into dynamic allocas!
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
+      if (AI->isStaticAlloca())
+        continue;
+
+    // Determine if there is a use in or before the loop (direct or
+    // otherwise).
+    bool UsedInLoop = false;
+    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+         UI != UE; ++UI) {
+      User *U = *UI;
+      BasicBlock *UseBB = cast<Instruction>(U)->getParent();
+      if (PHINode *P = dyn_cast<PHINode>(U)) {
+        unsigned i =
+          PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
+        UseBB = P->getIncomingBlock(i);
+      }
+      if (UseBB == Preheader || L->contains(UseBB)) {
+        UsedInLoop = true;
+        break;
+      }
+    }
+
+    // If there is, the def must remain in the preheader.
+    if (UsedInLoop)
+      continue;
+
+    // Otherwise, sink it to the exit block.
+    Instruction *ToMove = I;
+    bool Done = false;
+
+    if (I != Preheader->begin()) {
+      // Skip debug info intrinsics.
+      do {
+        --I;
+      } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
+
+      if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
+        Done = true;
+    } else {
+      Done = true;
+    }
+
+    ToMove->moveBefore(InsertPt);
+    if (Done) break;
+    InsertPt = ToMove;
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//  IndVarSimplify driver. Manage several subpasses of IV simplification.
+//===----------------------------------------------------------------------===//
+
 bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // If LoopSimplify form is not available, stay out of trouble. Some notes:
   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
@@ -1010,7 +1699,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (!L->isLoopSimplifyForm())
     return false;
 
-  IU = &getAnalysis<IVUsers>();
+  if (!DisableIVRewrite)
+    IU = &getAnalysis<IVUsers>();
   LI = &getAnalysis<LoopInfo>();
   SE = &getAnalysis<ScalarEvolution>();
   DT = &getAnalysis<DominatorTree>();
@@ -1026,9 +1716,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
 
   // Create a rewriter object which we'll use to transform the code with.
-  SCEVExpander Rewriter(*SE);
-  if (DisableIVRewrite)
+  SCEVExpander Rewriter(*SE, "indvars");
+
+  // Eliminate redundant IV users.
+  //
+  // Simplification works best when run before other consumers of SCEV. We
+  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
+  // other expressions involving loop IVs have been evaluated. This helps SCEV
+  // set no-wrap flags before normalizing sign/zero extension.
+  if (DisableIVRewrite) {
     Rewriter.disableCanonicalMode();
+    SimplifyIVUsersNoRewrite(L, Rewriter);
+  }
 
   // Check to see if this loop has a computable loop-invariant execution count.
   // If so, this means that we can compute the final value of any expressions
@@ -1040,7 +1739,12 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
     RewriteLoopExitValues(L, Rewriter);
 
   // Eliminate redundant IV users.
-  SimplifyIVUsers(Rewriter);
+  if (!DisableIVRewrite)
+    SimplifyIVUsers(Rewriter);
+
+  // Eliminate redundant IV cycles.
+  if (DisableIVRewrite)
+    SimplifyCongruentIVs(L);
 
   // Compute the type of the largest recurrence expression, and decide whether
   // a canonical induction variable should be inserted.
@@ -1119,8 +1823,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
            "canonical IV disrupted BackedgeTaken expansion");
     assert(NeedCannIV &&
            "LinearFunctionTestReplace requires a canonical induction variable");
-    NewICmp = LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar,
-                                        Rewriter);
+    // Check preconditions for proper SCEVExpander operation. SCEV does not
+    // express SCEVExpander's dependencies, such as LoopSimplify. Instead any
+    // pass that uses the SCEVExpander must do it. This does not work well for
+    // loop passes because SCEVExpander makes assumptions about all loops, while
+    // LoopPassManager only forces the current loop to be simplified.
+    //
+    // FIXME: SCEV expansion has no way to bail out, so the caller must
+    // explicitly check any assumptions made by SCEV. Brittle.
+    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(BackedgeTakenCount);
+    if (!AR || AR->getLoop()->getLoopPreheader())
+      NewICmp =
+        LinearFunctionTestReplace(L, BackedgeTakenCount, IndVar, Rewriter);
   }
   // Rewrite IV-derived expressions.
   if (!DisableIVRewrite)
@@ -1146,9 +1860,8 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
 
   // For completeness, inform IVUsers of the IV use in the newly-created
   // loop exit test instruction.
-  if (NewICmp)
-    IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)),
-                              IndVar);
+  if (NewICmp && IU)
+    IU->AddUsersIfInteresting(cast<Instruction>(NewICmp->getOperand(0)));
 
   // Clean up dead instructions.
   Changed |= DeleteDeadPHIs(L->getHeader());
@@ -1156,428 +1869,3 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   assert(L->isLCSSAForm(*DT) && "Indvars did not leave the loop in lcssa form!");
   return Changed;
 }
-
-// FIXME: It is an extremely bad idea to indvar substitute anything more
-// complex than affine induction variables.  Doing so will put expensive
-// polynomial evaluations inside of the loop, and the str reduction pass
-// currently can only reduce affine polynomials.  For now just disable
-// indvar subst on anything more complex than an affine addrec, unless
-// it can be expanded to a trivial value.
-static bool isSafe(const SCEV *S, const Loop *L, ScalarEvolution *SE) {
-  // Loop-invariant values are safe.
-  if (SE->isLoopInvariant(S, L)) return true;
-
-  // Affine addrecs are safe. Non-affine are not, because LSR doesn't know how
-  // to transform them into efficient code.
-  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
-    return AR->isAffine();
-
-  // An add is safe it all its operands are safe.
-  if (const SCEVCommutativeExpr *Commutative = dyn_cast<SCEVCommutativeExpr>(S)) {
-    for (SCEVCommutativeExpr::op_iterator I = Commutative->op_begin(),
-         E = Commutative->op_end(); I != E; ++I)
-      if (!isSafe(*I, L, SE)) return false;
-    return true;
-  }
-
-  // A cast is safe if its operand is.
-  if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
-    return isSafe(C->getOperand(), L, SE);
-
-  // A udiv is safe if its operands are.
-  if (const SCEVUDivExpr *UD = dyn_cast<SCEVUDivExpr>(S))
-    return isSafe(UD->getLHS(), L, SE) &&
-           isSafe(UD->getRHS(), L, SE);
-
-  // SCEVUnknown is always safe.
-  if (isa<SCEVUnknown>(S))
-    return true;
-
-  // Nothing else is safe.
-  return false;
-}
-
-void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
-  // Rewrite all induction variable expressions in terms of the canonical
-  // induction variable.
-  //
-  // If there were induction variables of other sizes or offsets, manually
-  // add the offsets to the primary induction variable and cast, avoiding
-  // the need for the code evaluation methods to insert induction variables
-  // of different sizes.
-  for (IVUsers::iterator UI = IU->begin(), E = IU->end(); UI != E; ++UI) {
-    Value *Op = UI->getOperandValToReplace();
-    const Type *UseTy = Op->getType();
-    Instruction *User = UI->getUser();
-
-    // Compute the final addrec to expand into code.
-    const SCEV *AR = IU->getReplacementExpr(*UI);
-
-    // Evaluate the expression out of the loop, if possible.
-    if (!L->contains(UI->getUser())) {
-      const SCEV *ExitVal = SE->getSCEVAtScope(AR, L->getParentLoop());
-      if (SE->isLoopInvariant(ExitVal, L))
-        AR = ExitVal;
-    }
-
-    // FIXME: It is an extremely bad idea to indvar substitute anything more
-    // complex than affine induction variables.  Doing so will put expensive
-    // polynomial evaluations inside of the loop, and the str reduction pass
-    // currently can only reduce affine polynomials.  For now just disable
-    // indvar subst on anything more complex than an affine addrec, unless
-    // it can be expanded to a trivial value.
-    if (!isSafe(AR, L, SE))
-      continue;
-
-    // Determine the insertion point for this user. By default, insert
-    // immediately before the user. The SCEVExpander class will automatically
-    // hoist loop invariants out of the loop. For PHI nodes, there may be
-    // multiple uses, so compute the nearest common dominator for the
-    // incoming blocks.
-    Instruction *InsertPt = User;
-    if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
-      for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
-        if (PHI->getIncomingValue(i) == Op) {
-          if (InsertPt == User)
-            InsertPt = PHI->getIncomingBlock(i)->getTerminator();
-          else
-            InsertPt =
-              DT->findNearestCommonDominator(InsertPt->getParent(),
-                                             PHI->getIncomingBlock(i))
-                    ->getTerminator();
-        }
-
-    // Now expand it into actual Instructions and patch it into place.
-    Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
-
-    DEBUG(dbgs() << "INDVARS: Rewrote IV '" << *AR << "' " << *Op << '\n'
-                 << "   into = " << *NewVal << "\n");
-
-    if (!isValidRewrite(Op, NewVal)) {
-      DeadInsts.push_back(NewVal);
-      continue;
-    }
-    // Inform ScalarEvolution that this value is changing. The change doesn't
-    // affect its value, but it does potentially affect which use lists the
-    // value will be on after the replacement, which affects ScalarEvolution's
-    // ability to walk use lists and drop dangling pointers when a value is
-    // deleted.
-    SE->forgetValue(User);
-
-    // Patch the new value into place.
-    if (Op->hasName())
-      NewVal->takeName(Op);
-    User->replaceUsesOfWith(Op, NewVal);
-    UI->setOperandValToReplace(NewVal);
-
-    ++NumRemoved;
-    Changed = true;
-
-    // The old value may be dead now.
-    DeadInsts.push_back(Op);
-  }
-}
-
-/// If there's a single exit block, sink any loop-invariant values that
-/// were defined in the preheader but not used inside the loop into the
-/// exit block to reduce register pressure in the loop.
-void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
-  BasicBlock *ExitBlock = L->getExitBlock();
-  if (!ExitBlock) return;
-
-  BasicBlock *Preheader = L->getLoopPreheader();
-  if (!Preheader) return;
-
-  Instruction *InsertPt = ExitBlock->getFirstNonPHI();
-  BasicBlock::iterator I = Preheader->getTerminator();
-  while (I != Preheader->begin()) {
-    --I;
-    // New instructions were inserted at the end of the preheader.
-    if (isa<PHINode>(I))
-      break;
-
-    // Don't move instructions which might have side effects, since the side
-    // effects need to complete before instructions inside the loop.  Also don't
-    // move instructions which might read memory, since the loop may modify
-    // memory. Note that it's okay if the instruction might have undefined
-    // behavior: LoopSimplify guarantees that the preheader dominates the exit
-    // block.
-    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
-      continue;
-
-    // Skip debug info intrinsics.
-    if (isa<DbgInfoIntrinsic>(I))
-      continue;
-
-    // Don't sink static AllocaInsts out of the entry block, which would
-    // turn them into dynamic allocas!
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(I))
-      if (AI->isStaticAlloca())
-        continue;
-
-    // Determine if there is a use in or before the loop (direct or
-    // otherwise).
-    bool UsedInLoop = false;
-    for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-         UI != UE; ++UI) {
-      User *U = *UI;
-      BasicBlock *UseBB = cast<Instruction>(U)->getParent();
-      if (PHINode *P = dyn_cast<PHINode>(U)) {
-        unsigned i =
-          PHINode::getIncomingValueNumForOperand(UI.getOperandNo());
-        UseBB = P->getIncomingBlock(i);
-      }
-      if (UseBB == Preheader || L->contains(UseBB)) {
-        UsedInLoop = true;
-        break;
-      }
-    }
-
-    // If there is, the def must remain in the preheader.
-    if (UsedInLoop)
-      continue;
-
-    // Otherwise, sink it to the exit block.
-    Instruction *ToMove = I;
-    bool Done = false;
-
-    if (I != Preheader->begin()) {
-      // Skip debug info intrinsics.
-      do {
-        --I;
-      } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
-
-      if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
-        Done = true;
-    } else {
-      Done = true;
-    }
-
-    ToMove->moveBefore(InsertPt);
-    if (Done) break;
-    InsertPt = ToMove;
-  }
-}
-
-/// ConvertToSInt - Convert APF to an integer, if possible.
-static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
-  bool isExact = false;
-  if (&APF.getSemantics() == &APFloat::PPCDoubleDouble)
-    return false;
-  // See if we can convert this to an int64_t
-  uint64_t UIntVal;
-  if (APF.convertToInteger(&UIntVal, 64, true, APFloat::rmTowardZero,
-                           &isExact) != APFloat::opOK || !isExact)
-    return false;
-  IntVal = UIntVal;
-  return true;
-}
-
-/// HandleFloatingPointIV - If the loop has floating induction variable
-/// then insert corresponding integer induction variable if possible.
-/// For example,
-/// for(double i = 0; i < 10000; ++i)
-///   bar(i)
-/// is converted into
-/// for(int i = 0; i < 10000; ++i)
-///   bar((double)i);
-///
-void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PN) {
-  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
-  unsigned BackEdge     = IncomingEdge^1;
-
-  // Check incoming value.
-  ConstantFP *InitValueVal =
-    dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
-
-  int64_t InitValue;
-  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
-    return;
-
-  // Check IV increment. Reject this PN if increment operation is not
-  // an add or increment value can not be represented by an integer.
-  BinaryOperator *Incr =
-    dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
-  if (Incr == 0 || Incr->getOpcode() != Instruction::FAdd) return;
-
-  // If this is not an add of the PHI with a constantfp, or if the constant fp
-  // is not an integer, bail out.
-  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
-  int64_t IncValue;
-  if (IncValueVal == 0 || Incr->getOperand(0) != PN ||
-      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
-    return;
-
-  // Check Incr uses. One user is PN and the other user is an exit condition
-  // used by the conditional terminator.
-  Value::use_iterator IncrUse = Incr->use_begin();
-  Instruction *U1 = cast<Instruction>(*IncrUse++);
-  if (IncrUse == Incr->use_end()) return;
-  Instruction *U2 = cast<Instruction>(*IncrUse++);
-  if (IncrUse != Incr->use_end()) return;
-
-  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
-  // only used by a branch, we can't transform it.
-  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
-  if (!Compare)
-    Compare = dyn_cast<FCmpInst>(U2);
-  if (Compare == 0 || !Compare->hasOneUse() ||
-      !isa<BranchInst>(Compare->use_back()))
-    return;
-
-  BranchInst *TheBr = cast<BranchInst>(Compare->use_back());
-
-  // We need to verify that the branch actually controls the iteration count
-  // of the loop.  If not, the new IV can overflow and no one will notice.
-  // The branch block must be in the loop and one of the successors must be out
-  // of the loop.
-  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
-  if (!L->contains(TheBr->getParent()) ||
-      (L->contains(TheBr->getSuccessor(0)) &&
-       L->contains(TheBr->getSuccessor(1))))
-    return;
-
-
-  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
-  // transform it.
-  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
-  int64_t ExitValue;
-  if (ExitValueVal == 0 ||
-      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
-    return;
-
-  // Find new predicate for integer comparison.
-  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
-  switch (Compare->getPredicate()) {
-  default: return;  // Unknown comparison.
-  case CmpInst::FCMP_OEQ:
-  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
-  case CmpInst::FCMP_ONE:
-  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
-  case CmpInst::FCMP_OGT:
-  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
-  case CmpInst::FCMP_OGE:
-  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
-  case CmpInst::FCMP_OLT:
-  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
-  case CmpInst::FCMP_OLE:
-  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
-  }
-
-  // We convert the floating point induction variable to a signed i32 value if
-  // we can.  This is only safe if the comparison will not overflow in a way
-  // that won't be trapped by the integer equivalent operations.  Check for this
-  // now.
-  // TODO: We could use i64 if it is native and the range requires it.
-
-  // The start/stride/exit values must all fit in signed i32.
-  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
-    return;
-
-  // If not actually striding (add x, 0.0), avoid touching the code.
-  if (IncValue == 0)
-    return;
-
-  // Positive and negative strides have different safety conditions.
-  if (IncValue > 0) {
-    // If we have a positive stride, we require the init to be less than the
-    // exit value and an equality or less than comparison.
-    if (InitValue >= ExitValue ||
-        NewPred == CmpInst::ICMP_SGT || NewPred == CmpInst::ICMP_SGE)
-      return;
-
-    uint32_t Range = uint32_t(ExitValue-InitValue);
-    if (NewPred == CmpInst::ICMP_SLE) {
-      // Normalize SLE -> SLT, check for infinite loop.
-      if (++Range == 0) return;  // Range overflows.
-    }
-
-    unsigned Leftover = Range % uint32_t(IncValue);
-
-    // If this is an equality comparison, we require that the strided value
-    // exactly land on the exit value, otherwise the IV condition will wrap
-    // around and do things the fp IV wouldn't.
-    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
-        Leftover != 0)
-      return;
-
-    // If the stride would wrap around the i32 before exiting, we can't
-    // transform the IV.
-    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
-      return;
-
-  } else {
-    // If we have a negative stride, we require the init to be greater than the
-    // exit value and an equality or greater than comparison.
-    if (InitValue >= ExitValue ||
-        NewPred == CmpInst::ICMP_SLT || NewPred == CmpInst::ICMP_SLE)
-      return;
-
-    uint32_t Range = uint32_t(InitValue-ExitValue);
-    if (NewPred == CmpInst::ICMP_SGE) {
-      // Normalize SGE -> SGT, check for infinite loop.
-      if (++Range == 0) return;  // Range overflows.
-    }
-
-    unsigned Leftover = Range % uint32_t(-IncValue);
-
-    // If this is an equality comparison, we require that the strided value
-    // exactly land on the exit value, otherwise the IV condition will wrap
-    // around and do things the fp IV wouldn't.
-    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
-        Leftover != 0)
-      return;
-
-    // If the stride would wrap around the i32 before exiting, we can't
-    // transform the IV.
-    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
-      return;
-  }
-
-  const IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
-
-  // Insert new integer induction variable.
-  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
-  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
-                      PN->getIncomingBlock(IncomingEdge));
-
-  Value *NewAdd =
-    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
-                              Incr->getName()+".int", Incr);
-  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
-
-  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
-                                      ConstantInt::get(Int32Ty, ExitValue),
-                                      Compare->getName());
-
-  // In the following deletions, PN may become dead and may be deleted.
-  // Use a WeakVH to observe whether this happens.
-  WeakVH WeakPH = PN;
-
-  // Delete the old floating point exit comparison.  The branch starts using the
-  // new comparison.
-  NewCompare->takeName(Compare);
-  Compare->replaceAllUsesWith(NewCompare);
-  RecursivelyDeleteTriviallyDeadInstructions(Compare);
-
-  // Delete the old floating point increment.
-  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
-  RecursivelyDeleteTriviallyDeadInstructions(Incr);
-
-  // If the FP induction variable still has uses, this is because something else
-  // in the loop uses its value.  In order to canonicalize the induction
-  // variable, we chose to eliminate the IV and rewrite it in terms of an
-  // int->fp cast.
-  //
-  // We give preference to sitofp over uitofp because it is faster on most
-  // platforms.
-  if (WeakPH) {
-    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
-                                 PN->getParent()->getFirstNonPHI());
-    PN->replaceAllUsesWith(Conv);
-    RecursivelyDeleteTriviallyDeadInstructions(PN);
-  }
-
-  // Add a new IVUsers entry for the newly-created integer PHI.
-  IU->AddUsersIfInteresting(NewPHI, NewPHI);
-}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index cf18ff0..b500d5b 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -600,8 +600,10 @@ static unsigned GetBestDestForJumpOnUndef(BasicBlock *BB) {
   for (unsigned i = 1, e = BBTerm->getNumSuccessors(); i != e; ++i) {
     TestBB = BBTerm->getSuccessor(i);
     unsigned NumPreds = std::distance(pred_begin(TestBB), pred_end(TestBB));
-    if (NumPreds < MinNumPreds)
+    if (NumPreds < MinNumPreds) {
       MinSucc = i;
+      MinNumPreds = NumPreds;
+    }
   }
 
   return MinSucc;
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 13bd022..66add6c 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -178,7 +178,7 @@ INITIALIZE_PASS_END(LICM, "licm", "Loop Invariant Code Motion", false, false)
 Pass *llvm::createLICMPass() { return new LICM(); }
 
 /// Hoist expressions out of the specified loop. Note, alias info for inner
-/// loop is not preserved so it is not a good idea to run LICM multiple 
+/// loop is not preserved so it is not a good idea to run LICM multiple
 /// times on one loop.
 ///
 bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
@@ -199,13 +199,13 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
 
     // What if InnerLoop was modified by other passes ?
     CurAST->add(*InnerAST);
-    
+
     // Once we've incorporated the inner loop's AST into ours, we don't need the
     // subloop's anymore.
     delete InnerAST;
     LoopToAliasSetMap.erase(InnerL);
   }
-  
+
   CurLoop = L;
 
   // Get the preheader block to move instructions into...
@@ -245,7 +245,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
          I != E; ++I)
       PromoteAliasSet(*I);
   }
-  
+
   // Clear out loops state information for the next iteration
   CurLoop = 0;
   Preheader = 0;
@@ -283,7 +283,7 @@ void LICM::SinkRegion(DomTreeNode *N) {
 
   for (BasicBlock::iterator II = BB->end(); II != BB->begin(); ) {
     Instruction &I = *--II;
-    
+
     // If the instruction is dead, we would try to sink it because it isn't used
     // in the loop, instead, just delete it.
     if (isInstructionTriviallyDead(&I)) {
@@ -336,7 +336,7 @@ void LICM::HoistRegion(DomTreeNode *N) {
         I.eraseFromParent();
         continue;
       }
-      
+
       // Try hoisting the instruction out to the preheader.  We can only do this
       // if all of the operands of the instruction are loop invariant and if it
       // is safe to hoist the instruction.
@@ -364,7 +364,7 @@ bool LICM::canSinkOrHoistInst(Instruction &I) {
     // in the same alias set as something that ends up being modified.
     if (AA->pointsToConstantMemory(LI->getOperand(0)))
       return true;
-    
+
     // Don't hoist loads which have may-aliased stores in loop.
     uint64_t Size = 0;
     if (LI->getType()->isSized())
@@ -470,7 +470,7 @@ void LICM::sink(Instruction &I) {
     }
     return;
   }
-  
+
   if (ExitBlocks.empty()) {
     // The instruction is actually dead if there ARE NO exit blocks.
     CurAST->deleteValue(&I);
@@ -482,30 +482,30 @@ void LICM::sink(Instruction &I) {
     I.eraseFromParent();
     return;
   }
-  
+
   // Otherwise, if we have multiple exits, use the SSAUpdater to do all of the
   // hard work of inserting PHI nodes as necessary.
   SmallVector<PHINode*, 8> NewPHIs;
   SSAUpdater SSA(&NewPHIs);
-  
+
   if (!I.use_empty())
     SSA.Initialize(I.getType(), I.getName());
-  
+
   // Insert a copy of the instruction in each exit block of the loop that is
   // dominated by the instruction.  Each exit block is known to only be in the
   // ExitBlocks list once.
   BasicBlock *InstOrigBB = I.getParent();
   unsigned NumInserted = 0;
-  
+
   for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {
     BasicBlock *ExitBlock = ExitBlocks[i];
-    
+
     if (!DT->dominates(InstOrigBB, ExitBlock))
       continue;
-    
+
     // Insert the code after the last PHI node.
     BasicBlock::iterator InsertPt = ExitBlock->getFirstNonPHI();
-    
+
     // If this is the first exit block processed, just move the original
     // instruction, otherwise clone the original instruction and insert
     // the copy.
@@ -519,12 +519,12 @@ void LICM::sink(Instruction &I) {
         New->setName(I.getName()+".le");
       ExitBlock->getInstList().insert(InsertPt, New);
     }
-    
+
     // Now that we have inserted the instruction, inform SSAUpdater.
     if (!I.use_empty())
       SSA.AddAvailableValue(ExitBlock, New);
   }
-  
+
   // If the instruction doesn't dominate any exit blocks, it must be dead.
   if (NumInserted == 0) {
     CurAST->deleteValue(&I);
@@ -533,7 +533,7 @@ void LICM::sink(Instruction &I) {
     I.eraseFromParent();
     return;
   }
-  
+
   // Next, rewrite uses of the instruction, inserting PHI nodes as needed.
   for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; ) {
     // Grab the use before incrementing the iterator.
@@ -542,12 +542,12 @@ void LICM::sink(Instruction &I) {
     ++UI;
     SSA.RewriteUseAfterInsertions(U);
   }
-  
+
   // Update CurAST for NewPHIs if I had pointer type.
   if (I.getType()->isPointerTy())
     for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
       CurAST->copyValue(&I, NewPHIs[i]);
-  
+
   // Finally, remove the instruction from CurAST.  It is no longer in the loop.
   CurAST->deleteValue(&I);
 }
@@ -606,15 +606,17 @@ namespace {
     SmallVectorImpl<BasicBlock*> &LoopExitBlocks;
     AliasSetTracker &AST;
     DebugLoc DL;
+    int Alignment;
   public:
     LoopPromoter(Value *SP,
                  const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
                  SmallPtrSet<Value*, 4> &PMA,
                  SmallVectorImpl<BasicBlock*> &LEB, AliasSetTracker &ast,
-                 DebugLoc dl)
-      : LoadAndStorePromoter(Insts, S, 0, 0), SomePtr(SP),
-        PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl) {}
-    
+                 DebugLoc dl, int alignment)
+      : LoadAndStorePromoter(Insts, S), SomePtr(SP),
+        PointerMustAliases(PMA), LoopExitBlocks(LEB), AST(ast), DL(dl),
+        Alignment(alignment) {}
+
     virtual bool isInstInList(Instruction *I,
                               const SmallVectorImpl<Instruction*> &) const {
       Value *Ptr;
@@ -624,7 +626,7 @@ namespace {
         Ptr = cast<StoreInst>(I)->getPointerOperand();
       return PointerMustAliases.count(Ptr);
     }
-    
+
     virtual void doExtraRewritesBeforeFinalDeletion() const {
       // Insert stores after in the loop exit blocks.  Each exit block gets a
       // store of the live-out values that feed them.  Since we've already told
@@ -635,6 +637,7 @@ namespace {
         Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);
         Instruction *InsertPos = ExitBlock->getFirstNonPHI();
         StoreInst *NewSI = new StoreInst(LiveInValue, SomePtr, InsertPos);
+        NewSI->setAlignment(Alignment);
         NewSI->setDebugLoc(DL);
       }
     }
@@ -661,7 +664,7 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
   if (AS.isForwardingAliasSet() || !AS.isMod() || !AS.isMustAlias() ||
       AS.isVolatile() || !CurLoop->isLoopInvariant(AS.begin()->getValue()))
     return;
-  
+
   assert(!AS.empty() &&
          "Must alias set should have at least one pointer element in it!");
   Value *SomePtr = AS.begin()->getValue();
@@ -676,60 +679,78 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
   //    tmp = *P;  for () { if (c) tmp +=1; } *P = tmp;
   //
   // is not safe, because *P may only be valid to access if 'c' is true.
-  // 
+  //
   // It is safe to promote P if all uses are direct load/stores and if at
   // least one is guaranteed to be executed.
   bool GuaranteedToExecute = false;
-  
+
   SmallVector<Instruction*, 64> LoopUses;
   SmallPtrSet<Value*, 4> PointerMustAliases;
 
+  // We start with an alignment of one and try to find instructions that allow
+  // us to prove better alignment.
+  unsigned Alignment = 1;
+
   // Check that all of the pointers in the alias set have the same type.  We
   // cannot (yet) promote a memory location that is loaded and stored in
   // different sizes.
   for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
     Value *ASIV = ASI->getValue();
     PointerMustAliases.insert(ASIV);
-    
+
     // Check that all of the pointers in the alias set have the same type.  We
     // cannot (yet) promote a memory location that is loaded and stored in
     // different sizes.
     if (SomePtr->getType() != ASIV->getType())
       return;
-    
+
     for (Value::use_iterator UI = ASIV->use_begin(), UE = ASIV->use_end();
          UI != UE; ++UI) {
       // Ignore instructions that are outside the loop.
       Instruction *Use = dyn_cast<Instruction>(*UI);
       if (!Use || !CurLoop->contains(Use))
         continue;
-      
+
       // If there is an non-load/store instruction in the loop, we can't promote
       // it.
-      if (isa<LoadInst>(Use))
+      unsigned InstAlignment;
+      if (LoadInst *load = dyn_cast<LoadInst>(Use)) {
         assert(!cast<LoadInst>(Use)->isVolatile() && "AST broken");
-      else if (isa<StoreInst>(Use)) {
+        InstAlignment = load->getAlignment();
+      } else if (StoreInst *store = dyn_cast<StoreInst>(Use)) {
         // Stores *of* the pointer are not interesting, only stores *to* the
         // pointer.
         if (Use->getOperand(1) != ASIV)
           continue;
+        InstAlignment = store->getAlignment();
         assert(!cast<StoreInst>(Use)->isVolatile() && "AST broken");
       } else
         return; // Not a load or store.
-      
+
+      // If the alignment of this instruction allows us to specify a more
+      // restrictive (and performant) alignment and if we are sure this
+      // instruction will be executed, update the alignment.
+      // Larger is better, with the exception of 0 being the best alignment.
+      if ((InstAlignment > Alignment || InstAlignment == 0)
+          && (Alignment != 0))
+        if (isSafeToExecuteUnconditionally(*Use)) {
+          GuaranteedToExecute = true;
+          Alignment = InstAlignment;
+        }
+
       if (!GuaranteedToExecute)
         GuaranteedToExecute = isSafeToExecuteUnconditionally(*Use);
-      
+
       LoopUses.push_back(Use);
     }
   }
-  
+
   // If there isn't a guaranteed-to-execute instruction, we can't promote.
   if (!GuaranteedToExecute)
     return;
-  
+
   // Otherwise, this is safe to promote, lets do it!
-  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');  
+  DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " <<*SomePtr<<'\n');
   Changed = true;
   ++NumPromoted;
 
@@ -741,18 +762,19 @@ void LICM::PromoteAliasSet(AliasSet &AS) {
 
   SmallVector<BasicBlock*, 8> ExitBlocks;
   CurLoop->getUniqueExitBlocks(ExitBlocks);
-  
+
   // We use the SSAUpdater interface to insert phi nodes as required.
   SmallVector<PHINode*, 16> NewPHIs;
   SSAUpdater SSA(&NewPHIs);
   LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
-                        *CurAST, DL);
-  
+                        *CurAST, DL, Alignment);
+
   // Set up the preheader to have a definition of the value.  It is the live-out
   // value from the preheader that uses in the loop will use.
   LoadInst *PreheaderLoad =
     new LoadInst(SomePtr, SomePtr->getName()+".promoted",
                  Preheader->getTerminator());
+  PreheaderLoad->setAlignment(Alignment);
   PreheaderLoad->setDebugLoc(DL);
   SSA.AddAvailableValue(Preheader, PreheaderLoad);
 
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 753a558..f7f3298 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -190,7 +190,9 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
   BasicBlock* exitingBlock = exitingBlocks[0];
   BasicBlock::iterator BI = exitBlock->begin();
   while (PHINode* P = dyn_cast<PHINode>(BI)) {
-    P->replaceUsesOfWith(exitingBlock, preheader);
+    int j = P->getBasicBlockIndex(exitingBlock);
+    assert(j >= 0 && "Can't find exiting block in exit block's phi node!");
+    P->setIncomingBlock(j, preheader);
     for (unsigned i = 1; i < exitingBlocks.size(); ++i)
       P->removeIncomingValue(exitingBlocks[i]);
     ++BI;
diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index dbf6eec..a0e41d9 100644
--- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -167,12 +167,17 @@ static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE) {
 static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE) {
   if (Instruction *I = dyn_cast<Instruction>(V))
     if (isInstructionTriviallyDead(I))
-      deleteDeadInstruction(I, SE);    
+      deleteDeadInstruction(I, SE);
 }
 
 bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) {
   CurLoop = L;
 
+  // Disable loop idiom recognition if the function's name is a common idiom. 
+  StringRef Name = L->getHeader()->getParent()->getName();
+  if (Name == "memset" || Name == "memcpy")
+    return false;
+
   // The trip count of the loop must be analyzable.
   SE = &getAnalysis<ScalarEvolution>();
   if (!SE->hasLoopInvariantBackedgeTakenCount(L))
@@ -467,8 +472,8 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
   // header.  This allows us to insert code for it in the preheader.
   BasicBlock *Preheader = CurLoop->getLoopPreheader();
   IRBuilder<> Builder(Preheader->getTerminator());
-  SCEVExpander Expander(*SE);
-  
+  SCEVExpander Expander(*SE, "loop-idiom");
+
   // Okay, we have a strided store "p[i]" of a splattable value.  We can turn
   // this into a memset in the loop preheader now if we want.  However, this
   // would be unsafe to do if there is anything else in the loop that may read
@@ -488,7 +493,7 @@ processLoopStridedStore(Value *DestPtr, unsigned StoreSize,
     deleteIfDeadInstruction(BasePtr, *SE);
     return false;
   }
-  
+
   // Okay, everything looks good, insert the memset.
 
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
@@ -556,8 +561,8 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
   // header.  This allows us to insert code for it in the preheader.
   BasicBlock *Preheader = CurLoop->getLoopPreheader();
   IRBuilder<> Builder(Preheader->getTerminator());
-  SCEVExpander Expander(*SE);
-  
+  SCEVExpander Expander(*SE, "loop-idiom");
+
   // Okay, we have a strided store "p[i]" of a loaded value.  We can turn
   // this into a memcpy in the loop preheader now if we want.  However, this
   // would be unsafe to do if there is anything else in the loop that may read
@@ -568,7 +573,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
     Expander.expandCodeFor(StoreEv->getStart(),
                            Builder.getInt8PtrTy(SI->getPointerAddressSpace()),
                            Preheader->getTerminator());
-  
+
   if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef,
                             CurLoop, BECount, StoreSize,
                             getAnalysis<AliasAnalysis>(), SI)) {
@@ -593,9 +598,9 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
     deleteIfDeadInstruction(StoreBasePtr, *SE);
     return false;
   }
-  
+
   // Okay, everything is safe, we can transform this!
-  
+
 
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
   // pointer size if it isn't already.
@@ -619,7 +624,7 @@ processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize,
   DEBUG(dbgs() << "  Formed memcpy: " << *NewCall << "\n"
                << "    from load ptr=" << *LoadEv << " at: " << *LI << "\n"
                << "    from store ptr=" << *StoreEv << " at: " << *SI << "\n");
-  
+
 
   // Okay, the memset has been formed.  Zap the original store and anything that
   // feeds into it.
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 47dced3..9fd0958 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -220,7 +220,7 @@ bool LoopRotate::rotateLoop(Loop *L) {
   // For PHI nodes, the value available in OldPreHeader is just the
   // incoming value from OldPreHeader.
   for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
-    ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
+    ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
 
   // For the rest of the instructions, either hoist to the OrigPreheader if
   // possible or create a clone in the OldPreHeader if not.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 73ebd61..509d026 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1804,8 +1804,7 @@ LSRInstance::OptimizeLoopTermCond() {
         ExitingBlock->getInstList().insert(TermBr, Cond);
 
         // Clone the IVUse, as the old use still exists!
-        CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace(),
-                              CondUse->getPhi());
+        CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
         TermBr->replaceUsesOfWith(OldCond, Cond);
       }
     }
@@ -2768,7 +2767,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
         // value to the immediate would produce a value closer to zero than the
         // immediate itself, then the formula isn't worthwhile.
         if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg))
-          if (C->getValue()->getValue().isNegative() !=
+          if (C->getValue()->isNegative() !=
                 (NewF.AM.BaseOffs < 0) &&
               (C->getValue()->getValue().abs() * APInt(BitWidth, F.AM.Scale))
                 .ule(abs64(NewF.AM.BaseOffs)))
@@ -3699,7 +3698,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
   // we can remove them after we are done working.
   SmallVector<WeakVH, 16> DeadInsts;
 
-  SCEVExpander Rewriter(SE);
+  SCEVExpander Rewriter(SE, "lsr");
   Rewriter.disableCanonicalMode();
   Rewriter.setIVIncInsertPos(L, IVIncInsertPos);
 
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index e05f29c..840c4b6 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -1021,6 +1021,10 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         while (PHINode *PN = dyn_cast<PHINode>(Succ->begin()))
           ReplaceUsesOfWith(PN, PN->getIncomingValue(0), Worklist, L, LPM);
         
+        // If Succ has any successors with PHI nodes, update them to have
+        // entries coming from Pred instead of Succ.
+        Succ->replaceAllUsesWith(Pred);
+        
         // Move all of the successor contents from Succ to Pred.
         Pred->getInstList().splice(BI, Succ->getInstList(), Succ->begin(),
                                    Succ->end());
@@ -1028,10 +1032,6 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
         BI->eraseFromParent();
         RemoveFromWorklist(BI, Worklist);
         
-        // If Succ has any successors with PHI nodes, update them to have
-        // entries coming from Pred instead of Succ.
-        Succ->replaceAllUsesWith(Pred);
-        
         // Remove Succ from the loop tree.
         LI->removeBlock(Succ);
         LPM->deleteSimpleAnalysisValue(Succ, L);
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index be5aa2e..7ed3db6 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -487,7 +487,8 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
   // happen to be using a load-store pair to implement it, rather than
   // a memcpy.
   if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) {
-    if (!LI->isVolatile() && LI->hasOneUse()) {
+    if (!LI->isVolatile() && LI->hasOneUse() &&
+        LI->getParent() == SI->getParent()) {
       MemDepResult ldep = MD->getDependency(LI);
       CallInst *C = 0;
       if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst()))
@@ -496,17 +497,14 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
       if (C) {
         // Check that nothing touches the dest of the "copy" between
         // the call and the store.
-        MemDepResult sdep = MD->getDependency(SI);
-        if (!sdep.isNonLocal()) {
-          bool FoundCall = false;
-          for (BasicBlock::iterator I = SI, E = sdep.getInst(); I != E; --I) {
-            if (&*I == C) {
-              FoundCall = true;
-              break;
-            }
-          }
-          if (!FoundCall)
+        AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
+        AliasAnalysis::Location StoreLoc = AA.getLocation(SI);
+        for (BasicBlock::iterator I = --BasicBlock::iterator(SI),
+                                  E = C; I != E; --I) {
+          if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) {
             C = 0;
+            break;
+          }
         }
       }
 
@@ -842,11 +840,11 @@ bool MemCpyOpt::processMemMove(MemMoveInst *M) {
   
   // If not, then we know we can transform this.
   Module *Mod = M->getParent()->getParent()->getParent();
-  const Type *ArgTys[3] = { M->getRawDest()->getType(),
-                            M->getRawSource()->getType(),
-                            M->getLength()->getType() };
+  Type *ArgTys[3] = { M->getRawDest()->getType(),
+                      M->getRawSource()->getType(),
+                      M->getLength()->getType() };
   M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy,
-                                                 ArgTys, 3));
+                                                 ArgTys));
 
   // MemDep may have over conservative information about this instruction, just
   // conservatively flush it from the cache.
diff --git a/lib/Transforms/Scalar/ObjCARC.cpp b/lib/Transforms/Scalar/ObjCARC.cpp
new file mode 100644
index 0000000..ee132d3
--- /dev/null
+++ b/lib/Transforms/Scalar/ObjCARC.cpp
@@ -0,0 +1,3595 @@
+//===- ObjCARC.cpp - ObjC ARC Optimization --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines ObjC ARC optimizations. ARC stands for
+// Automatic Reference Counting and is a system for managing reference counts
+// for objects in Objective C.
+//
+// The optimizations performed include elimination of redundant, partially
+// redundant, and inconsequential reference count operations, elimination of
+// redundant weak pointer operations, pattern-matching and replacement of
+// low-level operations into higher-level operations, and numerous minor
+// simplifications.
+//
+// This file also defines a simple ARC-aware AliasAnalysis.
+//
+// WARNING: This file knows about certain library functions. It recognizes them
+// by name, and hardwires knowedge of their semantics.
+//
+// WARNING: This file knows about how certain Objective-C library functions are
+// used. Naive LLVM IR transformations which would otherwise be
+// behavior-preserving may break these assumptions.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "objc-arc"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+// A handy option to enable/disable all optimizations in this file.
+static cl::opt<bool> EnableARCOpts("enable-objc-arc-opts", cl::init(true));
+
+//===----------------------------------------------------------------------===//
+// Misc. Utilities
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// MapVector - An associative container with fast insertion-order
+  /// (deterministic) iteration over its elements. Plus the special
+  /// blot operation.
+  template<class KeyT, class ValueT>
+  class MapVector {
+    /// Map - Map keys to indices in Vector.
+    typedef DenseMap<KeyT, size_t> MapTy;
+    MapTy Map;
+
+    /// Vector - Keys and values.
+    typedef std::vector<std::pair<KeyT, ValueT> > VectorTy;
+    VectorTy Vector;
+
+  public:
+    typedef typename VectorTy::iterator iterator;
+    typedef typename VectorTy::const_iterator const_iterator;
+    iterator begin() { return Vector.begin(); }
+    iterator end() { return Vector.end(); }
+    const_iterator begin() const { return Vector.begin(); }
+    const_iterator end() const { return Vector.end(); }
+
+#ifdef XDEBUG
+    ~MapVector() {
+      assert(Vector.size() >= Map.size()); // May differ due to blotting.
+      for (typename MapTy::const_iterator I = Map.begin(), E = Map.end();
+           I != E; ++I) {
+        assert(I->second < Vector.size());
+        assert(Vector[I->second].first == I->first);
+      }
+      for (typename VectorTy::const_iterator I = Vector.begin(),
+           E = Vector.end(); I != E; ++I)
+        assert(!I->first ||
+               (Map.count(I->first) &&
+                Map[I->first] == size_t(I - Vector.begin())));
+    }
+#endif
+
+    ValueT &operator[](KeyT Arg) {
+      std::pair<typename MapTy::iterator, bool> Pair =
+        Map.insert(std::make_pair(Arg, size_t(0)));
+      if (Pair.second) {
+        Pair.first->second = Vector.size();
+        Vector.push_back(std::make_pair(Arg, ValueT()));
+        return Vector.back().second;
+      }
+      return Vector[Pair.first->second].second;
+    }
+
+    std::pair<iterator, bool>
+    insert(const std::pair<KeyT, ValueT> &InsertPair) {
+      std::pair<typename MapTy::iterator, bool> Pair =
+        Map.insert(std::make_pair(InsertPair.first, size_t(0)));
+      if (Pair.second) {
+        Pair.first->second = Vector.size();
+        Vector.push_back(InsertPair);
+        return std::make_pair(llvm::prior(Vector.end()), true);
+      }
+      return std::make_pair(Vector.begin() + Pair.first->second, false);
+    }
+
+    const_iterator find(KeyT Key) const {
+      typename MapTy::const_iterator It = Map.find(Key);
+      if (It == Map.end()) return Vector.end();
+      return Vector.begin() + It->second;
+    }
+
+    /// blot - This is similar to erase, but instead of removing the element
+    /// from the vector, it just zeros out the key in the vector. This leaves
+    /// iterators intact, but clients must be prepared for zeroed-out keys when
+    /// iterating.
+    void blot(KeyT Key) {
+      typename MapTy::iterator It = Map.find(Key);
+      if (It == Map.end()) return;
+      Vector[It->second].first = KeyT();
+      Map.erase(It);
+    }
+
+    void clear() {
+      Map.clear();
+      Vector.clear();
+    }
+  };
+}
+
+//===----------------------------------------------------------------------===//
+// ARC Utilities.
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// InstructionClass - A simple classification for instructions.
+  enum InstructionClass {
+    IC_Retain,              ///< objc_retain
+    IC_RetainRV,            ///< objc_retainAutoreleasedReturnValue
+    IC_RetainBlock,         ///< objc_retainBlock
+    IC_Release,             ///< objc_release
+    IC_Autorelease,         ///< objc_autorelease
+    IC_AutoreleaseRV,       ///< objc_autoreleaseReturnValue
+    IC_AutoreleasepoolPush, ///< objc_autoreleasePoolPush
+    IC_AutoreleasepoolPop,  ///< objc_autoreleasePoolPop
+    IC_NoopCast,            ///< objc_retainedObject, etc.
+    IC_FusedRetainAutorelease, ///< objc_retainAutorelease
+    IC_FusedRetainAutoreleaseRV, ///< objc_retainAutoreleaseReturnValue
+    IC_LoadWeakRetained,    ///< objc_loadWeakRetained (primitive)
+    IC_StoreWeak,           ///< objc_storeWeak (primitive)
+    IC_InitWeak,            ///< objc_initWeak (derived)
+    IC_LoadWeak,            ///< objc_loadWeak (derived)
+    IC_MoveWeak,            ///< objc_moveWeak (derived)
+    IC_CopyWeak,            ///< objc_copyWeak (derived)
+    IC_DestroyWeak,         ///< objc_destroyWeak (derived)
+    IC_CallOrUser,          ///< could call objc_release and/or "use" pointers
+    IC_Call,                ///< could call objc_release
+    IC_User,                ///< could "use" a pointer
+    IC_None                 ///< anything else
+  };
+}
+
+/// IsPotentialUse - Test whether the given value is possible a
+/// reference-counted pointer.
+static bool IsPotentialUse(const Value *Op) {
+  // Pointers to static or stack storage are not reference-counted pointers.
+  if (isa<Constant>(Op) || isa<AllocaInst>(Op))
+    return false;
+  // Special arguments are not reference-counted.
+  if (const Argument *Arg = dyn_cast<Argument>(Op))
+    if (Arg->hasByValAttr() ||
+        Arg->hasNestAttr() ||
+        Arg->hasStructRetAttr())
+      return false;
+  // Only consider values with pointer types, and not function pointers.
+  const PointerType *Ty = dyn_cast<PointerType>(Op->getType());
+  if (!Ty || isa<FunctionType>(Ty->getElementType()))
+    return false;
+  // Conservatively assume anything else is a potential use.
+  return true;
+}
+
+/// GetCallSiteClass - Helper for GetInstructionClass. Determines what kind
+/// of construct CS is.
+static InstructionClass GetCallSiteClass(ImmutableCallSite CS) {
+  for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+       I != E; ++I)
+    if (IsPotentialUse(*I))
+      return CS.onlyReadsMemory() ? IC_User : IC_CallOrUser;
+
+  return CS.onlyReadsMemory() ? IC_None : IC_Call;
+}
+
+/// GetFunctionClass - Determine if F is one of the special known Functions.
+/// If it isn't, return IC_CallOrUser.
+static InstructionClass GetFunctionClass(const Function *F) {
+  Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
+
+  // No arguments.
+  if (AI == AE)
+    return StringSwitch<InstructionClass>(F->getName())
+      .Case("objc_autoreleasePoolPush",  IC_AutoreleasepoolPush)
+      .Default(IC_CallOrUser);
+
+  // One argument.
+  const Argument *A0 = AI++;
+  if (AI == AE)
+    // Argument is a pointer.
+    if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
+      const Type *ETy = PTy->getElementType();
+      // Argument is i8*.
+      if (ETy->isIntegerTy(8))
+        return StringSwitch<InstructionClass>(F->getName())
+          .Case("objc_retain",                IC_Retain)
+          .Case("objc_retainAutoreleasedReturnValue", IC_RetainRV)
+          .Case("objc_retainBlock",           IC_RetainBlock)
+          .Case("objc_release",               IC_Release)
+          .Case("objc_autorelease",           IC_Autorelease)
+          .Case("objc_autoreleaseReturnValue", IC_AutoreleaseRV)
+          .Case("objc_autoreleasePoolPop",    IC_AutoreleasepoolPop)
+          .Case("objc_retainedObject",        IC_NoopCast)
+          .Case("objc_unretainedObject",      IC_NoopCast)
+          .Case("objc_unretainedPointer",     IC_NoopCast)
+          .Case("objc_retain_autorelease",    IC_FusedRetainAutorelease)
+          .Case("objc_retainAutorelease",     IC_FusedRetainAutorelease)
+          .Case("objc_retainAutoreleaseReturnValue",IC_FusedRetainAutoreleaseRV)
+          .Default(IC_CallOrUser);
+
+      // Argument is i8**
+      if (const PointerType *Pte = dyn_cast<PointerType>(ETy))
+        if (Pte->getElementType()->isIntegerTy(8))
+          return StringSwitch<InstructionClass>(F->getName())
+            .Case("objc_loadWeakRetained",      IC_LoadWeakRetained)
+            .Case("objc_loadWeak",              IC_LoadWeak)
+            .Case("objc_destroyWeak",           IC_DestroyWeak)
+            .Default(IC_CallOrUser);
+    }
+
+  // Two arguments, first is i8**.
+  const Argument *A1 = AI++;
+  if (AI == AE)
+    if (const PointerType *PTy = dyn_cast<PointerType>(A0->getType()))
+      if (const PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType()))
+        if (Pte->getElementType()->isIntegerTy(8))
+          if (const PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) {
+            const Type *ETy1 = PTy1->getElementType();
+            // Second argument is i8*
+            if (ETy1->isIntegerTy(8))
+              return StringSwitch<InstructionClass>(F->getName())
+                     .Case("objc_storeWeak",             IC_StoreWeak)
+                     .Case("objc_initWeak",              IC_InitWeak)
+                     .Default(IC_CallOrUser);
+            // Second argument is i8**.
+            if (const PointerType *Pte1 = dyn_cast<PointerType>(ETy1))
+              if (Pte1->getElementType()->isIntegerTy(8))
+                return StringSwitch<InstructionClass>(F->getName())
+                       .Case("objc_moveWeak",              IC_MoveWeak)
+                       .Case("objc_copyWeak",              IC_CopyWeak)
+                       .Default(IC_CallOrUser);
+          }
+
+  // Anything else.
+  return IC_CallOrUser;
+}
+
+/// GetInstructionClass - Determine what kind of construct V is.
+static InstructionClass GetInstructionClass(const Value *V) {
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    // Any instruction other than bitcast and gep with a pointer operand have a
+    // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer
+    // to a subsequent use, rather than using it themselves, in this sense.
+    // As a short cut, several other opcodes are known to have no pointer
+    // operands of interest. And ret is never followed by a release, so it's
+    // not interesting to examine.
+    switch (I->getOpcode()) {
+    case Instruction::Call: {
+      const CallInst *CI = cast<CallInst>(I);
+      // Check for calls to special functions.
+      if (const Function *F = CI->getCalledFunction()) {
+        InstructionClass Class = GetFunctionClass(F);
+        if (Class != IC_CallOrUser)
+          return Class;
+
+        // None of the intrinsic functions do objc_release. For intrinsics, the
+        // only question is whether or not they may be users.
+        switch (F->getIntrinsicID()) {
+        case 0: break;
+        case Intrinsic::bswap: case Intrinsic::ctpop:
+        case Intrinsic::ctlz: case Intrinsic::cttz:
+        case Intrinsic::returnaddress: case Intrinsic::frameaddress:
+        case Intrinsic::stacksave: case Intrinsic::stackrestore:
+        case Intrinsic::vastart: case Intrinsic::vacopy: case Intrinsic::vaend:
+        // Don't let dbg info affect our results.
+        case Intrinsic::dbg_declare: case Intrinsic::dbg_value:
+          // Short cut: Some intrinsics obviously don't use ObjC pointers.
+          return IC_None;
+        default:
+          for (Function::const_arg_iterator AI = F->arg_begin(),
+               AE = F->arg_end(); AI != AE; ++AI)
+            if (IsPotentialUse(AI))
+              return IC_User;
+          return IC_None;
+        }
+      }
+      return GetCallSiteClass(CI);
+    }
+    case Instruction::Invoke:
+      return GetCallSiteClass(cast<InvokeInst>(I));
+    case Instruction::BitCast:
+    case Instruction::GetElementPtr:
+    case Instruction::Select: case Instruction::PHI:
+    case Instruction::Ret: case Instruction::Br:
+    case Instruction::Switch: case Instruction::IndirectBr:
+    case Instruction::Alloca: case Instruction::VAArg:
+    case Instruction::Add: case Instruction::FAdd:
+    case Instruction::Sub: case Instruction::FSub:
+    case Instruction::Mul: case Instruction::FMul:
+    case Instruction::SDiv: case Instruction::UDiv: case Instruction::FDiv:
+    case Instruction::SRem: case Instruction::URem: case Instruction::FRem:
+    case Instruction::Shl: case Instruction::LShr: case Instruction::AShr:
+    case Instruction::And: case Instruction::Or: case Instruction::Xor:
+    case Instruction::SExt: case Instruction::ZExt: case Instruction::Trunc:
+    case Instruction::IntToPtr: case Instruction::FCmp:
+    case Instruction::FPTrunc: case Instruction::FPExt:
+    case Instruction::FPToUI: case Instruction::FPToSI:
+    case Instruction::UIToFP: case Instruction::SIToFP:
+    case Instruction::InsertElement: case Instruction::ExtractElement:
+    case Instruction::ShuffleVector:
+    case Instruction::ExtractValue:
+      break;
+    case Instruction::ICmp:
+      // Comparing a pointer with null, or any other constant, isn't an
+      // interesting use, because we don't care what the pointer points to, or
+      // about the values of any other dynamic reference-counted pointers.
+      if (IsPotentialUse(I->getOperand(1)))
+        return IC_User;
+      break;
+    default:
+      // For anything else, check all the operands.
+      for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end();
+           OI != OE; ++OI)
+        if (IsPotentialUse(*OI))
+          return IC_User;
+    }
+  }
+
+  // Otherwise, it's totally inert for ARC purposes.
+  return IC_None;
+}
+
+/// GetBasicInstructionClass - Determine what kind of construct V is. This is
+/// similar to GetInstructionClass except that it only detects objc runtine
+/// calls. This allows it to be faster.
+static InstructionClass GetBasicInstructionClass(const Value *V) {
+  if (const CallInst *CI = dyn_cast<CallInst>(V)) {
+    if (const Function *F = CI->getCalledFunction())
+      return GetFunctionClass(F);
+    // Otherwise, be conservative.
+    return IC_CallOrUser;
+  }
+
+  // Otherwise, be conservative.
+  return IC_User;
+}
+
+/// IsRetain - Test if the the given class is objc_retain or
+/// equivalent.
+static bool IsRetain(InstructionClass Class) {
+  return Class == IC_Retain ||
+         Class == IC_RetainRV;
+}
+
+/// IsAutorelease - Test if the the given class is objc_autorelease or
+/// equivalent.
+static bool IsAutorelease(InstructionClass Class) {
+  return Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV;
+}
+
+/// IsForwarding - Test if the given class represents instructions which return
+/// their argument verbatim.
+static bool IsForwarding(InstructionClass Class) {
+  // objc_retainBlock technically doesn't always return its argument
+  // verbatim, but it doesn't matter for our purposes here.
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV ||
+         Class == IC_RetainBlock ||
+         Class == IC_NoopCast;
+}
+
+/// IsNoopOnNull - Test if the given class represents instructions which do
+/// nothing if passed a null pointer.
+static bool IsNoopOnNull(InstructionClass Class) {
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_Release ||
+         Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV ||
+         Class == IC_RetainBlock;
+}
+
+/// IsAlwaysTail - Test if the given class represents instructions which are
+/// always safe to mark with the "tail" keyword.
+static bool IsAlwaysTail(InstructionClass Class) {
+  // IC_RetainBlock may be given a stack argument.
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV;
+}
+
+/// IsNoThrow - Test if the given class represents instructions which are always
+/// safe to mark with the nounwind attribute..
+static bool IsNoThrow(InstructionClass Class) {
+  return Class == IC_Retain ||
+         Class == IC_RetainRV ||
+         Class == IC_RetainBlock ||
+         Class == IC_Release ||
+         Class == IC_Autorelease ||
+         Class == IC_AutoreleaseRV ||
+         Class == IC_AutoreleasepoolPush ||
+         Class == IC_AutoreleasepoolPop;
+}
+
+/// EraseInstruction - Erase the given instruction. ObjC calls return their
+/// argument verbatim, so if it's such a call and the return value has users,
+/// replace them with the argument value.
+static void EraseInstruction(Instruction *CI) {
+  Value *OldArg = cast<CallInst>(CI)->getArgOperand(0);
+
+  bool Unused = CI->use_empty();
+
+  if (!Unused) {
+    // Replace the return value with the argument.
+    assert(IsForwarding(GetBasicInstructionClass(CI)) &&
+           "Can't delete non-forwarding instruction with users!");
+    CI->replaceAllUsesWith(OldArg);
+  }
+
+  CI->eraseFromParent();
+
+  if (Unused)
+    RecursivelyDeleteTriviallyDeadInstructions(OldArg);
+}
+
+/// GetUnderlyingObjCPtr - This is a wrapper around getUnderlyingObject which
+/// also knows how to look through objc_retain and objc_autorelease calls, which
+/// we know to return their argument verbatim.
+static const Value *GetUnderlyingObjCPtr(const Value *V) {
+  for (;;) {
+    V = GetUnderlyingObject(V);
+    if (!IsForwarding(GetBasicInstructionClass(V)))
+      break;
+    V = cast<CallInst>(V)->getArgOperand(0);
+  }
+
+  return V;
+}
+
+/// StripPointerCastsAndObjCCalls - This is a wrapper around
+/// Value::stripPointerCasts which also knows how to look through objc_retain
+/// and objc_autorelease calls, which we know to return their argument verbatim.
+static const Value *StripPointerCastsAndObjCCalls(const Value *V) {
+  for (;;) {
+    V = V->stripPointerCasts();
+    if (!IsForwarding(GetBasicInstructionClass(V)))
+      break;
+    V = cast<CallInst>(V)->getArgOperand(0);
+  }
+  return V;
+}
+
+/// StripPointerCastsAndObjCCalls - This is a wrapper around
+/// Value::stripPointerCasts which also knows how to look through objc_retain
+/// and objc_autorelease calls, which we know to return their argument verbatim.
+static Value *StripPointerCastsAndObjCCalls(Value *V) {
+  for (;;) {
+    V = V->stripPointerCasts();
+    if (!IsForwarding(GetBasicInstructionClass(V)))
+      break;
+    V = cast<CallInst>(V)->getArgOperand(0);
+  }
+  return V;
+}
+
+/// GetObjCArg - Assuming the given instruction is one of the special calls such
+/// as objc_retain or objc_release, return the argument value, stripped of no-op
+/// casts and forwarding calls.
+static Value *GetObjCArg(Value *Inst) {
+  return StripPointerCastsAndObjCCalls(cast<CallInst>(Inst)->getArgOperand(0));
+}
+
+/// IsObjCIdentifiedObject - This is similar to AliasAnalysis'
+/// isObjCIdentifiedObject, except that it uses special knowledge of
+/// ObjC conventions...
+static bool IsObjCIdentifiedObject(const Value *V) {
+  // Assume that call results and arguments have their own "provenance".
+  // Constants (including GlobalVariables) and Allocas are never
+  // reference-counted.
+  if (isa<CallInst>(V) || isa<InvokeInst>(V) ||
+      isa<Argument>(V) || isa<Constant>(V) ||
+      isa<AllocaInst>(V))
+    return true;
+
+  if (const LoadInst *LI = dyn_cast<LoadInst>(V)) {
+    const Value *Pointer =
+      StripPointerCastsAndObjCCalls(LI->getPointerOperand());
+    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Pointer)) {
+      StringRef Name = GV->getName();
+      // These special variables are known to hold values which are not
+      // reference-counted pointers.
+      if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_") ||
+          Name.startswith("\01L_OBJC_CLASSLIST_REFERENCES_") ||
+          Name.startswith("\01L_OBJC_CLASSLIST_SUP_REFS_$_") ||
+          Name.startswith("\01L_OBJC_METH_VAR_NAME_") ||
+          Name.startswith("\01l_objc_msgSend_fixup_"))
+        return true;
+    }
+  }
+
+  return false;
+}
+
+/// FindSingleUseIdentifiedObject - This is similar to
+/// StripPointerCastsAndObjCCalls but it stops as soon as it finds a value
+/// with multiple uses.
+static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
+  if (Arg->hasOneUse()) {
+    if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
+      return FindSingleUseIdentifiedObject(BC->getOperand(0));
+    if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Arg))
+      if (GEP->hasAllZeroIndices())
+        return FindSingleUseIdentifiedObject(GEP->getPointerOperand());
+    if (IsForwarding(GetBasicInstructionClass(Arg)))
+      return FindSingleUseIdentifiedObject(
+               cast<CallInst>(Arg)->getArgOperand(0));
+    if (!IsObjCIdentifiedObject(Arg))
+      return 0;
+    return Arg;
+  }
+
+  // If we found an identifiable object but it has multiple uses, but they
+  // are trivial uses, we can still consider this to be a single-use
+  // value.
+  if (IsObjCIdentifiedObject(Arg)) {
+    for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+         UI != UE; ++UI) {
+      const User *U = *UI;
+      if (!U->use_empty() || StripPointerCastsAndObjCCalls(U) != Arg)
+         return 0;
+    }
+
+    return Arg;
+  }
+
+  return 0;
+}
+
+/// ModuleHasARC - Test if the given module looks interesting to run ARC
+/// optimization on.
+static bool ModuleHasARC(const Module &M) {
+  return
+    M.getNamedValue("objc_retain") ||
+    M.getNamedValue("objc_release") ||
+    M.getNamedValue("objc_autorelease") ||
+    M.getNamedValue("objc_retainAutoreleasedReturnValue") ||
+    M.getNamedValue("objc_retainBlock") ||
+    M.getNamedValue("objc_autoreleaseReturnValue") ||
+    M.getNamedValue("objc_autoreleasePoolPush") ||
+    M.getNamedValue("objc_loadWeakRetained") ||
+    M.getNamedValue("objc_loadWeak") ||
+    M.getNamedValue("objc_destroyWeak") ||
+    M.getNamedValue("objc_storeWeak") ||
+    M.getNamedValue("objc_initWeak") ||
+    M.getNamedValue("objc_moveWeak") ||
+    M.getNamedValue("objc_copyWeak") ||
+    M.getNamedValue("objc_retainedObject") ||
+    M.getNamedValue("objc_unretainedObject") ||
+    M.getNamedValue("objc_unretainedPointer");
+}
+
+//===----------------------------------------------------------------------===//
+// ARC AliasAnalysis.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/Passes.h"
+
+namespace {
+  /// ObjCARCAliasAnalysis - This is a simple alias analysis
+  /// implementation that uses knowledge of ARC constructs to answer queries.
+  ///
+  /// TODO: This class could be generalized to know about other ObjC-specific
+  /// tricks. Such as knowing that ivars in the non-fragile ABI are non-aliasing
+  /// even though their offsets are dynamic.
+  class ObjCARCAliasAnalysis : public ImmutablePass,
+                               public AliasAnalysis {
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    ObjCARCAliasAnalysis() : ImmutablePass(ID) {
+      initializeObjCARCAliasAnalysisPass(*PassRegistry::getPassRegistry());
+    }
+
+  private:
+    virtual void initializePass() {
+      InitializeAliasAnalysis(this);
+    }
+
+    /// getAdjustedAnalysisPointer - This method is used when a pass implements
+    /// an analysis interface through multiple inheritance.  If needed, it
+    /// should override this to adjust the this pointer as needed for the
+    /// specified pass info.
+    virtual void *getAdjustedAnalysisPointer(const void *PI) {
+      if (PI == &AliasAnalysis::ID)
+        return (AliasAnalysis*)this;
+      return this;
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual AliasResult alias(const Location &LocA, const Location &LocB);
+    virtual bool pointsToConstantMemory(const Location &Loc, bool OrLocal);
+    virtual ModRefBehavior getModRefBehavior(ImmutableCallSite CS);
+    virtual ModRefBehavior getModRefBehavior(const Function *F);
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS,
+                                       const Location &Loc);
+    virtual ModRefResult getModRefInfo(ImmutableCallSite CS1,
+                                       ImmutableCallSite CS2);
+  };
+}  // End of anonymous namespace
+
+// Register this pass...
+char ObjCARCAliasAnalysis::ID = 0;
+INITIALIZE_AG_PASS(ObjCARCAliasAnalysis, AliasAnalysis, "objc-arc-aa",
+                   "ObjC-ARC-Based Alias Analysis", false, true, false)
+
+ImmutablePass *llvm::createObjCARCAliasAnalysisPass() {
+  return new ObjCARCAliasAnalysis();
+}
+
+void
+ObjCARCAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AliasAnalysis::getAnalysisUsage(AU);
+}
+
+AliasAnalysis::AliasResult
+ObjCARCAliasAnalysis::alias(const Location &LocA, const Location &LocB) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::alias(LocA, LocB);
+
+  // First, strip off no-ops, including ObjC-specific no-ops, and try making a
+  // precise alias query.
+  const Value *SA = StripPointerCastsAndObjCCalls(LocA.Ptr);
+  const Value *SB = StripPointerCastsAndObjCCalls(LocB.Ptr);
+  AliasResult Result =
+    AliasAnalysis::alias(Location(SA, LocA.Size, LocA.TBAATag),
+                         Location(SB, LocB.Size, LocB.TBAATag));
+  if (Result != MayAlias)
+    return Result;
+
+  // If that failed, climb to the underlying object, including climbing through
+  // ObjC-specific no-ops, and try making an imprecise alias query.
+  const Value *UA = GetUnderlyingObjCPtr(SA);
+  const Value *UB = GetUnderlyingObjCPtr(SB);
+  if (UA != SA || UB != SB) {
+    Result = AliasAnalysis::alias(Location(UA), Location(UB));
+    // We can't use MustAlias or PartialAlias results here because
+    // GetUnderlyingObjCPtr may return an offsetted pointer value.
+    if (Result == NoAlias)
+      return NoAlias;
+  }
+
+  // If that failed, fail. We don't need to chain here, since that's covered
+  // by the earlier precise query.
+  return MayAlias;
+}
+
+bool
+ObjCARCAliasAnalysis::pointsToConstantMemory(const Location &Loc,
+                                             bool OrLocal) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal);
+
+  // First, strip off no-ops, including ObjC-specific no-ops, and try making
+  // a precise alias query.
+  const Value *S = StripPointerCastsAndObjCCalls(Loc.Ptr);
+  if (AliasAnalysis::pointsToConstantMemory(Location(S, Loc.Size, Loc.TBAATag),
+                                            OrLocal))
+    return true;
+
+  // If that failed, climb to the underlying object, including climbing through
+  // ObjC-specific no-ops, and try making an imprecise alias query.
+  const Value *U = GetUnderlyingObjCPtr(S);
+  if (U != S)
+    return AliasAnalysis::pointsToConstantMemory(Location(U), OrLocal);
+
+  // If that failed, fail. We don't need to chain here, since that's covered
+  // by the earlier precise query.
+  return false;
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) {
+  // We have nothing to do. Just chain to the next AliasAnalysis.
+  return AliasAnalysis::getModRefBehavior(CS);
+}
+
+AliasAnalysis::ModRefBehavior
+ObjCARCAliasAnalysis::getModRefBehavior(const Function *F) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::getModRefBehavior(F);
+
+  switch (GetFunctionClass(F)) {
+  case IC_NoopCast:
+    return DoesNotAccessMemory;
+  default:
+    break;
+  }
+
+  return AliasAnalysis::getModRefBehavior(F);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS, const Location &Loc) {
+  if (!EnableARCOpts)
+    return AliasAnalysis::getModRefInfo(CS, Loc);
+
+  switch (GetBasicInstructionClass(CS.getInstruction())) {
+  case IC_Retain:
+  case IC_RetainRV:
+  case IC_RetainBlock:
+  case IC_Autorelease:
+  case IC_AutoreleaseRV:
+  case IC_NoopCast:
+  case IC_AutoreleasepoolPush:
+  case IC_FusedRetainAutorelease:
+  case IC_FusedRetainAutoreleaseRV:
+    // These functions don't access any memory visible to the compiler.
+    return NoModRef;
+  default:
+    break;
+  }
+
+  return AliasAnalysis::getModRefInfo(CS, Loc);
+}
+
+AliasAnalysis::ModRefResult
+ObjCARCAliasAnalysis::getModRefInfo(ImmutableCallSite CS1,
+                                    ImmutableCallSite CS2) {
+  // TODO: Theoretically we could check for dependencies between objc_* calls
+  // and OnlyAccessesArgumentPointees calls or other well-behaved calls.
+  return AliasAnalysis::getModRefInfo(CS1, CS2);
+}
+
+//===----------------------------------------------------------------------===//
+// ARC expansion.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/InstIterator.h"
+#include "llvm/Transforms/Scalar.h"
+
+namespace {
+  /// ObjCARCExpand - Early ARC transformations.
+  class ObjCARCExpand : public FunctionPass {
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function &F);
+
+    /// Run - A flag indicating whether this optimization pass should run.
+    bool Run;
+
+  public:
+    static char ID;
+    ObjCARCExpand() : FunctionPass(ID) {
+      initializeObjCARCExpandPass(*PassRegistry::getPassRegistry());
+    }
+  };
+}
+
+char ObjCARCExpand::ID = 0;
+INITIALIZE_PASS(ObjCARCExpand,
+                "objc-arc-expand", "ObjC ARC expansion", false, false)
+
+Pass *llvm::createObjCARCExpandPass() {
+  return new ObjCARCExpand();
+}
+
+void ObjCARCExpand::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesCFG();
+}
+
+bool ObjCARCExpand::doInitialization(Module &M) {
+  Run = ModuleHasARC(M);
+  return false;
+}
+
+bool ObjCARCExpand::runOnFunction(Function &F) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
+  bool Changed = false;
+
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) {
+    Instruction *Inst = &*I;
+
+    switch (GetBasicInstructionClass(Inst)) {
+    case IC_Retain:
+    case IC_RetainRV:
+    case IC_Autorelease:
+    case IC_AutoreleaseRV:
+    case IC_FusedRetainAutorelease:
+    case IC_FusedRetainAutoreleaseRV:
+      // These calls return their argument verbatim, as a low-level
+      // optimization. However, this makes high-level optimizations
+      // harder. Undo any uses of this optimization that the front-end
+      // emitted here. We'll redo them in a later pass.
+      Changed = true;
+      Inst->replaceAllUsesWith(cast<CallInst>(Inst)->getArgOperand(0));
+      break;
+    default:
+      break;
+    }
+  }
+
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+// ARC optimization.
+//===----------------------------------------------------------------------===//
+
+// TODO: On code like this:
+//
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+// stuff_that_cannot_release()
+// objc_retain(%x)
+// stuff_that_cannot_release()
+// objc_autorelease(%x)
+//
+// The second retain and autorelease can be deleted.
+
+// TODO: It should be possible to delete
+// objc_autoreleasePoolPush and objc_autoreleasePoolPop
+// pairs if nothing is actually autoreleased between them. Also, autorelease
+// calls followed by objc_autoreleasePoolPop calls (perhaps in ObjC++ code
+// after inlining) can be turned into plain release calls.
+
+// TODO: Critical-edge splitting. If the optimial insertion point is
+// a critical edge, the current algorithm has to fail, because it doesn't
+// know how to split edges. It should be possible to make the optimizer
+// think in terms of edges, rather than blocks, and then split critical
+// edges on demand.
+
+// TODO: OptimizeSequences could generalized to be Interprocedural.
+
+// TODO: Recognize that a bunch of other objc runtime calls have
+// non-escaping arguments and non-releasing arguments, and may be
+// non-autoreleasing.
+
+// TODO: Sink autorelease calls as far as possible. Unfortunately we
+// usually can't sink them past other calls, which would be the main
+// case where it would be useful.
+
+/// TODO: The pointer returned from objc_loadWeakRetained is retained.
+
+#include "llvm/GlobalAlias.h"
+#include "llvm/Constants.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Statistic.h"
+
+STATISTIC(NumNoops,       "Number of no-op objc calls eliminated");
+STATISTIC(NumPartialNoops, "Number of partially no-op objc calls eliminated");
+STATISTIC(NumAutoreleases,"Number of autoreleases converted to releases");
+STATISTIC(NumRets,        "Number of return value forwarding "
+                          "retain+autoreleaes eliminated");
+STATISTIC(NumRRs,         "Number of retain+release paths eliminated");
+STATISTIC(NumPeeps,       "Number of calls peephole-optimized");
+
+namespace {
+  /// ProvenanceAnalysis - This is similar to BasicAliasAnalysis, and it
+  /// uses many of the same techniques, except it uses special ObjC-specific
+  /// reasoning about pointer relationships.
+  class ProvenanceAnalysis {
+    AliasAnalysis *AA;
+
+    typedef std::pair<const Value *, const Value *> ValuePairTy;
+    typedef DenseMap<ValuePairTy, bool> CachedResultsTy;
+    CachedResultsTy CachedResults;
+
+    bool relatedCheck(const Value *A, const Value *B);
+    bool relatedSelect(const SelectInst *A, const Value *B);
+    bool relatedPHI(const PHINode *A, const Value *B);
+
+    // Do not implement.
+    void operator=(const ProvenanceAnalysis &);
+    ProvenanceAnalysis(const ProvenanceAnalysis &);
+
+  public:
+    ProvenanceAnalysis() {}
+
+    void setAA(AliasAnalysis *aa) { AA = aa; }
+
+    AliasAnalysis *getAA() const { return AA; }
+
+    bool related(const Value *A, const Value *B);
+
+    void clear() {
+      CachedResults.clear();
+    }
+  };
+}
+
+bool ProvenanceAnalysis::relatedSelect(const SelectInst *A, const Value *B) {
+  // If the values are Selects with the same condition, we can do a more precise
+  // check: just check for relations between the values on corresponding arms.
+  if (const SelectInst *SB = dyn_cast<SelectInst>(B))
+    if (A->getCondition() == SB->getCondition()) {
+      if (related(A->getTrueValue(), SB->getTrueValue()))
+        return true;
+      if (related(A->getFalseValue(), SB->getFalseValue()))
+        return true;
+      return false;
+    }
+
+  // Check both arms of the Select node individually.
+  if (related(A->getTrueValue(), B))
+    return true;
+  if (related(A->getFalseValue(), B))
+    return true;
+
+  // The arms both checked out.
+  return false;
+}
+
+bool ProvenanceAnalysis::relatedPHI(const PHINode *A, const Value *B) {
+  // If the values are PHIs in the same block, we can do a more precise as well
+  // as efficient check: just check for relations between the values on
+  // corresponding edges.
+  if (const PHINode *PNB = dyn_cast<PHINode>(B))
+    if (PNB->getParent() == A->getParent()) {
+      for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i)
+        if (related(A->getIncomingValue(i),
+                    PNB->getIncomingValueForBlock(A->getIncomingBlock(i))))
+          return true;
+      return false;
+    }
+
+  // Check each unique source of the PHI node against B.
+  SmallPtrSet<const Value *, 4> UniqueSrc;
+  for (unsigned i = 0, e = A->getNumIncomingValues(); i != e; ++i) {
+    const Value *PV1 = A->getIncomingValue(i);
+    if (UniqueSrc.insert(PV1) && related(PV1, B))
+      return true;
+  }
+
+  // All of the arms checked out.
+  return false;
+}
+
+/// isStoredObjCPointer - Test if the value of P, or any value covered by its
+/// provenance, is ever stored within the function (not counting callees).
+static bool isStoredObjCPointer(const Value *P) {
+  SmallPtrSet<const Value *, 8> Visited;
+  SmallVector<const Value *, 8> Worklist;
+  Worklist.push_back(P);
+  Visited.insert(P);
+  do {
+    P = Worklist.pop_back_val();
+    for (Value::const_use_iterator UI = P->use_begin(), UE = P->use_end();
+         UI != UE; ++UI) {
+      const User *Ur = *UI;
+      if (isa<StoreInst>(Ur)) {
+        if (UI.getOperandNo() == 0)
+          // The pointer is stored.
+          return true;
+        // The pointed is stored through.
+        continue;
+      }
+      if (isa<CallInst>(Ur))
+        // The pointer is passed as an argument, ignore this.
+        continue;
+      if (isa<PtrToIntInst>(P))
+        // Assume the worst.
+        return true;
+      if (Visited.insert(Ur))
+        Worklist.push_back(Ur);
+    }
+  } while (!Worklist.empty());
+
+  // Everything checked out.
+  return false;
+}
+
+bool ProvenanceAnalysis::relatedCheck(const Value *A, const Value *B) {
+  // Skip past provenance pass-throughs.
+  A = GetUnderlyingObjCPtr(A);
+  B = GetUnderlyingObjCPtr(B);
+
+  // Quick check.
+  if (A == B)
+    return true;
+
+  // Ask regular AliasAnalysis, for a first approximation.
+  switch (AA->alias(A, B)) {
+  case AliasAnalysis::NoAlias:
+    return false;
+  case AliasAnalysis::MustAlias:
+  case AliasAnalysis::PartialAlias:
+    return true;
+  case AliasAnalysis::MayAlias:
+    break;
+  }
+
+  bool AIsIdentified = IsObjCIdentifiedObject(A);
+  bool BIsIdentified = IsObjCIdentifiedObject(B);
+
+  // An ObjC-Identified object can't alias a load if it is never locally stored.
+  if (AIsIdentified) {
+    if (BIsIdentified) {
+      // If both pointers have provenance, they can be directly compared.
+      if (A != B)
+        return false;
+    } else {
+      if (isa<LoadInst>(B))
+        return isStoredObjCPointer(A);
+    }
+  } else {
+    if (BIsIdentified && isa<LoadInst>(A))
+      return isStoredObjCPointer(B);
+  }
+
+   // Special handling for PHI and Select.
+  if (const PHINode *PN = dyn_cast<PHINode>(A))
+    return relatedPHI(PN, B);
+  if (const PHINode *PN = dyn_cast<PHINode>(B))
+    return relatedPHI(PN, A);
+  if (const SelectInst *S = dyn_cast<SelectInst>(A))
+    return relatedSelect(S, B);
+  if (const SelectInst *S = dyn_cast<SelectInst>(B))
+    return relatedSelect(S, A);
+
+  // Conservative.
+  return true;
+}
+
+bool ProvenanceAnalysis::related(const Value *A, const Value *B) {
+  // Begin by inserting a conservative value into the map. If the insertion
+  // fails, we have the answer already. If it succeeds, leave it there until we
+  // compute the real answer to guard against recursive queries.
+  if (A > B) std::swap(A, B);
+  std::pair<CachedResultsTy::iterator, bool> Pair =
+    CachedResults.insert(std::make_pair(ValuePairTy(A, B), true));
+  if (!Pair.second)
+    return Pair.first->second;
+
+  bool Result = relatedCheck(A, B);
+  CachedResults[ValuePairTy(A, B)] = Result;
+  return Result;
+}
+
+namespace {
+  // Sequence - A sequence of states that a pointer may go through in which an
+  // objc_retain and objc_release are actually needed.
+  enum Sequence {
+    S_None,
+    S_Retain,         ///< objc_retain(x)
+    S_CanRelease,     ///< foo(x) -- x could possibly see a ref count decrement
+    S_Use,            ///< any use of x
+    S_Stop,           ///< like S_Release, but code motion is stopped
+    S_Release,        ///< objc_release(x)
+    S_MovableRelease  ///< objc_release(x), !clang.imprecise_release
+  };
+}
+
+static Sequence MergeSeqs(Sequence A, Sequence B, bool TopDown) {
+  // The easy cases.
+  if (A == B)
+    return A;
+  if (A == S_None || B == S_None)
+    return S_None;
+
+  // Note that we can't merge S_CanRelease and S_Use.
+  if (A > B) std::swap(A, B);
+  if (TopDown) {
+    // Choose the side which is further along in the sequence.
+    if (A == S_Retain && (B == S_CanRelease || B == S_Use))
+      return B;
+  } else {
+    // Choose the side which is further along in the sequence.
+    if ((A == S_Use || A == S_CanRelease) &&
+        (B == S_Release || B == S_Stop || B == S_MovableRelease))
+      return A;
+    // If both sides are releases, choose the more conservative one.
+    if (A == S_Stop && (B == S_Release || B == S_MovableRelease))
+      return A;
+    if (A == S_Release && B == S_MovableRelease)
+      return A;
+  }
+
+  return S_None;
+}
+
+namespace {
+  /// RRInfo - Unidirectional information about either a
+  /// retain-decrement-use-release sequence or release-use-decrement-retain
+  /// reverese sequence.
+  struct RRInfo {
+    /// KnownIncremented - After an objc_retain, the reference count of the
+    /// referenced object is known to be positive. Similarly, before an
+    /// objc_release, the reference count of the referenced object is known to
+    /// be positive. If there are retain-release pairs in code regions where the
+    /// retain count is known to be positive, they can be eliminated, regardless
+    /// of any side effects between them.
+    bool KnownIncremented;
+
+    /// IsRetainBlock - True if the Calls are objc_retainBlock calls (as
+    /// opposed to objc_retain calls).
+    bool IsRetainBlock;
+
+    /// IsTailCallRelease - True of the objc_release calls are all marked
+    /// with the "tail" keyword.
+    bool IsTailCallRelease;
+
+    /// ReleaseMetadata - If the Calls are objc_release calls and they all have
+    /// a clang.imprecise_release tag, this is the metadata tag.
+    MDNode *ReleaseMetadata;
+
+    /// Calls - For a top-down sequence, the set of objc_retains or
+    /// objc_retainBlocks. For bottom-up, the set of objc_releases.
+    SmallPtrSet<Instruction *, 2> Calls;
+
+    /// ReverseInsertPts - The set of optimal insert positions for
+    /// moving calls in the opposite sequence.
+    SmallPtrSet<Instruction *, 2> ReverseInsertPts;
+
+    RRInfo() :
+      KnownIncremented(false), IsRetainBlock(false), IsTailCallRelease(false),
+      ReleaseMetadata(0) {}
+
+    void clear();
+  };
+}
+
+void RRInfo::clear() {
+  KnownIncremented = false;
+  IsRetainBlock = false;
+  IsTailCallRelease = false;
+  ReleaseMetadata = 0;
+  Calls.clear();
+  ReverseInsertPts.clear();
+}
+
+namespace {
+  /// PtrState - This class summarizes several per-pointer runtime properties
+  /// which are propogated through the flow graph.
+  class PtrState {
+    /// RefCount - The known minimum number of reference count increments.
+    unsigned RefCount;
+
+    /// Seq - The current position in the sequence.
+    Sequence Seq;
+
+  public:
+    /// RRI - Unidirectional information about the current sequence.
+    /// TODO: Encapsulate this better.
+    RRInfo RRI;
+
+    PtrState() : RefCount(0), Seq(S_None) {}
+
+    void IncrementRefCount() {
+      if (RefCount != UINT_MAX) ++RefCount;
+    }
+
+    void DecrementRefCount() {
+      if (RefCount != 0) --RefCount;
+    }
+
+    void ClearRefCount() {
+      RefCount = 0;
+    }
+
+    bool IsKnownIncremented() const {
+      return RefCount > 0;
+    }
+
+    void SetSeq(Sequence NewSeq) {
+      Seq = NewSeq;
+    }
+
+    void SetSeqToRelease(MDNode *M) {
+      if (Seq == S_None || Seq == S_Use) {
+        Seq = M ? S_MovableRelease : S_Release;
+        RRI.ReleaseMetadata = M;
+      } else if (Seq != S_MovableRelease || RRI.ReleaseMetadata != M) {
+        Seq = S_Release;
+        RRI.ReleaseMetadata = 0;
+      }
+    }
+
+    Sequence GetSeq() const {
+      return Seq;
+    }
+
+    void ClearSequenceProgress() {
+      Seq = S_None;
+      RRI.clear();
+    }
+
+    void Merge(const PtrState &Other, bool TopDown);
+  };
+}
+
+void
+PtrState::Merge(const PtrState &Other, bool TopDown) {
+  Seq = MergeSeqs(Seq, Other.Seq, TopDown);
+  RefCount = std::min(RefCount, Other.RefCount);
+
+  // We can't merge a plain objc_retain with an objc_retainBlock.
+  if (RRI.IsRetainBlock != Other.RRI.IsRetainBlock)
+    Seq = S_None;
+
+  if (Seq == S_None) {
+    RRI.clear();
+  } else {
+    // Conservatively merge the ReleaseMetadata information.
+    if (RRI.ReleaseMetadata != Other.RRI.ReleaseMetadata)
+      RRI.ReleaseMetadata = 0;
+
+    RRI.KnownIncremented = RRI.KnownIncremented && Other.RRI.KnownIncremented;
+    RRI.IsTailCallRelease = RRI.IsTailCallRelease && Other.RRI.IsTailCallRelease;
+    RRI.Calls.insert(Other.RRI.Calls.begin(), Other.RRI.Calls.end());
+    RRI.ReverseInsertPts.insert(Other.RRI.ReverseInsertPts.begin(),
+                                Other.RRI.ReverseInsertPts.end());
+  }
+}
+
+namespace {
+  /// BBState - Per-BasicBlock state.
+  class BBState {
+    /// TopDownPathCount - The number of unique control paths from the entry
+    /// which can reach this block.
+    unsigned TopDownPathCount;
+
+    /// BottomUpPathCount - The number of unique control paths to exits
+    /// from this block.
+    unsigned BottomUpPathCount;
+
+    /// MapTy - A type for PerPtrTopDown and PerPtrBottomUp.
+    typedef MapVector<const Value *, PtrState> MapTy;
+
+    /// PerPtrTopDown - The top-down traversal uses this to record information
+    /// known about a pointer at the bottom of each block.
+    MapTy PerPtrTopDown;
+
+    /// PerPtrBottomUp - The bottom-up traversal uses this to record information
+    /// known about a pointer at the top of each block.
+    MapTy PerPtrBottomUp;
+
+  public:
+    BBState() : TopDownPathCount(0), BottomUpPathCount(0) {}
+
+    typedef MapTy::iterator ptr_iterator;
+    typedef MapTy::const_iterator ptr_const_iterator;
+
+    ptr_iterator top_down_ptr_begin() { return PerPtrTopDown.begin(); }
+    ptr_iterator top_down_ptr_end() { return PerPtrTopDown.end(); }
+    ptr_const_iterator top_down_ptr_begin() const {
+      return PerPtrTopDown.begin();
+    }
+    ptr_const_iterator top_down_ptr_end() const {
+      return PerPtrTopDown.end();
+    }
+
+    ptr_iterator bottom_up_ptr_begin() { return PerPtrBottomUp.begin(); }
+    ptr_iterator bottom_up_ptr_end() { return PerPtrBottomUp.end(); }
+    ptr_const_iterator bottom_up_ptr_begin() const {
+      return PerPtrBottomUp.begin();
+    }
+    ptr_const_iterator bottom_up_ptr_end() const {
+      return PerPtrBottomUp.end();
+    }
+
+    /// SetAsEntry - Mark this block as being an entry block, which has one
+    /// path from the entry by definition.
+    void SetAsEntry() { TopDownPathCount = 1; }
+
+    /// SetAsExit - Mark this block as being an exit block, which has one
+    /// path to an exit by definition.
+    void SetAsExit()  { BottomUpPathCount = 1; }
+
+    PtrState &getPtrTopDownState(const Value *Arg) {
+      return PerPtrTopDown[Arg];
+    }
+
+    PtrState &getPtrBottomUpState(const Value *Arg) {
+      return PerPtrBottomUp[Arg];
+    }
+
+    void clearBottomUpPointers() {
+      PerPtrTopDown.clear();
+    }
+
+    void clearTopDownPointers() {
+      PerPtrTopDown.clear();
+    }
+
+    void InitFromPred(const BBState &Other);
+    void InitFromSucc(const BBState &Other);
+    void MergePred(const BBState &Other);
+    void MergeSucc(const BBState &Other);
+
+    /// GetAllPathCount - Return the number of possible unique paths from an
+    /// entry to an exit which pass through this block. This is only valid
+    /// after both the top-down and bottom-up traversals are complete.
+    unsigned GetAllPathCount() const {
+      return TopDownPathCount * BottomUpPathCount;
+    }
+  };
+}
+
+void BBState::InitFromPred(const BBState &Other) {
+  PerPtrTopDown = Other.PerPtrTopDown;
+  TopDownPathCount = Other.TopDownPathCount;
+}
+
+void BBState::InitFromSucc(const BBState &Other) {
+  PerPtrBottomUp = Other.PerPtrBottomUp;
+  BottomUpPathCount = Other.BottomUpPathCount;
+}
+
+/// MergePred - The top-down traversal uses this to merge information about
+/// predecessors to form the initial state for a new block.
+void BBState::MergePred(const BBState &Other) {
+  // Other.TopDownPathCount can be 0, in which case it is either dead or a
+  // loop backedge. Loop backedges are special.
+  TopDownPathCount += Other.TopDownPathCount;
+
+  // For each entry in the other set, if our set has an entry with the same key,
+  // merge the entries. Otherwise, copy the entry and merge it with an empty
+  // entry.
+  for (ptr_const_iterator MI = Other.top_down_ptr_begin(),
+       ME = Other.top_down_ptr_end(); MI != ME; ++MI) {
+    std::pair<ptr_iterator, bool> Pair = PerPtrTopDown.insert(*MI);
+    Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+                             /*TopDown=*/true);
+  }
+
+  // For each entry in our set, if the other set doens't have an entry with the
+  // same key, force it to merge with an empty entry.
+  for (ptr_iterator MI = top_down_ptr_begin(),
+       ME = top_down_ptr_end(); MI != ME; ++MI)
+    if (Other.PerPtrTopDown.find(MI->first) == Other.PerPtrTopDown.end())
+      MI->second.Merge(PtrState(), /*TopDown=*/true);
+}
+
+/// MergeSucc - The bottom-up traversal uses this to merge information about
+/// successors to form the initial state for a new block.
+void BBState::MergeSucc(const BBState &Other) {
+  // Other.BottomUpPathCount can be 0, in which case it is either dead or a
+  // loop backedge. Loop backedges are special.
+  BottomUpPathCount += Other.BottomUpPathCount;
+
+  // For each entry in the other set, if our set has an entry with the
+  // same key, merge the entries. Otherwise, copy the entry and merge
+  // it with an empty entry.
+  for (ptr_const_iterator MI = Other.bottom_up_ptr_begin(),
+       ME = Other.bottom_up_ptr_end(); MI != ME; ++MI) {
+    std::pair<ptr_iterator, bool> Pair = PerPtrBottomUp.insert(*MI);
+    Pair.first->second.Merge(Pair.second ? PtrState() : MI->second,
+                             /*TopDown=*/false);
+  }
+
+  // For each entry in our set, if the other set doens't have an entry
+  // with the same key, force it to merge with an empty entry.
+  for (ptr_iterator MI = bottom_up_ptr_begin(),
+       ME = bottom_up_ptr_end(); MI != ME; ++MI)
+    if (Other.PerPtrBottomUp.find(MI->first) == Other.PerPtrBottomUp.end())
+      MI->second.Merge(PtrState(), /*TopDown=*/false);
+}
+
+namespace {
+  /// ObjCARCOpt - The main ARC optimization pass.
+  class ObjCARCOpt : public FunctionPass {
+    bool Changed;
+    ProvenanceAnalysis PA;
+
+    /// Run - A flag indicating whether this optimization pass should run.
+    bool Run;
+
+    /// RetainFunc, RelaseFunc - Declarations for objc_retain,
+    /// objc_retainBlock, and objc_release.
+    Function *RetainFunc, *RetainBlockFunc, *RetainRVFunc, *ReleaseFunc;
+
+    /// RetainRVCallee, etc. - Declarations for ObjC runtime
+    /// functions, for use in creating calls to them. These are initialized
+    /// lazily to avoid cluttering up the Module with unused declarations.
+    Constant *RetainRVCallee, *AutoreleaseRVCallee, *ReleaseCallee,
+             *RetainCallee, *AutoreleaseCallee;
+
+    /// UsedInThisFunciton - Flags which determine whether each of the
+    /// interesting runtine functions is in fact used in the current function.
+    unsigned UsedInThisFunction;
+
+    /// ImpreciseReleaseMDKind - The Metadata Kind for clang.imprecise_release
+    /// metadata.
+    unsigned ImpreciseReleaseMDKind;
+
+    Constant *getRetainRVCallee(Module *M);
+    Constant *getAutoreleaseRVCallee(Module *M);
+    Constant *getReleaseCallee(Module *M);
+    Constant *getRetainCallee(Module *M);
+    Constant *getAutoreleaseCallee(Module *M);
+
+    void OptimizeRetainCall(Function &F, Instruction *Retain);
+    bool OptimizeRetainRVCall(Function &F, Instruction *RetainRV);
+    void OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV);
+    void OptimizeIndividualCalls(Function &F);
+
+    void CheckForCFGHazards(const BasicBlock *BB,
+                            DenseMap<const BasicBlock *, BBState> &BBStates,
+                            BBState &MyStates) const;
+    bool VisitBottomUp(BasicBlock *BB,
+                       DenseMap<const BasicBlock *, BBState> &BBStates,
+                       MapVector<Value *, RRInfo> &Retains);
+    bool VisitTopDown(BasicBlock *BB,
+                      DenseMap<const BasicBlock *, BBState> &BBStates,
+                      DenseMap<Value *, RRInfo> &Releases);
+    bool Visit(Function &F,
+               DenseMap<const BasicBlock *, BBState> &BBStates,
+               MapVector<Value *, RRInfo> &Retains,
+               DenseMap<Value *, RRInfo> &Releases);
+
+    void MoveCalls(Value *Arg, RRInfo &RetainsToMove, RRInfo &ReleasesToMove,
+                   MapVector<Value *, RRInfo> &Retains,
+                   DenseMap<Value *, RRInfo> &Releases,
+                   SmallVectorImpl<Instruction *> &DeadInsts);
+
+    bool PerformCodePlacement(DenseMap<const BasicBlock *, BBState> &BBStates,
+                              MapVector<Value *, RRInfo> &Retains,
+                              DenseMap<Value *, RRInfo> &Releases);
+
+    void OptimizeWeakCalls(Function &F);
+
+    bool OptimizeSequences(Function &F);
+
+    void OptimizeReturns(Function &F);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function &F);
+    virtual void releaseMemory();
+
+  public:
+    static char ID;
+    ObjCARCOpt() : FunctionPass(ID) {
+      initializeObjCARCOptPass(*PassRegistry::getPassRegistry());
+    }
+  };
+}
+
+char ObjCARCOpt::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCOpt,
+                      "objc-arc", "ObjC ARC optimization", false, false)
+INITIALIZE_PASS_DEPENDENCY(ObjCARCAliasAnalysis)
+INITIALIZE_PASS_END(ObjCARCOpt,
+                    "objc-arc", "ObjC ARC optimization", false, false)
+
+Pass *llvm::createObjCARCOptPass() {
+  return new ObjCARCOpt();
+}
+
+void ObjCARCOpt::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<ObjCARCAliasAnalysis>();
+  AU.addRequired<AliasAnalysis>();
+  // ARC optimization doesn't currently split critical edges.
+  AU.setPreservesCFG();
+}
+
+Constant *ObjCARCOpt::getRetainRVCallee(Module *M) {
+  if (!RetainRVCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
+    Params.push_back(I8X);
+    const FunctionType *FTy =
+      FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    RetainRVCallee =
+      M->getOrInsertFunction("objc_retainAutoreleasedReturnValue", FTy,
+                             Attributes);
+  }
+  return RetainRVCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseRVCallee(Module *M) {
+  if (!AutoreleaseRVCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
+    Params.push_back(I8X);
+    const FunctionType *FTy =
+      FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    AutoreleaseRVCallee =
+      M->getOrInsertFunction("objc_autoreleaseReturnValue", FTy,
+                             Attributes);
+  }
+  return AutoreleaseRVCallee;
+}
+
+Constant *ObjCARCOpt::getReleaseCallee(Module *M) {
+  if (!ReleaseCallee) {
+    LLVMContext &C = M->getContext();
+    std::vector<Type *> Params;
+    Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    ReleaseCallee =
+      M->getOrInsertFunction(
+        "objc_release",
+        FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+        Attributes);
+  }
+  return ReleaseCallee;
+}
+
+Constant *ObjCARCOpt::getRetainCallee(Module *M) {
+  if (!RetainCallee) {
+    LLVMContext &C = M->getContext();
+    std::vector<Type *> Params;
+    Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    RetainCallee =
+      M->getOrInsertFunction(
+        "objc_retain",
+        FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+        Attributes);
+  }
+  return RetainCallee;
+}
+
+Constant *ObjCARCOpt::getAutoreleaseCallee(Module *M) {
+  if (!AutoreleaseCallee) {
+    LLVMContext &C = M->getContext();
+    std::vector<Type *> Params;
+    Params.push_back(PointerType::getUnqual(Type::getInt8Ty(C)));
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    AutoreleaseCallee =
+      M->getOrInsertFunction(
+        "objc_autorelease",
+        FunctionType::get(Params[0], Params, /*isVarArg=*/false),
+        Attributes);
+  }
+  return AutoreleaseCallee;
+}
+
+/// CanAlterRefCount - Test whether the given instruction can result in a
+/// reference count modification (positive or negative) for the pointer's
+/// object.
+static bool
+CanAlterRefCount(const Instruction *Inst, const Value *Ptr,
+                 ProvenanceAnalysis &PA, InstructionClass Class) {
+  switch (Class) {
+  case IC_Autorelease:
+  case IC_AutoreleaseRV:
+  case IC_User:
+    // These operations never directly modify a reference count.
+    return false;
+  default: break;
+  }
+
+  ImmutableCallSite CS = static_cast<const Value *>(Inst);
+  assert(CS && "Only calls can alter reference counts!");
+
+  // See if AliasAnalysis can help us with the call.
+  AliasAnalysis::ModRefBehavior MRB = PA.getAA()->getModRefBehavior(CS);
+  if (AliasAnalysis::onlyReadsMemory(MRB))
+    return false;
+  if (AliasAnalysis::onlyAccessesArgPointees(MRB)) {
+    for (ImmutableCallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end();
+         I != E; ++I) {
+      const Value *Op = *I;
+      if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+        return true;
+    }
+    return false;
+  }
+
+  // Assume the worst.
+  return true;
+}
+
+/// CanUse - Test whether the given instruction can "use" the given pointer's
+/// object in a way that requires the reference count to be positive.
+static bool
+CanUse(const Instruction *Inst, const Value *Ptr, ProvenanceAnalysis &PA,
+       InstructionClass Class) {
+  // IC_Call operations (as opposed to IC_CallOrUser) never "use" objc pointers.
+  if (Class == IC_Call)
+    return false;
+
+  // Consider various instructions which may have pointer arguments which are
+  // not "uses".
+  if (const ICmpInst *ICI = dyn_cast<ICmpInst>(Inst)) {
+    // Comparing a pointer with null, or any other constant, isn't really a use,
+    // because we don't care what the pointer points to, or about the values
+    // of any other dynamic reference-counted pointers.
+    if (!IsPotentialUse(ICI->getOperand(1)))
+      return false;
+  } else if (ImmutableCallSite CS = static_cast<const Value *>(Inst)) {
+    // For calls, just check the arguments (and not the callee operand).
+    for (ImmutableCallSite::arg_iterator OI = CS.arg_begin(),
+         OE = CS.arg_end(); OI != OE; ++OI) {
+      const Value *Op = *OI;
+      if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+        return true;
+    }
+    return false;
+  } else if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+    // Special-case stores, because we don't care about the stored value, just
+    // the store address.
+    const Value *Op = GetUnderlyingObjCPtr(SI->getPointerOperand());
+    // If we can't tell what the underlying object was, assume there is a
+    // dependence.
+    return IsPotentialUse(Op) && PA.related(Op, Ptr);
+  }
+
+  // Check each operand for a match.
+  for (User::const_op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
+       OI != OE; ++OI) {
+    const Value *Op = *OI;
+    if (IsPotentialUse(Op) && PA.related(Ptr, Op))
+      return true;
+  }
+  return false;
+}
+
+/// CanInterruptRV - Test whether the given instruction can autorelease
+/// any pointer or cause an autoreleasepool pop.
+static bool
+CanInterruptRV(InstructionClass Class) {
+  switch (Class) {
+  case IC_AutoreleasepoolPop:
+  case IC_CallOrUser:
+  case IC_Call:
+  case IC_Autorelease:
+  case IC_AutoreleaseRV:
+  case IC_FusedRetainAutorelease:
+  case IC_FusedRetainAutoreleaseRV:
+    return true;
+  default:
+    return false;
+  }
+}
+
+namespace {
+  /// DependenceKind - There are several kinds of dependence-like concepts in
+  /// use here.
+  enum DependenceKind {
+    NeedsPositiveRetainCount,
+    CanChangeRetainCount,
+    RetainAutoreleaseDep,       ///< Blocks objc_retainAutorelease.
+    RetainAutoreleaseRVDep,     ///< Blocks objc_retainAutoreleaseReturnValue.
+    RetainRVDep                 ///< Blocks objc_retainAutoreleasedReturnValue.
+  };
+}
+
+/// Depends - Test if there can be dependencies on Inst through Arg. This
+/// function only tests dependencies relevant for removing pairs of calls.
+static bool
+Depends(DependenceKind Flavor, Instruction *Inst, const Value *Arg,
+        ProvenanceAnalysis &PA) {
+  // If we've reached the definition of Arg, stop.
+  if (Inst == Arg)
+    return true;
+
+  switch (Flavor) {
+  case NeedsPositiveRetainCount: {
+    InstructionClass Class = GetInstructionClass(Inst);
+    switch (Class) {
+    case IC_AutoreleasepoolPop:
+    case IC_AutoreleasepoolPush:
+    case IC_None:
+      return false;
+    default:
+      return CanUse(Inst, Arg, PA, Class);
+    }
+  }
+
+  case CanChangeRetainCount: {
+    InstructionClass Class = GetInstructionClass(Inst);
+    switch (Class) {
+    case IC_AutoreleasepoolPop:
+      // Conservatively assume this can decrement any count.
+      return true;
+    case IC_AutoreleasepoolPush:
+    case IC_None:
+      return false;
+    default:
+      return CanAlterRefCount(Inst, Arg, PA, Class);
+    }
+  }
+
+  case RetainAutoreleaseDep:
+    switch (GetBasicInstructionClass(Inst)) {
+    case IC_AutoreleasepoolPop:
+      // Don't merge an objc_autorelease with an objc_retain inside a different
+      // autoreleasepool scope.
+      return true;
+    case IC_Retain:
+    case IC_RetainRV:
+      // Check for a retain of the same pointer for merging.
+      return GetObjCArg(Inst) == Arg;
+    default:
+      // Nothing else matters for objc_retainAutorelease formation.
+      return false;
+    }
+    break;
+
+  case RetainAutoreleaseRVDep: {
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    switch (Class) {
+    case IC_Retain:
+    case IC_RetainRV:
+      // Check for a retain of the same pointer for merging.
+      return GetObjCArg(Inst) == Arg;
+    default:
+      // Anything that can autorelease interrupts
+      // retainAutoreleaseReturnValue formation.
+      return CanInterruptRV(Class);
+    }
+    break;
+  }
+
+  case RetainRVDep:
+    return CanInterruptRV(GetBasicInstructionClass(Inst));
+  }
+
+  llvm_unreachable("Invalid dependence flavor");
+  return true;
+}
+
+/// FindDependencies - Walk up the CFG from StartPos (which is in StartBB) and
+/// find local and non-local dependencies on Arg.
+/// TODO: Cache results?
+static void
+FindDependencies(DependenceKind Flavor,
+                 const Value *Arg,
+                 BasicBlock *StartBB, Instruction *StartInst,
+                 SmallPtrSet<Instruction *, 4> &DependingInstructions,
+                 SmallPtrSet<const BasicBlock *, 4> &Visited,
+                 ProvenanceAnalysis &PA) {
+  BasicBlock::iterator StartPos = StartInst;
+
+  SmallVector<std::pair<BasicBlock *, BasicBlock::iterator>, 4> Worklist;
+  Worklist.push_back(std::make_pair(StartBB, StartPos));
+  do {
+    std::pair<BasicBlock *, BasicBlock::iterator> Pair =
+      Worklist.pop_back_val();
+    BasicBlock *LocalStartBB = Pair.first;
+    BasicBlock::iterator LocalStartPos = Pair.second;
+    BasicBlock::iterator StartBBBegin = LocalStartBB->begin();
+    for (;;) {
+      if (LocalStartPos == StartBBBegin) {
+        pred_iterator PI(LocalStartBB), PE(LocalStartBB, false);
+        if (PI == PE)
+          // If we've reached the function entry, produce a null dependence.
+          DependingInstructions.insert(0);
+        else
+          // Add the predecessors to the worklist.
+          do {
+            BasicBlock *PredBB = *PI;
+            if (Visited.insert(PredBB))
+              Worklist.push_back(std::make_pair(PredBB, PredBB->end()));
+          } while (++PI != PE);
+        break;
+      }
+
+      Instruction *Inst = --LocalStartPos;
+      if (Depends(Flavor, Inst, Arg, PA)) {
+        DependingInstructions.insert(Inst);
+        break;
+      }
+    }
+  } while (!Worklist.empty());
+
+  // Determine whether the original StartBB post-dominates all of the blocks we
+  // visited. If not, insert a sentinal indicating that most optimizations are
+  // not safe.
+  for (SmallPtrSet<const BasicBlock *, 4>::const_iterator I = Visited.begin(),
+       E = Visited.end(); I != E; ++I) {
+    const BasicBlock *BB = *I;
+    if (BB == StartBB)
+      continue;
+    const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+    for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI) {
+      const BasicBlock *Succ = *SI;
+      if (Succ != StartBB && !Visited.count(Succ)) {
+        DependingInstructions.insert(reinterpret_cast<Instruction *>(-1));
+        return;
+      }
+    }
+  }
+}
+
+static bool isNullOrUndef(const Value *V) {
+  return isa<ConstantPointerNull>(V) || isa<UndefValue>(V);
+}
+
+static bool isNoopInstruction(const Instruction *I) {
+  return isa<BitCastInst>(I) ||
+         (isa<GetElementPtrInst>(I) &&
+          cast<GetElementPtrInst>(I)->hasAllZeroIndices());
+}
+
+/// OptimizeRetainCall - Turn objc_retain into
+/// objc_retainAutoreleasedReturnValue if the operand is a return value.
+void
+ObjCARCOpt::OptimizeRetainCall(Function &F, Instruction *Retain) {
+  CallSite CS(GetObjCArg(Retain));
+  Instruction *Call = CS.getInstruction();
+  if (!Call) return;
+  if (Call->getParent() != Retain->getParent()) return;
+
+  // Check that the call is next to the retain.
+  BasicBlock::iterator I = Call;
+  ++I;
+  while (isNoopInstruction(I)) ++I;
+  if (&*I != Retain)
+    return;
+
+  // Turn it to an objc_retainAutoreleasedReturnValue..
+  Changed = true;
+  ++NumPeeps;
+  cast<CallInst>(Retain)->setCalledFunction(getRetainRVCallee(F.getParent()));
+}
+
+/// OptimizeRetainRVCall - Turn objc_retainAutoreleasedReturnValue into
+/// objc_retain if the operand is not a return value.  Or, if it can be
+/// paired with an objc_autoreleaseReturnValue, delete the pair and
+/// return true.
+bool
+ObjCARCOpt::OptimizeRetainRVCall(Function &F, Instruction *RetainRV) {
+  // Check for the argument being from an immediately preceding call.
+  Value *Arg = GetObjCArg(RetainRV);
+  CallSite CS(Arg);
+  if (Instruction *Call = CS.getInstruction())
+    if (Call->getParent() == RetainRV->getParent()) {
+      BasicBlock::iterator I = Call;
+      ++I;
+      while (isNoopInstruction(I)) ++I;
+      if (&*I == RetainRV)
+        return false;
+    }
+
+  // Check for being preceded by an objc_autoreleaseReturnValue on the same
+  // pointer. In this case, we can delete the pair.
+  BasicBlock::iterator I = RetainRV, Begin = RetainRV->getParent()->begin();
+  if (I != Begin) {
+    do --I; while (I != Begin && isNoopInstruction(I));
+    if (GetBasicInstructionClass(I) == IC_AutoreleaseRV &&
+        GetObjCArg(I) == Arg) {
+      Changed = true;
+      ++NumPeeps;
+      EraseInstruction(I);
+      EraseInstruction(RetainRV);
+      return true;
+    }
+  }
+
+  // Turn it to a plain objc_retain.
+  Changed = true;
+  ++NumPeeps;
+  cast<CallInst>(RetainRV)->setCalledFunction(getRetainCallee(F.getParent()));
+  return false;
+}
+
+/// OptimizeAutoreleaseRVCall - Turn objc_autoreleaseReturnValue into
+/// objc_autorelease if the result is not used as a return value.
+void
+ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F, Instruction *AutoreleaseRV) {
+  // Check for a return of the pointer value.
+  const Value *Ptr = GetObjCArg(AutoreleaseRV);
+  for (Value::const_use_iterator UI = Ptr->use_begin(), UE = Ptr->use_end();
+       UI != UE; ++UI) {
+    const User *I = *UI;
+    if (isa<ReturnInst>(I) || GetBasicInstructionClass(I) == IC_RetainRV)
+      return;
+  }
+
+  Changed = true;
+  ++NumPeeps;
+  cast<CallInst>(AutoreleaseRV)->
+    setCalledFunction(getAutoreleaseCallee(F.getParent()));
+}
+
+/// OptimizeIndividualCalls - Visit each call, one at a time, and make
+/// simplifications without doing any additional analysis.
+void ObjCARCOpt::OptimizeIndividualCalls(Function &F) {
+  // Reset all the flags in preparation for recomputing them.
+  UsedInThisFunction = 0;
+
+  // Visit all objc_* calls in F.
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+
+    switch (Class) {
+    default: break;
+
+    // Delete no-op casts. These function calls have special semantics, but
+    // the semantics are entirely implemented via lowering in the front-end,
+    // so by the time they reach the optimizer, they are just no-op calls
+    // which return their argument.
+    //
+    // There are gray areas here, as the ability to cast reference-counted
+    // pointers to raw void* and back allows code to break ARC assumptions,
+    // however these are currently considered to be unimportant.
+    case IC_NoopCast:
+      Changed = true;
+      ++NumNoops;
+      EraseInstruction(Inst);
+      continue;
+
+    // If the pointer-to-weak-pointer is null, it's undefined behavior.
+    case IC_StoreWeak:
+    case IC_LoadWeak:
+    case IC_LoadWeakRetained:
+    case IC_InitWeak:
+    case IC_DestroyWeak: {
+      CallInst *CI = cast<CallInst>(Inst);
+      if (isNullOrUndef(CI->getArgOperand(0))) {
+        const Type *Ty = CI->getArgOperand(0)->getType();
+        new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+                      Constant::getNullValue(Ty),
+                      CI);
+        CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+        CI->eraseFromParent();
+        continue;
+      }
+      break;
+    }
+    case IC_CopyWeak:
+    case IC_MoveWeak: {
+      CallInst *CI = cast<CallInst>(Inst);
+      if (isNullOrUndef(CI->getArgOperand(0)) ||
+          isNullOrUndef(CI->getArgOperand(1))) {
+        const Type *Ty = CI->getArgOperand(0)->getType();
+        new StoreInst(UndefValue::get(cast<PointerType>(Ty)->getElementType()),
+                      Constant::getNullValue(Ty),
+                      CI);
+        CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+        CI->eraseFromParent();
+        continue;
+      }
+      break;
+    }
+    case IC_Retain:
+      OptimizeRetainCall(F, Inst);
+      break;
+    case IC_RetainRV:
+      if (OptimizeRetainRVCall(F, Inst))
+        continue;
+      break;
+    case IC_AutoreleaseRV:
+      OptimizeAutoreleaseRVCall(F, Inst);
+      break;
+    }
+
+    // objc_autorelease(x) -> objc_release(x) if x is otherwise unused.
+    if (IsAutorelease(Class) && Inst->use_empty()) {
+      CallInst *Call = cast<CallInst>(Inst);
+      const Value *Arg = Call->getArgOperand(0);
+      Arg = FindSingleUseIdentifiedObject(Arg);
+      if (Arg) {
+        Changed = true;
+        ++NumAutoreleases;
+
+        // Create the declaration lazily.
+        LLVMContext &C = Inst->getContext();
+        CallInst *NewCall =
+          CallInst::Create(getReleaseCallee(F.getParent()),
+                           Call->getArgOperand(0), "", Call);
+        NewCall->setMetadata(ImpreciseReleaseMDKind,
+                             MDNode::get(C, ArrayRef<Value *>()));
+        EraseInstruction(Call);
+        Inst = NewCall;
+        Class = IC_Release;
+      }
+    }
+
+    // For functions which can never be passed stack arguments, add
+    // a tail keyword.
+    if (IsAlwaysTail(Class)) {
+      Changed = true;
+      cast<CallInst>(Inst)->setTailCall();
+    }
+
+    // Set nounwind as needed.
+    if (IsNoThrow(Class)) {
+      Changed = true;
+      cast<CallInst>(Inst)->setDoesNotThrow();
+    }
+
+    if (!IsNoopOnNull(Class)) {
+      UsedInThisFunction |= 1 << Class;
+      continue;
+    }
+
+    const Value *Arg = GetObjCArg(Inst);
+
+    // ARC calls with null are no-ops. Delete them.
+    if (isNullOrUndef(Arg)) {
+      Changed = true;
+      ++NumNoops;
+      EraseInstruction(Inst);
+      continue;
+    }
+
+    // Keep track of which of retain, release, autorelease, and retain_block
+    // are actually present in this function.
+    UsedInThisFunction |= 1 << Class;
+
+    // If Arg is a PHI, and one or more incoming values to the
+    // PHI are null, and the call is control-equivalent to the PHI, and there
+    // are no relevant side effects between the PHI and the call, the call
+    // could be pushed up to just those paths with non-null incoming values.
+    // For now, don't bother splitting critical edges for this.
+    SmallVector<std::pair<Instruction *, const Value *>, 4> Worklist;
+    Worklist.push_back(std::make_pair(Inst, Arg));
+    do {
+      std::pair<Instruction *, const Value *> Pair = Worklist.pop_back_val();
+      Inst = Pair.first;
+      Arg = Pair.second;
+
+      const PHINode *PN = dyn_cast<PHINode>(Arg);
+      if (!PN) continue;
+
+      // Determine if the PHI has any null operands, or any incoming
+      // critical edges.
+      bool HasNull = false;
+      bool HasCriticalEdges = false;
+      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+        Value *Incoming =
+          StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+        if (isNullOrUndef(Incoming))
+          HasNull = true;
+        else if (cast<TerminatorInst>(PN->getIncomingBlock(i)->back())
+                   .getNumSuccessors() != 1) {
+          HasCriticalEdges = true;
+          break;
+        }
+      }
+      // If we have null operands and no critical edges, optimize.
+      if (!HasCriticalEdges && HasNull) {
+        SmallPtrSet<Instruction *, 4> DependingInstructions;
+        SmallPtrSet<const BasicBlock *, 4> Visited;
+
+        // Check that there is nothing that cares about the reference
+        // count between the call and the phi.
+        FindDependencies(NeedsPositiveRetainCount, Arg,
+                         Inst->getParent(), Inst,
+                         DependingInstructions, Visited, PA);
+        if (DependingInstructions.size() == 1 &&
+            *DependingInstructions.begin() == PN) {
+          Changed = true;
+          ++NumPartialNoops;
+          // Clone the call into each predecessor that has a non-null value.
+          CallInst *CInst = cast<CallInst>(Inst);
+          const Type *ParamTy = CInst->getArgOperand(0)->getType();
+          for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+            Value *Incoming =
+              StripPointerCastsAndObjCCalls(PN->getIncomingValue(i));
+            if (!isNullOrUndef(Incoming)) {
+              CallInst *Clone = cast<CallInst>(CInst->clone());
+              Value *Op = PN->getIncomingValue(i);
+              Instruction *InsertPos = &PN->getIncomingBlock(i)->back();
+              if (Op->getType() != ParamTy)
+                Op = new BitCastInst(Op, ParamTy, "", InsertPos);
+              Clone->setArgOperand(0, Op);
+              Clone->insertBefore(InsertPos);
+              Worklist.push_back(std::make_pair(Clone, Incoming));
+            }
+          }
+          // Erase the original call.
+          EraseInstruction(CInst);
+          continue;
+        }
+      }
+    } while (!Worklist.empty());
+  }
+}
+
+/// CheckForCFGHazards - Check for critical edges, loop boundaries, irreducible
+/// control flow, or other CFG structures where moving code across the edge
+/// would result in it being executed more.
+void
+ObjCARCOpt::CheckForCFGHazards(const BasicBlock *BB,
+                               DenseMap<const BasicBlock *, BBState> &BBStates,
+                               BBState &MyStates) const {
+  // If any top-down local-use or possible-dec has a succ which is earlier in
+  // the sequence, forget it.
+  for (BBState::ptr_const_iterator I = MyStates.top_down_ptr_begin(),
+       E = MyStates.top_down_ptr_end(); I != E; ++I)
+    switch (I->second.GetSeq()) {
+    default: break;
+    case S_Use: {
+      const Value *Arg = I->first;
+      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+      bool SomeSuccHasSame = false;
+      bool AllSuccsHaveSame = true;
+      for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
+        switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
+        case S_None:
+        case S_CanRelease:
+          MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+          SomeSuccHasSame = false;
+          break;
+        case S_Use:
+          SomeSuccHasSame = true;
+          break;
+        case S_Stop:
+        case S_Release:
+        case S_MovableRelease:
+          AllSuccsHaveSame = false;
+          break;
+        case S_Retain:
+          llvm_unreachable("bottom-up pointer in retain state!");
+        }
+      // If the state at the other end of any of the successor edges
+      // matches the current state, require all edges to match. This
+      // guards against loops in the middle of a sequence.
+      if (SomeSuccHasSame && !AllSuccsHaveSame)
+        MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+    }
+    case S_CanRelease: {
+      const Value *Arg = I->first;
+      const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+      bool SomeSuccHasSame = false;
+      bool AllSuccsHaveSame = true;
+      for (succ_const_iterator SI(TI), SE(TI, false); SI != SE; ++SI)
+        switch (BBStates[*SI].getPtrBottomUpState(Arg).GetSeq()) {
+        case S_None:
+          MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+          SomeSuccHasSame = false;
+          break;
+        case S_CanRelease:
+          SomeSuccHasSame = true;
+          break;
+        case S_Stop:
+        case S_Release:
+        case S_MovableRelease:
+        case S_Use:
+          AllSuccsHaveSame = false;
+          break;
+        case S_Retain:
+          llvm_unreachable("bottom-up pointer in retain state!");
+        }
+      // If the state at the other end of any of the successor edges
+      // matches the current state, require all edges to match. This
+      // guards against loops in the middle of a sequence.
+      if (SomeSuccHasSame && !AllSuccsHaveSame)
+        MyStates.getPtrTopDownState(Arg).ClearSequenceProgress();
+    }
+    }
+}
+
+bool
+ObjCARCOpt::VisitBottomUp(BasicBlock *BB,
+                          DenseMap<const BasicBlock *, BBState> &BBStates,
+                          MapVector<Value *, RRInfo> &Retains) {
+  bool NestingDetected = false;
+  BBState &MyStates = BBStates[BB];
+
+  // Merge the states from each successor to compute the initial state
+  // for the current block.
+  const TerminatorInst *TI = cast<TerminatorInst>(&BB->back());
+  succ_const_iterator SI(TI), SE(TI, false);
+  if (SI == SE)
+    MyStates.SetAsExit();
+  else
+    do {
+      const BasicBlock *Succ = *SI++;
+      if (Succ == BB)
+        continue;
+      DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Succ);
+      if (I == BBStates.end())
+        continue;
+      MyStates.InitFromSucc(I->second);
+      while (SI != SE) {
+        Succ = *SI++;
+        if (Succ != BB) {
+          I = BBStates.find(Succ);
+          if (I != BBStates.end())
+            MyStates.MergeSucc(I->second);
+        }
+      }
+      break;
+    } while (SI != SE);
+
+  // Visit all the instructions, bottom-up.
+  for (BasicBlock::iterator I = BB->end(), E = BB->begin(); I != E; --I) {
+    Instruction *Inst = llvm::prior(I);
+    InstructionClass Class = GetInstructionClass(Inst);
+    const Value *Arg = 0;
+
+    switch (Class) {
+    case IC_Release: {
+      Arg = GetObjCArg(Inst);
+
+      PtrState &S = MyStates.getPtrBottomUpState(Arg);
+
+      // If we see two releases in a row on the same pointer. If so, make
+      // a note, and we'll cicle back to revisit it after we've
+      // hopefully eliminated the second release, which may allow us to
+      // eliminate the first release too.
+      // Theoretically we could implement removal of nested retain+release
+      // pairs by making PtrState hold a stack of states, but this is
+      // simple and avoids adding overhead for the non-nested case.
+      if (S.GetSeq() == S_Release || S.GetSeq() == S_MovableRelease)
+        NestingDetected = true;
+
+      S.SetSeqToRelease(Inst->getMetadata(ImpreciseReleaseMDKind));
+      S.RRI.clear();
+      S.RRI.KnownIncremented = S.IsKnownIncremented();
+      S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+      S.RRI.Calls.insert(Inst);
+
+      S.IncrementRefCount();
+      break;
+    }
+    case IC_RetainBlock:
+    case IC_Retain:
+    case IC_RetainRV: {
+      Arg = GetObjCArg(Inst);
+
+      PtrState &S = MyStates.getPtrBottomUpState(Arg);
+      S.DecrementRefCount();
+
+      switch (S.GetSeq()) {
+      case S_Stop:
+      case S_Release:
+      case S_MovableRelease:
+      case S_Use:
+        S.RRI.ReverseInsertPts.clear();
+        // FALL THROUGH
+      case S_CanRelease:
+        // Don't do retain+release tracking for IC_RetainRV, because it's
+        // better to let it remain as the first instruction after a call.
+        if (Class != IC_RetainRV) {
+          S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+          Retains[Inst] = S.RRI;
+        }
+        S.ClearSequenceProgress();
+        break;
+      case S_None:
+        break;
+      case S_Retain:
+        llvm_unreachable("bottom-up pointer in retain state!");
+      }
+      break;
+    }
+    case IC_AutoreleasepoolPop:
+      // Conservatively, clear MyStates for all known pointers.
+      MyStates.clearBottomUpPointers();
+      continue;
+    case IC_AutoreleasepoolPush:
+    case IC_None:
+      // These are irrelevant.
+      continue;
+    default:
+      break;
+    }
+
+    // Consider any other possible effects of this instruction on each
+    // pointer being tracked.
+    for (BBState::ptr_iterator MI = MyStates.bottom_up_ptr_begin(),
+         ME = MyStates.bottom_up_ptr_end(); MI != ME; ++MI) {
+      const Value *Ptr = MI->first;
+      if (Ptr == Arg)
+        continue; // Handled above.
+      PtrState &S = MI->second;
+      Sequence Seq = S.GetSeq();
+
+      // Check for possible retains and releases.
+      if (CanAlterRefCount(Inst, Ptr, PA, Class)) {
+        // Check for a retain (we're going bottom-up here).
+        S.DecrementRefCount();
+
+        // Check for a release.
+        if (!IsRetain(Class) && Class != IC_RetainBlock)
+          switch (Seq) {
+          case S_Use:
+            S.SetSeq(S_CanRelease);
+            continue;
+          case S_CanRelease:
+          case S_Release:
+          case S_MovableRelease:
+          case S_Stop:
+          case S_None:
+            break;
+          case S_Retain:
+            llvm_unreachable("bottom-up pointer in retain state!");
+          }
+      }
+
+      // Check for possible direct uses.
+      switch (Seq) {
+      case S_Release:
+      case S_MovableRelease:
+        if (CanUse(Inst, Ptr, PA, Class)) {
+          S.RRI.ReverseInsertPts.clear();
+          S.RRI.ReverseInsertPts.insert(Inst);
+          S.SetSeq(S_Use);
+        } else if (Seq == S_Release &&
+                   (Class == IC_User || Class == IC_CallOrUser)) {
+          // Non-movable releases depend on any possible objc pointer use.
+          S.SetSeq(S_Stop);
+          S.RRI.ReverseInsertPts.clear();
+          S.RRI.ReverseInsertPts.insert(Inst);
+        }
+        break;
+      case S_Stop:
+        if (CanUse(Inst, Ptr, PA, Class))
+          S.SetSeq(S_Use);
+        break;
+      case S_CanRelease:
+      case S_Use:
+      case S_None:
+        break;
+      case S_Retain:
+        llvm_unreachable("bottom-up pointer in retain state!");
+      }
+    }
+  }
+
+  return NestingDetected;
+}
+
+bool
+ObjCARCOpt::VisitTopDown(BasicBlock *BB,
+                         DenseMap<const BasicBlock *, BBState> &BBStates,
+                         DenseMap<Value *, RRInfo> &Releases) {
+  bool NestingDetected = false;
+  BBState &MyStates = BBStates[BB];
+
+  // Merge the states from each predecessor to compute the initial state
+  // for the current block.
+  const_pred_iterator PI(BB), PE(BB, false);
+  if (PI == PE)
+    MyStates.SetAsEntry();
+  else
+    do {
+      const BasicBlock *Pred = *PI++;
+      if (Pred == BB)
+        continue;
+      DenseMap<const BasicBlock *, BBState>::iterator I = BBStates.find(Pred);
+      if (I == BBStates.end())
+        continue;
+      MyStates.InitFromPred(I->second);
+      while (PI != PE) {
+        Pred = *PI++;
+        if (Pred != BB) {
+          I = BBStates.find(Pred);
+          if (I != BBStates.end())
+            MyStates.MergePred(I->second);
+        }
+      }
+      break;
+    } while (PI != PE);
+
+  // Visit all the instructions, top-down.
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
+    Instruction *Inst = I;
+    InstructionClass Class = GetInstructionClass(Inst);
+    const Value *Arg = 0;
+
+    switch (Class) {
+    case IC_RetainBlock:
+    case IC_Retain:
+    case IC_RetainRV: {
+      Arg = GetObjCArg(Inst);
+
+      PtrState &S = MyStates.getPtrTopDownState(Arg);
+
+      // Don't do retain+release tracking for IC_RetainRV, because it's
+      // better to let it remain as the first instruction after a call.
+      if (Class != IC_RetainRV) {
+        // If we see two retains in a row on the same pointer. If so, make
+        // a note, and we'll cicle back to revisit it after we've
+        // hopefully eliminated the second retain, which may allow us to
+        // eliminate the first retain too.
+        // Theoretically we could implement removal of nested retain+release
+        // pairs by making PtrState hold a stack of states, but this is
+        // simple and avoids adding overhead for the non-nested case.
+        if (S.GetSeq() == S_Retain)
+          NestingDetected = true;
+
+        S.SetSeq(S_Retain);
+        S.RRI.clear();
+        S.RRI.IsRetainBlock = Class == IC_RetainBlock;
+        S.RRI.KnownIncremented = S.IsKnownIncremented();
+        S.RRI.Calls.insert(Inst);
+      }
+
+      S.IncrementRefCount();
+      break;
+    }
+    case IC_Release: {
+      Arg = GetObjCArg(Inst);
+
+      PtrState &S = MyStates.getPtrTopDownState(Arg);
+      S.DecrementRefCount();
+
+      switch (S.GetSeq()) {
+      case S_Retain:
+      case S_CanRelease:
+        S.RRI.ReverseInsertPts.clear();
+        // FALL THROUGH
+      case S_Use:
+        S.RRI.ReleaseMetadata = Inst->getMetadata(ImpreciseReleaseMDKind);
+        S.RRI.IsTailCallRelease = cast<CallInst>(Inst)->isTailCall();
+        Releases[Inst] = S.RRI;
+        S.ClearSequenceProgress();
+        break;
+      case S_None:
+        break;
+      case S_Stop:
+      case S_Release:
+      case S_MovableRelease:
+        llvm_unreachable("top-down pointer in release state!");
+      }
+      break;
+    }
+    case IC_AutoreleasepoolPop:
+      // Conservatively, clear MyStates for all known pointers.
+      MyStates.clearTopDownPointers();
+      continue;
+    case IC_AutoreleasepoolPush:
+    case IC_None:
+      // These are irrelevant.
+      continue;
+    default:
+      break;
+    }
+
+    // Consider any other possible effects of this instruction on each
+    // pointer being tracked.
+    for (BBState::ptr_iterator MI = MyStates.top_down_ptr_begin(),
+         ME = MyStates.top_down_ptr_end(); MI != ME; ++MI) {
+      const Value *Ptr = MI->first;
+      if (Ptr == Arg)
+        continue; // Handled above.
+      PtrState &S = MI->second;
+      Sequence Seq = S.GetSeq();
+
+      // Check for possible releases.
+      if (!IsRetain(Class) && Class != IC_RetainBlock &&
+          CanAlterRefCount(Inst, Ptr, PA, Class)) {
+        // Check for a release.
+        S.DecrementRefCount();
+
+        // Check for a release.
+        switch (Seq) {
+        case S_Retain:
+          S.SetSeq(S_CanRelease);
+          S.RRI.ReverseInsertPts.clear();
+          S.RRI.ReverseInsertPts.insert(Inst);
+
+          // One call can't cause a transition from S_Retain to S_CanRelease
+          // and S_CanRelease to S_Use. If we've made the first transition,
+          // we're done.
+          continue;
+        case S_Use:
+        case S_CanRelease:
+        case S_None:
+          break;
+        case S_Stop:
+        case S_Release:
+        case S_MovableRelease:
+          llvm_unreachable("top-down pointer in release state!");
+        }
+      }
+
+      // Check for possible direct uses.
+      switch (Seq) {
+      case S_CanRelease:
+        if (CanUse(Inst, Ptr, PA, Class))
+          S.SetSeq(S_Use);
+        break;
+      case S_Use:
+      case S_Retain:
+      case S_None:
+        break;
+      case S_Stop:
+      case S_Release:
+      case S_MovableRelease:
+        llvm_unreachable("top-down pointer in release state!");
+      }
+    }
+  }
+
+  CheckForCFGHazards(BB, BBStates, MyStates);
+  return NestingDetected;
+}
+
+// Visit - Visit the function both top-down and bottom-up.
+bool
+ObjCARCOpt::Visit(Function &F,
+                  DenseMap<const BasicBlock *, BBState> &BBStates,
+                  MapVector<Value *, RRInfo> &Retains,
+                  DenseMap<Value *, RRInfo> &Releases) {
+  // Use postorder for bottom-up, and reverse-postorder for top-down, because we
+  // magically know that loops will be well behaved, i.e. they won't repeatedly
+  // call retain on a single pointer without doing a release.
+  bool BottomUpNestingDetected = false;
+  SmallVector<BasicBlock *, 8> PostOrder;
+  for (po_iterator<Function *> I = po_begin(&F), E = po_end(&F); I != E; ++I) {
+    BasicBlock *BB = *I;
+    PostOrder.push_back(BB);
+
+    BottomUpNestingDetected |= VisitBottomUp(BB, BBStates, Retains);
+  }
+
+  // Iterate through the post-order in reverse order, achieving a
+  // reverse-postorder traversal. We don't use the ReversePostOrderTraversal
+  // class here because it works by computing its own full postorder iteration,
+  // recording the sequence, and playing it back in reverse. Since we're already
+  // doing a full iteration above, we can just record the sequence manually and
+  // avoid the cost of having ReversePostOrderTraversal compute it.
+  bool TopDownNestingDetected = false;
+  for (SmallVectorImpl<BasicBlock *>::const_reverse_iterator
+       RI = PostOrder.rbegin(), RE = PostOrder.rend(); RI != RE; ++RI)
+    TopDownNestingDetected |= VisitTopDown(*RI, BBStates, Releases);
+
+  return TopDownNestingDetected && BottomUpNestingDetected;
+}
+
+/// MoveCalls - Move the calls in RetainsToMove and ReleasesToMove.
+void ObjCARCOpt::MoveCalls(Value *Arg,
+                           RRInfo &RetainsToMove,
+                           RRInfo &ReleasesToMove,
+                           MapVector<Value *, RRInfo> &Retains,
+                           DenseMap<Value *, RRInfo> &Releases,
+                           SmallVectorImpl<Instruction *> &DeadInsts) {
+  const Type *ArgTy = Arg->getType();
+  const Type *ParamTy =
+    (RetainRVFunc ? RetainRVFunc :
+     RetainFunc ? RetainFunc :
+     RetainBlockFunc)->arg_begin()->getType();
+
+  // Insert the new retain and release calls.
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       PI = ReleasesToMove.ReverseInsertPts.begin(),
+       PE = ReleasesToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+    Instruction *InsertPt = *PI;
+    Value *MyArg = ArgTy == ParamTy ? Arg :
+                   new BitCastInst(Arg, ParamTy, "", InsertPt);
+    CallInst *Call =
+      CallInst::Create(RetainsToMove.IsRetainBlock ?
+                         RetainBlockFunc : RetainFunc,
+                       MyArg, "", InsertPt);
+    Call->setDoesNotThrow();
+    if (!RetainsToMove.IsRetainBlock)
+      Call->setTailCall();
+  }
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       PI = RetainsToMove.ReverseInsertPts.begin(),
+       PE = RetainsToMove.ReverseInsertPts.end(); PI != PE; ++PI) {
+    Instruction *LastUse = *PI;
+    Instruction *InsertPts[] = { 0, 0, 0 };
+    if (InvokeInst *II = dyn_cast<InvokeInst>(LastUse)) {
+      // We can't insert code immediately after an invoke instruction, so
+      // insert code at the beginning of both successor blocks instead.
+      // The invoke's return value isn't available in the unwind block,
+      // but our releases will never depend on it, because they must be
+      // paired with retains from before the invoke.
+      InsertPts[0] = II->getNormalDest()->getFirstNonPHI();
+      InsertPts[1] = II->getUnwindDest()->getFirstNonPHI();
+    } else {
+      // Insert code immediately after the last use.
+      InsertPts[0] = llvm::next(BasicBlock::iterator(LastUse));
+    }
+
+    for (Instruction **I = InsertPts; *I; ++I) {
+      Instruction *InsertPt = *I;
+      Value *MyArg = ArgTy == ParamTy ? Arg :
+                     new BitCastInst(Arg, ParamTy, "", InsertPt);
+      CallInst *Call = CallInst::Create(ReleaseFunc, MyArg, "", InsertPt);
+      // Attach a clang.imprecise_release metadata tag, if appropriate.
+      if (MDNode *M = ReleasesToMove.ReleaseMetadata)
+        Call->setMetadata(ImpreciseReleaseMDKind, M);
+      Call->setDoesNotThrow();
+      if (ReleasesToMove.IsTailCallRelease)
+        Call->setTailCall();
+    }
+  }
+
+  // Delete the original retain and release calls.
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       AI = RetainsToMove.Calls.begin(),
+       AE = RetainsToMove.Calls.end(); AI != AE; ++AI) {
+    Instruction *OrigRetain = *AI;
+    Retains.blot(OrigRetain);
+    DeadInsts.push_back(OrigRetain);
+  }
+  for (SmallPtrSet<Instruction *, 2>::const_iterator
+       AI = ReleasesToMove.Calls.begin(),
+       AE = ReleasesToMove.Calls.end(); AI != AE; ++AI) {
+    Instruction *OrigRelease = *AI;
+    Releases.erase(OrigRelease);
+    DeadInsts.push_back(OrigRelease);
+  }
+}
+
+bool
+ObjCARCOpt::PerformCodePlacement(DenseMap<const BasicBlock *, BBState>
+                                   &BBStates,
+                                 MapVector<Value *, RRInfo> &Retains,
+                                 DenseMap<Value *, RRInfo> &Releases) {
+  bool AnyPairsCompletelyEliminated = false;
+  RRInfo RetainsToMove;
+  RRInfo ReleasesToMove;
+  SmallVector<Instruction *, 4> NewRetains;
+  SmallVector<Instruction *, 4> NewReleases;
+  SmallVector<Instruction *, 8> DeadInsts;
+
+  for (MapVector<Value *, RRInfo>::const_iterator I = Retains.begin(),
+       E = Retains.end(); I != E; ) {
+    Value *V = (I++)->first;
+    if (!V) continue; // blotted
+
+    Instruction *Retain = cast<Instruction>(V);
+    Value *Arg = GetObjCArg(Retain);
+
+    // If the object being released is in static or stack storage, we know it's
+    // not being managed by ObjC reference counting, so we can delete pairs
+    // regardless of what possible decrements or uses lie between them.
+    bool KnownSafe = isa<Constant>(Arg) || isa<AllocaInst>(Arg);
+
+    // If a pair happens in a region where it is known that the reference count
+    // is already incremented, we can similarly ignore possible decrements.
+    bool KnownIncrementedTD = true, KnownIncrementedBU = true;
+
+    // Connect the dots between the top-down-collected RetainsToMove and
+    // bottom-up-collected ReleasesToMove to form sets of related calls.
+    // This is an iterative process so that we connect multiple releases
+    // to multiple retains if needed.
+    unsigned OldDelta = 0;
+    unsigned NewDelta = 0;
+    unsigned OldCount = 0;
+    unsigned NewCount = 0;
+    bool FirstRelease = true;
+    bool FirstRetain = true;
+    NewRetains.push_back(Retain);
+    for (;;) {
+      for (SmallVectorImpl<Instruction *>::const_iterator
+           NI = NewRetains.begin(), NE = NewRetains.end(); NI != NE; ++NI) {
+        Instruction *NewRetain = *NI;
+        MapVector<Value *, RRInfo>::const_iterator It = Retains.find(NewRetain);
+        assert(It != Retains.end());
+        const RRInfo &NewRetainRRI = It->second;
+        KnownIncrementedTD &= NewRetainRRI.KnownIncremented;
+        for (SmallPtrSet<Instruction *, 2>::const_iterator
+             LI = NewRetainRRI.Calls.begin(),
+             LE = NewRetainRRI.Calls.end(); LI != LE; ++LI) {
+          Instruction *NewRetainRelease = *LI;
+          DenseMap<Value *, RRInfo>::const_iterator Jt =
+            Releases.find(NewRetainRelease);
+          if (Jt == Releases.end())
+            goto next_retain;
+          const RRInfo &NewRetainReleaseRRI = Jt->second;
+          assert(NewRetainReleaseRRI.Calls.count(NewRetain));
+          if (ReleasesToMove.Calls.insert(NewRetainRelease)) {
+            OldDelta -=
+              BBStates[NewRetainRelease->getParent()].GetAllPathCount();
+
+            // Merge the ReleaseMetadata and IsTailCallRelease values.
+            if (FirstRelease) {
+              ReleasesToMove.ReleaseMetadata =
+                NewRetainReleaseRRI.ReleaseMetadata;
+              ReleasesToMove.IsTailCallRelease =
+                NewRetainReleaseRRI.IsTailCallRelease;
+              FirstRelease = false;
+            } else {
+              if (ReleasesToMove.ReleaseMetadata !=
+                    NewRetainReleaseRRI.ReleaseMetadata)
+                ReleasesToMove.ReleaseMetadata = 0;
+              if (ReleasesToMove.IsTailCallRelease !=
+                    NewRetainReleaseRRI.IsTailCallRelease)
+                ReleasesToMove.IsTailCallRelease = false;
+            }
+
+            // Collect the optimal insertion points.
+            if (!KnownSafe)
+              for (SmallPtrSet<Instruction *, 2>::const_iterator
+                   RI = NewRetainReleaseRRI.ReverseInsertPts.begin(),
+                   RE = NewRetainReleaseRRI.ReverseInsertPts.end();
+                   RI != RE; ++RI) {
+                Instruction *RIP = *RI;
+                if (ReleasesToMove.ReverseInsertPts.insert(RIP))
+                  NewDelta -= BBStates[RIP->getParent()].GetAllPathCount();
+              }
+            NewReleases.push_back(NewRetainRelease);
+          }
+        }
+      }
+      NewRetains.clear();
+      if (NewReleases.empty()) break;
+
+      // Back the other way.
+      for (SmallVectorImpl<Instruction *>::const_iterator
+           NI = NewReleases.begin(), NE = NewReleases.end(); NI != NE; ++NI) {
+        Instruction *NewRelease = *NI;
+        DenseMap<Value *, RRInfo>::const_iterator It =
+          Releases.find(NewRelease);
+        assert(It != Releases.end());
+        const RRInfo &NewReleaseRRI = It->second;
+        KnownIncrementedBU &= NewReleaseRRI.KnownIncremented;
+        for (SmallPtrSet<Instruction *, 2>::const_iterator
+             LI = NewReleaseRRI.Calls.begin(),
+             LE = NewReleaseRRI.Calls.end(); LI != LE; ++LI) {
+          Instruction *NewReleaseRetain = *LI;
+          MapVector<Value *, RRInfo>::const_iterator Jt =
+            Retains.find(NewReleaseRetain);
+          if (Jt == Retains.end())
+            goto next_retain;
+          const RRInfo &NewReleaseRetainRRI = Jt->second;
+          assert(NewReleaseRetainRRI.Calls.count(NewRelease));
+          if (RetainsToMove.Calls.insert(NewReleaseRetain)) {
+            unsigned PathCount =
+              BBStates[NewReleaseRetain->getParent()].GetAllPathCount();
+            OldDelta += PathCount;
+            OldCount += PathCount;
+
+            // Merge the IsRetainBlock values.
+            if (FirstRetain) {
+              RetainsToMove.IsRetainBlock = NewReleaseRetainRRI.IsRetainBlock;
+              FirstRetain = false;
+            } else if (ReleasesToMove.IsRetainBlock !=
+                       NewReleaseRetainRRI.IsRetainBlock)
+              // It's not possible to merge the sequences if one uses
+              // objc_retain and the other uses objc_retainBlock.
+              goto next_retain;
+
+            // Collect the optimal insertion points.
+            if (!KnownSafe)
+              for (SmallPtrSet<Instruction *, 2>::const_iterator
+                   RI = NewReleaseRetainRRI.ReverseInsertPts.begin(),
+                   RE = NewReleaseRetainRRI.ReverseInsertPts.end();
+                   RI != RE; ++RI) {
+                Instruction *RIP = *RI;
+                if (RetainsToMove.ReverseInsertPts.insert(RIP)) {
+                  PathCount = BBStates[RIP->getParent()].GetAllPathCount();
+                  NewDelta += PathCount;
+                  NewCount += PathCount;
+                }
+              }
+            NewRetains.push_back(NewReleaseRetain);
+          }
+        }
+      }
+      NewReleases.clear();
+      if (NewRetains.empty()) break;
+    }
+
+    // If the pointer is known incremented, we can safely delete the pair
+    // regardless of what's between them.
+    if (KnownIncrementedTD || KnownIncrementedBU) {
+      RetainsToMove.ReverseInsertPts.clear();
+      ReleasesToMove.ReverseInsertPts.clear();
+      NewCount = 0;
+    }
+
+    // Determine whether the original call points are balanced in the retain and
+    // release calls through the program. If not, conservatively don't touch
+    // them.
+    // TODO: It's theoretically possible to do code motion in this case, as
+    // long as the existing imbalances are maintained.
+    if (OldDelta != 0)
+      goto next_retain;
+
+    // Determine whether the new insertion points we computed preserve the
+    // balance of retain and release calls through the program.
+    // TODO: If the fully aggressive solution isn't valid, try to find a
+    // less aggressive solution which is.
+    if (NewDelta != 0)
+      goto next_retain;
+
+    // Ok, everything checks out and we're all set. Let's move some code!
+    Changed = true;
+    AnyPairsCompletelyEliminated = NewCount == 0;
+    NumRRs += OldCount - NewCount;
+    MoveCalls(Arg, RetainsToMove, ReleasesToMove, Retains, Releases, DeadInsts);
+
+  next_retain:
+    NewReleases.clear();
+    NewRetains.clear();
+    RetainsToMove.clear();
+    ReleasesToMove.clear();
+  }
+
+  // Now that we're done moving everything, we can delete the newly dead
+  // instructions, as we no longer need them as insert points.
+  while (!DeadInsts.empty())
+    EraseInstruction(DeadInsts.pop_back_val());
+
+  return AnyPairsCompletelyEliminated;
+}
+
+/// OptimizeWeakCalls - Weak pointer optimizations.
+void ObjCARCOpt::OptimizeWeakCalls(Function &F) {
+  // First, do memdep-style RLE and S2L optimizations. We can't use memdep
+  // itself because it uses AliasAnalysis and we need to do provenance
+  // queries instead.
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    if (Class != IC_LoadWeak && Class != IC_LoadWeakRetained)
+      continue;
+
+    // Delete objc_loadWeak calls with no users.
+    if (Class == IC_LoadWeak && Inst->use_empty()) {
+      Inst->eraseFromParent();
+      continue;
+    }
+
+    // TODO: For now, just look for an earlier available version of this value
+    // within the same block. Theoretically, we could do memdep-style non-local
+    // analysis too, but that would want caching. A better approach would be to
+    // use the technique that EarlyCSE uses.
+    inst_iterator Current = llvm::prior(I);
+    BasicBlock *CurrentBB = Current.getBasicBlockIterator();
+    for (BasicBlock::iterator B = CurrentBB->begin(),
+                              J = Current.getInstructionIterator();
+         J != B; --J) {
+      Instruction *EarlierInst = &*llvm::prior(J);
+      InstructionClass EarlierClass = GetInstructionClass(EarlierInst);
+      switch (EarlierClass) {
+      case IC_LoadWeak:
+      case IC_LoadWeakRetained: {
+        // If this is loading from the same pointer, replace this load's value
+        // with that one.
+        CallInst *Call = cast<CallInst>(Inst);
+        CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+        Value *Arg = Call->getArgOperand(0);
+        Value *EarlierArg = EarlierCall->getArgOperand(0);
+        switch (PA.getAA()->alias(Arg, EarlierArg)) {
+        case AliasAnalysis::MustAlias:
+          Changed = true;
+          // If the load has a builtin retain, insert a plain retain for it.
+          if (Class == IC_LoadWeakRetained) {
+            CallInst *CI =
+              CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+                               "", Call);
+            CI->setTailCall();
+          }
+          // Zap the fully redundant load.
+          Call->replaceAllUsesWith(EarlierCall);
+          Call->eraseFromParent();
+          goto clobbered;
+        case AliasAnalysis::MayAlias:
+        case AliasAnalysis::PartialAlias:
+          goto clobbered;
+        case AliasAnalysis::NoAlias:
+          break;
+        }
+        break;
+      }
+      case IC_StoreWeak:
+      case IC_InitWeak: {
+        // If this is storing to the same pointer and has the same size etc.
+        // replace this load's value with the stored value.
+        CallInst *Call = cast<CallInst>(Inst);
+        CallInst *EarlierCall = cast<CallInst>(EarlierInst);
+        Value *Arg = Call->getArgOperand(0);
+        Value *EarlierArg = EarlierCall->getArgOperand(0);
+        switch (PA.getAA()->alias(Arg, EarlierArg)) {
+        case AliasAnalysis::MustAlias:
+          Changed = true;
+          // If the load has a builtin retain, insert a plain retain for it.
+          if (Class == IC_LoadWeakRetained) {
+            CallInst *CI =
+              CallInst::Create(getRetainCallee(F.getParent()), EarlierCall,
+                               "", Call);
+            CI->setTailCall();
+          }
+          // Zap the fully redundant load.
+          Call->replaceAllUsesWith(EarlierCall->getArgOperand(1));
+          Call->eraseFromParent();
+          goto clobbered;
+        case AliasAnalysis::MayAlias:
+        case AliasAnalysis::PartialAlias:
+          goto clobbered;
+        case AliasAnalysis::NoAlias:
+          break;
+        }
+        break;
+      }
+      case IC_MoveWeak:
+      case IC_CopyWeak:
+        // TOOD: Grab the copied value.
+        goto clobbered;
+      case IC_AutoreleasepoolPush:
+      case IC_None:
+      case IC_User:
+        // Weak pointers are only modified through the weak entry points
+        // (and arbitrary calls, which could call the weak entry points).
+        break;
+      default:
+        // Anything else could modify the weak pointer.
+        goto clobbered;
+      }
+    }
+  clobbered:;
+  }
+
+  // Then, for each destroyWeak with an alloca operand, check to see if
+  // the alloca and all its users can be zapped.
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    if (Class != IC_DestroyWeak)
+      continue;
+
+    CallInst *Call = cast<CallInst>(Inst);
+    Value *Arg = Call->getArgOperand(0);
+    if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Arg)) {
+      for (Value::use_iterator UI = Alloca->use_begin(),
+           UE = Alloca->use_end(); UI != UE; ++UI) {
+        Instruction *UserInst = cast<Instruction>(*UI);
+        switch (GetBasicInstructionClass(UserInst)) {
+        case IC_InitWeak:
+        case IC_StoreWeak:
+        case IC_DestroyWeak:
+          continue;
+        default:
+          goto done;
+        }
+      }
+      Changed = true;
+      for (Value::use_iterator UI = Alloca->use_begin(),
+           UE = Alloca->use_end(); UI != UE; ) {
+        CallInst *UserInst = cast<CallInst>(*UI++);
+        if (!UserInst->use_empty())
+          UserInst->replaceAllUsesWith(UserInst->getOperand(1));
+        UserInst->eraseFromParent();
+      }
+      Alloca->eraseFromParent();
+    done:;
+    }
+  }
+}
+
+/// OptimizeSequences - Identify program paths which execute sequences of
+/// retains and releases which can be eliminated.
+bool ObjCARCOpt::OptimizeSequences(Function &F) {
+  /// Releases, Retains - These are used to store the results of the main flow
+  /// analysis. These use Value* as the key instead of Instruction* so that the
+  /// map stays valid when we get around to rewriting code and calls get
+  /// replaced by arguments.
+  DenseMap<Value *, RRInfo> Releases;
+  MapVector<Value *, RRInfo> Retains;
+
+  /// BBStates, This is used during the traversal of the function to track the
+  /// states for each identified object at each block.
+  DenseMap<const BasicBlock *, BBState> BBStates;
+
+  // Analyze the CFG of the function, and all instructions.
+  bool NestingDetected = Visit(F, BBStates, Retains, Releases);
+
+  // Transform.
+  return PerformCodePlacement(BBStates, Retains, Releases) && NestingDetected;
+}
+
+/// OptimizeReturns - Look for this pattern:
+///
+///    %call = call i8* @something(...)
+///    %2 = call i8* @objc_retain(i8* %call)
+///    %3 = call i8* @objc_autorelease(i8* %2)
+///    ret i8* %3
+///
+/// And delete the retain and autorelease.
+///
+/// Otherwise if it's just this:
+///
+///    %3 = call i8* @objc_autorelease(i8* %2)
+///    ret i8* %3
+///
+/// convert the autorelease to autoreleaseRV.
+void ObjCARCOpt::OptimizeReturns(Function &F) {
+  if (!F.getReturnType()->isPointerTy())
+    return;
+
+  SmallPtrSet<Instruction *, 4> DependingInstructions;
+  SmallPtrSet<const BasicBlock *, 4> Visited;
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    BasicBlock *BB = FI;
+    ReturnInst *Ret = dyn_cast<ReturnInst>(&BB->back());
+    if (!Ret) continue;
+
+    const Value *Arg = StripPointerCastsAndObjCCalls(Ret->getOperand(0));
+    FindDependencies(NeedsPositiveRetainCount, Arg,
+                     BB, Ret, DependingInstructions, Visited, PA);
+    if (DependingInstructions.size() != 1)
+      goto next_block;
+
+    {
+      CallInst *Autorelease =
+        dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+      if (!Autorelease)
+        goto next_block;
+      InstructionClass AutoreleaseClass =
+        GetBasicInstructionClass(Autorelease);
+      if (!IsAutorelease(AutoreleaseClass))
+        goto next_block;
+      if (GetObjCArg(Autorelease) != Arg)
+        goto next_block;
+
+      DependingInstructions.clear();
+      Visited.clear();
+
+      // Check that there is nothing that can affect the reference
+      // count between the autorelease and the retain.
+      FindDependencies(CanChangeRetainCount, Arg,
+                       BB, Autorelease, DependingInstructions, Visited, PA);
+      if (DependingInstructions.size() != 1)
+        goto next_block;
+
+      {
+        CallInst *Retain =
+          dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+
+        // Check that we found a retain with the same argument.
+        if (!Retain ||
+            !IsRetain(GetBasicInstructionClass(Retain)) ||
+            GetObjCArg(Retain) != Arg)
+          goto next_block;
+
+        DependingInstructions.clear();
+        Visited.clear();
+
+        // Convert the autorelease to an autoreleaseRV, since it's
+        // returning the value.
+        if (AutoreleaseClass == IC_Autorelease) {
+          Autorelease->setCalledFunction(getAutoreleaseRVCallee(F.getParent()));
+          AutoreleaseClass = IC_AutoreleaseRV;
+        }
+
+        // Check that there is nothing that can affect the reference
+        // count between the retain and the call.
+        FindDependencies(CanChangeRetainCount, Arg, BB, Retain,
+                         DependingInstructions, Visited, PA);
+        if (DependingInstructions.size() != 1)
+          goto next_block;
+
+        {
+          CallInst *Call =
+            dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+
+          // Check that the pointer is the return value of the call.
+          if (!Call || Arg != Call)
+            goto next_block;
+
+          // Check that the call is a regular call.
+          InstructionClass Class = GetBasicInstructionClass(Call);
+          if (Class != IC_CallOrUser && Class != IC_Call)
+            goto next_block;
+
+          // If so, we can zap the retain and autorelease.
+          Changed = true;
+          ++NumRets;
+          EraseInstruction(Retain);
+          EraseInstruction(Autorelease);
+        }
+      }
+    }
+
+  next_block:
+    DependingInstructions.clear();
+    Visited.clear();
+  }
+}
+
+bool ObjCARCOpt::doInitialization(Module &M) {
+  if (!EnableARCOpts)
+    return false;
+
+  Run = ModuleHasARC(M);
+  if (!Run)
+    return false;
+
+  // Identify the imprecise release metadata kind.
+  ImpreciseReleaseMDKind =
+    M.getContext().getMDKindID("clang.imprecise_release");
+
+  // Identify the declarations for objc_retain and friends.
+  RetainFunc = M.getFunction("objc_retain");
+  RetainBlockFunc = M.getFunction("objc_retainBlock");
+  RetainRVFunc = M.getFunction("objc_retainAutoreleasedReturnValue");
+  ReleaseFunc = M.getFunction("objc_release");
+
+  // Intuitively, objc_retain and others are nocapture, however in practice
+  // they are not, because they return their argument value. And objc_release
+  // calls finalizers.
+
+  // These are initialized lazily.
+  RetainRVCallee = 0;
+  AutoreleaseRVCallee = 0;
+  ReleaseCallee = 0;
+  RetainCallee = 0;
+  AutoreleaseCallee = 0;
+
+  return false;
+}
+
+bool ObjCARCOpt::runOnFunction(Function &F) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
+  Changed = false;
+
+  PA.setAA(&getAnalysis<AliasAnalysis>());
+
+  // This pass performs several distinct transformations. As a compile-time aid
+  // when compiling code that isn't ObjC, skip these if the relevant ObjC
+  // library functions aren't declared.
+
+  // Preliminary optimizations. This also computs UsedInThisFunction.
+  OptimizeIndividualCalls(F);
+
+  // Optimizations for weak pointers.
+  if (UsedInThisFunction & ((1 << IC_LoadWeak) |
+                            (1 << IC_LoadWeakRetained) |
+                            (1 << IC_StoreWeak) |
+                            (1 << IC_InitWeak) |
+                            (1 << IC_CopyWeak) |
+                            (1 << IC_MoveWeak) |
+                            (1 << IC_DestroyWeak)))
+    OptimizeWeakCalls(F);
+
+  // Optimizations for retain+release pairs.
+  if (UsedInThisFunction & ((1 << IC_Retain) |
+                            (1 << IC_RetainRV) |
+                            (1 << IC_RetainBlock)))
+    if (UsedInThisFunction & (1 << IC_Release))
+      // Run OptimizeSequences until it either stops making changes or
+      // no retain+release pair nesting is detected.
+      while (OptimizeSequences(F)) {}
+
+  // Optimizations if objc_autorelease is used.
+  if (UsedInThisFunction &
+      ((1 << IC_Autorelease) | (1 << IC_AutoreleaseRV)))
+    OptimizeReturns(F);
+
+  return Changed;
+}
+
+void ObjCARCOpt::releaseMemory() {
+  PA.clear();
+}
+
+//===----------------------------------------------------------------------===//
+// ARC contraction.
+//===----------------------------------------------------------------------===//
+
+// TODO: ObjCARCContract could insert PHI nodes when uses aren't
+// dominated by single calls.
+
+#include "llvm/Operator.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Analysis/Dominators.h"
+
+STATISTIC(NumStoreStrongs, "Number objc_storeStrong calls formed");
+
+namespace {
+  /// ObjCARCContract - Late ARC optimizations.  These change the IR in a way
+  /// that makes it difficult to be analyzed by ObjCARCOpt, so it's run late.
+  class ObjCARCContract : public FunctionPass {
+    bool Changed;
+    AliasAnalysis *AA;
+    DominatorTree *DT;
+    ProvenanceAnalysis PA;
+
+    /// Run - A flag indicating whether this optimization pass should run.
+    bool Run;
+
+    /// StoreStrongCallee, etc. - Declarations for ObjC runtime
+    /// functions, for use in creating calls to them. These are initialized
+    /// lazily to avoid cluttering up the Module with unused declarations.
+    Constant *StoreStrongCallee,
+             *RetainAutoreleaseCallee, *RetainAutoreleaseRVCallee;
+
+    /// RetainRVMarker - The inline asm string to insert between calls and
+    /// RetainRV calls to make the optimization work on targets which need it.
+    const MDString *RetainRVMarker;
+
+    Constant *getStoreStrongCallee(Module *M);
+    Constant *getRetainAutoreleaseCallee(Module *M);
+    Constant *getRetainAutoreleaseRVCallee(Module *M);
+
+    bool ContractAutorelease(Function &F, Instruction *Autorelease,
+                             InstructionClass Class,
+                             SmallPtrSet<Instruction *, 4>
+                               &DependingInstructions,
+                             SmallPtrSet<const BasicBlock *, 4>
+                               &Visited);
+
+    void ContractRelease(Instruction *Release,
+                         inst_iterator &Iter);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+    virtual bool doInitialization(Module &M);
+    virtual bool runOnFunction(Function &F);
+
+  public:
+    static char ID;
+    ObjCARCContract() : FunctionPass(ID) {
+      initializeObjCARCContractPass(*PassRegistry::getPassRegistry());
+    }
+  };
+}
+
+char ObjCARCContract::ID = 0;
+INITIALIZE_PASS_BEGIN(ObjCARCContract,
+                      "objc-arc-contract", "ObjC ARC contraction", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_DEPENDENCY(DominatorTree)
+INITIALIZE_PASS_END(ObjCARCContract,
+                    "objc-arc-contract", "ObjC ARC contraction", false, false)
+
+Pass *llvm::createObjCARCContractPass() {
+  return new ObjCARCContract();
+}
+
+void ObjCARCContract::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<AliasAnalysis>();
+  AU.addRequired<DominatorTree>();
+  AU.setPreservesCFG();
+}
+
+Constant *ObjCARCContract::getStoreStrongCallee(Module *M) {
+  if (!StoreStrongCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    Type *I8XX = PointerType::getUnqual(I8X);
+    std::vector<Type *> Params;
+    Params.push_back(I8XX);
+    Params.push_back(I8X);
+
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    Attributes.addAttr(1, Attribute::NoCapture);
+
+    StoreStrongCallee =
+      M->getOrInsertFunction(
+        "objc_storeStrong",
+        FunctionType::get(Type::getVoidTy(C), Params, /*isVarArg=*/false),
+        Attributes);
+  }
+  return StoreStrongCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseCallee(Module *M) {
+  if (!RetainAutoreleaseCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
+    Params.push_back(I8X);
+    const FunctionType *FTy =
+      FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    RetainAutoreleaseCallee =
+      M->getOrInsertFunction("objc_retainAutorelease", FTy, Attributes);
+  }
+  return RetainAutoreleaseCallee;
+}
+
+Constant *ObjCARCContract::getRetainAutoreleaseRVCallee(Module *M) {
+  if (!RetainAutoreleaseRVCallee) {
+    LLVMContext &C = M->getContext();
+    Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+    std::vector<Type *> Params;
+    Params.push_back(I8X);
+    const FunctionType *FTy =
+      FunctionType::get(I8X, Params, /*isVarArg=*/false);
+    AttrListPtr Attributes;
+    Attributes.addAttr(~0u, Attribute::NoUnwind);
+    RetainAutoreleaseRVCallee =
+      M->getOrInsertFunction("objc_retainAutoreleaseReturnValue", FTy,
+                             Attributes);
+  }
+  return RetainAutoreleaseRVCallee;
+}
+
+/// ContractAutorelease - Merge an autorelease with a retain into a fused
+/// call.
+bool
+ObjCARCContract::ContractAutorelease(Function &F, Instruction *Autorelease,
+                                     InstructionClass Class,
+                                     SmallPtrSet<Instruction *, 4>
+                                       &DependingInstructions,
+                                     SmallPtrSet<const BasicBlock *, 4>
+                                       &Visited) {
+  const Value *Arg = GetObjCArg(Autorelease);
+
+  // Check that there are no instructions between the retain and the autorelease
+  // (such as an autorelease_pop) which may change the count.
+  CallInst *Retain = 0;
+  if (Class == IC_AutoreleaseRV)
+    FindDependencies(RetainAutoreleaseRVDep, Arg,
+                     Autorelease->getParent(), Autorelease,
+                     DependingInstructions, Visited, PA);
+  else
+    FindDependencies(RetainAutoreleaseDep, Arg,
+                     Autorelease->getParent(), Autorelease,
+                     DependingInstructions, Visited, PA);
+
+  Visited.clear();
+  if (DependingInstructions.size() != 1) {
+    DependingInstructions.clear();
+    return false;
+  }
+
+  Retain = dyn_cast_or_null<CallInst>(*DependingInstructions.begin());
+  DependingInstructions.clear();
+
+  if (!Retain ||
+      GetBasicInstructionClass(Retain) != IC_Retain ||
+      GetObjCArg(Retain) != Arg)
+    return false;
+
+  Changed = true;
+  ++NumPeeps;
+
+  if (Class == IC_AutoreleaseRV)
+    Retain->setCalledFunction(getRetainAutoreleaseRVCallee(F.getParent()));
+  else
+    Retain->setCalledFunction(getRetainAutoreleaseCallee(F.getParent()));
+
+  EraseInstruction(Autorelease);
+  return true;
+}
+
+/// ContractRelease - Attempt to merge an objc_release with a store, load, and
+/// objc_retain to form an objc_storeStrong. This can be a little tricky because
+/// the instructions don't always appear in order, and there may be unrelated
+/// intervening instructions.
+void ObjCARCContract::ContractRelease(Instruction *Release,
+                                      inst_iterator &Iter) {
+  LoadInst *Load = dyn_cast<LoadInst>(GetObjCArg(Release));
+  if (!Load || Load->isVolatile()) return;
+
+  // For now, require everything to be in one basic block.
+  BasicBlock *BB = Release->getParent();
+  if (Load->getParent() != BB) return;
+
+  // Walk down to find the store.
+  BasicBlock::iterator I = Load, End = BB->end();
+  ++I;
+  AliasAnalysis::Location Loc = AA->getLocation(Load);
+  while (I != End &&
+         (&*I == Release ||
+          IsRetain(GetBasicInstructionClass(I)) ||
+          !(AA->getModRefInfo(I, Loc) & AliasAnalysis::Mod)))
+    ++I;
+  StoreInst *Store = dyn_cast<StoreInst>(I);
+  if (!Store || Store->isVolatile()) return;
+  if (Store->getPointerOperand() != Loc.Ptr) return;
+
+  Value *New = StripPointerCastsAndObjCCalls(Store->getValueOperand());
+
+  // Walk up to find the retain.
+  I = Store;
+  BasicBlock::iterator Begin = BB->begin();
+  while (I != Begin && GetBasicInstructionClass(I) != IC_Retain)
+    --I;
+  Instruction *Retain = I;
+  if (GetBasicInstructionClass(Retain) != IC_Retain) return;
+  if (GetObjCArg(Retain) != New) return;
+
+  Changed = true;
+  ++NumStoreStrongs;
+
+  LLVMContext &C = Release->getContext();
+  const Type *I8X = PointerType::getUnqual(Type::getInt8Ty(C));
+  const Type *I8XX = PointerType::getUnqual(I8X);
+
+  Value *Args[] = { Load->getPointerOperand(), New };
+  if (Args[0]->getType() != I8XX)
+    Args[0] = new BitCastInst(Args[0], I8XX, "", Store);
+  if (Args[1]->getType() != I8X)
+    Args[1] = new BitCastInst(Args[1], I8X, "", Store);
+  CallInst *StoreStrong =
+    CallInst::Create(getStoreStrongCallee(BB->getParent()->getParent()),
+                     Args, "", Store);
+  StoreStrong->setDoesNotThrow();
+  StoreStrong->setDebugLoc(Store->getDebugLoc());
+
+  if (&*Iter == Store) ++Iter;
+  Store->eraseFromParent();
+  Release->eraseFromParent();
+  EraseInstruction(Retain);
+  if (Load->use_empty())
+    Load->eraseFromParent();
+}
+
+bool ObjCARCContract::doInitialization(Module &M) {
+  Run = ModuleHasARC(M);
+  if (!Run)
+    return false;
+
+  // These are initialized lazily.
+  StoreStrongCallee = 0;
+  RetainAutoreleaseCallee = 0;
+  RetainAutoreleaseRVCallee = 0;
+
+  // Initialize RetainRVMarker.
+  RetainRVMarker = 0;
+  if (NamedMDNode *NMD =
+        M.getNamedMetadata("clang.arc.retainAutoreleasedReturnValueMarker"))
+    if (NMD->getNumOperands() == 1) {
+      const MDNode *N = NMD->getOperand(0);
+      if (N->getNumOperands() == 1)
+        if (const MDString *S = dyn_cast<MDString>(N->getOperand(0)))
+          RetainRVMarker = S;
+    }
+
+  return false;
+}
+
+bool ObjCARCContract::runOnFunction(Function &F) {
+  if (!EnableARCOpts)
+    return false;
+
+  // If nothing in the Module uses ARC, don't do anything.
+  if (!Run)
+    return false;
+
+  Changed = false;
+  AA = &getAnalysis<AliasAnalysis>();
+  DT = &getAnalysis<DominatorTree>();
+
+  PA.setAA(&getAnalysis<AliasAnalysis>());
+
+  // For ObjC library calls which return their argument, replace uses of the
+  // argument with uses of the call return value, if it dominates the use. This
+  // reduces register pressure.
+  SmallPtrSet<Instruction *, 4> DependingInstructions;
+  SmallPtrSet<const BasicBlock *, 4> Visited;
+  for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ) {
+    Instruction *Inst = &*I++;
+
+    // Only these library routines return their argument. In particular,
+    // objc_retainBlock does not necessarily return its argument.
+    InstructionClass Class = GetBasicInstructionClass(Inst);
+    switch (Class) {
+    case IC_Retain:
+    case IC_FusedRetainAutorelease:
+    case IC_FusedRetainAutoreleaseRV:
+      break;
+    case IC_Autorelease:
+    case IC_AutoreleaseRV:
+      if (ContractAutorelease(F, Inst, Class, DependingInstructions, Visited))
+        continue;
+      break;
+    case IC_RetainRV: {
+      // If we're compiling for a target which needs a special inline-asm
+      // marker to do the retainAutoreleasedReturnValue optimization,
+      // insert it now.
+      if (!RetainRVMarker)
+        break;
+      BasicBlock::iterator BBI = Inst;
+      --BBI;
+      while (isNoopInstruction(BBI)) --BBI;
+      if (&*BBI == GetObjCArg(Inst)) {
+        InlineAsm *IA =
+          InlineAsm::get(FunctionType::get(Type::getVoidTy(Inst->getContext()),
+                                           /*isVarArg=*/false),
+                         RetainRVMarker->getString(),
+                         /*Constraints=*/"", /*hasSideEffects=*/true);
+        CallInst::Create(IA, "", Inst);
+      }
+      break;
+    }
+    case IC_InitWeak: {
+      // objc_initWeak(p, null) => *p = null
+      CallInst *CI = cast<CallInst>(Inst);
+      if (isNullOrUndef(CI->getArgOperand(1))) {
+        Value *Null =
+          ConstantPointerNull::get(cast<PointerType>(CI->getType()));
+        Changed = true;
+        new StoreInst(Null, CI->getArgOperand(0), CI);
+        CI->replaceAllUsesWith(Null);
+        CI->eraseFromParent();
+      }
+      continue;
+    }
+    case IC_Release:
+      ContractRelease(Inst, I);
+      continue;
+    default:
+      continue;
+    }
+
+    // Don't use GetObjCArg because we don't want to look through bitcasts
+    // and such; to do the replacement, the argument must have type i8*.
+    const Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
+    for (;;) {
+      // If we're compiling bugpointed code, don't get in trouble.
+      if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
+        break;
+      // Look through the uses of the pointer.
+      for (Value::const_use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
+           UI != UE; ) {
+        Use &U = UI.getUse();
+        unsigned OperandNo = UI.getOperandNo();
+        ++UI; // Increment UI now, because we may unlink its element.
+        if (Instruction *UserInst = dyn_cast<Instruction>(U.getUser()))
+          if (Inst != UserInst && DT->dominates(Inst, UserInst)) {
+            Changed = true;
+            Instruction *Replacement = Inst;
+            const Type *UseTy = U.get()->getType();
+            if (PHINode *PHI = dyn_cast<PHINode>(UserInst)) {
+              // For PHI nodes, insert the bitcast in the predecessor block.
+              unsigned ValNo =
+                PHINode::getIncomingValueNumForOperand(OperandNo);
+              BasicBlock *BB =
+                PHI->getIncomingBlock(ValNo);
+              if (Replacement->getType() != UseTy)
+                Replacement = new BitCastInst(Replacement, UseTy, "",
+                                              &BB->back());
+              for (unsigned i = 0, e = PHI->getNumIncomingValues();
+                   i != e; ++i)
+                if (PHI->getIncomingBlock(i) == BB) {
+                  // Keep the UI iterator valid.
+                  if (&PHI->getOperandUse(
+                        PHINode::getOperandNumForIncomingValue(i)) ==
+                        &UI.getUse())
+                    ++UI;
+                  PHI->setIncomingValue(i, Replacement);
+                }
+            } else {
+              if (Replacement->getType() != UseTy)
+                Replacement = new BitCastInst(Replacement, UseTy, "", UserInst);
+              U.set(Replacement);
+            }
+          }
+      }
+
+      // If Arg is a no-op casted pointer, strip one level of casts and
+      // iterate.
+      if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
+        Arg = BI->getOperand(0);
+      else if (isa<GEPOperator>(Arg) &&
+               cast<GEPOperator>(Arg)->hasAllZeroIndices())
+        Arg = cast<GEPOperator>(Arg)->getPointerOperand();
+      else if (isa<GlobalAlias>(Arg) &&
+               !cast<GlobalAlias>(Arg)->mayBeOverridden())
+        Arg = cast<GlobalAlias>(Arg)->getAliasee();
+      else
+        break;
+    }
+  }
+
+  return Changed;
+}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index c1dfe15..e6341ae 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -812,7 +812,7 @@ Value *Reassociate::OptimizeAdd(Instruction *I,
       // because we can percolate the negate out.  Watch for minint, which
       // cannot be positivified.
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Factor))
-        if (CI->getValue().isNegative() && !CI->getValue().isMinSignedValue()) {
+        if (CI->isNegative() && !CI->isMinValue(true)) {
           Factor = ConstantInt::get(CI->getContext(), -CI->getValue());
           assert(!Duplicates.count(Factor) &&
                  "Shouldn't have two constant factors, missed a canonicalize");
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 32a0506..302c287 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -48,7 +48,12 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLoopUnswitchPass(Registry);
   initializeLoopIdiomRecognizePass(Registry);
   initializeLowerAtomicPass(Registry);
+  initializeLowerExpectIntrinsicPass(Registry);
   initializeMemCpyOptPass(Registry);
+  initializeObjCARCAliasAnalysisPass(Registry);
+  initializeObjCARCExpandPass(Registry);
+  initializeObjCARCContractPass(Registry);
+  initializeObjCARCOptPass(Registry);
   initializeReassociatePass(Registry);
   initializeRegToMemPass(Registry);
   initializeSCCPPass(Registry);
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 8938b28..7d6349c 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -30,6 +30,7 @@
 #include "llvm/LLVMContext.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/Analysis/DIBuilder.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/Loads.h"
@@ -152,7 +153,8 @@ namespace {
     void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
                                       SmallVector<AllocaInst*, 32> &NewElts);
 
-    static MemTransferInst *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
+    static MemTransferInst *isOnlyCopiedFromConstantGlobal(
+        AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
   };
   
   // SROA_DT - SROA that uses DominatorTree.
@@ -228,16 +230,30 @@ class ConvertToScalarInfo {
   /// which means that mem2reg can't promote it.
   bool IsNotTrivial;
 
+  /// ScalarKind - Tracks the kind of alloca being considered for promotion,
+  /// computed based on the uses of the alloca rather than the LLVM type system.
+  enum {
+    Unknown,
+
+    // Accesses via GEPs that are consistent with element access of a vector
+    // type. This will not be converted into a vector unless there is a later
+    // access using an actual vector type.
+    ImplicitVector,
+
+    // Accesses via vector operations and GEPs that are consistent with the
+    // layout of a vector type.
+    Vector,
+
+    // An integer bag-of-bits with bitwise operations for insertion and
+    // extraction. Any combination of types can be converted into this kind
+    // of scalar.
+    Integer
+  } ScalarKind;
+
   /// VectorTy - This tracks the type that we should promote the vector to if
   /// it is possible to turn it into a vector.  This starts out null, and if it
   /// isn't possible to turn into a vector type, it gets set to VoidTy.
-  const Type *VectorTy;
-
-  /// HadAVector - True if there is at least one vector access to the alloca.
-  /// We don't want to turn random arrays into vectors and use vector element
-  /// insert/extract, but if there are element accesses to something that is
-  /// also declared as a vector, we do want to promote to a vector.
-  bool HadAVector;
+  const VectorType *VectorTy;
 
   /// HadNonMemTransferAccess - True if there is at least one access to the 
   /// alloca that is not a MemTransferInst.  We don't want to turn structs into
@@ -246,14 +262,14 @@ class ConvertToScalarInfo {
 
 public:
   explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
-    : AllocaSize(Size), TD(td), IsNotTrivial(false), VectorTy(0),
-      HadAVector(false), HadNonMemTransferAccess(false) { }
+    : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown),
+      VectorTy(0), HadNonMemTransferAccess(false) { }
 
   AllocaInst *TryConvert(AllocaInst *AI);
 
 private:
   bool CanConvertToScalar(Value *V, uint64_t Offset);
-  void MergeInType(const Type *In, uint64_t Offset, bool IsLoadOrStore);
+  void MergeInTypeForLoadOrStore(const Type *In, uint64_t Offset);
   bool MergeInVectorType(const VectorType *VInTy, uint64_t Offset);
   void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
 
@@ -274,6 +290,16 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
   if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
     return 0;
 
+  // If an alloca has only memset / memcpy uses, it may still have an Unknown
+  // ScalarKind. Treat it as an Integer below.
+  if (ScalarKind == Unknown)
+    ScalarKind = Integer;
+
+  // FIXME: It should be possible to promote the vector type up to the alloca's
+  // size.
+  if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
+    ScalarKind = Integer;
+
   // If we were able to find a vector type that can handle this with
   // insert/extract elements, and if there was at least one use that had
   // a vector type, promote this to a vector.  We don't want to promote
@@ -281,14 +307,15 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
   // we just get a lot of insert/extracts.  If at least one vector is
   // involved, then we probably really do have a union of vector/array.
   const Type *NewTy;
-  if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
+  if (ScalarKind == Vector) {
+    assert(VectorTy && "Missing type for vector scalar.");
     DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
           << *VectorTy << '\n');
     NewTy = VectorTy;  // Use the vector type.
   } else {
     unsigned BitWidth = AllocaSize * 8;
-    if (!HadAVector && !HadNonMemTransferAccess &&
-        !TD.fitsInLegalInteger(BitWidth))
+    if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
+        !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth))
       return 0;
 
     DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
@@ -300,8 +327,9 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
   return NewAI;
 }
 
-/// MergeInType - Add the 'In' type to the accumulated vector type (VectorTy)
-/// so far at the offset specified by Offset (which is specified in bytes).
+/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type
+/// (VectorTy) so far at the offset specified by Offset (which is specified in
+/// bytes).
 ///
 /// There are three cases we handle here:
 ///   1) A union of vector types of the same size and potentially its elements.
@@ -316,11 +344,11 @@ AllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
 ///      large) integer type with extract and insert operations where the loads
 ///      and stores would mutate the memory.  We mark this by setting VectorTy
 ///      to VoidTy.
-void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
-                                      bool IsLoadOrStore) {
+void ConvertToScalarInfo::MergeInTypeForLoadOrStore(const Type *In,
+                                                    uint64_t Offset) {
   // If we already decided to turn this into a blob of integer memory, there is
   // nothing to be done.
-  if (VectorTy && VectorTy->isVoidTy())
+  if (ScalarKind == Integer)
     return;
 
   // If this could be contributing to a vector, analyze it.
@@ -336,7 +364,7 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
     // Full width accesses can be ignored, because they can always be turned
     // into bitcasts.
     unsigned EltSize = In->getPrimitiveSizeInBits()/8;
-    if (IsLoadOrStore && EltSize == AllocaSize)
+    if (EltSize == AllocaSize)
       return;
 
     // If we're accessing something that could be an element of a vector, see
@@ -345,11 +373,12 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
     if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
         (!VectorTy || Offset * 8 < VectorTy->getPrimitiveSizeInBits())) {
       if (!VectorTy) {
+        ScalarKind = ImplicitVector;
         VectorTy = VectorType::get(In, AllocaSize/EltSize);
         return;
       }
 
-      unsigned CurrentEltSize = cast<VectorType>(VectorTy)->getElementType()
+      unsigned CurrentEltSize = VectorTy->getElementType()
                                 ->getPrimitiveSizeInBits()/8;
       if (EltSize == CurrentEltSize)
         return;
@@ -361,16 +390,13 @@ void ConvertToScalarInfo::MergeInType(const Type *In, uint64_t Offset,
 
   // Otherwise, we have a case that we can't handle with an optimized vector
   // form.  We can still turn this into a large integer.
-  VectorTy = Type::getVoidTy(In->getContext());
+  ScalarKind = Integer;
 }
 
-/// MergeInVectorType - Handles the vector case of MergeInType, returning true
-/// if the type was successfully merged and false otherwise.
+/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
+/// returning true if the type was successfully merged and false otherwise.
 bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
                                             uint64_t Offset) {
-  // Remember if we saw a vector type.
-  HadAVector = true;
-
   // TODO: Support nonzero offsets?
   if (Offset != 0)
     return false;
@@ -382,19 +408,22 @@ bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
   // If this the first vector we see, remember the type so that we know the
   // element size.
   if (!VectorTy) {
+    ScalarKind = Vector;
     VectorTy = VInTy;
     return true;
   }
 
-  unsigned BitWidth = cast<VectorType>(VectorTy)->getBitWidth();
+  unsigned BitWidth = VectorTy->getBitWidth();
   unsigned InBitWidth = VInTy->getBitWidth();
 
   // Vectors of the same size can be converted using a simple bitcast.
-  if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8))
+  if (InBitWidth == BitWidth && AllocaSize == (InBitWidth / 8)) {
+    ScalarKind = Vector;
     return true;
+  }
 
-  const Type *ElementTy = cast<VectorType>(VectorTy)->getElementType();
-  const Type *InElementTy = cast<VectorType>(VInTy)->getElementType();
+  const Type *ElementTy = VectorTy->getElementType();
+  const Type *InElementTy = VInTy->getElementType();
 
   // Do not allow mixed integer and floating-point accesses from vectors of
   // different sizes.
@@ -429,6 +458,7 @@ bool ConvertToScalarInfo::MergeInVectorType(const VectorType *VInTy,
   }
 
   // Pick the largest of the two vector types.
+  ScalarKind = Vector;
   if (InBitWidth > BitWidth)
     VectorTy = VInTy;
 
@@ -456,7 +486,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
       if (LI->getType()->isX86_MMXTy())
         return false;
       HadNonMemTransferAccess = true;
-      MergeInType(LI->getType(), Offset, true);
+      MergeInTypeForLoadOrStore(LI->getType(), Offset);
       continue;
     }
 
@@ -467,7 +497,7 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
       if (SI->getOperand(0)->getType()->isX86_MMXTy())
         return false;
       HadNonMemTransferAccess = true;
-      MergeInType(SI->getOperand(0)->getType(), Offset, true);
+      MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset);
       continue;
     }
 
@@ -498,10 +528,22 @@ bool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
     // If this is a constant sized memset of a constant value (e.g. 0) we can
     // handle it.
     if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
-      // Store of constant value and constant size.
-      if (!isa<ConstantInt>(MSI->getValue()) ||
-          !isa<ConstantInt>(MSI->getLength()))
+      // Store of constant value.
+      if (!isa<ConstantInt>(MSI->getValue()))
+        return false;
+
+      // Store of constant size.
+      ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength());
+      if (!Len)
         return false;
+
+      // If the size differs from the alloca, we can only convert the alloca to
+      // an integer bag-of-bits.
+      // FIXME: This should handle all of the cases that are currently accepted
+      // as vector element insertions.
+      if (Len->getZExtValue() != AllocaSize || Offset != 0)
+        ScalarKind = Integer;
+
       IsNotTrivial = true;  // Can't be mem2reg'd.
       HadNonMemTransferAccess = true;
       continue;
@@ -1053,16 +1095,37 @@ bool SROA::runOnFunction(Function &F) {
 namespace {
 class AllocaPromoter : public LoadAndStorePromoter {
   AllocaInst *AI;
+  DIBuilder *DIB;
+  SmallVector<DbgDeclareInst *, 4> DDIs;
+  SmallVector<DbgValueInst *, 4> DVIs;
 public:
   AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
-                 DbgDeclareInst *DD, DIBuilder *&DB)
-    : LoadAndStorePromoter(Insts, S, DD, DB), AI(0) {}
+                 DIBuilder *DB)
+    : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {}
   
   void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
     // Remember which alloca we're promoting (for isInstInList).
     this->AI = AI;
+    if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI))
+      for (Value::use_iterator UI = DebugNode->use_begin(),
+             E = DebugNode->use_end(); UI != E; ++UI)
+        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
+          DDIs.push_back(DDI);
+        else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
+          DVIs.push_back(DVI);
+
     LoadAndStorePromoter::run(Insts);
     AI->eraseFromParent();
+    for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(), 
+           E = DDIs.end(); I != E; ++I) {
+      DbgDeclareInst *DDI = *I;
+      DDI->eraseFromParent();
+    }
+    for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(), 
+           E = DVIs.end(); I != E; ++I) {
+      DbgValueInst *DVI = *I;
+      DVI->eraseFromParent();
+    }
   }
   
   virtual bool isInstInList(Instruction *I,
@@ -1071,6 +1134,45 @@ public:
       return LI->getOperand(0) == AI;
     return cast<StoreInst>(I)->getPointerOperand() == AI;
   }
+
+  virtual void updateDebugInfo(Instruction *Inst) const {
+    for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), 
+           E = DDIs.end(); I != E; ++I) {
+      DbgDeclareInst *DDI = *I;
+      if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
+        ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+      else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
+        ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
+    }
+    for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), 
+           E = DVIs.end(); I != E; ++I) {
+      DbgValueInst *DVI = *I;
+      if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        Instruction *DbgVal = NULL;
+        // If an argument is zero extended then use argument directly. The ZExt
+        // may be zapped by an optimization pass in future.
+        Argument *ExtendedArg = NULL;
+        if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
+          ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
+        if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
+          ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
+        if (ExtendedArg)
+          DbgVal = DIB->insertDbgValueIntrinsic(ExtendedArg, 0, 
+                                                DIVariable(DVI->getVariable()),
+                                                SI);
+        else
+          DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, 
+                                                DIVariable(DVI->getVariable()),
+                                                SI);
+        DbgVal->setDebugLoc(DVI->getDebugLoc());
+      } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        Instruction *DbgVal = 
+          DIB->insertDbgValueIntrinsic(LI->getOperand(0), 0, 
+                                       DIVariable(DVI->getVariable()), LI);
+        DbgVal->setDebugLoc(DVI->getDebugLoc());
+      }
+    }
+  }
 };
 } // end anon namespace
 
@@ -1262,7 +1364,7 @@ static bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
         LoadInst *TrueLoad = 
           Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
         LoadInst *FalseLoad = 
-          Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".t");
+          Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f");
         
         // Transfer alignment and TBAA info if present.
         TrueLoad->setAlignment(LI->getAlignment());
@@ -1340,10 +1442,9 @@ bool SROA::performPromotion(Function &F) {
     DT = &getAnalysis<DominatorTree>();
 
   BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
-
+  DIBuilder DIB(*F.getParent());
   bool Changed = false;
   SmallVector<Instruction*, 64> Insts;
-  DIBuilder *DIB = 0;
   while (1) {
     Allocas.clear();
 
@@ -1367,11 +1468,7 @@ bool SROA::performPromotion(Function &F) {
         for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
              UI != E; ++UI)
           Insts.push_back(cast<Instruction>(*UI));
-
-        DbgDeclareInst *DDI = FindAllocaDbgDeclare(AI);
-        if (DDI && !DIB)
-          DIB = new DIBuilder(*AI->getParent()->getParent()->getParent());
-        AllocaPromoter(Insts, SSA, DDI, DIB).run(AI, Insts);
+        AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts);
         Insts.clear();
       }
     }
@@ -1379,10 +1476,6 @@ bool SROA::performPromotion(Function &F) {
     Changed = true;
   }
 
-  // FIXME: Is there a better way to handle the lazy initialization of DIB
-  // so that there doesn't need to be an explicit delete?
-  delete DIB;
-
   return Changed;
 }
 
@@ -1403,8 +1496,8 @@ static bool ShouldAttemptScalarRepl(AllocaInst *AI) {
 
 
 // performScalarRepl - This algorithm is a simple worklist driven algorithm,
-// which runs on all of the malloc/alloca instructions in the function, removing
-// them if they are only used by getelementptr instructions.
+// which runs on all of the alloca instructions in the function, removing them
+// if they are only used by getelementptr instructions.
 //
 bool SROA::performScalarRepl(Function &F) {
   std::vector<AllocaInst*> WorkList;
@@ -1438,12 +1531,15 @@ bool SROA::performScalarRepl(Function &F) {
     // the constant global instead.  This is commonly produced by the CFE by
     // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
     // is only subsequently read.
-    if (MemTransferInst *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
+    SmallVector<Instruction *, 4> ToDelete;
+    if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) {
       DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
-      DEBUG(dbgs() << "  memcpy = " << *TheCopy << '\n');
-      Constant *TheSrc = cast<Constant>(TheCopy->getSource());
+      DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
+      for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
+        ToDelete[i]->eraseFromParent();
+      Constant *TheSrc = cast<Constant>(Copy->getSource());
       AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
-      TheCopy->eraseFromParent();  // Don't mutate the global.
+      Copy->eraseFromParent();  // Don't mutate the global.
       AI->eraseFromParent();
       ++NumGlobals;
       Changed = true;
@@ -2467,8 +2563,14 @@ static bool PointsToConstantGlobal(Value *V) {
 /// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
 /// the alloca, and if the source pointer is a pointer to a constant global, we
 /// can optimize this.
-static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
-                                           bool isOffset) {
+static bool
+isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
+                               bool isOffset,
+                               SmallVector<Instruction *, 4> &LifetimeMarkers) {
+  // We track lifetime intrinsics as we encounter them.  If we decide to go
+  // ahead and replace the value with the global, this lets the caller quickly
+  // eliminate the markers.
+
   for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
     User *U = cast<Instruction>(*UI);
 
@@ -2480,7 +2582,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
 
     if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
       // If uses of the bitcast are ok, we are ok.
-      if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
+      if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset,
+                                          LifetimeMarkers))
         return false;
       continue;
     }
@@ -2488,7 +2591,8 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
       // If the GEP has all zero indices, it doesn't offset the pointer.  If it
       // doesn't, it does.
       if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
-                                         isOffset || !GEP->hasAllZeroIndices()))
+                                          isOffset || !GEP->hasAllZeroIndices(),
+                                          LifetimeMarkers))
         return false;
       continue;
     }
@@ -2514,6 +2618,16 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
         continue;
     }
 
+    // Lifetime intrinsics can be handled by the caller.
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
+          II->getIntrinsicID() == Intrinsic::lifetime_end) {
+        assert(II->use_empty() && "Lifetime markers have no result to use!");
+        LifetimeMarkers.push_back(II);
+        continue;
+      }
+    }
+
     // If this is isn't our memcpy/memmove, reject it as something we can't
     // handle.
     MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
@@ -2550,9 +2664,11 @@ static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
 /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
 /// modified by a copy from a constant global.  If we can prove this, we can
 /// replace any uses of the alloca with uses of the global directly.
-MemTransferInst *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) {
+MemTransferInst *
+SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
+                                     SmallVector<Instruction*, 4> &ToDelete) {
   MemTransferInst *TheCopy = 0;
-  if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false))
+  if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete))
     return TheCopy;
   return 0;
 }
diff --git a/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 7e9cc80..a66b3e3 100644
--- a/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -91,8 +91,7 @@ static void ChangeToUnreachable(Instruction *I, bool UseLLVMTrap) {
 static void ChangeToCall(InvokeInst *II) {
   BasicBlock *BB = II->getParent();
   SmallVector<Value*, 8> Args(II->op_begin(), II->op_end() - 3);
-  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args.begin(),
-                                       Args.end(), "", II);
+  CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II);
   NewCall->takeName(II);
   NewCall->setCallingConv(II->getCallingConv());
   NewCall->setAttributes(II->getAttributes());
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 6247b03..7c415e5 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -992,9 +992,9 @@ struct FFSOpt : public LibCallOptimization {
     }
 
     // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
-    const Type *ArgType = Op->getType();
+    Type *ArgType = Op->getType();
     Value *F = Intrinsic::getDeclaration(Callee->getParent(),
-                                         Intrinsic::cttz, &ArgType, 1);
+                                         Intrinsic::cttz, ArgType);
     Value *V = B.CreateCall(F, Op, "cttz");
     V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1), "tmp");
     V = B.CreateIntCast(V, B.getInt32Ty(), false, "tmp");
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 92464e8..b4f74f9 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -153,13 +153,13 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, Pass *P) {
   // Delete the unconditional branch from the predecessor...
   PredBB->getInstList().pop_back();
   
-  // Move all definitions in the successor to the predecessor...
-  PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
-  
   // Make all PHI nodes that referred to BB now refer to Pred as their
   // source...
   BB->replaceAllUsesWith(PredBB);
   
+  // Move all definitions in the successor to the predecessor...
+  PredBB->getInstList().splice(PredBB->end(), BB->getInstList());
+  
   // Inherit predecessors name if it exists.
   if (!PredBB->hasName())
     PredBB->takeName(BB);
diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp
index d6206a3..92ce500 100644
--- a/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -193,44 +193,22 @@ BasicBlock *llvm::SplitCriticalEdge(TerminatorInst *TI, unsigned SuccNum,
   
   // If there are any PHI nodes in DestBB, we need to update them so that they
   // merge incoming values from NewBB instead of from TIBB.
-  if (PHINode *APHI = dyn_cast<PHINode>(DestBB->begin())) {
-    // This conceptually does:
-    //  foreach (PHINode *PN in DestBB)
-    //    PN->setIncomingBlock(PN->getIncomingBlock(TIBB), NewBB);
-    // but is optimized for two cases.
-    
-    if (APHI->getNumIncomingValues() <= 8) {  // Small # preds case.
-      unsigned BBIdx = 0;
-      for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
-        // We no longer enter through TIBB, now we come in through NewBB.
-        // Revector exactly one entry in the PHI node that used to come from
-        // TIBB to come from NewBB.
-        PHINode *PN = cast<PHINode>(I);
-        
-        // Reuse the previous value of BBIdx if it lines up.  In cases where we
-        // have multiple phi nodes with *lots* of predecessors, this is a speed
-        // win because we don't have to scan the PHI looking for TIBB.  This
-        // happens because the BB list of PHI nodes are usually in the same
-        // order.
-        if (PN->getIncomingBlock(BBIdx) != TIBB)
-          BBIdx = PN->getBasicBlockIndex(TIBB);
-        PN->setIncomingBlock(BBIdx, NewBB);
-      }
-    } else {
-      // However, the foreach loop is slow for blocks with lots of predecessors
-      // because PHINode::getIncomingBlock is O(n) in # preds.  Instead, walk
-      // the user list of TIBB to find the PHI nodes.
-      SmallPtrSet<PHINode*, 16> UpdatedPHIs;
-    
-      for (Value::use_iterator UI = TIBB->use_begin(), E = TIBB->use_end();
-           UI != E; ) {
-        Value::use_iterator Use = UI++;
-        if (PHINode *PN = dyn_cast<PHINode>(*Use)) {
-          // Remove one entry from each PHI.
-          if (PN->getParent() == DestBB && UpdatedPHIs.insert(PN))
-            PN->setOperand(Use.getOperandNo(), NewBB);
-        }
-      }
+  {
+    unsigned BBIdx = 0;
+    for (BasicBlock::iterator I = DestBB->begin(); isa<PHINode>(I); ++I) {
+      // We no longer enter through TIBB, now we come in through NewBB.
+      // Revector exactly one entry in the PHI node that used to come from
+      // TIBB to come from NewBB.
+      PHINode *PN = cast<PHINode>(I);
+
+      // Reuse the previous value of BBIdx if it lines up.  In cases where we
+      // have multiple phi nodes with *lots* of predecessors, this is a speed
+      // win because we don't have to scan the PHI looking for TIBB.  This
+      // happens because the BB list of PHI nodes are usually in the same
+      // order.
+      if (PN->getIncomingBlock(BBIdx) != TIBB)
+	BBIdx = PN->getBasicBlockIndex(TIBB);
+      PN->setIncomingBlock(BBIdx, NewBB);
     }
   }
    
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 5b76bb2..204c2c6 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -5,7 +5,6 @@ add_llvm_library(LLVMTransformUtils
   BreakCriticalEdges.cpp
   BuildLibCalls.cpp
   CloneFunction.cpp
-  CloneLoop.cpp
   CloneModule.cpp
   CodeExtractor.cpp
   DemoteRegToStack.cpp
@@ -15,6 +14,7 @@ add_llvm_library(LLVMTransformUtils
   Local.cpp
   LoopSimplify.cpp
   LoopUnroll.cpp
+  LowerExpectIntrinsic.cpp
   LowerInvoke.cpp
   LowerSwitch.cpp
   Mem2Reg.cpp
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index d967ceb..6ea831f 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -140,7 +140,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
 Function *llvm::CloneFunction(const Function *F, ValueToValueMapTy &VMap,
                               bool ModuleLevelChanges,
                               ClonedCodeInfo *CodeInfo) {
-  std::vector<const Type*> ArgTypes;
+  std::vector<Type*> ArgTypes;
 
   // The user might be deleting arguments to the function by specifying them in
   // the VMap.  If so, we need to not add the arguments to the arg ty vector
@@ -342,18 +342,6 @@ ConstantFoldMappedInstruction(const Instruction *I) {
                                   Ops.size(), TD);
 }
 
-static DebugLoc
-UpdateInlinedAtInfo(const DebugLoc &InsnDL, const DebugLoc &TheCallDL,
-                    LLVMContext &Ctx) {
-  DebugLoc NewLoc = TheCallDL;
-  if (MDNode *IA = InsnDL.getInlinedAt(Ctx))
-    NewLoc = UpdateInlinedAtInfo(DebugLoc::getFromDILocation(IA), TheCallDL,
-                                 Ctx);
-
-  return DebugLoc::get(InsnDL.getLine(), InsnDL.getCol(),
-                       InsnDL.getScope(Ctx), NewLoc.getAsMDNode(Ctx));
-}
-
 /// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
 /// except that it does some simple constant prop and DCE on the fly.  The
 /// effect of this is to copy significantly less code in cases where (for
@@ -418,50 +406,14 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
     if (PHINode *PN = dyn_cast<PHINode>(I)) {
       // Skip over all PHI nodes, remembering them for later.
       BasicBlock::const_iterator OldI = BI->begin();
-      for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) {
-        if (I->hasMetadata()) {
-          if (!TheCallDL.isUnknown()) {
-            DebugLoc IDL = I->getDebugLoc();
-            if (!IDL.isUnknown()) {
-              DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL,
-                                                   I->getContext());
-              I->setDebugLoc(NewDL);
-            }
-          } else {
-            // The cloned instruction has dbg info but the call instruction
-            // does not have dbg info. Remove dbg info from cloned instruction.
-            I->setDebugLoc(DebugLoc());
-          }
-        }
+      for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
         PHIToResolve.push_back(cast<PHINode>(OldI));
-      }
     }
     
-    // FIXME:
-    // FIXME:
-    // FIXME: Unclone all this metadata stuff.
-    // FIXME:
-    // FIXME:
-    
     // Otherwise, remap the rest of the instructions normally.
-    for (; I != NewBB->end(); ++I) {
-      if (I->hasMetadata()) {
-        if (!TheCallDL.isUnknown()) {
-          DebugLoc IDL = I->getDebugLoc();
-          if (!IDL.isUnknown()) {
-            DebugLoc NewDL = UpdateInlinedAtInfo(IDL, TheCallDL,
-                                                 I->getContext());
-            I->setDebugLoc(NewDL);
-          }
-        } else {
-          // The cloned instruction has dbg info but the call instruction
-          // does not have dbg info. Remove dbg info from cloned instruction.
-          I->setDebugLoc(DebugLoc());
-        }
-      }
+    for (; I != NewBB->end(); ++I)
       RemapInstruction(I, VMap,
                        ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
-    }
   }
   
   // Defer PHI resolution until rest of function is resolved, PHI resolution
@@ -572,12 +524,12 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
     // removed, so we just need to splice the blocks.
     BI->eraseFromParent();
     
-    // Move all the instructions in the succ to the pred.
-    I->getInstList().splice(I->end(), Dest->getInstList());
-    
     // Make all PHI nodes that referred to Dest now refer to I as their source.
     Dest->replaceAllUsesWith(I);
 
+    // Move all the instructions in the succ to the pred.
+    I->getInstList().splice(I->end(), Dest->getInstList());
+    
     // Remove the dest block.
     Dest->eraseFromParent();
     
diff --git a/lib/Transforms/Utils/CloneLoop.cpp b/lib/Transforms/Utils/CloneLoop.cpp
deleted file mode 100644
index 87dd141..0000000
--- a/lib/Transforms/Utils/CloneLoop.cpp
+++ /dev/null
@@ -1,128 +0,0 @@
-//===- CloneLoop.cpp - Clone loop nest ------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the CloneLoop interface which makes a copy of a loop.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Analysis/Dominators.h"
-
-
-using namespace llvm;
-
-/// CloneDominatorInfo - Clone a basic block's dominator tree. It is expected
-/// that the basic block is already cloned.
-static void CloneDominatorInfo(BasicBlock *BB, 
-                               ValueToValueMapTy &VMap,
-                               DominatorTree *DT) {
-
-  assert (DT && "DominatorTree is not available");
-  ValueToValueMapTy::iterator BI = VMap.find(BB);
-  assert (BI != VMap.end() && "BasicBlock clone is missing");
-  BasicBlock *NewBB = cast<BasicBlock>(BI->second);
-
-  // NewBB already got dominator info.
-  if (DT->getNode(NewBB))
-    return;
-
-  assert (DT->getNode(BB) && "BasicBlock does not have dominator info");
-  // Entry block is not expected here. Infinite loops are not to cloned.
-  assert (DT->getNode(BB)->getIDom() && "BasicBlock does not have immediate dominator");
-  BasicBlock *BBDom = DT->getNode(BB)->getIDom()->getBlock();
-
-  // NewBB's dominator is either BB's dominator or BB's dominator's clone.
-  BasicBlock *NewBBDom = BBDom;
-  ValueToValueMapTy::iterator BBDomI = VMap.find(BBDom);
-  if (BBDomI != VMap.end()) {
-    NewBBDom = cast<BasicBlock>(BBDomI->second);
-    if (!DT->getNode(NewBBDom))
-      CloneDominatorInfo(BBDom, VMap, DT);
-  }
-  DT->addNewBlock(NewBB, NewBBDom);
-}
-
-/// CloneLoop - Clone Loop. Clone dominator info. Populate VMap
-/// using old blocks to new blocks mapping.
-Loop *llvm::CloneLoop(Loop *OrigL, LPPassManager  *LPM, LoopInfo *LI,
-                      ValueToValueMapTy &VMap, Pass *P) {
-  
-  DominatorTree *DT = NULL;
-  if (P)
-    DT = P->getAnalysisIfAvailable<DominatorTree>();
-
-  SmallVector<BasicBlock *, 16> NewBlocks;
-
-  // Populate loop nest.
-  SmallVector<Loop *, 8> LoopNest;
-  LoopNest.push_back(OrigL);
-
-
-  Loop *NewParentLoop = NULL;
-  do {
-    Loop *L = LoopNest.pop_back_val();
-    Loop *NewLoop = new Loop();
-
-    if (!NewParentLoop)
-      NewParentLoop = NewLoop;
-
-    LPM->insertLoop(NewLoop, L->getParentLoop());
-
-    // Clone Basic Blocks.
-    for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-         I != E; ++I) {
-      BasicBlock *BB = *I;
-      BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".clone");
-      VMap[BB] = NewBB;
-      if (P)
-        LPM->cloneBasicBlockSimpleAnalysis(BB, NewBB, L);
-      NewLoop->addBasicBlockToLoop(NewBB, LI->getBase());
-      NewBlocks.push_back(NewBB);
-    }
-
-    // Clone dominator info.
-    if (DT)
-      for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
-           I != E; ++I) {
-        BasicBlock *BB = *I;
-        CloneDominatorInfo(BB, VMap, DT);
-      }
-
-    // Process sub loops
-    for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
-      LoopNest.push_back(*I);
-  } while (!LoopNest.empty());
-
-  // Remap instructions to reference operands from VMap.
-  for(SmallVector<BasicBlock *, 16>::iterator NBItr = NewBlocks.begin(), 
-        NBE = NewBlocks.end();  NBItr != NBE; ++NBItr) {
-    BasicBlock *NB = *NBItr;
-    for(BasicBlock::iterator BI = NB->begin(), BE = NB->end(); 
-        BI != BE; ++BI) {
-      Instruction *Insn = BI;
-      for (unsigned index = 0, num_ops = Insn->getNumOperands(); 
-           index != num_ops; ++index) {
-        Value *Op = Insn->getOperand(index);
-        ValueToValueMapTy::iterator OpItr = VMap.find(Op);
-        if (OpItr != VMap.end())
-          Insn->setOperand(index, OpItr->second);
-      }
-    }
-  }
-
-  BasicBlock *Latch = OrigL->getLoopLatch();
-  Function *F = Latch->getParent();
-  F->getBasicBlockList().insert(OrigL->getHeader(), 
-                                NewBlocks.begin(), NewBlocks.end());
-
-
-  return NewParentLoop;
-}
diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp
index 1046c38..a08fa35 100644
--- a/lib/Transforms/Utils/CloneModule.cpp
+++ b/lib/Transforms/Utils/CloneModule.cpp
@@ -15,7 +15,6 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Module.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/Constant.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 using namespace llvm;
@@ -32,20 +31,13 @@ Module *llvm::CloneModule(const Module *M) {
   return CloneModule(M, VMap);
 }
 
-Module *llvm::CloneModule(const Module *M,
-                          ValueToValueMapTy &VMap) {
-  // First off, we need to create the new module...
+Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) {
+  // First off, we need to create the new module.
   Module *New = new Module(M->getModuleIdentifier(), M->getContext());
   New->setDataLayout(M->getDataLayout());
   New->setTargetTriple(M->getTargetTriple());
   New->setModuleInlineAsm(M->getModuleInlineAsm());
-
-  // Copy all of the type symbol table entries over.
-  const TypeSymbolTable &TST = M->getTypeSymbolTable();
-  for (TypeSymbolTable::const_iterator TI = TST.begin(), TE = TST.end(); 
-       TI != TE; ++TI)
-    New->addTypeName(TI->first, TI->second);
-  
+   
   // Copy all of the dependent libraries over.
   for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I)
     New->addLibrary(*I);
@@ -88,8 +80,7 @@ Module *llvm::CloneModule(const Module *M,
        I != E; ++I) {
     GlobalVariable *GV = cast<GlobalVariable>(VMap[I]);
     if (I->hasInitializer())
-      GV->setInitializer(cast<Constant>(MapValue(I->getInitializer(),
-                                                 VMap, RF_None)));
+      GV->setInitializer(MapValue(I->getInitializer(), VMap));
     GV->setLinkage(I->getLinkage());
     GV->setThreadLocal(I->isThreadLocal());
     GV->setConstant(I->isConstant());
@@ -119,8 +110,8 @@ Module *llvm::CloneModule(const Module *M,
        I != E; ++I) {
     GlobalAlias *GA = cast<GlobalAlias>(VMap[I]);
     GA->setLinkage(I->getLinkage());
-    if (const Constant* C = I->getAliasee())
-      GA->setAliasee(cast<Constant>(MapValue(C, VMap, RF_None)));
+    if (const Constant *C = I->getAliasee())
+      GA->setAliasee(MapValue(C, VMap));
   }
 
   // And named metadata....
@@ -129,8 +120,7 @@ Module *llvm::CloneModule(const Module *M,
     const NamedMDNode &NMD = *I;
     NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());
     for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i)
-      NewNMD->addOperand(cast<MDNode>(MapValue(NMD.getOperand(i), VMap,
-                                               RF_None)));
+      NewNMD->addOperand(MapValue(NMD.getOperand(i), VMap));
   }
 
   return New;
diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp
index 8c133ea..0813523 100644
--- a/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/lib/Transforms/Utils/CodeExtractor.cpp
@@ -258,7 +258,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
   default: RetTy = Type::getInt16Ty(header->getContext()); break;
   }
 
-  std::vector<const Type*> paramTy;
+  std::vector<Type*> paramTy;
 
   // Add the types of the input values to the function's argument list
   for (Values::const_iterator i = inputs.begin(),
@@ -279,7 +279,7 @@ Function *CodeExtractor::constructFunction(const Values &inputs,
   }
 
   DEBUG(dbgs() << "Function type: " << *RetTy << " f(");
-  for (std::vector<const Type*>::iterator i = paramTy.begin(),
+  for (std::vector<Type*>::iterator i = paramTy.begin(),
          e = paramTy.end(); i != e; ++i)
     DEBUG(dbgs() << **i << ", ");
   DEBUG(dbgs() << ")\n");
@@ -403,7 +403,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
 
   AllocaInst *Struct = 0;
   if (AggregateArgs && (inputs.size() + outputs.size() > 0)) {
-    std::vector<const Type*> ArgTypes;
+    std::vector<Type*> ArgTypes;
     for (Values::iterator v = StructValues.begin(),
            ve = StructValues.end(); v != ve; ++v)
       ArgTypes.push_back((*v)->getType());
@@ -429,7 +429,7 @@ emitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer,
   }
 
   // Emit the call to the function
-  CallInst *call = CallInst::Create(newFunction, params.begin(), params.end(),
+  CallInst *call = CallInst::Create(newFunction, params,
                                     NumExitBlocks > 1 ? "targetBlock" : "");
   codeReplacer->getInstList().push_back(call);
 
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 8416170..d5b382e 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -449,11 +449,8 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
       for (unsigned i = 2, e = Outer->getNumArgOperands(); i != e; ++i)
         NewSelector.push_back(Outer->getArgOperand(i));
 
-      CallInst *NewInner = CallInst::Create(Inner->getCalledValue(),
-                                            NewSelector.begin(),
-                                            NewSelector.end(),
-                                            "",
-                                            Inner);
+      CallInst *NewInner =
+        IRBuilder<>(Inner).CreateCall(Inner->getCalledValue(), NewSelector);
       // No need to copy attributes, calling convention, etc.
       NewInner->takeName(Inner);
       Inner->replaceAllUsesWith(NewInner);
@@ -489,8 +486,7 @@ static bool HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB,
     InvokeInst *II =
       InvokeInst::Create(CI->getCalledValue(), Split,
                          Invoke.getOuterUnwindDest(),
-                         InvokeArgs.begin(), InvokeArgs.end(),
-                         CI->getName(), BB);
+                         InvokeArgs, CI->getName(), BB);
     II->setCallingConv(CI->getCallingConv());
     II->setAttributes(CI->getAttributes());
     
@@ -664,7 +660,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
   
   LLVMContext &Context = Arg->getContext();
 
-  const Type *VoidPtrTy = Type::getInt8PtrTy(Context);
+  Type *VoidPtrTy = Type::getInt8PtrTy(Context);
   
   // Create the alloca.  If we have TargetData, use nice alignment.
   unsigned Align = 1;
@@ -681,10 +677,10 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
   Value *NewAlloca = new AllocaInst(AggTy, 0, Align, Arg->getName(), 
                                     &*Caller->begin()->begin());
   // Emit a memcpy.
-  const Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
+  Type *Tys[3] = {VoidPtrTy, VoidPtrTy, Type::getInt64Ty(Context)};
   Function *MemCpyFn = Intrinsic::getDeclaration(Caller->getParent(),
                                                  Intrinsic::memcpy, 
-                                                 Tys, 3);
+                                                 Tys);
   Value *DestCast = new BitCastInst(NewAlloca, VoidPtrTy, "tmp", TheCall);
   Value *SrcCast = new BitCastInst(Arg, VoidPtrTy, "tmp", TheCall);
   
@@ -703,7 +699,7 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
     ConstantInt::get(Type::getInt32Ty(Context), 1),
     ConstantInt::getFalse(Context) // isVolatile
   };
-  CallInst::Create(MemCpyFn, CallArgs, CallArgs+5, "", TheCall);
+  IRBuilder<>(TheCall).CreateCall(MemCpyFn, CallArgs);
   
   // Uses of the argument in the function should use our new alloca
   // instead.
@@ -734,17 +730,52 @@ static bool hasLifetimeMarkers(AllocaInst *AI) {
   if (AI->getType() == Int8PtrTy)
     return isUsedByLifetimeMarker(AI);
 
-  // Do a scan to find all the bitcasts to i8*.
+  // Do a scan to find all the casts to i8*.
   for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); I != E;
        ++I) {
     if (I->getType() != Int8PtrTy) continue;
-    if (!isa<BitCastInst>(*I)) continue;
+    if (I->stripPointerCasts() != AI) continue;
     if (isUsedByLifetimeMarker(*I))
       return true;
   }
   return false;
 }
 
+/// updateInlinedAtInfo - Helper function used by fixupLineNumbers to recursively
+/// update InlinedAtEntry of a DebugLoc.
+static DebugLoc updateInlinedAtInfo(const DebugLoc &DL, 
+                                    const DebugLoc &InlinedAtDL,
+                                    LLVMContext &Ctx) {
+  if (MDNode *IA = DL.getInlinedAt(Ctx)) {
+    DebugLoc NewInlinedAtDL 
+      = updateInlinedAtInfo(DebugLoc::getFromDILocation(IA), InlinedAtDL, Ctx);
+    return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+                         NewInlinedAtDL.getAsMDNode(Ctx));
+  }
+                                             
+  return DebugLoc::get(DL.getLine(), DL.getCol(), DL.getScope(Ctx),
+                       InlinedAtDL.getAsMDNode(Ctx));
+}
+
+
+/// fixupLineNumbers - Update inlined instructions' line numbers to 
+/// to encode location where these instructions are inlined.
+static void fixupLineNumbers(Function *Fn, Function::iterator FI,
+                              Instruction *TheCall) {
+  DebugLoc TheCallDL = TheCall->getDebugLoc();
+  if (TheCallDL.isUnknown())
+    return;
+
+  for (; FI != Fn->end(); ++FI) {
+    for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
+         BI != BE; ++BI) {
+      DebugLoc DL = BI->getDebugLoc();
+      if (!DL.isUnknown())
+        BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext()));
+    }
+  }
+}
+
 // InlineFunction - This function inlines the called function into the basic
 // block of the caller.  This returns false if it is not possible to inline this
 // call.  The program is still in a well defined state if this occurs though.
@@ -847,6 +878,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
     // Update the callgraph if requested.
     if (IFI.CG)
       UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
+
+    // Update inlined instructions' line number information.
+    fixupLineNumbers(Caller, FirstNewBlock, TheCall);
   }
 
   // If there are any alloca instructions in the block that used to be the entry
@@ -920,13 +954,13 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
     Function *StackRestore=Intrinsic::getDeclaration(M,Intrinsic::stackrestore);
 
     // Insert the llvm.stacksave.
-    CallInst *SavedPtr = CallInst::Create(StackSave, "savedstack",
-                                          FirstNewBlock->begin());
+    CallInst *SavedPtr = IRBuilder<>(FirstNewBlock, FirstNewBlock->begin())
+      .CreateCall(StackSave, "savedstack");
 
     // Insert a call to llvm.stackrestore before any return instructions in the
     // inlined function.
     for (unsigned i = 0, e = Returns.size(); i != e; ++i) {
-      CallInst::Create(StackRestore, SavedPtr, "", Returns[i]);
+      IRBuilder<>(Returns[i]).CreateCall(StackRestore, SavedPtr);
     }
 
     // Count the number of StackRestore calls we insert.
@@ -938,7 +972,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
       for (Function::iterator BB = FirstNewBlock, E = Caller->end();
            BB != E; ++BB)
         if (UnwindInst *UI = dyn_cast<UnwindInst>(BB->getTerminator())) {
-          CallInst::Create(StackRestore, SavedPtr, "", UI);
+          IRBuilder<>(UI).CreateCall(StackRestore, SavedPtr);
           ++NumStackRestores;
         }
     }
@@ -1098,15 +1132,15 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
         TheCall->replaceAllUsesWith(Returns[0]->getReturnValue());
     }
 
+    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
+    BasicBlock *ReturnBB = Returns[0]->getParent();
+    ReturnBB->replaceAllUsesWith(AfterCallBB);
+
     // Splice the code from the return block into the block that it will return
     // to, which contains the code that was after the call.
-    BasicBlock *ReturnBB = Returns[0]->getParent();
     AfterCallBB->getInstList().splice(AfterCallBB->begin(),
                                       ReturnBB->getInstList());
 
-    // Update PHI nodes that use the ReturnBB to use the AfterCallBB.
-    ReturnBB->replaceAllUsesWith(AfterCallBB);
-
     // Delete the return instruction now and empty ReturnBB now.
     Returns[0]->eraseFromParent();
     ReturnBB->eraseFromParent();
@@ -1126,8 +1160,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI) {
 
   // Splice the code entry block into calling block, right before the
   // unconditional branch.
-  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
   CalleeEntry->replaceAllUsesWith(OrigBB);  // Update PHI nodes
+  OrigBB->getInstList().splice(Br, CalleeEntry->getInstList());
 
   // Remove the unconditional branch.
   OrigBB->getInstList().erase(Br);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 3bdbaa5..0f6d9ae 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -427,10 +427,6 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   BasicBlock *PredBB = DestBB->getSinglePredecessor();
   assert(PredBB && "Block doesn't have a single predecessor!");
   
-  // Splice all the instructions from PredBB to DestBB.
-  PredBB->getTerminator()->eraseFromParent();
-  DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
-
   // Zap anything that took the address of DestBB.  Not doing this will give the
   // address an invalid value.
   if (DestBB->hasAddressTaken()) {
@@ -445,6 +441,10 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
   // Anything that branched to PredBB now branches to DestBB.
   PredBB->replaceAllUsesWith(DestBB);
   
+  // Splice all the instructions from PredBB to DestBB.
+  PredBB->getTerminator()->eraseFromParent();
+  DestBB->getInstList().splice(DestBB->begin(), PredBB->getInstList());
+
   if (P) {
     DominatorTree *DT = P->getAnalysisIfAvailable<DominatorTree>();
     if (DT) {
@@ -536,9 +536,9 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
 
 /// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
 /// unconditional branch, and contains no instructions other than PHI nodes,
-/// potential debug intrinsics and the branch.  If possible, eliminate BB by
-/// rewriting all the predecessors to branch to the successor block and return
-/// true.  If we can't transform, return false.
+/// potential side-effect free intrinsics and the branch.  If possible,
+/// eliminate BB by rewriting all the predecessors to branch to the successor
+/// block and return true.  If we can't transform, return false.
 bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
   assert(BB != &BB->getParent()->getEntryBlock() &&
          "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!");
@@ -613,13 +613,15 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
     }
   }
   
-  while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
-    if (Succ->getSinglePredecessor()) {
-      // BB is the only predecessor of Succ, so Succ will end up with exactly
-      // the same predecessors BB had.
-      Succ->getInstList().splice(Succ->begin(),
-                                 BB->getInstList(), BB->begin());
-    } else {
+  if (Succ->getSinglePredecessor()) {
+    // BB is the only predecessor of Succ, so Succ will end up with exactly
+    // the same predecessors BB had.
+
+    // Copy over any phi, debug or lifetime instruction.
+    BB->getTerminator()->eraseFromParent();
+    Succ->getInstList().splice(Succ->getFirstNonPHI(), BB->getInstList());
+  } else {
+    while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
       // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
       assert(PN->use_empty() && "There shouldn't be any uses here!");
       PN->eraseFromParent();
@@ -642,7 +644,7 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
   bool Changed = false;
 
   // This implementation doesn't currently consider undef operands
-  // specially. Theroetically, two phis which are identical except for
+  // specially. Theoretically, two phis which are identical except for
   // one having an undef where the other doesn't could be collapsed.
 
   // Map from PHI hash values to PHI nodes. If multiple PHIs have
@@ -660,12 +662,17 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
     // them, which helps expose duplicates, but we have to check all the
     // operands to be safe in case instcombine hasn't run.
     uintptr_t Hash = 0;
+    // This hash algorithm is quite weak as hash functions go, but it seems
+    // to do a good enough job for this particular purpose, and is very quick.
     for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
-      // This hash algorithm is quite weak as hash functions go, but it seems
-      // to do a good enough job for this particular purpose, and is very quick.
       Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
       Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
     }
+    for (PHINode::block_iterator I = PN->block_begin(), E = PN->block_end();
+         I != E; ++I) {
+      Hash ^= reinterpret_cast<uintptr_t>(static_cast<BasicBlock *>(*I));
+      Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+    }
     // Avoid colliding with the DenseMap sentinels ~0 and ~0-1.
     Hash >>= 1;
     // If we've never seen this hash value before, it's a unique PHI.
@@ -706,39 +713,15 @@ bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
 ///
 static unsigned enforceKnownAlignment(Value *V, unsigned Align,
                                       unsigned PrefAlign) {
+  V = V->stripPointerCasts();
 
-  User *U = dyn_cast<User>(V);
-  if (!U) return Align;
-
-  switch (Operator::getOpcode(U)) {
-  default: break;
-  case Instruction::BitCast:
-    return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
-  case Instruction::GetElementPtr: {
-    // If all indexes are zero, it is just the alignment of the base pointer.
-    bool AllZeroOperands = true;
-    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e; ++i)
-      if (!isa<Constant>(*i) ||
-          !cast<Constant>(*i)->isNullValue()) {
-        AllZeroOperands = false;
-        break;
-      }
-
-    if (AllZeroOperands) {
-      // Treat this like a bitcast.
-      return enforceKnownAlignment(U->getOperand(0), Align, PrefAlign);
-    }
-    return Align;
-  }
-  case Instruction::Alloca: {
-    AllocaInst *AI = cast<AllocaInst>(V);
+  if (AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
     // If there is a requested alignment and if this is an alloca, round up.
     if (AI->getAlignment() >= PrefAlign)
       return AI->getAlignment();
     AI->setAlignment(PrefAlign);
     return PrefAlign;
   }
-  }
 
   if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     // If there is a large requested alignment and we can, bump up the alignment
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index f02ffd2..e79fb5a 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -375,6 +375,7 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
     SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
                            ".preheader", this);
 
+  NewBB->getTerminator()->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
   DEBUG(dbgs() << "LoopSimplify: Creating pre-header " << NewBB->getName()
                << "\n");
 
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 7da7271..6772511 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -47,6 +47,14 @@ static inline void RemapInstruction(Instruction *I,
     if (It != VMap.end())
       I->setOperand(op, It->second);
   }
+
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      ValueToValueMapTy::iterator It = VMap.find(PN->getIncomingBlock(i));
+      if (It != VMap.end())
+        PN->setIncomingBlock(i, cast<BasicBlock>(It->second));
+    }
+  }
 }
 
 /// FoldBlockIntoPredecessor - Folds a basic block into its predecessor if it
@@ -75,13 +83,13 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
   // Delete the unconditional branch from the predecessor...
   OnlyPred->getInstList().pop_back();
 
-  // Move all definitions in the successor to the predecessor...
-  OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
-
   // Make all PHI nodes that referred to BB now refer to Pred as their
   // source...
   BB->replaceAllUsesWith(OnlyPred);
 
+  // Move all definitions in the successor to the predecessor...
+  OnlyPred->getInstList().splice(OnlyPred->end(), BB->getInstList());
+
   std::string OldName = BB->getName();
 
   // Erase basic block from the function...
@@ -247,16 +255,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
       // the successor of the latch block.  The successor of the exit block will
       // be updated specially after unrolling all the way.
       if (*BB != LatchBlock)
-        for (Value::use_iterator UI = (*BB)->use_begin(), UE = (*BB)->use_end();
-             UI != UE;) {
-          Instruction *UseInst = cast<Instruction>(*UI);
-          ++UI;
-          if (isa<PHINode>(UseInst) && !L->contains(UseInst)) {
-            PHINode *phi = cast<PHINode>(UseInst);
-            Value *Incoming = phi->getIncomingValueForBlock(*BB);
-            phi->addIncoming(Incoming, New);
-          }
-        }
+        for (succ_iterator SI = succ_begin(*BB), SE = succ_end(*BB); SI != SE;
+             ++SI)
+          if (!L->contains(*SI))
+            for (BasicBlock::iterator BBI = (*SI)->begin();
+                 PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) {
+              Value *Incoming = phi->getIncomingValueForBlock(*BB);
+              phi->addIncoming(Incoming, New);
+            }
 
       // Keep track of new headers and latches as we create them, so that
       // we can insert the proper branches later.
@@ -288,24 +294,20 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
   // successor blocks, update them to use the appropriate values computed as the
   // last iteration of the loop.
   if (Count != 1) {
-    SmallPtrSet<PHINode*, 8> Users;
-    for (Value::use_iterator UI = LatchBlock->use_begin(),
-         UE = LatchBlock->use_end(); UI != UE; ++UI)
-      if (PHINode *phi = dyn_cast<PHINode>(*UI))
-        Users.insert(phi);
-    
     BasicBlock *LastIterationBB = cast<BasicBlock>(LastValueMap[LatchBlock]);
-    for (SmallPtrSet<PHINode*,8>::iterator SI = Users.begin(), SE = Users.end();
+    for (succ_iterator SI = succ_begin(LatchBlock), SE = succ_end(LatchBlock);
          SI != SE; ++SI) {
-      PHINode *PN = *SI;
-      Value *InVal = PN->removeIncomingValue(LatchBlock, false);
-      // If this value was defined in the loop, take the value defined by the
-      // last iteration of the loop.
-      if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
-        if (L->contains(InValI))
-          InVal = LastValueMap[InVal];
+      for (BasicBlock::iterator BBI = (*SI)->begin();
+           PHINode *PN = dyn_cast<PHINode>(BBI); ++BBI) {
+        Value *InVal = PN->removeIncomingValue(LatchBlock, false);
+        // If this value was defined in the loop, take the value defined by the
+        // last iteration of the loop.
+        if (Instruction *InValI = dyn_cast<Instruction>(InVal)) {
+          if (L->contains(InValI))
+            InVal = LastValueMap[InVal];
+        }
+        PN->addIncoming(InVal, LastIterationBB);
       }
-      PN->addIncoming(InVal, LastIterationBB);
     }
   }
 
@@ -352,11 +354,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count,
       // Replace the conditional branch with an unconditional one.
       BranchInst::Create(Dest, Term);
       Term->eraseFromParent();
-      // Merge adjacent basic blocks, if possible.
-      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI)) {
+    }
+  }
+
+  // Merge adjacent basic blocks, if possible.
+  for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
+    BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());
+    if (Term->isUnconditional()) {
+      BasicBlock *Dest = Term->getSuccessor(0);
+      if (BasicBlock *Fold = FoldBlockIntoPredecessor(Dest, LI))
         std::replace(Latches.begin(), Latches.end(), Dest, Fold);
-        std::replace(Headers.begin(), Headers.end(), Dest, Fold);
-      }
     }
   }
   
diff --git a/lib/Transforms/Utils/LowerExpectIntrinsic.cpp b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
new file mode 100644
index 0000000..c1213fa
--- /dev/null
+++ b/lib/Transforms/Utils/LowerExpectIntrinsic.cpp
@@ -0,0 +1,166 @@
+#define DEBUG_TYPE "lower-expect-intrinsic"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/BasicBlock.h"
+#include "llvm/LLVMContext.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Metadata.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/ADT/Statistic.h"
+#include <vector>
+
+using namespace llvm;
+
+STATISTIC(IfHandled, "Number of 'expect' intrinsic intructions handled");
+
+static cl::opt<uint32_t>
+LikelyBranchWeight("likely-branch-weight", cl::Hidden, cl::init(64),
+                   cl::desc("Weight of the branch likely to be taken (default = 64)"));
+static cl::opt<uint32_t>
+UnlikelyBranchWeight("unlikely-branch-weight", cl::Hidden, cl::init(4),
+                   cl::desc("Weight of the branch unlikely to be taken (default = 4)"));
+
+namespace {
+
+  class LowerExpectIntrinsic : public FunctionPass {
+
+    bool HandleSwitchExpect(SwitchInst *SI);
+
+    bool HandleIfExpect(BranchInst *BI);
+
+  public:
+    static char ID;
+    LowerExpectIntrinsic() : FunctionPass(ID) {
+      initializeLowerExpectIntrinsicPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+  };
+}
+
+
+bool LowerExpectIntrinsic::HandleSwitchExpect(SwitchInst *SI) {
+  CallInst *CI = dyn_cast<CallInst>(SI->getCondition());
+  if (!CI)
+    return false;
+
+  Function *Fn = CI->getCalledFunction();
+  if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+    return false;
+
+  Value *ArgValue = CI->getArgOperand(0);
+  ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+  if (!ExpectedValue)
+    return false;
+
+  LLVMContext &Context = CI->getContext();
+  const Type *Int32Ty = Type::getInt32Ty(Context);
+
+  unsigned caseNo = SI->findCaseValue(ExpectedValue);
+  std::vector<Value *> Vec;
+  unsigned n = SI->getNumCases();
+  Vec.resize(n + 1); // +1 for MDString
+
+  Vec[0] = MDString::get(Context, "branch_weights");
+  for (unsigned i = 0; i < n; ++i) {
+    Vec[i + 1] = ConstantInt::get(Int32Ty, i == caseNo ? LikelyBranchWeight : UnlikelyBranchWeight);
+  }
+
+  MDNode *WeightsNode = llvm::MDNode::get(Context, Vec);
+  SI->setMetadata(LLVMContext::MD_prof, WeightsNode);
+
+  SI->setCondition(ArgValue);
+  return true;
+}
+
+
+bool LowerExpectIntrinsic::HandleIfExpect(BranchInst *BI) {
+  if (BI->isUnconditional())
+    return false;
+
+  // Handle non-optimized IR code like:
+  //   %expval = call i64 @llvm.expect.i64.i64(i64 %conv1, i64 1)
+  //   %tobool = icmp ne i64 %expval, 0
+  //   br i1 %tobool, label %if.then, label %if.end
+
+  ICmpInst *CmpI = dyn_cast<ICmpInst>(BI->getCondition());
+  if (!CmpI || CmpI->getPredicate() != CmpInst::ICMP_NE)
+    return false;
+
+  CallInst *CI = dyn_cast<CallInst>(CmpI->getOperand(0));
+  if (!CI)
+    return false;
+
+  Function *Fn = CI->getCalledFunction();
+  if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
+    return false;
+
+  Value *ArgValue = CI->getArgOperand(0);
+  ConstantInt *ExpectedValue = dyn_cast<ConstantInt>(CI->getArgOperand(1));
+  if (!ExpectedValue)
+    return false;
+
+  LLVMContext &Context = CI->getContext();
+  const Type *Int32Ty = Type::getInt32Ty(Context);
+  bool Likely = ExpectedValue->isOne();
+
+  // If expect value is equal to 1 it means that we are more likely to take
+  // branch 0, in other case more likely is branch 1.
+  Value *Ops[] = {
+    MDString::get(Context, "branch_weights"),
+    ConstantInt::get(Int32Ty, Likely ? LikelyBranchWeight : UnlikelyBranchWeight),
+    ConstantInt::get(Int32Ty, Likely ? UnlikelyBranchWeight : LikelyBranchWeight)
+  };
+
+  MDNode *WeightsNode = MDNode::get(Context, Ops);
+  BI->setMetadata(LLVMContext::MD_prof, WeightsNode);
+
+  CmpI->setOperand(0, ArgValue);
+  return true;
+}
+
+
+bool LowerExpectIntrinsic::runOnFunction(Function &F) {
+  for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
+    BasicBlock *BB = I++;
+
+    // Create "block_weights" metadata.
+    if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+      if (HandleIfExpect(BI))
+        IfHandled++;
+    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
+      if (HandleSwitchExpect(SI))
+        IfHandled++;
+    }
+
+    // remove llvm.expect intrinsics.
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+         BI != BE; ) {
+      CallInst *CI = dyn_cast<CallInst>(BI++);
+      if (!CI)
+        continue;
+
+      Function *Fn = CI->getCalledFunction();
+      if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) {
+        Value *Exp = CI->getArgOperand(0);
+        CI->replaceAllUsesWith(Exp);
+        CI->eraseFromParent();
+      }
+    }
+  }
+
+  return false;
+}
+
+
+char LowerExpectIntrinsic::ID = 0;
+INITIALIZE_PASS(LowerExpectIntrinsic, "lower-expect", "Lower 'expect' "
+                "Intrinsics", false, false)
+
+FunctionPass *llvm::createLowerExpectIntrinsicPass() {
+  return new LowerExpectIntrinsic();
+}
diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp
index 025ae0d..f77d19d 100644
--- a/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/lib/Transforms/Utils/LowerInvoke.cpp
@@ -66,7 +66,7 @@ namespace {
     Constant *AbortFn;
 
     // Used for expensive EH support.
-    const Type *JBLinkTy;
+    StructType *JBLinkTy;
     GlobalVariable *JBListHead;
     Constant *SetJmpFn, *LongJmpFn, *StackSaveFn, *StackRestoreFn;
     bool useExpensiveEHSupport;
@@ -120,24 +120,16 @@ FunctionPass *llvm::createLowerInvokePass(const TargetLowering *TLI,
 // doInitialization - Make sure that there is a prototype for abort in the
 // current module.
 bool LowerInvoke::doInitialization(Module &M) {
-  const Type *VoidPtrTy =
-          Type::getInt8PtrTy(M.getContext());
+  const Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext());
   if (useExpensiveEHSupport) {
     // Insert a type for the linked list of jump buffers.
     unsigned JBSize = TLI ? TLI->getJumpBufSize() : 0;
     JBSize = JBSize ? JBSize : 200;
-    const Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
-
-    { // The type is recursive, so use a type holder.
-      std::vector<const Type*> Elements;
-      Elements.push_back(JmpBufTy);
-      OpaqueType *OT = OpaqueType::get(M.getContext());
-      Elements.push_back(PointerType::getUnqual(OT));
-      PATypeHolder JBLType(StructType::get(M.getContext(), Elements));
-      OT->refineAbstractTypeTo(JBLType.get());  // Complete the cycle.
-      JBLinkTy = JBLType.get();
-      M.addTypeName("llvm.sjljeh.jmpbufty", JBLinkTy);
-    }
+    Type *JmpBufTy = ArrayType::get(VoidPtrTy, JBSize);
+
+    JBLinkTy = StructType::createNamed(M.getContext(), "llvm.sjljeh.jmpbufty");
+    Type *Elts[] = { JmpBufTy, PointerType::getUnqual(JBLinkTy) };
+    JBLinkTy->setBody(Elts);
 
     const Type *PtrJBList = PointerType::getUnqual(JBLinkTy);
 
@@ -184,8 +176,7 @@ bool LowerInvoke::insertCheapEHSupport(Function &F) {
       SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
       // Insert a normal call instruction...
       CallInst *NewCall = CallInst::Create(II->getCalledValue(),
-                                           CallArgs.begin(), CallArgs.end(),
-                                           "",II);
+                                           CallArgs, "", II);
       NewCall->takeName(II);
       NewCall->setCallingConv(II->getCallingConv());
       NewCall->setAttributes(II->getAttributes());
@@ -265,8 +256,7 @@ void LowerInvoke::rewriteExpensiveInvoke(InvokeInst *II, unsigned InvokeNo,
   // Insert a normal call instruction.
   SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
   CallInst *NewCall = CallInst::Create(II->getCalledValue(),
-                                       CallArgs.begin(), CallArgs.end(), "",
-                                       II);
+                                       CallArgs, "", II);
   NewCall->takeName(II);
   NewCall->setCallingConv(II->getCallingConv());
   NewCall->setAttributes(II->getAttributes());
@@ -573,7 +563,7 @@ bool LowerInvoke::insertExpensiveEHSupport(Function &F) {
              Type::getInt8PtrTy(F.getContext()),
                            "tmp", UnwindBlock);
   Idx[1] = ConstantInt::get(Type::getInt32Ty(F.getContext()), 1);
-  CallInst::Create(LongJmpFn, &Idx[0], &Idx[2], "", UnwindBlock);
+  CallInst::Create(LongJmpFn, Idx, "", UnwindBlock);
   new UnreachableInst(F.getContext(), UnwindBlock);
 
   // Set up the term block ("throw without a catch").
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index a1736b9..e5a00f4 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -38,6 +38,7 @@
 #include "llvm/Analysis/DIBuilder.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -92,6 +93,22 @@ bool llvm::isAllocaPromotable(const AllocaInst *AI) {
         return false;   // Don't allow a store OF the AI, only INTO the AI.
       if (SI->isVolatile())
         return false;
+    } else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
+      if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+          II->getIntrinsicID() != Intrinsic::lifetime_end)
+        return false;
+    } else if (const BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
+      if (BCI->getType() != Type::getInt8PtrTy(U->getContext()))
+        return false;
+      if (!onlyUsedByLifetimeMarkers(BCI))
+        return false;
+    } else if (const GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) {
+      if (GEPI->getType() != Type::getInt8PtrTy(U->getContext()))
+        return false;
+      if (!GEPI->hasAllZeroIndices())
+        return false;
+      if (!onlyUsedByLifetimeMarkers(GEPI))
+        return false;
     } else {
       return false;
     }
@@ -335,6 +352,31 @@ namespace {
   };
 }  // end of anonymous namespace
 
+static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {
+  // Knowing that this alloca is promotable, we know that it's safe to kill all
+  // instructions except for load and store.
+
+  for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
+       UI != UE;) {
+    Instruction *I = cast<Instruction>(*UI);
+    ++UI;
+    if (isa<LoadInst>(I) || isa<StoreInst>(I))
+      continue;
+
+    if (!I->getType()->isVoidTy()) {
+      // The only users of this bitcast/GEP instruction are lifetime intrinsics.
+      // Follow the use/def chain to erase them now instead of leaving it for
+      // dead code elimination later.
+      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
+           UI != UE;) {
+        Instruction *Inst = cast<Instruction>(*UI);
+        ++UI;
+        Inst->eraseFromParent();
+      }
+    }
+    I->eraseFromParent();
+  }
+}
 
 void PromoteMem2Reg::run() {
   Function &F = *DT.getRoot()->getParent();
@@ -353,6 +395,8 @@ void PromoteMem2Reg::run() {
     assert(AI->getParent()->getParent() == &F &&
            "All allocas should be in the same function, which is same as DF!");
 
+    removeLifetimeIntrinsicUsers(AI);
+
     if (AI->use_empty()) {
       // If there are no uses of the alloca, just delete it now.
       if (AST) AST->deleteValue(AI);
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index b336194..b47a7cc 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/Analysis/DIBuilder.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
@@ -358,8 +357,7 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
 
 LoadAndStorePromoter::
 LoadAndStorePromoter(const SmallVectorImpl<Instruction*> &Insts,
-                     SSAUpdater &S, DbgDeclareInst *DD, DIBuilder *DB,
-                     StringRef BaseName) : SSA(S), DDI(DD), DIB(DB) {
+                     SSAUpdater &S, StringRef BaseName) : SSA(S) {
   if (Insts.empty()) return;
   
   Value *SomeVal;
@@ -407,8 +405,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
     if (BlockUses.size() == 1) {
       // If it is a store, it is a trivial def of the value in the block.
       if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
-        if (DDI)
-          ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+        updateDebugInfo(SI);
         SSA.AddAvailableValue(BB, SI->getOperand(0));
       } else 
         // Otherwise it is a load, queue it to rewrite as a live-in load.
@@ -462,9 +459,7 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
       if (StoreInst *SI = dyn_cast<StoreInst>(II)) {
         // If this is a store to an unrelated pointer, ignore it.
         if (!isInstInList(SI, Insts)) continue;
-
-        if (DDI)
-          ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
+        updateDebugInfo(SI);
 
         // Remember that this is the active value in the block.
         StoredValue = SI->getOperand(0);
@@ -522,7 +517,4 @@ run(const SmallVectorImpl<Instruction*> &Insts) const {
     instructionDeleted(User);
     User->eraseFromParent();
   }
-
-  if (DDI)
-    DDI->eraseFromParent();
 }
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 6df846c..9d9c324 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2211,8 +2211,7 @@ bool SimplifyCFGOpt::SimplifyUnwind(UnwindInst *UI, IRBuilder<> &Builder) {
       SmallVector<Value*,8> Args(II->op_begin(), II->op_end()-3);
       Builder.SetInsertPoint(BI);
       CallInst *CI = Builder.CreateCall(II->getCalledValue(),
-                                        Args.begin(), Args.end(),
-                                        II->getName());
+                                        Args, II->getName());
       CI->setCallingConv(II->getCallingConv());
       CI->setAttributes(II->getAttributes());
       // If the invoke produced a value, the Call now does instead.
@@ -2355,8 +2354,7 @@ bool SimplifyCFGOpt::SimplifyUnreachable(UnreachableInst *UI) {
         SmallVector<Value*, 8> Args(II->op_begin(), II->op_end()-3);
         Builder.SetInsertPoint(BI);
         CallInst *CI = Builder.CreateCall(II->getCalledValue(),
-                                          Args.begin(), Args.end(),
-                                          II->getName());
+                                          Args, II->getName());
         CI->setCallingConv(II->getCallingConv());
         CI->setAttributes(II->getAttributes());
         // If the invoke produced a value, the call does now instead.
@@ -2450,6 +2448,77 @@ static bool EliminateDeadSwitchCases(SwitchInst *SI) {
   return !DeadCases.empty();
 }
 
+/// FindPHIForConditionForwarding - If BB would be eligible for simplification
+/// by TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
+/// by an unconditional branch), look at the phi node for BB in the successor
+/// block and see if the incoming value is equal to CaseValue. If so, return
+/// the phi node, and set PhiIndex to BB's index in the phi node.
+static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
+                                              BasicBlock *BB,
+                                              int *PhiIndex) {
+  if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
+    return NULL; // BB must be empty to be a candidate for simplification.
+  if (!BB->getSinglePredecessor())
+    return NULL; // BB must be dominated by the switch.
+
+  BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!Branch || !Branch->isUnconditional())
+    return NULL; // Terminator must be unconditional branch.
+
+  BasicBlock *Succ = Branch->getSuccessor(0);
+
+  BasicBlock::iterator I = Succ->begin();
+  while (PHINode *PHI = dyn_cast<PHINode>(I++)) {
+    int Idx = PHI->getBasicBlockIndex(BB);
+    assert(Idx >= 0 && "PHI has no entry for predecessor?");
+
+    Value *InValue = PHI->getIncomingValue(Idx);
+    if (InValue != CaseValue) continue;
+
+    *PhiIndex = Idx;
+    return PHI;
+  }
+
+  return NULL;
+}
+
+/// ForwardSwitchConditionToPHI - Try to forward the condition of a switch
+/// instruction to a phi node dominated by the switch, if that would mean that
+/// some of the destination blocks of the switch can be folded away.
+/// Returns true if a change is made.
+static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
+  typedef DenseMap<PHINode*, SmallVector<int,4> > ForwardingNodesMap;
+  ForwardingNodesMap ForwardingNodes;
+
+  for (unsigned I = 1; I < SI->getNumCases(); ++I) { // 0 is the default case.
+    ConstantInt *CaseValue = SI->getCaseValue(I);
+    BasicBlock *CaseDest = SI->getSuccessor(I);
+
+    int PhiIndex;
+    PHINode *PHI = FindPHIForConditionForwarding(CaseValue, CaseDest,
+                                                 &PhiIndex);
+    if (!PHI) continue;
+
+    ForwardingNodes[PHI].push_back(PhiIndex);
+  }
+
+  bool Changed = false;
+
+  for (ForwardingNodesMap::iterator I = ForwardingNodes.begin(),
+       E = ForwardingNodes.end(); I != E; ++I) {
+    PHINode *Phi = I->first;
+    SmallVector<int,4> &Indexes = I->second;
+
+    if (Indexes.size() < 2) continue;
+
+    for (size_t I = 0, E = Indexes.size(); I != E; ++I)
+      Phi->setIncomingValue(Indexes[I], SI->getCondition());
+    Changed = true;
+  }
+
+  return Changed;
+}
+
 bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   // If this switch is too complex to want to look at, ignore it.
   if (!isValueEqualityComparison(SI))
@@ -2486,6 +2555,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   if (EliminateDeadSwitchCases(SI))
     return SimplifyCFG(BB) | true;
 
+  if (ForwardSwitchConditionToPHI(SI))
+    return SimplifyCFG(BB) | true;
+
   return false;
 }
 
@@ -2530,7 +2602,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){
   BasicBlock *BB = BI->getParent();
   
   // If the Terminator is the only non-phi instruction, simplify the block.
-  BasicBlock::iterator I = BB->getFirstNonPHIOrDbg();
+  BasicBlock::iterator I = BB->getFirstNonPHIOrDbgOrLifetime();
   if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
       TryToSimplifyUncondBranchFromEmptyBlock(BB))
     return true;
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index a73bf04..973b105 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -13,15 +13,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Type.h"
 #include "llvm/Constants.h"
 #include "llvm/Function.h"
+#include "llvm/InlineAsm.h"
+#include "llvm/Instructions.h"
 #include "llvm/Metadata.h"
-#include "llvm/ADT/SmallVector.h"
 using namespace llvm;
 
-Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
-                      RemapFlags Flags) {
+// Out of line method to get vtable etc for class.
+void ValueMapTypeRemapper::Anchor() {}
+
+Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM, RemapFlags Flags,
+                      ValueMapTypeRemapper *TypeMapper) {
   ValueToValueMapTy::iterator I = VM.find(V);
   
   // If the value already exists in the map, use it.
@@ -29,8 +32,23 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
   
   // Global values do not need to be seeded into the VM if they
   // are using the identity mapping.
-  if (isa<GlobalValue>(V) || isa<InlineAsm>(V) || isa<MDString>(V))
+  if (isa<GlobalValue>(V) || isa<MDString>(V))
     return VM[V] = const_cast<Value*>(V);
+  
+  if (const InlineAsm *IA = dyn_cast<InlineAsm>(V)) {
+    // Inline asm may need *type* remapping.
+    FunctionType *NewTy = IA->getFunctionType();
+    if (TypeMapper) {
+      NewTy = cast<FunctionType>(TypeMapper->remapType(NewTy));
+
+      if (NewTy != IA->getFunctionType())
+        V = InlineAsm::get(NewTy, IA->getAsmString(), IA->getConstraintString(),
+                           IA->hasSideEffects(), IA->isAlignStack());
+    }
+    
+    return VM[V] = const_cast<Value*>(V);
+  }
+  
 
   if (const MDNode *MD = dyn_cast<MDNode>(V)) {
     // If this is a module-level metadata and we know that nothing at the module
@@ -45,14 +63,14 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
     // Check all operands to see if any need to be remapped.
     for (unsigned i = 0, e = MD->getNumOperands(); i != e; ++i) {
       Value *OP = MD->getOperand(i);
-      if (OP == 0 || MapValue(OP, VM, Flags) == OP) continue;
+      if (OP == 0 || MapValue(OP, VM, Flags, TypeMapper) == OP) continue;
 
       // Ok, at least one operand needs remapping.  
       SmallVector<Value*, 4> Elts;
       Elts.reserve(MD->getNumOperands());
       for (i = 0; i != e; ++i) {
         Value *Op = MD->getOperand(i);
-        Elts.push_back(Op ? MapValue(Op, VM, Flags) : 0);
+        Elts.push_back(Op ? MapValue(Op, VM, Flags, TypeMapper) : 0);
       }
       MDNode *NewMD = MDNode::get(V->getContext(), Elts);
       Dummy->replaceAllUsesWith(NewMD);
@@ -75,51 +93,75 @@ Value *llvm::MapValue(const Value *V, ValueToValueMapTy &VM,
     return 0;
   
   if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
-    Function *F = cast<Function>(MapValue(BA->getFunction(), VM, Flags));
+    Function *F = 
+      cast<Function>(MapValue(BA->getFunction(), VM, Flags, TypeMapper));
     BasicBlock *BB = cast_or_null<BasicBlock>(MapValue(BA->getBasicBlock(), VM,
-                                                       Flags));
+                                                       Flags, TypeMapper));
     return VM[V] = BlockAddress::get(F, BB ? BB : BA->getBasicBlock());
   }
   
-  for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
-    Value *Op = C->getOperand(i);
-    Value *Mapped = MapValue(Op, VM, Flags);
-    if (Mapped == C) continue;
-    
-    // Okay, the operands don't all match.  We've already processed some or all
-    // of the operands, set them up now.
-    std::vector<Constant*> Ops;
-    Ops.reserve(C->getNumOperands());
-    for (unsigned j = 0; j != i; ++j)
-      Ops.push_back(cast<Constant>(C->getOperand(i)));
+  // Otherwise, we have some other constant to remap.  Start by checking to see
+  // if all operands have an identity remapping.
+  unsigned OpNo = 0, NumOperands = C->getNumOperands();
+  Value *Mapped = 0;
+  for (; OpNo != NumOperands; ++OpNo) {
+    Value *Op = C->getOperand(OpNo);
+    Mapped = MapValue(Op, VM, Flags, TypeMapper);
+    if (Mapped != C) break;
+  }
+  
+  // See if the type mapper wants to remap the type as well.
+  Type *NewTy = C->getType();
+  if (TypeMapper)
+    NewTy = TypeMapper->remapType(NewTy);
+
+  // If the result type and all operands match up, then just insert an identity
+  // mapping.
+  if (OpNo == NumOperands && NewTy == C->getType())
+    return VM[V] = C;
+  
+  // Okay, we need to create a new constant.  We've already processed some or
+  // all of the operands, set them all up now.
+  SmallVector<Constant*, 8> Ops;
+  Ops.reserve(NumOperands);
+  for (unsigned j = 0; j != OpNo; ++j)
+    Ops.push_back(cast<Constant>(C->getOperand(j)));
+  
+  // If one of the operands mismatch, push it and the other mapped operands.
+  if (OpNo != NumOperands) {
     Ops.push_back(cast<Constant>(Mapped));
-    
+  
     // Map the rest of the operands that aren't processed yet.
-    for (++i; i != e; ++i)
-      Ops.push_back(cast<Constant>(MapValue(C->getOperand(i), VM, Flags)));
-    
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-      return VM[V] = CE->getWithOperands(Ops);
-    if (ConstantArray *CA = dyn_cast<ConstantArray>(C))
-      return VM[V] = ConstantArray::get(CA->getType(), Ops);
-    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(C))
-      return VM[V] = ConstantStruct::get(CS->getType(), Ops);
-    assert(isa<ConstantVector>(C) && "Unknown mapped constant type");
-    return VM[V] = ConstantVector::get(Ops);
+    for (++OpNo; OpNo != NumOperands; ++OpNo)
+      Ops.push_back(MapValue(cast<Constant>(C->getOperand(OpNo)), VM,
+                             Flags, TypeMapper));
   }
-
-  // If we reach here, all of the operands of the constant match.
-  return VM[V] = C;
+  
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
+    return VM[V] = CE->getWithOperands(Ops, NewTy);
+  if (isa<ConstantArray>(C))
+    return VM[V] = ConstantArray::get(cast<ArrayType>(NewTy), Ops);
+  if (isa<ConstantStruct>(C))
+    return VM[V] = ConstantStruct::get(cast<StructType>(NewTy), Ops);
+  if (isa<ConstantVector>(C))
+    return VM[V] = ConstantVector::get(Ops);
+  // If this is a no-operand constant, it must be because the type was remapped.
+  if (isa<UndefValue>(C))
+    return VM[V] = UndefValue::get(NewTy);
+  if (isa<ConstantAggregateZero>(C))
+    return VM[V] = ConstantAggregateZero::get(NewTy);
+  assert(isa<ConstantPointerNull>(C));
+  return VM[V] = ConstantPointerNull::get(cast<PointerType>(NewTy));
 }
 
 /// RemapInstruction - Convert the instruction operands from referencing the
 /// current values into those specified by VMap.
 ///
 void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
-                            RemapFlags Flags) {
+                            RemapFlags Flags, ValueMapTypeRemapper *TypeMapper){
   // Remap operands.
   for (User::op_iterator op = I->op_begin(), E = I->op_end(); op != E; ++op) {
-    Value *V = MapValue(*op, VMap, Flags);
+    Value *V = MapValue(*op, VMap, Flags, TypeMapper);
     // If we aren't ignoring missing entries, assert that something happened.
     if (V != 0)
       *op = V;
@@ -128,14 +170,32 @@ void llvm::RemapInstruction(Instruction *I, ValueToValueMapTy &VMap,
              "Referenced value not in value map!");
   }
 
-  // Remap attached metadata.
+  // Remap phi nodes' incoming blocks.
+  if (PHINode *PN = dyn_cast<PHINode>(I)) {
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      Value *V = MapValue(PN->getIncomingBlock(i), VMap, Flags);
+      // If we aren't ignoring missing entries, assert that something happened.
+      if (V != 0)
+        PN->setIncomingBlock(i, cast<BasicBlock>(V));
+      else
+        assert((Flags & RF_IgnoreMissingEntries) &&
+               "Referenced block not in value map!");
+    }
+  }
+
+  // Remap attached metadata.  Don't bother remapping DebugLoc, it can never
+  // have mappings to do.
   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
-  I->getAllMetadata(MDs);
+  I->getAllMetadataOtherThanDebugLoc(MDs);
   for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
        MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI) {
-    Value *Old = MI->second;
-    Value *New = MapValue(Old, VMap, Flags);
+    MDNode *Old = MI->second;
+    MDNode *New = MapValue(Old, VMap, Flags, TypeMapper);
     if (New != Old)
-      I->setMetadata(MI->first, cast<MDNode>(New));
+      I->setMetadata(MI->first, New);
   }
+  
+  // If the instruction's type is being remapped, do so now.
+  if (TypeMapper)
+    I->mutateType(TypeMapper->remapType(I->getType()));
 }
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 844284d..94794c3 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -26,13 +26,11 @@
 #include "llvm/Operator.h"
 #include "llvm/Module.h"
 #include "llvm/ValueSymbolTable.h"
-#include "llvm/TypeSymbolTable.h"
-#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -42,11 +40,6 @@
 #include <cctype>
 using namespace llvm;
 
-static cl::opt<bool>
-EnableDebugInfoComment("enable-debug-info-comment", cl::Hidden,
-                       cl::desc("Enable debug info comments"));
-
-
 // Make virtual table appear in this compilation unit.
 AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {}
 
@@ -140,60 +133,60 @@ static void PrintLLVMName(raw_ostream &OS, const Value *V) {
 // TypePrinting Class: Type printing machinery
 //===----------------------------------------------------------------------===//
 
-static DenseMap<const Type *, std::string> &getTypeNamesMap(void *M) {
-  return *static_cast<DenseMap<const Type *, std::string>*>(M);
-}
-
-void TypePrinting::clear() {
-  getTypeNamesMap(TypeNames).clear();
-}
+/// TypePrinting - Type printing machinery.
+namespace {
+class TypePrinting {
+  TypePrinting(const TypePrinting &);   // DO NOT IMPLEMENT
+  void operator=(const TypePrinting&);  // DO NOT IMPLEMENT
+public:
 
-bool TypePrinting::hasTypeName(const Type *Ty) const {
-  return getTypeNamesMap(TypeNames).count(Ty);
-}
+  /// NamedTypes - The named types that are used by the current module.
+  std::vector<StructType*> NamedTypes;
+  
+  /// NumberedTypes - The numbered types, along with their value.
+  DenseMap<StructType*, unsigned> NumberedTypes;
+  
 
-void TypePrinting::addTypeName(const Type *Ty, const std::string &N) {
-  getTypeNamesMap(TypeNames).insert(std::make_pair(Ty, N));
-}
+  TypePrinting() {}
+  ~TypePrinting() {}
+  
+  void incorporateTypes(const Module &M);
+  
+  void print(Type *Ty, raw_ostream &OS);
+  
+  void printStructBody(StructType *Ty, raw_ostream &OS);
+};
+} // end anonymous namespace.
 
 
-TypePrinting::TypePrinting() {
-  TypeNames = new DenseMap<const Type *, std::string>();
+void TypePrinting::incorporateTypes(const Module &M) {
+  M.findUsedStructTypes(NamedTypes);
+  
+  // The list of struct types we got back includes all the struct types, split
+  // the unnamed ones out to a numbering and remove the anonymous structs.
+  unsigned NextNumber = 0;
+  
+  std::vector<StructType*>::iterator NextToUse = NamedTypes.begin(), I, E;
+  for (I = NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I) {
+    StructType *STy = *I;
+    
+    // Ignore anonymous types.
+    if (STy->isAnonymous())
+      continue;
+    
+    if (STy->getName().empty())
+      NumberedTypes[STy] = NextNumber++;
+    else
+      *NextToUse++ = STy;
+  }
+    
+  NamedTypes.erase(NextToUse, NamedTypes.end());
 }
 
-TypePrinting::~TypePrinting() {
-  delete &getTypeNamesMap(TypeNames);
-}
 
 /// CalcTypeName - Write the specified type to the specified raw_ostream, making
 /// use of type names or up references to shorten the type name where possible.
-void TypePrinting::CalcTypeName(const Type *Ty,
-                                SmallVectorImpl<const Type *> &TypeStack,
-                                raw_ostream &OS, bool IgnoreTopLevelName) {
-  // Check to see if the type is named.
-  if (!IgnoreTopLevelName) {
-    DenseMap<const Type *, std::string> &TM = getTypeNamesMap(TypeNames);
-    DenseMap<const Type *, std::string>::iterator I = TM.find(Ty);
-    if (I != TM.end()) {
-      OS << I->second;
-      return;
-    }
-  }
-
-  // Check to see if the Type is already on the stack...
-  unsigned Slot = 0, CurSize = TypeStack.size();
-  while (Slot < CurSize && TypeStack[Slot] != Ty) ++Slot; // Scan for type
-
-  // This is another base case for the recursion.  In this case, we know
-  // that we have looped back to a type that we have previously visited.
-  // Generate the appropriate upreference to handle this.
-  if (Slot < CurSize) {
-    OS << '\\' << unsigned(CurSize-Slot);     // Here's the upreference
-    return;
-  }
-
-  TypeStack.push_back(Ty);    // Recursive case: Add us to the stack..
-
+void TypePrinting::print(Type *Ty, raw_ostream &OS) {
   switch (Ty->getTypeID()) {
   case Type::VoidTyID:      OS << "void"; break;
   case Type::FloatTyID:     OS << "float"; break;
@@ -206,259 +199,96 @@ void TypePrinting::CalcTypeName(const Type *Ty,
   case Type::X86_MMXTyID:   OS << "x86_mmx"; break;
   case Type::IntegerTyID:
     OS << 'i' << cast<IntegerType>(Ty)->getBitWidth();
-    break;
+    return;
 
   case Type::FunctionTyID: {
-    const FunctionType *FTy = cast<FunctionType>(Ty);
-    CalcTypeName(FTy->getReturnType(), TypeStack, OS);
+    FunctionType *FTy = cast<FunctionType>(Ty);
+    print(FTy->getReturnType(), OS);
     OS << " (";
     for (FunctionType::param_iterator I = FTy->param_begin(),
          E = FTy->param_end(); I != E; ++I) {
       if (I != FTy->param_begin())
         OS << ", ";
-      CalcTypeName(*I, TypeStack, OS);
+      print(*I, OS);
     }
     if (FTy->isVarArg()) {
       if (FTy->getNumParams()) OS << ", ";
       OS << "...";
     }
     OS << ')';
-    break;
+    return;
   }
   case Type::StructTyID: {
-    const StructType *STy = cast<StructType>(Ty);
-    if (STy->isPacked())
-      OS << '<';
-    OS << '{';
-    for (StructType::element_iterator I = STy->element_begin(),
-         E = STy->element_end(); I != E; ++I) {
-      OS << ' ';
-      CalcTypeName(*I, TypeStack, OS);
-      if (llvm::next(I) == STy->element_end())
-        OS << ' ';
-      else
-        OS << ',';
-    }
-    OS << '}';
-    if (STy->isPacked())
-      OS << '>';
-    break;
+    StructType *STy = cast<StructType>(Ty);
+    
+    if (STy->isAnonymous())
+      return printStructBody(STy, OS);
+
+    if (!STy->getName().empty())
+      return PrintLLVMName(OS, STy->getName(), LocalPrefix);
+    
+    DenseMap<StructType*, unsigned>::iterator I = NumberedTypes.find(STy);
+    if (I != NumberedTypes.end())
+      OS << '%' << I->second;
+    else  // Not enumerated, print the hex address.
+      OS << "%\"type 0x" << STy << '\"';
+    return;
   }
   case Type::PointerTyID: {
-    const PointerType *PTy = cast<PointerType>(Ty);
-    CalcTypeName(PTy->getElementType(), TypeStack, OS);
+    PointerType *PTy = cast<PointerType>(Ty);
+    print(PTy->getElementType(), OS);
     if (unsigned AddressSpace = PTy->getAddressSpace())
       OS << " addrspace(" << AddressSpace << ')';
     OS << '*';
-    break;
+    return;
   }
   case Type::ArrayTyID: {
-    const ArrayType *ATy = cast<ArrayType>(Ty);
+    ArrayType *ATy = cast<ArrayType>(Ty);
     OS << '[' << ATy->getNumElements() << " x ";
-    CalcTypeName(ATy->getElementType(), TypeStack, OS);
+    print(ATy->getElementType(), OS);
     OS << ']';
-    break;
+    return;
   }
   case Type::VectorTyID: {
-    const VectorType *PTy = cast<VectorType>(Ty);
+    VectorType *PTy = cast<VectorType>(Ty);
     OS << "<" << PTy->getNumElements() << " x ";
-    CalcTypeName(PTy->getElementType(), TypeStack, OS);
+    print(PTy->getElementType(), OS);
     OS << '>';
-    break;
+    return;
   }
-  case Type::OpaqueTyID:
-    OS << "opaque";
-    break;
   default:
     OS << "<unrecognized-type>";
-    break;
+    return;
   }
-
-  TypeStack.pop_back();       // Remove self from stack.
 }
 
-/// printTypeInt - The internal guts of printing out a type that has a
-/// potentially named portion.
-///
-void TypePrinting::print(const Type *Ty, raw_ostream &OS,
-                         bool IgnoreTopLevelName) {
-  // Check to see if the type is named.
-  DenseMap<const Type*, std::string> &TM = getTypeNamesMap(TypeNames);
-  if (!IgnoreTopLevelName) {
-    DenseMap<const Type*, std::string>::iterator I = TM.find(Ty);
-    if (I != TM.end()) {
-      OS << I->second;
-      return;
-    }
+void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) {
+  if (STy->isOpaque()) {
+    OS << "opaque";
+    return;
   }
-
-  // Otherwise we have a type that has not been named but is a derived type.
-  // Carefully recurse the type hierarchy to print out any contained symbolic
-  // names.
-  SmallVector<const Type *, 16> TypeStack;
-  std::string TypeName;
-
-  raw_string_ostream TypeOS(TypeName);
-  CalcTypeName(Ty, TypeStack, TypeOS, IgnoreTopLevelName);
-  OS << TypeOS.str();
-
-  // Cache type name for later use.
-  if (!IgnoreTopLevelName)
-    TM.insert(std::make_pair(Ty, TypeOS.str()));
-}
-
-namespace {
-  class TypeFinder {
-    // To avoid walking constant expressions multiple times and other IR
-    // objects, we keep several helper maps.
-    DenseSet<const Value*> VisitedConstants;
-    DenseSet<const Type*> VisitedTypes;
-
-    TypePrinting &TP;
-    std::vector<const Type*> &NumberedTypes;
-  public:
-    TypeFinder(TypePrinting &tp, std::vector<const Type*> &numberedTypes)
-      : TP(tp), NumberedTypes(numberedTypes) {}
-
-    void Run(const Module &M) {
-      // Get types from the type symbol table.  This gets opaque types referened
-      // only through derived named types.
-      const TypeSymbolTable &ST = M.getTypeSymbolTable();
-      for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end();
-           TI != E; ++TI)
-        IncorporateType(TI->second);
-
-      // Get types from global variables.
-      for (Module::const_global_iterator I = M.global_begin(),
-           E = M.global_end(); I != E; ++I) {
-        IncorporateType(I->getType());
-        if (I->hasInitializer())
-          IncorporateValue(I->getInitializer());
-      }
-
-      // Get types from aliases.
-      for (Module::const_alias_iterator I = M.alias_begin(),
-           E = M.alias_end(); I != E; ++I) {
-        IncorporateType(I->getType());
-        IncorporateValue(I->getAliasee());
-      }
-
-      // Get types from functions.
-      for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
-        IncorporateType(FI->getType());
-
-        for (Function::const_iterator BB = FI->begin(), E = FI->end();
-             BB != E;++BB)
-          for (BasicBlock::const_iterator II = BB->begin(),
-               E = BB->end(); II != E; ++II) {
-            const Instruction &I = *II;
-            // Incorporate the type of the instruction and all its operands.
-            IncorporateType(I.getType());
-            for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
-                 OI != OE; ++OI)
-              IncorporateValue(*OI);
-          }
-      }
-    }
-
-  private:
-    void IncorporateType(const Type *Ty) {
-      // Check to see if we're already visited this type.
-      if (!VisitedTypes.insert(Ty).second)
-        return;
-
-      // If this is a structure or opaque type, add a name for the type.
-      if (((Ty->isStructTy() && cast<StructType>(Ty)->getNumElements())
-            || Ty->isOpaqueTy()) && !TP.hasTypeName(Ty)) {
-        TP.addTypeName(Ty, "%"+utostr(unsigned(NumberedTypes.size())));
-        NumberedTypes.push_back(Ty);
-      }
-
-      // Recursively walk all contained types.
-      for (Type::subtype_iterator I = Ty->subtype_begin(),
-           E = Ty->subtype_end(); I != E; ++I)
-        IncorporateType(*I);
-    }
-
-    /// IncorporateValue - This method is used to walk operand lists finding
-    /// types hiding in constant expressions and other operands that won't be
-    /// walked in other ways.  GlobalValues, basic blocks, instructions, and
-    /// inst operands are all explicitly enumerated.
-    void IncorporateValue(const Value *V) {
-      if (V == 0 || !isa<Constant>(V) || isa<GlobalValue>(V)) return;
-
-      // Already visited?
-      if (!VisitedConstants.insert(V).second)
-        return;
-
-      // Check this type.
-      IncorporateType(V->getType());
-
-      // Look in operands for types.
-      const Constant *C = cast<Constant>(V);
-      for (Constant::const_op_iterator I = C->op_begin(),
-           E = C->op_end(); I != E;++I)
-        IncorporateValue(*I);
-    }
-  };
-} // end anonymous namespace
-
-
-/// AddModuleTypesToPrinter - Add all of the symbolic type names for types in
-/// the specified module to the TypePrinter and all numbered types to it and the
-/// NumberedTypes table.
-static void AddModuleTypesToPrinter(TypePrinting &TP,
-                                    std::vector<const Type*> &NumberedTypes,
-                                    const Module *M) {
-  if (M == 0) return;
-
-  // If the module has a symbol table, take all global types and stuff their
-  // names into the TypeNames map.
-  const TypeSymbolTable &ST = M->getTypeSymbolTable();
-  for (TypeSymbolTable::const_iterator TI = ST.begin(), E = ST.end();
-       TI != E; ++TI) {
-    const Type *Ty = cast<Type>(TI->second);
-
-    // As a heuristic, don't insert pointer to primitive types, because
-    // they are used too often to have a single useful name.
-    if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
-      const Type *PETy = PTy->getElementType();
-      if ((PETy->isPrimitiveType() || PETy->isIntegerTy()) &&
-          !PETy->isOpaqueTy())
-        continue;
+  
+  if (STy->isPacked())
+    OS << '<';
+  
+  if (STy->getNumElements() == 0) {
+    OS << "{}";
+  } else {
+    StructType::element_iterator I = STy->element_begin();
+    OS << "{ ";
+    print(*I++, OS);
+    for (StructType::element_iterator E = STy->element_end(); I != E; ++I) {
+      OS << ", ";
+      print(*I, OS);
     }
-
-    // Likewise don't insert primitives either.
-    if (Ty->isIntegerTy() || Ty->isPrimitiveType())
-      continue;
-
-    // Get the name as a string and insert it into TypeNames.
-    std::string NameStr;
-    raw_string_ostream NameROS(NameStr);
-    formatted_raw_ostream NameOS(NameROS);
-    PrintLLVMName(NameOS, TI->first, LocalPrefix);
-    NameOS.flush();
-    TP.addTypeName(Ty, NameStr);
+  
+    OS << " }";
   }
-
-  // Walk the entire module to find references to unnamed structure and opaque
-  // types.  This is required for correctness by opaque types (because multiple
-  // uses of an unnamed opaque type needs to be referred to by the same ID) and
-  // it shrinks complex recursive structure types substantially in some cases.
-  TypeFinder(TP, NumberedTypes).Run(*M);
+  if (STy->isPacked())
+    OS << '>';
 }
 
 
-/// WriteTypeSymbolic - This attempts to write the specified type as a symbolic
-/// type, iff there is an entry in the modules symbol table for the specified
-/// type or one of it's component types.
-///
-void llvm::WriteTypeSymbolic(raw_ostream &OS, const Type *Ty, const Module *M) {
-  TypePrinting Printer;
-  std::vector<const Type*> NumberedTypes;
-  AddModuleTypesToPrinter(Printer, NumberedTypes, M);
-  Printer.print(Ty, OS);
-}
 
 //===----------------------------------------------------------------------===//
 // SlotTracker Class: Enumerate slot numbers for unnamed values
@@ -481,11 +311,11 @@ private:
   const Function* TheFunction;
   bool FunctionProcessed;
 
-  /// mMap - The TypePlanes map for the module level data.
+  /// mMap - The slot map for the module level data.
   ValueMap mMap;
   unsigned mNext;
 
-  /// fMap - The TypePlanes map for the function level data.
+  /// fMap - The slot map for the function level data.
   ValueMap fMap;
   unsigned fNext;
 
@@ -706,7 +536,7 @@ int SlotTracker::getGlobalSlot(const GlobalValue *V) {
   // Check for uninitialized state and do lazy initialization.
   initialize();
 
-  // Find the type plane in the module map
+  // Find the value in the module map
   ValueMap::iterator MI = mMap.find(V);
   return MI == mMap.end() ? -1 : (int)MI->second;
 }
@@ -716,7 +546,7 @@ int SlotTracker::getMetadataSlot(const MDNode *N) {
   // Check for uninitialized state and do lazy initialization.
   initialize();
 
-  // Find the type plane in the module map
+  // Find the MDNode in the module map
   mdn_iterator MI = mdnMap.find(N);
   return MI == mdnMap.end() ? -1 : (int)MI->second;
 }
@@ -978,7 +808,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
     // As a special case, print the array as a string if it is an array of
     // i8 with ConstantInt values.
     //
-    const Type *ETy = CA->getType()->getElementType();
+    Type *ETy = CA->getType()->getElementType();
     if (CA->isString()) {
       Out << "c\"";
       PrintEscapedString(CA->getAsString(), Out);
@@ -1035,7 +865,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
   }
 
   if (const ConstantVector *CP = dyn_cast<ConstantVector>(CV)) {
-    const Type *ETy = CP->getType()->getElementType();
+    Type *ETy = CP->getType()->getElementType();
     assert(CP->getNumOperands() > 0 &&
            "Number of operands for a PackedConst must be > 0");
     Out << '<';
@@ -1233,8 +1063,8 @@ void llvm::WriteAsOperand(raw_ostream &Out, const Value *V,
   if (Context == 0) Context = getModuleFromVal(V);
 
   TypePrinting TypePrinter;
-  std::vector<const Type*> NumberedTypes;
-  AddModuleTypesToPrinter(TypePrinter, NumberedTypes, Context);
+  if (Context)
+    TypePrinter.incorporateTypes(*Context);
   if (PrintType) {
     TypePrinter.print(V->getType(), Out);
     Out << ' ';
@@ -1251,14 +1081,14 @@ class AssemblyWriter {
   const Module *TheModule;
   TypePrinting TypePrinter;
   AssemblyAnnotationWriter *AnnotationWriter;
-  std::vector<const Type*> NumberedTypes;
   
 public:
   inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
                         const Module *M,
                         AssemblyAnnotationWriter *AAW)
     : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) {
-    AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M);
+    if (M)
+      TypePrinter.incorporateTypes(*M);
   }
 
   void printMDNodeBody(const MDNode *MD);
@@ -1271,7 +1101,7 @@ public:
 
   void writeAllMDNodes();
 
-  void printTypeSymbolTable(const TypeSymbolTable &ST);
+  void printTypeIdentities();
   void printGlobal(const GlobalVariable *GV);
   void printAlias(const GlobalAlias *GV);
   void printFunction(const Function *F);
@@ -1366,9 +1196,7 @@ void AssemblyWriter::printModule(const Module *M) {
     Out << " ]";
   }
 
-  // Loop over the symbol table, emitting all id'd types.
-  if (!M->getTypeSymbolTable().empty() || !NumberedTypes.empty()) Out << '\n';
-  printTypeSymbolTable(M->getTypeSymbolTable());
+  printTypeIdentities();
 
   // Output all globals.
   if (!M->global_empty()) Out << '\n';
@@ -1401,7 +1229,25 @@ void AssemblyWriter::printModule(const Module *M) {
 }
 
 void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) {
-  Out << "!" << NMD->getName() << " = !{";
+  Out << '!';
+  StringRef Name = NMD->getName();
+  if (Name.empty()) {
+    Out << "<empty name> ";
+  } else {
+    if (isalpha(Name[0]) || Name[0] == '-' || Name[0] == '$' ||
+        Name[0] == '.' || Name[0] == '_')
+      Out << Name[0];
+    else
+      Out << '\\' << hexdigit(Name[0] >> 4) << hexdigit(Name[0] & 0x0F);
+    for (unsigned i = 1, e = Name.size(); i != e; ++i) {
+      unsigned char C = Name[i];
+      if (isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_')
+        Out << C;
+      else
+        Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F);
+    }
+  }
+  Out << " = !{";
   for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
     if (i) Out << ", ";
     int Slot = Machine.getMetadataSlot(NMD->getOperand(i));
@@ -1508,7 +1354,10 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
 
   const Constant *Aliasee = GA->getAliasee();
 
-  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Aliasee)) {
+  if (Aliasee == 0) {
+    TypePrinter.print(GA->getType(), Out);
+    Out << " <<NULL ALIASEE>>";
+  } else if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(Aliasee)) {
     TypePrinter.print(GV->getType(), Out);
     Out << ' ';
     PrintLLVMName(Out, GV);
@@ -1534,26 +1383,40 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) {
   Out << '\n';
 }
 
-void AssemblyWriter::printTypeSymbolTable(const TypeSymbolTable &ST) {
+void AssemblyWriter::printTypeIdentities() {
+  if (TypePrinter.NumberedTypes.empty() &&
+      TypePrinter.NamedTypes.empty())
+    return;
+  
+  Out << '\n';
+  
+  // We know all the numbers that each type is used and we know that it is a
+  // dense assignment.  Convert the map to an index table.
+  std::vector<StructType*> NumberedTypes(TypePrinter.NumberedTypes.size());
+  for (DenseMap<StructType*, unsigned>::iterator I = 
+       TypePrinter.NumberedTypes.begin(), E = TypePrinter.NumberedTypes.end();
+       I != E; ++I) {
+    assert(I->second < NumberedTypes.size() && "Didn't get a dense numbering?");
+    NumberedTypes[I->second] = I->first;
+  }
+           
   // Emit all numbered types.
   for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) {
     Out << '%' << i << " = type ";
-
+    
     // Make sure we print out at least one level of the type structure, so
     // that we do not get %2 = type %2
-    TypePrinter.printAtLeastOneLevel(NumberedTypes[i], Out);
+    TypePrinter.printStructBody(NumberedTypes[i], Out);
     Out << '\n';
   }
-
-  // Print the named types.
-  for (TypeSymbolTable::const_iterator TI = ST.begin(), TE = ST.end();
-       TI != TE; ++TI) {
-    PrintLLVMName(Out, TI->first, LocalPrefix);
+  
+  for (unsigned i = 0, e = TypePrinter.NamedTypes.size(); i != e; ++i) {
+    PrintLLVMName(Out, TypePrinter.NamedTypes[i]->getName(), LocalPrefix);
     Out << " = type ";
 
     // Make sure we print out at least one level of the type structure, so
     // that we do not get %FILE = type %FILE
-    TypePrinter.printAtLeastOneLevel(TI->second, Out);
+    TypePrinter.printStructBody(TypePrinter.NamedTypes[i], Out);
     Out << '\n';
   }
 }
@@ -1735,18 +1598,6 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) {
   if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out);
 }
 
-/// printDebugLoc - Print DebugLoc.
-static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) {
-  OS << DL.getLine() << ":" << DL.getCol();
-  if (MDNode *N = DL.getInlinedAt(getGlobalContext())) {
-    DebugLoc IDL = DebugLoc::getFromDILocation(N);
-    if (!IDL.isUnknown()) {
-      OS << "@";
-      printDebugLoc(IDL,OS);
-    }
-  }
-}
-
 /// printInfoComment - Print a little comment after the instruction indicating
 /// which slot it occupies.
 ///
@@ -1754,43 +1605,6 @@ void AssemblyWriter::printInfoComment(const Value &V) {
   if (AnnotationWriter) {
     AnnotationWriter->printInfoComment(V, Out);
     return;
-  } else if (EnableDebugInfoComment) {
-    bool Padded = false;
-    if (const Instruction *I = dyn_cast<Instruction>(&V)) {
-      const DebugLoc &DL = I->getDebugLoc();
-      if (!DL.isUnknown()) {
-        if (!Padded) {
-          Out.PadToColumn(50);
-          Padded = true;
-          Out << ";";
-        }
-        Out << " [debug line = ";
-        printDebugLoc(DL,Out);
-        Out << "]";
-      }
-      if (const DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(I)) {
-        const MDNode *Var = DDI->getVariable();
-        if (!Padded) {
-          Out.PadToColumn(50);
-          Padded = true;
-          Out << ";";
-        }
-        if (Var && Var->getNumOperands() >= 2)
-          if (MDString *MDS = dyn_cast_or_null<MDString>(Var->getOperand(2)))
-            Out << " [debug variable = " << MDS->getString() << "]";
-      }
-      else if (const DbgValueInst *DVI = dyn_cast<DbgValueInst>(I)) {
-        const MDNode *Var = DVI->getVariable();
-        if (!Padded) {
-          Out.PadToColumn(50);
-          Padded = true;
-          Out << ";";
-        }
-        if (Var && Var->getNumOperands() >= 2)
-          if (MDString *MDS = dyn_cast_or_null<MDString>(Var->getOperand(2)))
-            Out << " [debug variable = " << MDS->getString() << "]";
-      }
-    }
   }
 }
 
@@ -1873,16 +1687,16 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
       writeOperand(I.getOperand(i), true);
     }
     Out << ']';
-  } else if (isa<PHINode>(I)) {
+  } else if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
     Out << ' ';
     TypePrinter.print(I.getType(), Out);
     Out << ' ';
 
-    for (unsigned op = 0, Eop = I.getNumOperands(); op < Eop; op += 2) {
+    for (unsigned op = 0, Eop = PN->getNumIncomingValues(); op < Eop; ++op) {
       if (op) Out << ", ";
       Out << "[ ";
-      writeOperand(I.getOperand(op  ), false); Out << ", ";
-      writeOperand(I.getOperand(op+1), false); Out << " ]";
+      writeOperand(PN->getIncomingValue(op), false); Out << ", ";
+      writeOperand(PN->getIncomingBlock(op), false); Out << " ]";
     }
   } else if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&I)) {
     Out << ' ';
@@ -1916,9 +1730,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     }
 
     Operand = CI->getCalledValue();
-    const PointerType    *PTy = cast<PointerType>(Operand->getType());
-    const FunctionType   *FTy = cast<FunctionType>(PTy->getElementType());
-    const Type         *RetTy = FTy->getReturnType();
+    PointerType *PTy = cast<PointerType>(Operand->getType());
+    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    Type *RetTy = FTy->getReturnType();
     const AttrListPtr &PAL = CI->getAttributes();
 
     if (PAL.getRetAttributes() != Attribute::None)
@@ -1949,9 +1763,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
       Out << ' ' << Attribute::getAsString(PAL.getFnAttributes());
   } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
     Operand = II->getCalledValue();
-    const PointerType    *PTy = cast<PointerType>(Operand->getType());
-    const FunctionType   *FTy = cast<FunctionType>(PTy->getElementType());
-    const Type         *RetTy = FTy->getReturnType();
+    PointerType *PTy = cast<PointerType>(Operand->getType());
+    FunctionType *FTy = cast<FunctionType>(PTy->getElementType());
+    Type *RetTy = FTy->getReturnType();
     const AttrListPtr &PAL = II->getAttributes();
 
     // Print the calling convention being used.
@@ -2034,7 +1848,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     // omit the type from all but the first operand.  If the instruction has
     // different type operands (for example br), then they are all printed.
     bool PrintAllTypes = false;
-    const Type *TheType = Operand->getType();
+    Type *TheType = Operand->getType();
 
     // Select, Store and ShuffleVector always print all types.
     if (isa<SelectInst>(I) || isa<StoreInst>(I) || isa<ShuffleVectorInst>(I)
@@ -2154,7 +1968,15 @@ void Type::print(raw_ostream &OS) const {
     OS << "<null Type>";
     return;
   }
-  TypePrinting().print(this, OS);
+  TypePrinting TP;
+  TP.print(const_cast<Type*>(this), OS);
+  
+  // If the type is a named struct type, print the body as well.
+  if (StructType *STy = dyn_cast<StructType>(const_cast<Type*>(this)))
+    if (!STy->isAnonymous()) {
+      OS << " = type ";
+      TP.printStructBody(STy, OS);
+    }
 }
 
 void Value::print(raw_ostream &ROS, AssemblyAnnotationWriter *AAW) const {
@@ -2210,14 +2032,7 @@ void Value::printCustom(raw_ostream &OS) const {
 void Value::dump() const { print(dbgs()); dbgs() << '\n'; }
 
 // Type::dump - allow easy printing of Types from the debugger.
-// This one uses type names from the given context module
-void Type::dump(const Module *Context) const {
-  WriteTypeSymbolic(dbgs(), this, Context);
-  dbgs() << '\n';
-}
-
-// Type::dump - allow easy printing of Types from the debugger.
-void Type::dump() const { dump(0); }
+void Type::dump() const { print(dbgs()); }
 
 // Module::dump() - Allow printing of Modules from the debugger.
 void Module::dump() const { print(dbgs(), 0); }
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index ee257db..bf6efa1 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -74,6 +74,8 @@ std::string Attribute::getAsString(Attributes Attrs) {
     Result += "naked ";
   if (Attrs & Attribute::Hotpatch)
     Result += "hotpatch ";
+  if (Attrs & Attribute::NonLazyBind)
+    Result += "nonlazybind ";
   if (Attrs & Attribute::StackAlignment) {
     Result += "alignstack(";
     Result += utostr(Attribute::getStackAlignmentFromAttrs(Attrs));
diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp
index f8f15ca..9e93ff3 100644
--- a/lib/VMCore/AutoUpgrade.cpp
+++ b/lib/VMCore/AutoUpgrade.cpp
@@ -28,558 +28,77 @@ using namespace llvm;
 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
   assert(F && "Illegal to upgrade a non-existent Function.");
 
-  // Get the Function's name.
-  const std::string& Name = F->getName();
-
-  // Convenience
-  const FunctionType *FTy = F->getFunctionType();
-
   // Quickly eliminate it, if it's not a candidate.
-  if (Name.length() <= 8 || Name[0] != 'l' || Name[1] != 'l' || 
-      Name[2] != 'v' || Name[3] != 'm' || Name[4] != '.')
+  StringRef Name = F->getName();
+  if (Name.size() <= 8 || !Name.startswith("llvm."))
     return false;
+  Name = Name.substr(5); // Strip off "llvm."
 
+  const FunctionType *FTy = F->getFunctionType();
   Module *M = F->getParent();
-  switch (Name[5]) {
+  
+  switch (Name[0]) {
   default: break;
-  case 'a':
-    // This upgrades the llvm.atomic.lcs, llvm.atomic.las, llvm.atomic.lss,
-    // and atomics with default address spaces to their new names to their new
-    // function name (e.g. llvm.atomic.add.i32 => llvm.atomic.add.i32.p0i32)
-    if (Name.compare(5,7,"atomic.",7) == 0) {
-      if (Name.compare(12,3,"lcs",3) == 0) {
-        std::string::size_type delim = Name.find('.',12);
-        F->setName("llvm.atomic.cmp.swap" + Name.substr(delim) +
-                   ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-      else if (Name.compare(12,3,"las",3) == 0) {
-        std::string::size_type delim = Name.find('.',12);
-        F->setName("llvm.atomic.load.add"+Name.substr(delim)
-                   + ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-      else if (Name.compare(12,3,"lss",3) == 0) {
-        std::string::size_type delim = Name.find('.',12);
-        F->setName("llvm.atomic.load.sub"+Name.substr(delim)
-                   + ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-      else if (Name.rfind(".p") == std::string::npos) {
-        // We don't have an address space qualifier so this has be upgraded
-        // to the new name.  Copy the type name at the end of the intrinsic
-        // and add to it
-        std::string::size_type delim = Name.find_last_of('.');
-        assert(delim != std::string::npos && "can not find type");
-        F->setName(Name + ".p0" + Name.substr(delim+1));
-        NewFn = F;
-        return true;
-      }
-    } else if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
-      if (((Name.compare(14, 5, "vmovl", 5) == 0 ||
-            Name.compare(14, 5, "vaddl", 5) == 0 ||
-            Name.compare(14, 5, "vsubl", 5) == 0 ||
-            Name.compare(14, 5, "vaddw", 5) == 0 ||
-            Name.compare(14, 5, "vsubw", 5) == 0 ||
-            Name.compare(14, 5, "vmlal", 5) == 0 ||
-            Name.compare(14, 5, "vmlsl", 5) == 0 ||
-            Name.compare(14, 5, "vabdl", 5) == 0 ||
-            Name.compare(14, 5, "vabal", 5) == 0) &&
-           (Name.compare(19, 2, "s.", 2) == 0 ||
-            Name.compare(19, 2, "u.", 2) == 0)) ||
-
-          (Name.compare(14, 4, "vaba", 4) == 0 &&
-           (Name.compare(18, 2, "s.", 2) == 0 ||
-            Name.compare(18, 2, "u.", 2) == 0)) ||
-
-          (Name.compare(14, 6, "vmovn.", 6) == 0)) {
-
-        // Calls to these are transformed into IR without intrinsics.
-        NewFn = 0;
-        return true;
-      }
-      // Old versions of NEON ld/st intrinsics are missing alignment arguments.
-      bool isVLd = (Name.compare(14, 3, "vld", 3) == 0);
-      bool isVSt = (Name.compare(14, 3, "vst", 3) == 0);
-      if (isVLd || isVSt) {
-        unsigned NumVecs = Name.at(17) - '0';
-        if (NumVecs == 0 || NumVecs > 4)
-          return false;
-        bool isLaneOp = (Name.compare(18, 5, "lane.", 5) == 0);
-        if (!isLaneOp && Name.at(18) != '.')
-          return false;
-        unsigned ExpectedArgs = 2; // for the address and alignment
-        if (isVSt || isLaneOp)
-          ExpectedArgs += NumVecs;
-        if (isLaneOp)
-          ExpectedArgs += 1; // for the lane number
-        unsigned NumP = FTy->getNumParams();
-        if (NumP != ExpectedArgs - 1)
-          return false;
-
-        // Change the name of the old (bad) intrinsic, because 
-        // its type is incorrect, but we cannot overload that name.
-        F->setName("");
-
-        // One argument is missing: add the alignment argument.
-        std::vector<const Type*> NewParams;
-        for (unsigned p = 0; p < NumP; ++p)
-          NewParams.push_back(FTy->getParamType(p));
-        NewParams.push_back(Type::getInt32Ty(F->getContext()));
-        FunctionType *NewFTy = FunctionType::get(FTy->getReturnType(),
-                                                 NewParams, false);
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, NewFTy));
-        return true;
-      }
-    }
-    break;
-  case 'b':
-    //  This upgrades the name of the llvm.bswap intrinsic function to only use 
-    //  a single type name for overloading. We only care about the old format
-    //  'llvm.bswap.i*.i*', so check for 'bswap.' and then for there being 
-    //  a '.' after 'bswap.'
-    if (Name.compare(5,6,"bswap.",6) == 0) {
-      std::string::size_type delim = Name.find('.',11);
-      
-      if (delim != std::string::npos) {
-        //  Construct the new name as 'llvm.bswap' + '.i*'
-        F->setName(Name.substr(0,10)+Name.substr(delim));
-        NewFn = F;
-        return true;
-      }
-    }
-    break;
+  case 'p':
+    //  This upgrades the llvm.prefetch intrinsic to accept one more parameter,
+    //  which is a instruction / data cache identifier. The old version only
+    //  implicitly accepted the data version.
+    if (Name == "prefetch") {
+      // Don't do anything if it has the correct number of arguments already
+      if (FTy->getNumParams() == 4)
+        break;
 
-  case 'c':
-    //  We only want to fix the 'llvm.ct*' intrinsics which do not have the 
-    //  correct return type, so we check for the name, and then check if the 
-    //  return type does not match the parameter type.
-    if ( (Name.compare(5,5,"ctpop",5) == 0 ||
-          Name.compare(5,4,"ctlz",4) == 0 ||
-          Name.compare(5,4,"cttz",4) == 0) &&
-        FTy->getReturnType() != FTy->getParamType(0)) {
-      //  We first need to change the name of the old (bad) intrinsic, because 
-      //  its type is incorrect, but we cannot overload that name. We 
-      //  arbitrarily unique it here allowing us to construct a correctly named 
+      assert(FTy->getNumParams() == 3 && "old prefetch takes 3 args!");
+      //  We first need to change the name of the old (bad) intrinsic, because
+      //  its type is incorrect, but we cannot overload that name. We
+      //  arbitrarily unique it here allowing us to construct a correctly named
       //  and typed function below.
+      std::string NameTmp = F->getName();
       F->setName("");
-
-      //  Now construct the new intrinsic with the correct name and type. We 
-      //  leave the old function around in order to query its type, whatever it 
-      //  may be, and correctly convert up to the new type.
-      NewFn = cast<Function>(M->getOrInsertFunction(Name, 
+      NewFn = cast<Function>(M->getOrInsertFunction(NameTmp,
+                                                    FTy->getReturnType(),
                                                     FTy->getParamType(0),
-                                                    FTy->getParamType(0),
-                                                    (Type *)0));
+                                                    FTy->getParamType(1),
+                                                    FTy->getParamType(2),
+                                                    FTy->getParamType(2),
+                                                    (Type*)0));
       return true;
     }
-    break;
 
-  case 'e':
-    //  The old llvm.eh.selector.i32 is equivalent to the new llvm.eh.selector.
-    if (Name.compare("llvm.eh.selector.i32") == 0) {
-      F->setName("llvm.eh.selector");
-      NewFn = F;
-      return true;
-    }
-    //  The old llvm.eh.typeid.for.i32 is equivalent to llvm.eh.typeid.for.
-    if (Name.compare("llvm.eh.typeid.for.i32") == 0) {
-      F->setName("llvm.eh.typeid.for");
-      NewFn = F;
-      return true;
-    }
-    //  Convert the old llvm.eh.selector.i64 to a call to llvm.eh.selector.
-    if (Name.compare("llvm.eh.selector.i64") == 0) {
-      NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_selector);
-      return true;
-    }
-    //  Convert the old llvm.eh.typeid.for.i64 to a call to llvm.eh.typeid.for.
-    if (Name.compare("llvm.eh.typeid.for.i64") == 0) {
-      NewFn = Intrinsic::getDeclaration(M, Intrinsic::eh_typeid_for);
-      return true;
-    }
     break;
-
-  case 'm': {
-    // This upgrades the llvm.memcpy, llvm.memmove, and llvm.memset to the
-    // new format that allows overloading the pointer for different address
-    // space (e.g., llvm.memcpy.i16 => llvm.memcpy.p0i8.p0i8.i16)
-    const char* NewFnName = NULL;
-    if (Name.compare(5,8,"memcpy.i",8) == 0) {
-      if (Name[13] == '8')
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i8";
-      else if (Name.compare(13,2,"16") == 0)
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i16";
-      else if (Name.compare(13,2,"32") == 0)
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i32";
-      else if (Name.compare(13,2,"64") == 0)
-        NewFnName = "llvm.memcpy.p0i8.p0i8.i64";
-    } else if (Name.compare(5,9,"memmove.i",9) == 0) {
-      if (Name[14] == '8')
-        NewFnName = "llvm.memmove.p0i8.p0i8.i8";
-      else if (Name.compare(14,2,"16") == 0)
-        NewFnName = "llvm.memmove.p0i8.p0i8.i16";
-      else if (Name.compare(14,2,"32") == 0)
-        NewFnName = "llvm.memmove.p0i8.p0i8.i32";
-      else if (Name.compare(14,2,"64") == 0)
-        NewFnName = "llvm.memmove.p0i8.p0i8.i64";
-    }
-    else if (Name.compare(5,8,"memset.i",8) == 0) {
-      if (Name[13] == '8')
-        NewFnName = "llvm.memset.p0i8.i8";
-      else if (Name.compare(13,2,"16") == 0)
-        NewFnName = "llvm.memset.p0i8.i16";
-      else if (Name.compare(13,2,"32") == 0)
-        NewFnName = "llvm.memset.p0i8.i32";
-      else if (Name.compare(13,2,"64") == 0)
-        NewFnName = "llvm.memset.p0i8.i64";
-    }
+  case 'x': {
+    const char *NewFnName = NULL;
+    // This fixes the poorly named crc32 intrinsics.
+    if (Name == "x86.sse42.crc32.8")
+      NewFnName = "llvm.x86.sse42.crc32.32.8";
+    else if (Name == "x86.sse42.crc32.16")
+      NewFnName = "llvm.x86.sse42.crc32.32.16";
+    else if (Name == "x86.sse42.crc32.32")
+      NewFnName = "llvm.x86.sse42.crc32.32.32";
+    else if (Name == "x86.sse42.crc64.8")
+      NewFnName = "llvm.x86.sse42.crc32.64.8";
+    else if (Name == "x86.sse42.crc64.64")
+      NewFnName = "llvm.x86.sse42.crc32.64.64";
+    
     if (NewFnName) {
-      NewFn = cast<Function>(M->getOrInsertFunction(NewFnName, 
-                                            FTy->getReturnType(),
-                                            FTy->getParamType(0),
-                                            FTy->getParamType(1),
-                                            FTy->getParamType(2),
-                                            FTy->getParamType(3),
-                                            Type::getInt1Ty(F->getContext()),
-                                            (Type *)0));
+      F->setName(NewFnName);
+      NewFn = F;
       return true;
     }
-    break;
-  }
-  case 'p':
-    //  This upgrades the llvm.part.select overloaded intrinsic names to only 
-    //  use one type specifier in the name. We only care about the old format
-    //  'llvm.part.select.i*.i*', and solve as above with bswap.
-    if (Name.compare(5,12,"part.select.",12) == 0) {
-      std::string::size_type delim = Name.find('.',17);
-      
-      if (delim != std::string::npos) {
-        //  Construct a new name as 'llvm.part.select' + '.i*'
-        F->setName(Name.substr(0,16)+Name.substr(delim));
-        NewFn = F;
-        return true;
-      }
-      break;
-    }
 
-    //  This upgrades the llvm.part.set intrinsics similarly as above, however 
-    //  we care about 'llvm.part.set.i*.i*.i*', but only the first two types 
-    //  must match. There is an additional type specifier after these two 
-    //  matching types that we must retain when upgrading.  Thus, we require 
-    //  finding 2 periods, not just one, after the intrinsic name.
-    if (Name.compare(5,9,"part.set.",9) == 0) {
-      std::string::size_type delim = Name.find('.',14);
-
-      if (delim != std::string::npos &&
-          Name.find('.',delim+1) != std::string::npos) {
-        //  Construct a new name as 'llvm.part.select' + '.i*.i*'
-        F->setName(Name.substr(0,13)+Name.substr(delim));
-        NewFn = F;
-        return true;
-      }
-      break;
-    }
-
-    break;
-  case 'x':
-    // This fixes the poorly named crc32 intrinsics
-    if (Name.compare(5, 13, "x86.sse42.crc", 13) == 0) {
-      const char* NewFnName = NULL;
-      if (Name.compare(18, 2, "32", 2) == 0) {
-        if (Name.compare(20, 2, ".8") == 0 && Name.length() == 22) {
-          NewFnName = "llvm.x86.sse42.crc32.32.8";
-        } else if (Name.compare(20, 3, ".16") == 0 && Name.length() == 23) {
-          NewFnName = "llvm.x86.sse42.crc32.32.16";
-        } else if (Name.compare(20, 3, ".32") == 0 && Name.length() == 23) {
-          NewFnName = "llvm.x86.sse42.crc32.32.32";
-        }
-      }
-      else if (Name.compare(18, 2, "64", 2) == 0) {
-        if (Name.compare(20, 2, ".8") == 0 && Name.length() == 22) {
-          NewFnName = "llvm.x86.sse42.crc32.64.8";
-        } else if (Name.compare(20, 3, ".64") == 0 && Name.length() == 23) {
-          NewFnName = "llvm.x86.sse42.crc32.64.64";
-        }
-      }
-      if (NewFnName) {
-        F->setName(NewFnName);
-        NewFn = F;
-        return true;
-      }
-    }
-
-    // This fixes all MMX shift intrinsic instructions to take a
-    // x86_mmx instead of a v1i64, v2i32, v4i16, or v8i8.
-    if (Name.compare(5, 8, "x86.mmx.", 8) == 0) {
-      const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
-
-      if (Name.compare(13, 4, "padd", 4) == 0   ||
-          Name.compare(13, 4, "psub", 4) == 0   ||
-          Name.compare(13, 4, "pmul", 4) == 0   ||
-          Name.compare(13, 5, "pmadd", 5) == 0  ||
-          Name.compare(13, 4, "pand", 4) == 0   ||
-          Name.compare(13, 3, "por", 3) == 0    ||
-          Name.compare(13, 4, "pxor", 4) == 0   ||
-          Name.compare(13, 4, "pavg", 4) == 0   ||
-          Name.compare(13, 4, "pmax", 4) == 0   ||
-          Name.compare(13, 4, "pmin", 4) == 0   ||
-          Name.compare(13, 4, "psad", 4) == 0   ||
-          Name.compare(13, 4, "psll", 4) == 0   ||
-          Name.compare(13, 4, "psrl", 4) == 0   ||
-          Name.compare(13, 4, "psra", 4) == 0   ||
-          Name.compare(13, 4, "pack", 4) == 0   ||
-          Name.compare(13, 6, "punpck", 6) == 0 ||
-          Name.compare(13, 4, "pcmp", 4) == 0) {
-        assert(FTy->getNumParams() == 2 && "MMX intrinsic takes 2 args!");
-        const Type *SecondParamTy = X86_MMXTy;
-
-        if (Name.compare(13, 5, "pslli", 5) == 0 ||
-            Name.compare(13, 5, "psrli", 5) == 0 ||
-            Name.compare(13, 5, "psrai", 5) == 0)
-          SecondParamTy = FTy->getParamType(1);
-
-        // Don't do anything if it has the correct types.
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy &&
-            FTy->getParamType(1) == SecondParamTy)
-          break;
-
-        // We first need to change the name of the old (bad) intrinsic, because
-        // its type is incorrect, but we cannot overload that name. We
-        // arbitrarily unique it here allowing us to construct a correctly named
-        // and typed function below.
-        F->setName("");
-
-        // Now construct the new intrinsic with the correct name and type. We
-        // leave the old function around in order to query its type, whatever it
-        // may be, and correctly convert up to the new type.
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy, X86_MMXTy,
-                                                      SecondParamTy, (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 8, "maskmovq", 8) == 0) {
-        // Don't do anything if it has the correct types.
-        if (FTy->getParamType(0) == X86_MMXTy &&
-            FTy->getParamType(1) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(2),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 8, "pmovmskb", 8) == 0) {
-        if (FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 5, "movnt", 5) == 0) {
-        if (FTy->getParamType(1) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      FTy->getParamType(0),
-                                                      X86_MMXTy,
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 7, "palignr", 7) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy &&
-            FTy->getParamType(1) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(2),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 5, "pextr", 5) == 0) {
-        if (FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(1),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 5, "pinsr", 5) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(1),
-                                                      FTy->getParamType(2),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 12, "cvtsi32.si64", 12) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(0),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 12, "cvtsi64.si32", 12) == 0) {
-        if (FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      FTy->getReturnType(),
-                                                      X86_MMXTy,
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 8, "vec.init", 8) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy)
-          break;
-
-        F->setName("");
-
-        if (Name.compare(21, 2, ".b", 2) == 0)
-          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                        X86_MMXTy,
-                                                        FTy->getParamType(0),
-                                                        FTy->getParamType(1),
-                                                        FTy->getParamType(2),
-                                                        FTy->getParamType(3),
-                                                        FTy->getParamType(4),
-                                                        FTy->getParamType(5),
-                                                        FTy->getParamType(6),
-                                                        FTy->getParamType(7),
-                                                        (Type*)0));
-        else if (Name.compare(21, 2, ".w", 2) == 0)
-          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                        X86_MMXTy,
-                                                        FTy->getParamType(0),
-                                                        FTy->getParamType(1),
-                                                        FTy->getParamType(2),
-                                                        FTy->getParamType(3),
-                                                        (Type*)0));
-        else if (Name.compare(21, 2, ".d", 2) == 0)
-          NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                        X86_MMXTy,
-                                                        FTy->getParamType(0),
-                                                        FTy->getParamType(1),
-                                                        (Type*)0));
-        return true;
-      }
-
-
-      if (Name.compare(13, 9, "vec.ext.d", 9) == 0) {
-        if (FTy->getReturnType() == X86_MMXTy &&
-            FTy->getParamType(0) == X86_MMXTy)
-          break;
-
-        F->setName("");
-        NewFn = cast<Function>(M->getOrInsertFunction(Name, 
-                                                      X86_MMXTy,
-                                                      X86_MMXTy,
-                                                      FTy->getParamType(1),
-                                                      (Type*)0));
-        return true;
-      }
-
-      if (Name.compare(13, 9, "emms", 4) == 0 ||
-          Name.compare(13, 9, "femms", 5) == 0) {
-        NewFn = 0;
-        break;
-      }
-
-      // We really shouldn't get here ever.
-      assert(0 && "Invalid MMX intrinsic!");
-      break;
-    } else if (Name.compare(5,17,"x86.sse2.loadh.pd",17) == 0 ||
-               Name.compare(5,17,"x86.sse2.loadl.pd",17) == 0 ||
-               Name.compare(5,16,"x86.sse2.movl.dq",16) == 0 ||
-               Name.compare(5,15,"x86.sse2.movs.d",15) == 0 ||
-               Name.compare(5,16,"x86.sse2.shuf.pd",16) == 0 ||
-               Name.compare(5,18,"x86.sse2.unpckh.pd",18) == 0 ||
-               Name.compare(5,18,"x86.sse2.unpckl.pd",18) == 0 ||
-               Name.compare(5,20,"x86.sse2.punpckh.qdq",20) == 0 ||
-               Name.compare(5,20,"x86.sse2.punpckl.qdq",20) == 0) {
-      // Calls to these intrinsics are transformed into ShuffleVector's.
-      NewFn = 0;
-      return true;
-    } else if (Name.compare(5, 16, "x86.sse41.pmulld", 16) == 0) {
-      // Calls to these intrinsics are transformed into vector multiplies.
-      NewFn = 0;
+    // Calls to these instructions are transformed into unaligned loads.
+    if (Name == "x86.sse.loadu.ps" || Name == "x86.sse2.loadu.dq" ||
+        Name == "x86.sse2.loadu.pd")
       return true;
-    } else if (Name.compare(5, 18, "x86.ssse3.palign.r", 18) == 0 ||
-               Name.compare(5, 22, "x86.ssse3.palign.r.128", 22) == 0) {
-      // Calls to these intrinsics are transformed into vector shuffles, shifts,
-      // or 0.
-      NewFn = 0;
-      return true;           
-    } else if (Name.compare(5, 16, "x86.sse.loadu.ps", 16) == 0 ||
-               Name.compare(5, 17, "x86.sse2.loadu.dq", 17) == 0 ||
-               Name.compare(5, 17, "x86.sse2.loadu.pd", 17) == 0) {
-      // Calls to these instructions are transformed into unaligned loads.
-      NewFn = 0;
-      return true;
-    } else if (Name.compare(5, 16, "x86.sse.movnt.ps", 16) == 0 ||
-               Name.compare(5, 17, "x86.sse2.movnt.dq", 17) == 0 ||
-               Name.compare(5, 17, "x86.sse2.movnt.pd", 17) == 0 ||
-               Name.compare(5, 17, "x86.sse2.movnt.i", 16) == 0) {
-      // Calls to these instructions are transformed into nontemporal stores.
-      NewFn = 0;
-      return true;
-    } else if (Name.compare(5, 17, "x86.ssse3.pshuf.w", 17) == 0) {
-      // This is an SSE/MMX instruction.
-      const Type *X86_MMXTy = VectorType::getX86_MMXTy(FTy->getContext());
-      NewFn =
-        cast<Function>(M->getOrInsertFunction("llvm.x86.sse.pshuf.w",
-                                              X86_MMXTy,
-                                              X86_MMXTy,
-                                              Type::getInt8Ty(F->getContext()),
-                                              (Type*)0));
+      
+    // Calls to these instructions are transformed into nontemporal stores.
+    if (Name == "x86.sse.movnt.ps"  || Name == "x86.sse2.movnt.dq" ||
+        Name == "x86.sse2.movnt.pd" || Name == "x86.sse2.movnt.i")
       return true;
-    }
 
     break;
   }
+  }
 
   //  This may not belong here. This function is effectively being overloaded 
   //  to both detect an intrinsic which needs upgrading, and to provide the 
@@ -601,105 +120,10 @@ bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) {
 }
 
 bool llvm::UpgradeGlobalVariable(GlobalVariable *GV) {
-  StringRef Name(GV->getName());
-
-  // We are only upgrading one symbol here.
-  if (Name == ".llvm.eh.catch.all.value") {
-    GV->setName("llvm.eh.catch.all.value");
-    return true;
-  }
-
+  // Nothing to do yet.
   return false;
 }
 
-/// ExtendNEONArgs - For NEON "long" and "wide" operations, where the results
-/// have vector elements twice as big as one or both source operands, do the
-/// sign- or zero-extension that used to be handled by intrinsics.  The
-/// extended values are returned via V0 and V1.
-static void ExtendNEONArgs(CallInst *CI, Value *Arg0, Value *Arg1,
-                           Value *&V0, Value *&V1) {
-  Function *F = CI->getCalledFunction();
-  const std::string& Name = F->getName();
-  bool isLong = (Name.at(18) == 'l');
-  bool isSigned = (Name.at(19) == 's');
-
-  if (isSigned) {
-    if (isLong)
-      V0 = new SExtInst(Arg0, CI->getType(), "", CI);
-    else
-      V0 = Arg0;
-    V1 = new SExtInst(Arg1, CI->getType(), "", CI);
-  } else {
-    if (isLong)
-      V0 = new ZExtInst(Arg0, CI->getType(), "", CI);
-    else
-      V0 = Arg0;
-    V1 = new ZExtInst(Arg1, CI->getType(), "", CI);
-  }
-}
-
-/// CallVABD - As part of expanding a call to one of the old NEON vabdl, vaba,
-/// or vabal intrinsics, construct a call to a vabd intrinsic.  Examine the
-/// name of the old intrinsic to determine whether to use a signed or unsigned
-/// vabd intrinsic.  Get the type from the old call instruction, adjusted for
-/// half-size vector elements if the old intrinsic was vabdl or vabal.
-static Instruction *CallVABD(CallInst *CI, Value *Arg0, Value *Arg1) {
-  Function *F = CI->getCalledFunction();
-  const std::string& Name = F->getName();
-  bool isLong = (Name.at(18) == 'l');
-  bool isSigned = (Name.at(isLong ? 19 : 18) == 's');
-
-  Intrinsic::ID intID;
-  if (isSigned)
-    intID = Intrinsic::arm_neon_vabds;
-  else
-    intID = Intrinsic::arm_neon_vabdu;
-
-  const Type *Ty = CI->getType();
-  if (isLong)
-    Ty = VectorType::getTruncatedElementVectorType(cast<const VectorType>(Ty));
-
-  Function *VABD = Intrinsic::getDeclaration(F->getParent(), intID, &Ty, 1);
-  Value *Operands[2];
-  Operands[0] = Arg0;
-  Operands[1] = Arg1;
-  return CallInst::Create(VABD, Operands, Operands+2, 
-                          "upgraded."+CI->getName(), CI);
-}
-
-/// ConstructNewCallInst - Construct a new CallInst with the signature of NewFn.
-static void ConstructNewCallInst(Function *NewFn, CallInst *OldCI,
-                                 Value **Operands, unsigned NumOps,
-                                 bool AssignName = true) {
-  // Construct a new CallInst.
-  CallInst *NewCI =
-    CallInst::Create(NewFn, Operands, Operands + NumOps,
-                     AssignName ? "upgraded." + OldCI->getName() : "", OldCI);
-
-  NewCI->setTailCall(OldCI->isTailCall());
-  NewCI->setCallingConv(OldCI->getCallingConv());
-
-  // Handle any uses of the old CallInst. If the type has changed, add a cast.
-  if (!OldCI->use_empty()) {
-    if (OldCI->getType() != NewCI->getType()) {
-      Function *OldFn = OldCI->getCalledFunction();
-      CastInst *RetCast =
-        CastInst::Create(CastInst::getCastOpcode(NewCI, true,
-                                                 OldFn->getReturnType(), true),
-                         NewCI, OldFn->getReturnType(), NewCI->getName(),OldCI);
-
-      // Replace all uses of the old call with the new cast which has the
-      // correct type.
-      OldCI->replaceAllUsesWith(RetCast);
-    } else {
-      OldCI->replaceAllUsesWith(NewCI);
-    }
-  }
-
-  // Clean up the old call now that it has been completely upgraded.
-  OldCI->eraseFromParent();
-}
-
 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the 
 // upgraded intrinsic. All argument and return casting must be provided in 
 // order to seamlessly integrate with existing context.
@@ -711,284 +135,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   assert(F && "CallInst has no function associated with it.");
 
   if (!NewFn) {
-    // Get the Function's name.
-    const std::string& Name = F->getName();
-
-    // Upgrade ARM NEON intrinsics.
-    if (Name.compare(5, 9, "arm.neon.", 9) == 0) {
-      Instruction *NewI;
-      Value *V0, *V1;
-      if (Name.compare(14, 7, "vmovls.", 7) == 0) {
-        NewI = new SExtInst(CI->getArgOperand(0), CI->getType(),
-                            "upgraded." + CI->getName(), CI);
-      } else if (Name.compare(14, 7, "vmovlu.", 7) == 0) {
-        NewI = new ZExtInst(CI->getArgOperand(0), CI->getType(),
-                            "upgraded." + CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vadd", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
-        NewI = BinaryOperator::CreateAdd(V0, V1, "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vsub", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
-        NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI);
-      } else if (Name.compare(14, 4, "vmul", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(0), CI->getArgOperand(1), V0, V1);
-        NewI = BinaryOperator::CreateMul(V0, V1,"upgraded."+CI->getName(),CI);
-      } else if (Name.compare(14, 4, "vmla", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
-        Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
-        NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), MulI,
-                                         "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vmls", 4) == 0) {
-        ExtendNEONArgs(CI, CI->getArgOperand(1), CI->getArgOperand(2), V0, V1);
-        Instruction *MulI = BinaryOperator::CreateMul(V0, V1, "", CI);
-        NewI = BinaryOperator::CreateSub(CI->getArgOperand(0), MulI,
-                                         "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vabd", 4) == 0) {
-        NewI = CallVABD(CI, CI->getArgOperand(0), CI->getArgOperand(1));
-        NewI = new ZExtInst(NewI, CI->getType(), "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 4, "vaba", 4) == 0) {
-        NewI = CallVABD(CI, CI->getArgOperand(1), CI->getArgOperand(2));
-        if (Name.at(18) == 'l')
-          NewI = new ZExtInst(NewI, CI->getType(), "", CI);
-        NewI = BinaryOperator::CreateAdd(CI->getArgOperand(0), NewI,
-                                         "upgraded."+CI->getName(), CI);
-      } else if (Name.compare(14, 6, "vmovn.", 6) == 0) {
-        NewI = new TruncInst(CI->getArgOperand(0), CI->getType(),
-                             "upgraded." + CI->getName(), CI);
-      } else {
-        llvm_unreachable("Unknown arm.neon function for CallInst upgrade.");
-      }
-      // Replace any uses of the old CallInst.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(NewI);
-      CI->eraseFromParent();
-      return;
-    }
-
-    bool isLoadH = false, isLoadL = false, isMovL = false;
-    bool isMovSD = false, isShufPD = false;
-    bool isUnpckhPD = false, isUnpcklPD = false;
-    bool isPunpckhQPD = false, isPunpcklQPD = false;
-    if (F->getName() == "llvm.x86.sse2.loadh.pd")
-      isLoadH = true;
-    else if (F->getName() == "llvm.x86.sse2.loadl.pd")
-      isLoadL = true;
-    else if (F->getName() == "llvm.x86.sse2.movl.dq")
-      isMovL = true;
-    else if (F->getName() == "llvm.x86.sse2.movs.d")
-      isMovSD = true;
-    else if (F->getName() == "llvm.x86.sse2.shuf.pd")
-      isShufPD = true;
-    else if (F->getName() == "llvm.x86.sse2.unpckh.pd")
-      isUnpckhPD = true;
-    else if (F->getName() == "llvm.x86.sse2.unpckl.pd")
-      isUnpcklPD = true;
-    else if (F->getName() ==  "llvm.x86.sse2.punpckh.qdq")
-      isPunpckhQPD = true;
-    else if (F->getName() ==  "llvm.x86.sse2.punpckl.qdq")
-      isPunpcklQPD = true;
-
-    if (isLoadH || isLoadL || isMovL || isMovSD || isShufPD ||
-        isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
-      std::vector<Constant*> Idxs;
-      Value *Op0 = CI->getArgOperand(0);
-      ShuffleVectorInst *SI = NULL;
-      if (isLoadH || isLoadL) {
-        Value *Op1 = UndefValue::get(Op0->getType());
-        Value *Addr = new BitCastInst(CI->getArgOperand(1), 
-                                  Type::getDoublePtrTy(C),
-                                      "upgraded.", CI);
-        Value *Load = new LoadInst(Addr, "upgraded.", false, 8, CI);
-        Value *Idx = ConstantInt::get(Type::getInt32Ty(C), 0);
-        Op1 = InsertElementInst::Create(Op1, Load, Idx, "upgraded.", CI);
-
-        if (isLoadH) {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-        } else {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-        }
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
-      } else if (isMovL) {
-        Constant *Zero = ConstantInt::get(Type::getInt32Ty(C), 0);
-        Idxs.push_back(Zero);
-        Idxs.push_back(Zero);
-        Idxs.push_back(Zero);
-        Idxs.push_back(Zero);
-        Value *ZeroV = ConstantVector::get(Idxs);
-
-        Idxs.clear(); 
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 4));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 5));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(ZeroV, Op0, Mask, "upgraded.", CI);
-      } else if (isMovSD ||
-                 isUnpckhPD || isUnpcklPD || isPunpckhQPD || isPunpcklQPD) {
-        Value *Op1 = CI->getArgOperand(1);
-        if (isMovSD) {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-        } else if (isUnpckhPD || isPunpckhQPD) {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 3));
-        } else {
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 0));
-          Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), 2));
-        }
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
-      } else if (isShufPD) {
-        Value *Op1 = CI->getArgOperand(1);
-        unsigned MaskVal =
-                        cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C), MaskVal & 1));
-        Idxs.push_back(ConstantInt::get(Type::getInt32Ty(C),
-                                               ((MaskVal >> 1) & 1)+2));
-        Value *Mask = ConstantVector::get(Idxs);
-        SI = new ShuffleVectorInst(Op0, Op1, Mask, "upgraded.", CI);
-      }
-
-      assert(SI && "Unexpected!");
-
-      // Handle any uses of the old CallInst.
-      if (!CI->use_empty())
-        //  Replace all uses of the old call with the new cast which has the 
-        //  correct type.
-        CI->replaceAllUsesWith(SI);
-      
-      //  Clean up the old call now that it has been completely upgraded.
-      CI->eraseFromParent();
-    } else if (F->getName() == "llvm.x86.sse41.pmulld") {
-      // Upgrade this set of intrinsics into vector multiplies.
-      Instruction *Mul = BinaryOperator::CreateMul(CI->getArgOperand(0),
-                                                   CI->getArgOperand(1),
-                                                   CI->getName(),
-                                                   CI);
-      // Fix up all the uses with our new multiply.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Mul);
-        
-      // Remove upgraded multiply.
-      CI->eraseFromParent();
-    } else if (F->getName() == "llvm.x86.ssse3.palign.r") {
-      Value *Op1 = CI->getArgOperand(0);
-      Value *Op2 = CI->getArgOperand(1);
-      Value *Op3 = CI->getArgOperand(2);
-      unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
-      Value *Rep;
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      // If palignr is shifting the pair of input vectors less than 9 bytes,
-      // emit a shuffle instruction.
-      if (shiftVal <= 8) {
-        const Type *IntTy = Type::getInt32Ty(C);
-        const Type *EltTy = Type::getInt8Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 8);
-        
-        Op2 = Builder.CreateBitCast(Op2, VecTy);
-        Op1 = Builder.CreateBitCast(Op1, VecTy);
-
-        llvm::SmallVector<llvm::Constant*, 8> Indices;
-        for (unsigned i = 0; i != 8; ++i)
-          Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
-
-        Value *SV = ConstantVector::get(Indices);
-        Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
-        Rep = Builder.CreateBitCast(Rep, F->getReturnType());
-      }
-
-      // If palignr is shifting the pair of input vectors more than 8 but less
-      // than 16 bytes, emit a logical right shift of the destination.
-      else if (shiftVal < 16) {
-        // MMX has these as 1 x i64 vectors for some odd optimization reasons.
-        const Type *EltTy = Type::getInt64Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 1);
-
-        Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
-        Op2 = ConstantInt::get(VecTy, (shiftVal-8) * 8);
-
-        // create i32 constant
-        Function *I =
-          Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_mmx_psrl_q);
-        Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
-      }
-
-      // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
-      else {
-        Rep = Constant::getNullValue(F->getReturnType());
-      }
-      
-      // Replace any uses with our new instruction.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Rep);
-        
-      // Remove upgraded instruction.
-      CI->eraseFromParent();
-      
-    } else if (F->getName() == "llvm.x86.ssse3.palign.r.128") {
-      Value *Op1 = CI->getArgOperand(0);
-      Value *Op2 = CI->getArgOperand(1);
-      Value *Op3 = CI->getArgOperand(2);
-      unsigned shiftVal = cast<ConstantInt>(Op3)->getZExtValue();
-      Value *Rep;
-      IRBuilder<> Builder(C);
-      Builder.SetInsertPoint(CI->getParent(), CI);
-
-      // If palignr is shifting the pair of input vectors less than 17 bytes,
-      // emit a shuffle instruction.
-      if (shiftVal <= 16) {
-        const Type *IntTy = Type::getInt32Ty(C);
-        const Type *EltTy = Type::getInt8Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 16);
-        
-        Op2 = Builder.CreateBitCast(Op2, VecTy);
-        Op1 = Builder.CreateBitCast(Op1, VecTy);
-
-        llvm::SmallVector<llvm::Constant*, 16> Indices;
-        for (unsigned i = 0; i != 16; ++i)
-          Indices.push_back(ConstantInt::get(IntTy, shiftVal + i));
-
-        Value *SV = ConstantVector::get(Indices);
-        Rep = Builder.CreateShuffleVector(Op2, Op1, SV, "palignr");
-        Rep = Builder.CreateBitCast(Rep, F->getReturnType());
-      }
-
-      // If palignr is shifting the pair of input vectors more than 16 but less
-      // than 32 bytes, emit a logical right shift of the destination.
-      else if (shiftVal < 32) {
-        const Type *EltTy = Type::getInt64Ty(C);
-        const Type *VecTy = VectorType::get(EltTy, 2);
-        const Type *IntTy = Type::getInt32Ty(C);
-
-        Op1 = Builder.CreateBitCast(Op1, VecTy, "cast");
-        Op2 = ConstantInt::get(IntTy, (shiftVal-16) * 8);
-
-        // create i32 constant
-        Function *I =
-          Intrinsic::getDeclaration(F->getParent(), Intrinsic::x86_sse2_psrl_dq);
-        Rep = Builder.CreateCall2(I, Op1, Op2, "palignr");
-      }
-
-      // If palignr is shifting the pair of vectors more than 32 bytes, emit zero.
-      else {
-        Rep = Constant::getNullValue(F->getReturnType());
-      }
-      
-      // Replace any uses with our new instruction.
-      if (!CI->use_empty())
-        CI->replaceAllUsesWith(Rep);
-        
-      // Remove upgraded instruction.
-      CI->eraseFromParent();
-    
-    } else if (F->getName() == "llvm.x86.sse.loadu.ps" ||
-               F->getName() == "llvm.x86.sse2.loadu.dq" ||
-               F->getName() == "llvm.x86.sse2.loadu.pd") {
+    if (F->getName() == "llvm.x86.sse.loadu.ps" ||
+        F->getName() == "llvm.x86.sse2.loadu.dq" ||
+        F->getName() == "llvm.x86.sse2.loadu.pd") {
       // Convert to a native, unaligned load.
       const Type *VecTy = CI->getType();
       const Type *IntTy = IntegerType::get(C, 128);
@@ -1040,306 +189,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   }
 
   switch (NewFn->getIntrinsicID()) {
-  default: llvm_unreachable("Unknown function for CallInst upgrade.");
-  case Intrinsic::arm_neon_vld1:
-  case Intrinsic::arm_neon_vld2:
-  case Intrinsic::arm_neon_vld3:
-  case Intrinsic::arm_neon_vld4:
-  case Intrinsic::arm_neon_vst1:
-  case Intrinsic::arm_neon_vst2:
-  case Intrinsic::arm_neon_vst3:
-  case Intrinsic::arm_neon_vst4:
-  case Intrinsic::arm_neon_vld2lane:
-  case Intrinsic::arm_neon_vld3lane:
-  case Intrinsic::arm_neon_vld4lane:
-  case Intrinsic::arm_neon_vst2lane:
-  case Intrinsic::arm_neon_vst3lane:
-  case Intrinsic::arm_neon_vst4lane: {
-    // Add a default alignment argument of 1.
-    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
-    Operands.push_back(ConstantInt::get(Type::getInt32Ty(C), 1));
-    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
-                                       CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty())
-      //  Replace all uses of the old call with the new cast which has the 
-      //  correct type.
-      CI->replaceAllUsesWith(NewCI);
-    
-    //  Clean up the old call now that it has been completely upgraded.
-    CI->eraseFromParent();
-    break;
-  }        
-
-  case Intrinsic::x86_mmx_padd_b:
-  case Intrinsic::x86_mmx_padd_w:
-  case Intrinsic::x86_mmx_padd_d:
-  case Intrinsic::x86_mmx_padd_q:
-  case Intrinsic::x86_mmx_padds_b:
-  case Intrinsic::x86_mmx_padds_w:
-  case Intrinsic::x86_mmx_paddus_b:
-  case Intrinsic::x86_mmx_paddus_w:
-  case Intrinsic::x86_mmx_psub_b:
-  case Intrinsic::x86_mmx_psub_w:
-  case Intrinsic::x86_mmx_psub_d:
-  case Intrinsic::x86_mmx_psub_q:
-  case Intrinsic::x86_mmx_psubs_b:
-  case Intrinsic::x86_mmx_psubs_w:
-  case Intrinsic::x86_mmx_psubus_b:
-  case Intrinsic::x86_mmx_psubus_w:
-  case Intrinsic::x86_mmx_pmulh_w:
-  case Intrinsic::x86_mmx_pmull_w:
-  case Intrinsic::x86_mmx_pmulhu_w:
-  case Intrinsic::x86_mmx_pmulu_dq:
-  case Intrinsic::x86_mmx_pmadd_wd:
-  case Intrinsic::x86_mmx_pand:
-  case Intrinsic::x86_mmx_pandn:
-  case Intrinsic::x86_mmx_por:
-  case Intrinsic::x86_mmx_pxor:
-  case Intrinsic::x86_mmx_pavg_b:
-  case Intrinsic::x86_mmx_pavg_w:
-  case Intrinsic::x86_mmx_pmaxu_b:
-  case Intrinsic::x86_mmx_pmaxs_w:
-  case Intrinsic::x86_mmx_pminu_b:
-  case Intrinsic::x86_mmx_pmins_w:
-  case Intrinsic::x86_mmx_psad_bw:
-  case Intrinsic::x86_mmx_psll_w:
-  case Intrinsic::x86_mmx_psll_d:
-  case Intrinsic::x86_mmx_psll_q:
-  case Intrinsic::x86_mmx_pslli_w:
-  case Intrinsic::x86_mmx_pslli_d:
-  case Intrinsic::x86_mmx_pslli_q:
-  case Intrinsic::x86_mmx_psrl_w:
-  case Intrinsic::x86_mmx_psrl_d:
-  case Intrinsic::x86_mmx_psrl_q:
-  case Intrinsic::x86_mmx_psrli_w:
-  case Intrinsic::x86_mmx_psrli_d:
-  case Intrinsic::x86_mmx_psrli_q:
-  case Intrinsic::x86_mmx_psra_w:
-  case Intrinsic::x86_mmx_psra_d:
-  case Intrinsic::x86_mmx_psrai_w:
-  case Intrinsic::x86_mmx_psrai_d:
-  case Intrinsic::x86_mmx_packsswb:
-  case Intrinsic::x86_mmx_packssdw:
-  case Intrinsic::x86_mmx_packuswb:
-  case Intrinsic::x86_mmx_punpckhbw:
-  case Intrinsic::x86_mmx_punpckhwd:
-  case Intrinsic::x86_mmx_punpckhdq:
-  case Intrinsic::x86_mmx_punpcklbw:
-  case Intrinsic::x86_mmx_punpcklwd:
-  case Intrinsic::x86_mmx_punpckldq:
-  case Intrinsic::x86_mmx_pcmpeq_b:
-  case Intrinsic::x86_mmx_pcmpeq_w:
-  case Intrinsic::x86_mmx_pcmpeq_d:
-  case Intrinsic::x86_mmx_pcmpgt_b:
-  case Intrinsic::x86_mmx_pcmpgt_w:
-  case Intrinsic::x86_mmx_pcmpgt_d: {
-    Value *Operands[2];
-    
-    // Cast the operand to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-
-    switch (NewFn->getIntrinsicID()) {
-    default:
-      // Cast to the X86 MMX type.
-      Operands[1] = new BitCastInst(CI->getArgOperand(1), 
-                                    NewFn->getFunctionType()->getParamType(1),
-                                    "upgraded.", CI);
-      break;
-    case Intrinsic::x86_mmx_pslli_w:
-    case Intrinsic::x86_mmx_pslli_d:
-    case Intrinsic::x86_mmx_pslli_q:
-    case Intrinsic::x86_mmx_psrli_w:
-    case Intrinsic::x86_mmx_psrli_d:
-    case Intrinsic::x86_mmx_psrli_q:
-    case Intrinsic::x86_mmx_psrai_w:
-    case Intrinsic::x86_mmx_psrai_d:
-      // These take an i32 as their second parameter.
-      Operands[1] = CI->getArgOperand(1);
-      break;
-    }
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2);
-    break;
-  }
-  case Intrinsic::x86_mmx_maskmovq: {
-    Value *Operands[3];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = new BitCastInst(CI->getArgOperand(1), 
-                                  NewFn->getFunctionType()->getParamType(1),
-                                  "upgraded.", CI);
-    Operands[2] = CI->getArgOperand(2);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 3, false);
-    break;
-  }
-  case Intrinsic::x86_mmx_pmovmskb: {
-    Value *Operands[1];
-
-    // Cast the operand to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0), 
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 1);
-    break;
-  }
-  case Intrinsic::x86_mmx_movnt_dq: {
-    Value *Operands[2];
-
-    Operands[0] = CI->getArgOperand(0);
-
-    // Cast the operand to the X86 MMX type.
-    Operands[1] = new BitCastInst(CI->getArgOperand(1),
-                                  NewFn->getFunctionType()->getParamType(1),
-                                  "upgraded.", CI);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2, false);
-    break;
-  }
-  case Intrinsic::x86_mmx_palignr_b: {
-    Value *Operands[3];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0),
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = new BitCastInst(CI->getArgOperand(1),
-                                  NewFn->getFunctionType()->getParamType(1),
-                                  "upgraded.", CI);
-    Operands[2] = CI->getArgOperand(2);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 3);
-    break;
-  }
-  case Intrinsic::x86_mmx_pextr_w: {
-    Value *Operands[2];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0),
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = CI->getArgOperand(1);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2);
-    break;
-  }
-  case Intrinsic::x86_mmx_pinsr_w: {
-    Value *Operands[3];
-
-    // Cast the operands to the X86 MMX type.
-    Operands[0] = new BitCastInst(CI->getArgOperand(0),
-                                  NewFn->getFunctionType()->getParamType(0),
-                                  "upgraded.", CI);
-    Operands[1] = CI->getArgOperand(1);
-    Operands[2] = CI->getArgOperand(2);
-
-    ConstructNewCallInst(NewFn, CI, Operands, 3);
-    break;
-  }
-  case Intrinsic::x86_sse_pshuf_w: {
+  case Intrinsic::prefetch: {
     IRBuilder<> Builder(C);
     Builder.SetInsertPoint(CI->getParent(), CI);
+    const llvm::Type *I32Ty = llvm::Type::getInt32Ty(CI->getContext());
 
-    // Cast the operand to the X86 MMX type.
-    Value *Operands[2];
-    Operands[0] =
-      Builder.CreateBitCast(CI->getArgOperand(0), 
-                            NewFn->getFunctionType()->getParamType(0),
-                            "upgraded.");
-    Operands[1] =
-      Builder.CreateTrunc(CI->getArgOperand(1),
-                          Type::getInt8Ty(C),
-                          "upgraded.");
-
-    ConstructNewCallInst(NewFn, CI, Operands, 2);
-    break;
-  }
-
-  case Intrinsic::ctlz:
-  case Intrinsic::ctpop:
-  case Intrinsic::cttz: {
-    //  Build a small vector of the original arguments.
-    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
-
-    //  Construct a new CallInst
-    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
-                                       "upgraded."+CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty()) {
-      //  Check for sign extend parameter attributes on the return values.
-      bool SrcSExt = NewFn->getAttributes().paramHasAttr(0, Attribute::SExt);
-      bool DestSExt = F->getAttributes().paramHasAttr(0, Attribute::SExt);
-      
-      //  Construct an appropriate cast from the new return type to the old.
-      CastInst *RetCast = CastInst::Create(
-                            CastInst::getCastOpcode(NewCI, SrcSExt,
-                                                    F->getReturnType(),
-                                                    DestSExt),
-                            NewCI, F->getReturnType(),
-                            NewCI->getName(), CI);
-      NewCI->moveBefore(RetCast);
-
-      //  Replace all uses of the old call with the new cast which has the 
-      //  correct type.
-      CI->replaceAllUsesWith(RetCast);
-    }
-
-    //  Clean up the old call now that it has been completely upgraded.
-    CI->eraseFromParent();
-  }
-  break;
-  case Intrinsic::eh_selector:
-  case Intrinsic::eh_typeid_for: {
-    // Only the return type changed.
-    SmallVector<Value*, 8> Operands(CS.arg_begin(), CS.arg_end());
-    CallInst *NewCI = CallInst::Create(NewFn, Operands.begin(), Operands.end(),
-                                       "upgraded." + CI->getName(), CI);
-    NewCI->setTailCall(CI->isTailCall());
-    NewCI->setCallingConv(CI->getCallingConv());
-
-    //  Handle any uses of the old CallInst.
-    if (!CI->use_empty()) {
-      //  Construct an appropriate cast from the new return type to the old.
-      CastInst *RetCast =
-        CastInst::Create(CastInst::getCastOpcode(NewCI, true,
-                                                 F->getReturnType(), true),
-                         NewCI, F->getReturnType(), NewCI->getName(), CI);
-      CI->replaceAllUsesWith(RetCast);
-    }
-    CI->eraseFromParent();
-  }
-  break;
-  case Intrinsic::memcpy:
-  case Intrinsic::memmove:
-  case Intrinsic::memset: {
-    // Add isVolatile
-    const llvm::Type *I1Ty = llvm::Type::getInt1Ty(CI->getContext());
-    Value *Operands[5] = { CI->getArgOperand(0), CI->getArgOperand(1),
-                           CI->getArgOperand(2), CI->getArgOperand(3),
-                           llvm::ConstantInt::get(I1Ty, 0) };
-    CallInst *NewCI = CallInst::Create(NewFn, Operands, Operands+5,
+    // Add the extra "data cache" argument
+    Value *Operands[4] = { CI->getArgOperand(0), CI->getArgOperand(1),
+                           CI->getArgOperand(2),
+                           llvm::ConstantInt::get(I32Ty, 1) };
+    CallInst *NewCI = CallInst::Create(NewFn, Operands,
                                        CI->getName(), CI);
     NewCI->setTailCall(CI->isTailCall());
     NewCI->setCallingConv(CI->getCallingConv());
     //  Handle any uses of the old CallInst.
     if (!CI->use_empty())
-      //  Replace all uses of the old call with the new cast which has the 
+      //  Replace all uses of the old call with the new cast which has the
       //  correct type.
       CI->replaceAllUsesWith(NewCI);
-    
+
     //  Clean up the old call now that it has been completely upgraded.
     CI->eraseFromParent();
     break;
@@ -1354,13 +222,13 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
   assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
 
   // Upgrade the function and check if it is a totaly new function.
-  Function* NewFn;
+  Function *NewFn;
   if (UpgradeIntrinsicFunction(F, NewFn)) {
     if (NewFn != F) {
       // Replace all uses to the old function with the new one if necessary.
       for (Value::use_iterator UI = F->use_begin(), UE = F->use_end();
            UI != UE; ) {
-        if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+        if (CallInst *CI = dyn_cast<CallInst>(*UI++))
           UpgradeIntrinsicCall(CI, NewFn);
       }
       // Remove old function, no longer used, from the module.
@@ -1373,37 +241,27 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) {
 /// If an llvm.dbg.declare intrinsic is invalid, then this function simply
 /// strips that use.
 void llvm::CheckDebugInfoIntrinsics(Module *M) {
-
-
   if (Function *FuncStart = M->getFunction("llvm.dbg.func.start")) {
-    while (!FuncStart->use_empty()) {
-      CallInst *CI = cast<CallInst>(FuncStart->use_back());
-      CI->eraseFromParent();
-    }
+    while (!FuncStart->use_empty())
+      cast<CallInst>(FuncStart->use_back())->eraseFromParent();
     FuncStart->eraseFromParent();
   }
   
   if (Function *StopPoint = M->getFunction("llvm.dbg.stoppoint")) {
-    while (!StopPoint->use_empty()) {
-      CallInst *CI = cast<CallInst>(StopPoint->use_back());
-      CI->eraseFromParent();
-    }
+    while (!StopPoint->use_empty())
+      cast<CallInst>(StopPoint->use_back())->eraseFromParent();
     StopPoint->eraseFromParent();
   }
 
   if (Function *RegionStart = M->getFunction("llvm.dbg.region.start")) {
-    while (!RegionStart->use_empty()) {
-      CallInst *CI = cast<CallInst>(RegionStart->use_back());
-      CI->eraseFromParent();
-    }
+    while (!RegionStart->use_empty())
+      cast<CallInst>(RegionStart->use_back())->eraseFromParent();
     RegionStart->eraseFromParent();
   }
 
   if (Function *RegionEnd = M->getFunction("llvm.dbg.region.end")) {
-    while (!RegionEnd->use_empty()) {
-      CallInst *CI = cast<CallInst>(RegionEnd->use_back());
-      CI->eraseFromParent();
-    }
+    while (!RegionEnd->use_empty())
+      cast<CallInst>(RegionEnd->use_back())->eraseFromParent();
     RegionEnd->eraseFromParent();
   }
   
diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp
index 955a028..70265c8 100644
--- a/lib/VMCore/BasicBlock.cpp
+++ b/lib/VMCore/BasicBlock.cpp
@@ -147,6 +147,26 @@ Instruction* BasicBlock::getFirstNonPHIOrDbg() {
   return &*i;
 }
 
+Instruction* BasicBlock::getFirstNonPHIOrDbgOrLifetime() {
+  // All valid basic blocks should have a terminator,
+  // which is not a PHINode. If we have an invalid basic
+  // block we'll get an assertion failure when dereferencing
+  // a past-the-end iterator.
+  BasicBlock::iterator i = begin();
+  for (;; ++i) {
+    if (isa<PHINode>(i) || isa<DbgInfoIntrinsic>(i))
+      continue;
+
+    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(i);
+    if (!II)
+      break;
+    if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
+        II->getIntrinsicID() != Intrinsic::lifetime_end)
+      break;
+  }
+  return &*i;
+}
+
 void BasicBlock::dropAllReferences() {
   for(iterator I = begin(), E = end(); I != E; ++I)
     I->dropAllReferences();
@@ -227,8 +247,8 @@ void BasicBlock::removePredecessor(BasicBlock *Pred,
 
       // If the PHI _HAD_ two uses, replace PHI node with its now *single* value
       if (max_idx == 2) {
-        if (PN->getOperand(0) != PN)
-          PN->replaceAllUsesWith(PN->getOperand(0));
+        if (PN->getIncomingValue(0) != PN)
+          PN->replaceAllUsesWith(PN->getIncomingValue(0));
         else
           // We are left with an infinite loop with no entries: kill the PHI.
           PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
@@ -308,3 +328,19 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
   return New;
 }
 
+void BasicBlock::replaceSuccessorsPhiUsesWith(BasicBlock *New) {
+  TerminatorInst *TI = getTerminator();
+  if (!TI)
+    // Cope with being called on a BasicBlock that doesn't have a terminator
+    // yet. Clang's CodeGenFunction::EmitReturnBlock() likes to do this.
+    return;
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+    BasicBlock *Succ = TI->getSuccessor(i);
+    for (iterator II = Succ->begin(); PHINode *PN = dyn_cast<PHINode>(II);
+         ++II) {
+      int i;
+      while ((i = PN->getBasicBlockIndex(this)) >= 0)
+        PN->setIncomingBlock(i, New);
+    }
+  }
+}
diff --git a/lib/VMCore/CMakeLists.txt b/lib/VMCore/CMakeLists.txt
index 6bde263..f60dd06 100644
--- a/lib/VMCore/CMakeLists.txt
+++ b/lib/VMCore/CMakeLists.txt
@@ -29,7 +29,6 @@ add_llvm_library(LLVMCore
   PassRegistry.cpp
   PrintModulePass.cpp
   Type.cpp
-  TypeSymbolTable.cpp
   Use.cpp
   User.cpp
   Value.cpp
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 9985ada..323e2a2 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -559,7 +559,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
       for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i)
         res.push_back(ConstantExpr::getCast(opc,
                                             CV->getOperand(i), DstEltTy));
-      return ConstantVector::get(DestVecTy, res);
+      return ConstantVector::get(res);
     }
 
   // We actually have to do a cast now. Perform the cast according to the
@@ -730,9 +730,12 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
   }
 
 
+  if (isa<UndefValue>(Cond)) {
+    if (isa<UndefValue>(V1)) return V1;
+    return V2;
+  }
   if (isa<UndefValue>(V1)) return V2;
   if (isa<UndefValue>(V2)) return V1;
-  if (isa<UndefValue>(Cond)) return V1;
   if (V1 == V2) return V1;
 
   if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) {
@@ -877,42 +880,38 @@ Constant *llvm::ConstantFoldShuffleVectorInstruction(Constant *V1,
 }
 
 Constant *llvm::ConstantFoldExtractValueInstruction(Constant *Agg,
-                                                    const unsigned *Idxs,
-                                                    unsigned NumIdx) {
+                                                    ArrayRef<unsigned> Idxs) {
   // Base case: no indices, so return the entire value.
-  if (NumIdx == 0)
+  if (Idxs.empty())
     return Agg;
 
   if (isa<UndefValue>(Agg))  // ev(undef, x) -> undef
     return UndefValue::get(ExtractValueInst::getIndexedType(Agg->getType(),
-                                                            Idxs,
-                                                            Idxs + NumIdx));
+                                                            Idxs));
 
   if (isa<ConstantAggregateZero>(Agg))  // ev(0, x) -> 0
     return
       Constant::getNullValue(ExtractValueInst::getIndexedType(Agg->getType(),
-                                                              Idxs,
-                                                              Idxs + NumIdx));
+                                                              Idxs));
 
   // Otherwise recurse.
   if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Agg))
-    return ConstantFoldExtractValueInstruction(CS->getOperand(*Idxs),
-                                               Idxs+1, NumIdx-1);
+    return ConstantFoldExtractValueInstruction(CS->getOperand(Idxs[0]),
+                                               Idxs.slice(1));
 
   if (ConstantArray *CA = dyn_cast<ConstantArray>(Agg))
-    return ConstantFoldExtractValueInstruction(CA->getOperand(*Idxs),
-                                               Idxs+1, NumIdx-1);
+    return ConstantFoldExtractValueInstruction(CA->getOperand(Idxs[0]),
+                                               Idxs.slice(1));
   ConstantVector *CV = cast<ConstantVector>(Agg);
-  return ConstantFoldExtractValueInstruction(CV->getOperand(*Idxs),
-                                             Idxs+1, NumIdx-1);
+  return ConstantFoldExtractValueInstruction(CV->getOperand(Idxs[0]),
+                                             Idxs.slice(1));
 }
 
 Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
                                                    Constant *Val,
-                                                   const unsigned *Idxs,
-                                                   unsigned NumIdx) {
+                                                   ArrayRef<unsigned> Idxs) {
   // Base case: no indices, so replace the entire value.
-  if (NumIdx == 0)
+  if (Idxs.empty())
     return Val;
 
   if (isa<UndefValue>(Agg)) {
@@ -934,15 +933,15 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
     for (unsigned i = 0; i < numOps; ++i) {
       const Type *MemberTy = AggTy->getTypeAtIndex(i);
       Constant *Op =
-        (*Idxs == i) ?
+        (Idxs[0] == i) ?
         ConstantFoldInsertValueInstruction(UndefValue::get(MemberTy),
-                                           Val, Idxs+1, NumIdx-1) :
+                                           Val, Idxs.slice(1)) :
         UndefValue::get(MemberTy);
       Ops[i] = Op;
     }
     
     if (const StructType* ST = dyn_cast<StructType>(AggTy))
-      return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
+      return ConstantStruct::get(ST, Ops);
     return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
   }
   
@@ -965,15 +964,15 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
     for (unsigned i = 0; i < numOps; ++i) {
       const Type *MemberTy = AggTy->getTypeAtIndex(i);
       Constant *Op =
-        (*Idxs == i) ?
+        (Idxs[0] == i) ?
         ConstantFoldInsertValueInstruction(Constant::getNullValue(MemberTy),
-                                           Val, Idxs+1, NumIdx-1) :
+                                           Val, Idxs.slice(1)) :
         Constant::getNullValue(MemberTy);
       Ops[i] = Op;
     }
     
     if (const StructType *ST = dyn_cast<StructType>(AggTy))
-      return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
+      return ConstantStruct::get(ST, Ops);
     return ConstantArray::get(cast<ArrayType>(AggTy), Ops);
   }
   
@@ -982,13 +981,13 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
     std::vector<Constant*> Ops(Agg->getNumOperands());
     for (unsigned i = 0; i < Agg->getNumOperands(); ++i) {
       Constant *Op = cast<Constant>(Agg->getOperand(i));
-      if (*Idxs == i)
-        Op = ConstantFoldInsertValueInstruction(Op, Val, Idxs+1, NumIdx-1);
+      if (Idxs[0] == i)
+        Op = ConstantFoldInsertValueInstruction(Op, Val, Idxs.slice(1));
       Ops[i] = Op;
     }
     
     if (const StructType* ST = dyn_cast<StructType>(Agg->getType()))
-      return ConstantStruct::get(ST->getContext(), Ops, ST->isPacked());
+      return ConstantStruct::get(ST, Ops);
     return ConstantArray::get(cast<ArrayType>(Agg->getType()), Ops);
   }
 
@@ -1014,20 +1013,38 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
     case Instruction::Add:
     case Instruction::Sub:
       return UndefValue::get(C1->getType());
-    case Instruction::Mul:
     case Instruction::And:
+      if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef & undef -> undef
+        return C1;
+      return Constant::getNullValue(C1->getType());   // undef & X -> 0
+    case Instruction::Mul: {
+      ConstantInt *CI;
+      // X * undef -> undef   if X is odd or undef
+      if (((CI = dyn_cast<ConstantInt>(C1)) && CI->getValue()[0]) ||
+          ((CI = dyn_cast<ConstantInt>(C2)) && CI->getValue()[0]) ||
+          (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
+        return UndefValue::get(C1->getType());
+
+      // X * undef -> 0       otherwise
       return Constant::getNullValue(C1->getType());
+    }
     case Instruction::UDiv:
     case Instruction::SDiv:
+      // undef / 1 -> undef
+      if (Opcode == Instruction::UDiv || Opcode == Instruction::SDiv)
+        if (ConstantInt *CI2 = dyn_cast<ConstantInt>(C2))
+          if (CI2->isOne())
+            return C1;
+      // FALL THROUGH
     case Instruction::URem:
     case Instruction::SRem:
       if (!isa<UndefValue>(C2))                    // undef / X -> 0
         return Constant::getNullValue(C1->getType());
       return C2;                                   // X / undef -> undef
     case Instruction::Or:                          // X | undef -> -1
-      if (const VectorType *PTy = dyn_cast<VectorType>(C1->getType()))
-        return Constant::getAllOnesValue(PTy);
-      return Constant::getAllOnesValue(C1->getType());
+      if (isa<UndefValue>(C1) && isa<UndefValue>(C2)) // undef | undef -> undef
+        return C1;
+      return Constant::getAllOnesValue(C1->getType()); // undef | X -> ~0
     case Instruction::LShr:
       if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
         return C1;                                  // undef lshr undef -> undef
@@ -1041,6 +1058,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
       else
         return C1;                                  // X ashr undef --> X
     case Instruction::Shl:
+      if (isa<UndefValue>(C2) && isa<UndefValue>(C1))
+        return C1;                                  // undef shl undef -> undef
       // undef << X -> 0   or   X << undef -> 0
       return Constant::getNullValue(C1->getType());
     }
@@ -1443,8 +1462,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
 /// isZeroSizedType - This type is zero sized if its an array or structure of
 /// zero sized types.  The only leaf zero sized type is an empty structure.
 static bool isMaybeZeroSizedType(const Type *Ty) {
-  if (Ty->isOpaqueTy()) return true;  // Can't say.
   if (const StructType *STy = dyn_cast<StructType>(Ty)) {
+    if (STy->isOpaque()) return true;  // Can't say.
 
     // If all of elements have zero size, this does too.
     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
@@ -1831,7 +1850,9 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
   if (isa<UndefValue>(C1) || isa<UndefValue>(C2)) {
     // For EQ and NE, we can always pick a value for the undef to make the
     // predicate pass or fail, so we can return undef.
-    if (ICmpInst::isEquality(ICmpInst::Predicate(pred)))
+    // Also, if both operands are undef, we can return undef.
+    if (ICmpInst::isEquality(ICmpInst::Predicate(pred)) ||
+        (isa<UndefValue>(C1) && isa<UndefValue>(C2)))
       return UndefValue::get(ResultTy);
     // Otherwise, pick the same value as the non-undef operand, and fold
     // it to true or false.
@@ -2147,9 +2168,9 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C,
                                                bool inBounds,
                                                IndexTy const *Idxs,
                                                unsigned NumIdx) {
+  if (NumIdx == 0) return C;
   Constant *Idx0 = cast<Constant>(Idxs[0]);
-  if (NumIdx == 0 ||
-      (NumIdx == 1 && Idx0->isNullValue()))
+  if ((NumIdx == 1 && Idx0->isNullValue()))
     return C;
 
   if (isa<UndefValue>(C)) {
diff --git a/lib/VMCore/ConstantFold.h b/lib/VMCore/ConstantFold.h
index 0ecd7b4..653a1c3 100644
--- a/lib/VMCore/ConstantFold.h
+++ b/lib/VMCore/ConstantFold.h
@@ -19,6 +19,8 @@
 #ifndef CONSTANTFOLDING_H
 #define CONSTANTFOLDING_H
 
+#include "llvm/ADT/ArrayRef.h"
+
 namespace llvm {
   class Value;
   class Constant;
@@ -38,11 +40,9 @@ namespace llvm {
   Constant *ConstantFoldShuffleVectorInstruction(Constant *V1, Constant *V2,
                                                  Constant *Mask);
   Constant *ConstantFoldExtractValueInstruction(Constant *Agg,
-                                                const unsigned *Idxs,
-                                                unsigned NumIdx);
+                                                ArrayRef<unsigned> Idxs);
   Constant *ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val,
-                                               const unsigned *Idxs,
-                                               unsigned NumIdx);
+                                               ArrayRef<unsigned> Idxs);
   Constant *ConstantFoldBinaryInstruction(unsigned Opcode, Constant *V1,
                                           Constant *V2);
   Constant *ConstantFoldCompareInstruction(unsigned short predicate, 
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 15d7793..316c884 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Support/GetElementPtrTypeIterator.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 #include <cstdarg>
 using namespace llvm;
@@ -39,6 +40,28 @@ using namespace llvm;
 //                              Constant Class
 //===----------------------------------------------------------------------===//
 
+bool Constant::isNegativeZeroValue() const {
+  // Floating point values have an explicit -0.0 value.
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+    return CFP->isZero() && CFP->isNegative();
+  
+  // Otherwise, just use +0.0.
+  return isNullValue();
+}
+
+bool Constant::isNullValue() const {
+  // 0 is null.
+  if (const ConstantInt *CI = dyn_cast<ConstantInt>(this))
+    return CI->isZero();
+  
+  // +0.0 is null.
+  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this))
+    return CFP->isZero() && !CFP->isNegative();
+
+  // constant zero is zero for aggregates and cpnull is null for pointers.
+  return isa<ConstantAggregateZero>(this) || isa<ConstantPointerNull>(this);
+}
+
 // Constructor to create a '0' constant of arbitrary type...
 Constant *Constant::getNullValue(const Type *Ty) {
   switch (Ty->getTypeID()) {
@@ -541,11 +564,7 @@ ConstantFP::ConstantFP(const Type *Ty, const APFloat& V)
          "FP type Mismatch");
 }
 
-bool ConstantFP::isNullValue() const {
-  return Val.isZero() && !Val.isNegative();
-}
-
-bool ConstantFP::isExactlyValue(const APFloat& V) const {
+bool ConstantFP::isExactlyValue(const APFloat &V) const {
   return Val.bitwiseIsEqual(V);
 }
 
@@ -571,8 +590,7 @@ ConstantArray::ConstantArray(const ArrayType *T,
   }
 }
 
-Constant *ConstantArray::get(const ArrayType *Ty, 
-                             const std::vector<Constant*> &V) {
+Constant *ConstantArray::get(const ArrayType *Ty, ArrayRef<Constant*> V) {
   for (unsigned i = 0, e = V.size(); i != e; ++i) {
     assert(V[i]->getType() == Ty->getElementType() &&
            "Wrong type in array element initializer");
@@ -592,13 +610,6 @@ Constant *ConstantArray::get(const ArrayType *Ty,
   return ConstantAggregateZero::get(Ty);
 }
 
-
-Constant *ConstantArray::get(const ArrayType* T, Constant *const* Vals,
-                             unsigned NumVals) {
-  // FIXME: make this the primary ctor method.
-  return get(T, std::vector<Constant*>(Vals, Vals+NumVals));
-}
-
 /// ConstantArray::get(const string&) - Return an array that is initialized to
 /// contain the specified string.  If length is zero then a null terminator is 
 /// added to the specified string so that it may be used in a natural way. 
@@ -621,63 +632,64 @@ Constant *ConstantArray::get(LLVMContext &Context, StringRef Str,
   return get(ATy, ElementVals);
 }
 
+/// getTypeForElements - Return an anonymous struct type to use for a constant
+/// with the specified set of elements.  The list must not be empty.
+StructType *ConstantStruct::getTypeForElements(LLVMContext &Context,
+                                               ArrayRef<Constant*> V,
+                                               bool Packed) {
+  SmallVector<Type*, 16> EltTypes;
+  for (unsigned i = 0, e = V.size(); i != e; ++i)
+    EltTypes.push_back(V[i]->getType());
+  
+  return StructType::get(Context, EltTypes, Packed);
+}
+
+
+StructType *ConstantStruct::getTypeForElements(ArrayRef<Constant*> V,
+                                               bool Packed) {
+  assert(!V.empty() &&
+         "ConstantStruct::getTypeForElements cannot be called on empty list");
+  return getTypeForElements(V[0]->getContext(), V, Packed);
+}
+
+
 ConstantStruct::ConstantStruct(const StructType *T,
                                const std::vector<Constant*> &V)
   : Constant(T, ConstantStructVal,
              OperandTraits<ConstantStruct>::op_end(this) - V.size(),
              V.size()) {
-  assert(V.size() == T->getNumElements() &&
+  assert((T->isOpaque() || V.size() == T->getNumElements()) &&
          "Invalid initializer vector for constant structure");
   Use *OL = OperandList;
   for (std::vector<Constant*>::const_iterator I = V.begin(), E = V.end();
        I != E; ++I, ++OL) {
     Constant *C = *I;
-    assert(C->getType() == T->getElementType(I-V.begin()) &&
+    assert((T->isOpaque() || C->getType() == T->getElementType(I-V.begin())) &&
            "Initializer for struct element doesn't match struct element type!");
     *OL = C;
   }
 }
 
 // ConstantStruct accessors.
-Constant *ConstantStruct::get(const StructType* T,
-                              const std::vector<Constant*>& V) {
-  LLVMContextImpl* pImpl = T->getContext().pImpl;
-  
-  // Create a ConstantAggregateZero value if all elements are zeros...
+Constant *ConstantStruct::get(const StructType *ST, ArrayRef<Constant*> V) {
+  // Create a ConstantAggregateZero value if all elements are zeros.
   for (unsigned i = 0, e = V.size(); i != e; ++i)
     if (!V[i]->isNullValue())
-      return pImpl->StructConstants.getOrCreate(T, V);
-
-  return ConstantAggregateZero::get(T);
-}
-
-Constant *ConstantStruct::get(LLVMContext &Context,
-                              const std::vector<Constant*>& V, bool packed) {
-  std::vector<const Type*> StructEls;
-  StructEls.reserve(V.size());
-  for (unsigned i = 0, e = V.size(); i != e; ++i)
-    StructEls.push_back(V[i]->getType());
-  return get(StructType::get(Context, StructEls, packed), V);
-}
+      return ST->getContext().pImpl->StructConstants.getOrCreate(ST, V);
 
-Constant *ConstantStruct::get(LLVMContext &Context,
-                              Constant *const *Vals, unsigned NumVals,
-                              bool Packed) {
-  // FIXME: make this the primary ctor method.
-  return get(Context, std::vector<Constant*>(Vals, Vals+NumVals), Packed);
+  assert((ST->isOpaque() || ST->getNumElements() == V.size()) &&
+         "Incorrect # elements specified to ConstantStruct::get");
+  return ConstantAggregateZero::get(ST);
 }
 
-Constant* ConstantStruct::get(LLVMContext &Context, bool Packed,
-                              Constant * Val, ...) {
+Constant* ConstantStruct::get(const StructType *T, ...) {
   va_list ap;
-  std::vector<Constant*> Values;
-  va_start(ap, Val);
-  while (Val) {
+  SmallVector<Constant*, 8> Values;
+  va_start(ap, T);
+  while (Constant *Val = va_arg(ap, llvm::Constant*))
     Values.push_back(Val);
-    Val = va_arg(ap, llvm::Constant*);
-  }
   va_end(ap);
-  return get(Context, Values, Packed);
+  return get(T, Values);
 }
 
 ConstantVector::ConstantVector(const VectorType *T,
@@ -696,9 +708,9 @@ ConstantVector::ConstantVector(const VectorType *T,
 }
 
 // ConstantVector accessors.
-Constant *ConstantVector::get(const VectorType *T,
-                              const std::vector<Constant*> &V) {
+Constant *ConstantVector::get(ArrayRef<Constant*> V) {
   assert(!V.empty() && "Vectors can't be empty");
+  const VectorType *T = VectorType::get(V.front()->getType(), V.size());
   LLVMContextImpl *pImpl = T->getContext().pImpl;
 
   // If this is an all-undef or all-zero vector, return a
@@ -723,12 +735,6 @@ Constant *ConstantVector::get(const VectorType *T,
   return pImpl->VectorConstants.getOrCreate(T, V);
 }
 
-Constant *ConstantVector::get(ArrayRef<Constant*> V) {
-  // FIXME: make this the primary ctor method.
-  assert(!V.empty() && "Vectors cannot be empty");
-  return get(VectorType::get(V.front()->getType(), V.size()), V.vec());
-}
-
 // Utility function for determining if a ConstantExpr is a CastOp or not. This
 // can't be inline because we don't want to #include Instruction.h into
 // Constant.h
@@ -779,8 +785,7 @@ ArrayRef<unsigned> ConstantExpr::getIndices() const {
 }
 
 unsigned ConstantExpr::getPredicate() const {
-  assert(getOpcode() == Instruction::FCmp || 
-         getOpcode() == Instruction::ICmp);
+  assert(isCompare());
   return ((const CompareConstantExpr*)this)->predicate;
 }
 
@@ -851,17 +856,15 @@ ConstantExpr::getWithOperandReplaced(unsigned OpNo, Constant *Op) const {
 }
 
 /// getWithOperands - This returns the current constant expression with the
-/// operands replaced with the specified values.  The specified operands must
-/// match count and type with the existing ones.
+/// operands replaced with the specified values.  The specified array must
+/// have the same number of operands as our current one.
 Constant *ConstantExpr::
-getWithOperands(ArrayRef<Constant*> Ops) const {
+getWithOperands(ArrayRef<Constant*> Ops, const Type *Ty) const {
   assert(Ops.size() == getNumOperands() && "Operand count mismatch!");
-  bool AnyChange = false;
-  for (unsigned i = 0; i != Ops.size(); ++i) {
-    assert(Ops[i]->getType() == getOperand(i)->getType() &&
-           "Operand type mismatch!");
+  bool AnyChange = Ty != getType();
+  for (unsigned i = 0; i != Ops.size(); ++i)
     AnyChange |= Ops[i] != getOperand(i);
-  }
+  
   if (!AnyChange)  // No operands changed, return self.
     return const_cast<ConstantExpr*>(this);
 
@@ -878,7 +881,7 @@ getWithOperands(ArrayRef<Constant*> Ops) const {
   case Instruction::PtrToInt:
   case Instruction::IntToPtr:
   case Instruction::BitCast:
-    return ConstantExpr::getCast(getOpcode(), Ops[0], getType());
+    return ConstantExpr::getCast(getOpcode(), Ops[0], Ty);
   case Instruction::Select:
     return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]);
   case Instruction::InsertElement:
@@ -976,14 +979,14 @@ ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) {
 /// destroyConstant - Remove the constant from the constant table...
 ///
 void ConstantAggregateZero::destroyConstant() {
-  getRawType()->getContext().pImpl->AggZeroConstants.remove(this);
+  getType()->getContext().pImpl->AggZeroConstants.remove(this);
   destroyConstantImpl();
 }
 
 /// destroyConstant - Remove the constant from the constant table...
 ///
 void ConstantArray::destroyConstant() {
-  getRawType()->getContext().pImpl->ArrayConstants.remove(this);
+  getType()->getContext().pImpl->ArrayConstants.remove(this);
   destroyConstantImpl();
 }
 
@@ -1023,44 +1026,54 @@ bool ConstantArray::isCString() const {
 }
 
 
-/// getAsString - If the sub-element type of this array is i8
-/// then this method converts the array to an std::string and returns it.
-/// Otherwise, it asserts out.
+/// convertToString - Helper function for getAsString() and getAsCString().
+static std::string convertToString(const User *U, unsigned len) {
+  std::string Result;
+  Result.reserve(len);
+  for (unsigned i = 0; i != len; ++i)
+    Result.push_back((char)cast<ConstantInt>(U->getOperand(i))->getZExtValue());
+  return Result;
+}
+
+/// getAsString - If this array is isString(), then this method converts the
+/// array to an std::string and returns it.  Otherwise, it asserts out.
 ///
 std::string ConstantArray::getAsString() const {
   assert(isString() && "Not a string!");
-  std::string Result;
-  Result.reserve(getNumOperands());
-  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
-    Result.push_back((char)cast<ConstantInt>(getOperand(i))->getZExtValue());
-  return Result;
+  return convertToString(this, getNumOperands());
 }
 
 
-//---- ConstantStruct::get() implementation...
-//
+/// getAsCString - If this array is isCString(), then this method converts the
+/// array (without the trailing null byte) to an std::string and returns it.
+/// Otherwise, it asserts out.
+///
+std::string ConstantArray::getAsCString() const {
+  assert(isCString() && "Not a string!");
+  return convertToString(this, getNumOperands() - 1);
+}
 
-namespace llvm {
 
-}
+//---- ConstantStruct::get() implementation...
+//
 
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantStruct::destroyConstant() {
-  getRawType()->getContext().pImpl->StructConstants.remove(this);
+  getType()->getContext().pImpl->StructConstants.remove(this);
   destroyConstantImpl();
 }
 
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantVector::destroyConstant() {
-  getRawType()->getContext().pImpl->VectorConstants.remove(this);
+  getType()->getContext().pImpl->VectorConstants.remove(this);
   destroyConstantImpl();
 }
 
 /// This function will return true iff every element in this vector constant
 /// is set to all ones.
-/// @returns true iff this constant's emements are all set to all ones.
+/// @returns true iff this constant's elements are all set to all ones.
 /// @brief Determine if the value is all ones.
 bool ConstantVector::isAllOnesValue() const {
   // Check out first element.
@@ -1068,9 +1081,10 @@ bool ConstantVector::isAllOnesValue() const {
   const ConstantInt *CI = dyn_cast<ConstantInt>(Elt);
   if (!CI || !CI->isAllOnesValue()) return false;
   // Then make sure all remaining elements point to the same value.
-  for (unsigned I = 1, E = getNumOperands(); I < E; ++I) {
-    if (getOperand(I) != Elt) return false;
-  }
+  for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
+    if (getOperand(I) != Elt)
+      return false;
+  
   return true;
 }
 
@@ -1081,7 +1095,8 @@ Constant *ConstantVector::getSplatValue() const {
   Constant *Elt = getOperand(0);
   // Then make sure all remaining elements point to the same value.
   for (unsigned I = 1, E = getNumOperands(); I < E; ++I)
-    if (getOperand(I) != Elt) return 0;
+    if (getOperand(I) != Elt)
+      return 0;
   return Elt;
 }
 
@@ -1095,7 +1110,7 @@ ConstantPointerNull *ConstantPointerNull::get(const PointerType *Ty) {
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantPointerNull::destroyConstant() {
-  getRawType()->getContext().pImpl->NullPtrConstants.remove(this);
+  getType()->getContext().pImpl->NullPtrConstants.remove(this);
   destroyConstantImpl();
 }
 
@@ -1110,7 +1125,7 @@ UndefValue *UndefValue::get(const Type *Ty) {
 // destroyConstant - Remove the constant from the constant table.
 //
 void UndefValue::destroyConstant() {
-  getRawType()->getContext().pImpl->UndefValueConstants.remove(this);
+  getType()->getContext().pImpl->UndefValueConstants.remove(this);
   destroyConstantImpl();
 }
 
@@ -1144,7 +1159,7 @@ BlockAddress::BlockAddress(Function *F, BasicBlock *BB)
 // destroyConstant - Remove the constant from the constant table.
 //
 void BlockAddress::destroyConstant() {
-  getFunction()->getRawType()->getContext().pImpl
+  getFunction()->getType()->getContext().pImpl
     ->BlockAddresses.erase(std::make_pair(getFunction(), getBasicBlock()));
   getBasicBlock()->AdjustBlockAddressRefCount(-1);
   destroyConstantImpl();
@@ -1183,7 +1198,7 @@ void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) {
   assert(NewBA != this && "I didn't contain From!");
   
   // Everyone using this now uses the replacement.
-  uncheckedReplaceAllUsesWith(NewBA);
+  replaceAllUsesWith(NewBA);
   
   destroyConstant();
 }
@@ -1420,49 +1435,15 @@ Constant *ConstantExpr::getBitCast(Constant *C, const Type *DstTy) {
   return getFoldedCast(Instruction::BitCast, C, DstTy);
 }
 
-Constant *ConstantExpr::getTy(const Type *ReqTy, unsigned Opcode,
-                              Constant *C1, Constant *C2,
-                              unsigned Flags) {
-  // Check the operands for consistency first
+Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
+                            unsigned Flags) {
+  // Check the operands for consistency first.
   assert(Opcode >= Instruction::BinaryOpsBegin &&
          Opcode <  Instruction::BinaryOpsEnd   &&
          "Invalid opcode in binary constant expression");
   assert(C1->getType() == C2->getType() &&
          "Operand types in binary constant expression should match");
-
-  if (ReqTy == C1->getType() || ReqTy == Type::getInt1Ty(ReqTy->getContext()))
-    if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
-      return FC;          // Fold a few common cases...
-
-  std::vector<Constant*> argVec(1, C1); argVec.push_back(C2);
-  ExprMapKeyType Key(Opcode, argVec, 0, Flags);
   
-  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
-}
-
-Constant *ConstantExpr::getCompareTy(unsigned short predicate,
-                                     Constant *C1, Constant *C2) {
-  switch (predicate) {
-    default: llvm_unreachable("Invalid CmpInst predicate");
-    case CmpInst::FCMP_FALSE: case CmpInst::FCMP_OEQ: case CmpInst::FCMP_OGT:
-    case CmpInst::FCMP_OGE:   case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE:
-    case CmpInst::FCMP_ONE:   case CmpInst::FCMP_ORD: case CmpInst::FCMP_UNO:
-    case CmpInst::FCMP_UEQ:   case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE:
-    case CmpInst::FCMP_ULT:   case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE:
-    case CmpInst::FCMP_TRUE:
-      return getFCmp(predicate, C1, C2);
-
-    case CmpInst::ICMP_EQ:  case CmpInst::ICMP_NE:  case CmpInst::ICMP_UGT:
-    case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE:
-    case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT:
-    case CmpInst::ICMP_SLE:
-      return getICmp(predicate, C1, C2);
-  }
-}
-
-Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
-                            unsigned Flags) {
 #ifndef NDEBUG
   switch (Opcode) {
   case Instruction::Add:
@@ -1521,7 +1502,15 @@ Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2,
   }
 #endif
 
-  return getTy(C1->getType(), Opcode, C1, C2, Flags);
+  if (Constant *FC = ConstantFoldBinaryInstruction(Opcode, C1, C2))
+    return FC;          // Fold a few common cases.
+  
+  std::vector<Constant*> argVec(1, C1);
+  argVec.push_back(C2);
+  ExprMapKeyType Key(Opcode, argVec, 0, Flags);
+  
+  LLVMContextImpl *pImpl = C1->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(C1->getType(), Key);
 }
 
 Constant *ConstantExpr::getSizeOf(const Type* Ty) {
@@ -1537,8 +1526,8 @@ Constant *ConstantExpr::getSizeOf(const Type* Ty) {
 Constant *ConstantExpr::getAlignOf(const Type* Ty) {
   // alignof is implemented as: (i64) gep ({i1,Ty}*)null, 0, 1
   // Note that a non-inbounds gep is used, as null isn't within any object.
-  const Type *AligningTy = StructType::get(Ty->getContext(),
-                                   Type::getInt1Ty(Ty->getContext()), Ty, NULL);
+  const Type *AligningTy = 
+    StructType::get(Type::getInt1Ty(Ty->getContext()), Ty, NULL);
   Constant *NullPtr = Constant::getNullValue(AligningTy->getPointerTo());
   Constant *Zero = ConstantInt::get(Type::getInt64Ty(Ty->getContext()), 0);
   Constant *One = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 1);
@@ -1566,41 +1555,55 @@ Constant *ConstantExpr::getOffsetOf(const Type* Ty, Constant *FieldNo) {
                      Type::getInt64Ty(Ty->getContext()));
 }
 
-Constant *ConstantExpr::getCompare(unsigned short pred, 
-                            Constant *C1, Constant *C2) {
+Constant *ConstantExpr::getCompare(unsigned short Predicate, 
+                                   Constant *C1, Constant *C2) {
   assert(C1->getType() == C2->getType() && "Op types should be identical!");
-  return getCompareTy(pred, C1, C2);
+  
+  switch (Predicate) {
+  default: llvm_unreachable("Invalid CmpInst predicate");
+  case CmpInst::FCMP_FALSE: case CmpInst::FCMP_OEQ: case CmpInst::FCMP_OGT:
+  case CmpInst::FCMP_OGE:   case CmpInst::FCMP_OLT: case CmpInst::FCMP_OLE:
+  case CmpInst::FCMP_ONE:   case CmpInst::FCMP_ORD: case CmpInst::FCMP_UNO:
+  case CmpInst::FCMP_UEQ:   case CmpInst::FCMP_UGT: case CmpInst::FCMP_UGE:
+  case CmpInst::FCMP_ULT:   case CmpInst::FCMP_ULE: case CmpInst::FCMP_UNE:
+  case CmpInst::FCMP_TRUE:
+    return getFCmp(Predicate, C1, C2);
+    
+  case CmpInst::ICMP_EQ:  case CmpInst::ICMP_NE:  case CmpInst::ICMP_UGT:
+  case CmpInst::ICMP_UGE: case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE:
+  case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: case CmpInst::ICMP_SLT:
+  case CmpInst::ICMP_SLE:
+    return getICmp(Predicate, C1, C2);
+  }
 }
 
-Constant *ConstantExpr::getSelectTy(const Type *ReqTy, Constant *C,
-                                    Constant *V1, Constant *V2) {
+Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2) {
   assert(!SelectInst::areInvalidOperands(C, V1, V2)&&"Invalid select operands");
 
-  if (ReqTy == V1->getType())
-    if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
-      return SC;        // Fold common cases
+  if (Constant *SC = ConstantFoldSelectInstruction(C, V1, V2))
+    return SC;        // Fold common cases
 
   std::vector<Constant*> argVec(3, C);
   argVec[1] = V1;
   argVec[2] = V2;
   ExprMapKeyType Key(Instruction::Select, argVec);
   
-  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+  LLVMContextImpl *pImpl = C->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(V1->getType(), Key);
 }
 
-template<typename IndexTy>
-Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
-                                           IndexTy const *Idxs,
-                                           unsigned NumIdx, bool InBounds) {
-  assert(GetElementPtrInst::getIndexedType(C->getType(), Idxs,
-                                           Idxs+NumIdx) ==
-         cast<PointerType>(ReqTy)->getElementType() &&
-         "GEP indices invalid!");
-
+Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
+                                         unsigned NumIdx, bool InBounds) {
   if (Constant *FC = ConstantFoldGetElementPtr(C, InBounds, Idxs, NumIdx))
     return FC;          // Fold a few common cases.
 
+  // Get the result type of the getelementptr!
+  const Type *Ty = 
+    GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx);
+  assert(Ty && "GEP indices invalid!");
+  unsigned AS = cast<PointerType>(C->getType())->getAddressSpace();
+  Type *ReqTy = Ty->getPointerTo(AS);
+  
   assert(C->getType()->isPointerTy() &&
          "Non-pointer type for constant GetElementPtr expression");
   // Look up the constant in the table first to ensure uniqueness
@@ -1611,32 +1614,11 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
     ArgVec.push_back(cast<Constant>(Idxs[i]));
   const ExprMapKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
                            InBounds ? GEPOperator::IsInBounds : 0);
-
-  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+  
+  LLVMContextImpl *pImpl = C->getContext().pImpl;
   return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
 }
 
-template<typename IndexTy>
-Constant *ConstantExpr::getGetElementPtrImpl(Constant *C, IndexTy const *Idxs,
-                                             unsigned NumIdx, bool InBounds) {
-  // Get the result type of the getelementptr!
-  const Type *Ty = 
-    GetElementPtrInst::getIndexedType(C->getType(), Idxs, Idxs+NumIdx);
-  assert(Ty && "GEP indices invalid!");
-  unsigned As = cast<PointerType>(C->getType())->getAddressSpace();
-  return getGetElementPtrTy(PointerType::get(Ty, As), C, Idxs, NumIdx,InBounds);
-}
-
-Constant *ConstantExpr::getGetElementPtr(Constant *C, Value* const *Idxs,
-                                         unsigned NumIdx, bool InBounds) {
-  return getGetElementPtrImpl(C, Idxs, NumIdx, InBounds);
-}
-
-Constant *ConstantExpr::getGetElementPtr(Constant *C, Constant *const *Idxs,
-                                         unsigned NumIdx, bool InBounds) {
-  return getGetElementPtrImpl(C, Idxs, NumIdx, InBounds);
-}
-
 Constant *
 ConstantExpr::getICmp(unsigned short pred, Constant *LHS, Constant *RHS) {
   assert(LHS->getType() == RHS->getType());
@@ -1684,39 +1666,22 @@ ConstantExpr::getFCmp(unsigned short pred, Constant *LHS, Constant *RHS) {
   return pImpl->ExprConstants.getOrCreate(ResultTy, Key);
 }
 
-Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val,
-                                            Constant *Idx) {
-  if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
-    return FC;          // Fold a few common cases.
-  // Look up the constant in the table first to ensure uniqueness
-  std::vector<Constant*> ArgVec(1, Val);
-  ArgVec.push_back(Idx);
-  const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
-  
-  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
-}
-
 Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
   assert(Val->getType()->isVectorTy() &&
          "Tried to create extractelement operation on non-vector type!");
   assert(Idx->getType()->isIntegerTy(32) &&
          "Extractelement index must be i32 type!");
-  return getExtractElementTy(cast<VectorType>(Val->getType())->getElementType(),
-                             Val, Idx);
-}
-
-Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val,
-                                           Constant *Elt, Constant *Idx) {
-  if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
+  
+  if (Constant *FC = ConstantFoldExtractElementInstruction(Val, Idx))
     return FC;          // Fold a few common cases.
+  
   // Look up the constant in the table first to ensure uniqueness
   std::vector<Constant*> ArgVec(1, Val);
-  ArgVec.push_back(Elt);
   ArgVec.push_back(Idx);
-  const ExprMapKeyType Key(Instruction::InsertElement,ArgVec);
+  const ExprMapKeyType Key(Instruction::ExtractElement,ArgVec);
   
-  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
+  LLVMContextImpl *pImpl = Val->getContext().pImpl;
+  Type *ReqTy = cast<VectorType>(Val->getType())->getElementType();
   return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
 }
 
@@ -1728,21 +1693,17 @@ Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
          && "Insertelement types must match!");
   assert(Idx->getType()->isIntegerTy(32) &&
          "Insertelement index must be i32 type!");
-  return getInsertElementTy(Val->getType(), Val, Elt, Idx);
-}
 
-Constant *ConstantExpr::getShuffleVectorTy(const Type *ReqTy, Constant *V1,
-                                           Constant *V2, Constant *Mask) {
-  if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
-    return FC;          // Fold a few common cases...
+  if (Constant *FC = ConstantFoldInsertElementInstruction(Val, Elt, Idx))
+    return FC;          // Fold a few common cases.
   // Look up the constant in the table first to ensure uniqueness
-  std::vector<Constant*> ArgVec(1, V1);
-  ArgVec.push_back(V2);
-  ArgVec.push_back(Mask);
-  const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec);
+  std::vector<Constant*> ArgVec(1, Val);
+  ArgVec.push_back(Elt);
+  ArgVec.push_back(Idx);
+  const ExprMapKeyType Key(Instruction::InsertElement,ArgVec);
   
-  LLVMContextImpl *pImpl = ReqTy->getContext().pImpl;
-  return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
+  LLVMContextImpl *pImpl = Val->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(Val->getType(), Key);
 }
 
 Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2, 
@@ -1750,62 +1711,49 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2,
   assert(ShuffleVectorInst::isValidOperands(V1, V2, Mask) &&
          "Invalid shuffle vector constant expr operands!");
 
+  if (Constant *FC = ConstantFoldShuffleVectorInstruction(V1, V2, Mask))
+    return FC;          // Fold a few common cases.
+
   unsigned NElts = cast<VectorType>(Mask->getType())->getNumElements();
   const Type *EltTy = cast<VectorType>(V1->getType())->getElementType();
   const Type *ShufTy = VectorType::get(EltTy, NElts);
-  return getShuffleVectorTy(ShufTy, V1, V2, Mask);
-}
 
-Constant *ConstantExpr::getInsertValueTy(const Type *ReqTy, Constant *Agg,
-                                         Constant *Val,
-                                        const unsigned *Idxs, unsigned NumIdx) {
-  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs,
-                                          Idxs+NumIdx) == Val->getType() &&
-         "insertvalue indices invalid!");
-  assert(Agg->getType() == ReqTy &&
-         "insertvalue type invalid!");
-  assert(Agg->getType()->isFirstClassType() &&
-         "Non-first-class type for constant InsertValue expression");
-  Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs, NumIdx);
-  assert(FC && "InsertValue constant expr couldn't be folded!");
-  return FC;
+  // Look up the constant in the table first to ensure uniqueness
+  std::vector<Constant*> ArgVec(1, V1);
+  ArgVec.push_back(V2);
+  ArgVec.push_back(Mask);
+  const ExprMapKeyType Key(Instruction::ShuffleVector,ArgVec);
+  
+  LLVMContextImpl *pImpl = ShufTy->getContext().pImpl;
+  return pImpl->ExprConstants.getOrCreate(ShufTy, Key);
 }
 
 Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val,
-                                     const unsigned *IdxList, unsigned NumIdx) {
-  assert(Agg->getType()->isFirstClassType() &&
-         "Tried to create insertelement operation on non-first-class type!");
-
-  const Type *ReqTy = Agg->getType();
-#ifndef NDEBUG
-  const Type *ValTy =
-    ExtractValueInst::getIndexedType(Agg->getType(), IdxList, IdxList+NumIdx);
-#endif
-  assert(ValTy == Val->getType() && "insertvalue indices invalid!");
-  return getInsertValueTy(ReqTy, Agg, Val, IdxList, NumIdx);
-}
-
-Constant *ConstantExpr::getExtractValueTy(const Type *ReqTy, Constant *Agg,
-                                        const unsigned *Idxs, unsigned NumIdx) {
-  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs,
-                                          Idxs+NumIdx) == ReqTy &&
-         "extractvalue indices invalid!");
+                                       ArrayRef<unsigned> Idxs) {
+  assert(ExtractValueInst::getIndexedType(Agg->getType(),
+                                          Idxs) == Val->getType() &&
+         "insertvalue indices invalid!");
   assert(Agg->getType()->isFirstClassType() &&
-         "Non-first-class type for constant extractvalue expression");
-  Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs, NumIdx);
-  assert(FC && "ExtractValue constant expr couldn't be folded!");
+         "Non-first-class type for constant insertvalue expression");
+  Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs);
+  assert(FC && "insertvalue constant expr couldn't be folded!");
   return FC;
 }
 
 Constant *ConstantExpr::getExtractValue(Constant *Agg,
-                                     const unsigned *IdxList, unsigned NumIdx) {
+                                        ArrayRef<unsigned> Idxs) {
   assert(Agg->getType()->isFirstClassType() &&
          "Tried to create extractelement operation on non-first-class type!");
 
-  const Type *ReqTy =
-    ExtractValueInst::getIndexedType(Agg->getType(), IdxList, IdxList+NumIdx);
+  const Type *ReqTy = ExtractValueInst::getIndexedType(Agg->getType(), Idxs);
+  (void)ReqTy;
   assert(ReqTy && "extractvalue indices invalid!");
-  return getExtractValueTy(ReqTy, Agg, IdxList, NumIdx);
+  
+  assert(Agg->getType()->isFirstClassType() &&
+         "Non-first-class type for constant extractvalue expression");
+  Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs);
+  assert(FC && "ExtractValue constant expr couldn't be folded!");
+  return FC;
 }
 
 Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) {
@@ -1918,7 +1866,7 @@ Constant *ConstantExpr::getAShr(Constant *C1, Constant *C2, bool isExact) {
 // destroyConstant - Remove the constant from the constant table...
 //
 void ConstantExpr::destroyConstant() {
-  getRawType()->getContext().pImpl->ExprConstants.remove(this);
+  getType()->getContext().pImpl->ExprConstants.remove(this);
   destroyConstantImpl();
 }
 
@@ -1959,10 +1907,10 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
   assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
   Constant *ToC = cast<Constant>(To);
 
-  LLVMContextImpl *pImpl = getRawType()->getContext().pImpl;
+  LLVMContextImpl *pImpl = getType()->getContext().pImpl;
 
   std::pair<LLVMContextImpl::ArrayConstantsTy::MapKey, ConstantArray*> Lookup;
-  Lookup.first.first = cast<ArrayType>(getRawType());
+  Lookup.first.first = cast<ArrayType>(getType());
   Lookup.second = this;
 
   std::vector<Constant*> &Values = Lookup.first.second;
@@ -1996,7 +1944,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
   
   Constant *Replacement = 0;
   if (isAllZeros) {
-    Replacement = ConstantAggregateZero::get(getRawType());
+    Replacement = ConstantAggregateZero::get(getType());
   } else {
     // Check to see if we have this array type already.
     bool Exists;
@@ -2032,7 +1980,7 @@ void ConstantArray::replaceUsesOfWithOnConstant(Value *From, Value *To,
   assert(Replacement != this && "I didn't contain From!");
   
   // Everyone using this now uses the replacement.
-  uncheckedReplaceAllUsesWith(Replacement);
+  replaceAllUsesWith(Replacement);
   
   // Delete the old constant!
   destroyConstant();
@@ -2047,7 +1995,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
   assert(getOperand(OperandToUpdate) == From && "ReplaceAllUsesWith broken!");
 
   std::pair<LLVMContextImpl::StructConstantsTy::MapKey, ConstantStruct*> Lookup;
-  Lookup.first.first = cast<StructType>(getRawType());
+  Lookup.first.first = cast<StructType>(getType());
   Lookup.second = this;
   std::vector<Constant*> &Values = Lookup.first.second;
   Values.reserve(getNumOperands());  // Build replacement struct.
@@ -2069,11 +2017,11 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
   }
   Values[OperandToUpdate] = ToC;
   
-  LLVMContextImpl *pImpl = getRawType()->getContext().pImpl;
+  LLVMContextImpl *pImpl = getContext().pImpl;
   
   Constant *Replacement = 0;
   if (isAllZeros) {
-    Replacement = ConstantAggregateZero::get(getRawType());
+    Replacement = ConstantAggregateZero::get(getType());
   } else {
     // Check to see if we have this struct type already.
     bool Exists;
@@ -2098,7 +2046,7 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
   assert(Replacement != this && "I didn't contain From!");
   
   // Everyone using this now uses the replacement.
-  uncheckedReplaceAllUsesWith(Replacement);
+  replaceAllUsesWith(Replacement);
   
   // Delete the old constant!
   destroyConstant();
@@ -2116,11 +2064,11 @@ void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
     Values.push_back(Val);
   }
   
-  Constant *Replacement = get(cast<VectorType>(getRawType()), Values);
+  Constant *Replacement = get(Values);
   assert(Replacement != this && "I didn't contain From!");
   
   // Everyone using this now uses the replacement.
-  uncheckedReplaceAllUsesWith(Replacement);
+  replaceAllUsesWith(Replacement);
   
   // Delete the old constant!
   destroyConstant();
@@ -2151,8 +2099,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
     if (Agg == From) Agg = To;
     
     ArrayRef<unsigned> Indices = getIndices();
-    Replacement = ConstantExpr::getExtractValue(Agg,
-                                                &Indices[0], Indices.size());
+    Replacement = ConstantExpr::getExtractValue(Agg, Indices);
   } else if (getOpcode() == Instruction::InsertValue) {
     Constant *Agg = getOperand(0);
     Constant *Val = getOperand(1);
@@ -2160,11 +2107,10 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
     if (Val == From) Val = To;
     
     ArrayRef<unsigned> Indices = getIndices();
-    Replacement = ConstantExpr::getInsertValue(Agg, Val,
-                                               &Indices[0], Indices.size());
+    Replacement = ConstantExpr::getInsertValue(Agg, Val, Indices);
   } else if (isCast()) {
     assert(getOperand(0) == From && "Cast only has one use!");
-    Replacement = ConstantExpr::getCast(getOpcode(), To, getRawType());
+    Replacement = ConstantExpr::getCast(getOpcode(), To, getType());
   } else if (getOpcode() == Instruction::Select) {
     Constant *C1 = getOperand(0);
     Constant *C2 = getOperand(1);
@@ -2220,7 +2166,7 @@ void ConstantExpr::replaceUsesOfWithOnConstant(Value *From, Value *ToV,
   assert(Replacement != this && "I didn't contain From!");
   
   // Everyone using this now uses the replacement.
-  uncheckedReplaceAllUsesWith(Replacement);
+  replaceAllUsesWith(Replacement);
   
   // Delete the old constant!
   destroyConstant();
diff --git a/lib/VMCore/ConstantsContext.h b/lib/VMCore/ConstantsContext.h
index 1395754..bd134d9 100644
--- a/lib/VMCore/ConstantsContext.h
+++ b/lib/VMCore/ConstantsContext.h
@@ -568,15 +568,13 @@ struct ConstantKeyData<InlineAsm> {
   }
 };
 
-template<class ValType, class TypeClass, class ConstantClass,
+template<class ValType, class ValRefType, class TypeClass, class ConstantClass,
          bool HasLargeKey = false /*true for arrays and structs*/ >
-class ConstantUniqueMap : public AbstractTypeUser {
+class ConstantUniqueMap {
 public:
   typedef std::pair<const TypeClass*, ValType> MapKey;
   typedef std::map<MapKey, ConstantClass *> MapTy;
   typedef std::map<ConstantClass *, typename MapTy::iterator> InverseMapTy;
-  typedef std::map<const DerivedType*, typename MapTy::iterator>
-    AbstractTypeMapTy;
 private:
   /// Map - This is the main map from the element descriptor to the Constants.
   /// This is the primary way we avoid creating two of the same shape
@@ -589,10 +587,6 @@ private:
   /// through the map with very large keys.
   InverseMapTy InverseMap;
 
-  /// AbstractTypeMap - Map for abstract type constants.
-  ///
-  AbstractTypeMapTy AbstractTypeMap;
-    
 public:
   typename MapTy::iterator map_begin() { return Map.begin(); }
   typename MapTy::iterator map_end() { return Map.end(); }
@@ -629,7 +623,7 @@ private:
     }
       
     typename MapTy::iterator I =
-      Map.find(MapKey(static_cast<const TypeClass*>(CP->getRawType()),
+      Map.find(MapKey(static_cast<const TypeClass*>(CP->getType()),
                       ConstantKeyData<ConstantClass>::getValType(CP)));
     if (I == Map.end() || I->second != CP) {
       // FIXME: This should not use a linear scan.  If this gets to be a
@@ -639,24 +633,8 @@ private:
     }
     return I;
   }
-    
-  void AddAbstractTypeUser(const Type *Ty, typename MapTy::iterator I) {
-    // If the type of the constant is abstract, make sure that an entry
-    // exists for it in the AbstractTypeMap.
-    if (Ty->isAbstract()) {
-      const DerivedType *DTy = static_cast<const DerivedType *>(Ty);
-      typename AbstractTypeMapTy::iterator TI = AbstractTypeMap.find(DTy);
-
-      if (TI == AbstractTypeMap.end()) {
-        // Add ourselves to the ATU list of the type.
-        cast<DerivedType>(DTy)->addAbstractTypeUser(this);
-
-        AbstractTypeMap.insert(TI, std::make_pair(DTy, I));
-      }
-    }
-  }
 
-  ConstantClass* Create(const TypeClass *Ty, const ValType &V,
+  ConstantClass *Create(const TypeClass *Ty, ValRefType V,
                         typename MapTy::iterator I) {
     ConstantClass* Result =
       ConstantCreator<ConstantClass,TypeClass,ValType>::create(Ty, V);
@@ -667,15 +645,13 @@ private:
     if (HasLargeKey)  // Remember the reverse mapping if needed.
       InverseMap.insert(std::make_pair(Result, I));
 
-    AddAbstractTypeUser(Ty, I);
-      
     return Result;
   }
 public:
     
   /// getOrCreate - Return the specified constant from the map, creating it if
   /// necessary.
-  ConstantClass *getOrCreate(const TypeClass *Ty, const ValType &V) {
+  ConstantClass *getOrCreate(const TypeClass *Ty, ValRefType V) {
     MapKey Lookup(Ty, V);
     ConstantClass* Result = 0;
     
@@ -692,43 +668,6 @@ public:
     return Result;
   }
 
-  void UpdateAbstractTypeMap(const DerivedType *Ty,
-                             typename MapTy::iterator I) {
-    assert(AbstractTypeMap.count(Ty) &&
-           "Abstract type not in AbstractTypeMap?");
-    typename MapTy::iterator &ATMEntryIt = AbstractTypeMap[Ty];
-    if (ATMEntryIt == I) {
-      // Yes, we are removing the representative entry for this type.
-      // See if there are any other entries of the same type.
-      typename MapTy::iterator TmpIt = ATMEntryIt;
-
-      // First check the entry before this one...
-      if (TmpIt != Map.begin()) {
-        --TmpIt;
-        if (TmpIt->first.first != Ty) // Not the same type, move back...
-          ++TmpIt;
-      }
-
-      // If we didn't find the same type, try to move forward...
-      if (TmpIt == ATMEntryIt) {
-        ++TmpIt;
-        if (TmpIt == Map.end() || TmpIt->first.first != Ty)
-          --TmpIt;   // No entry afterwards with the same type
-      }
-
-      // If there is another entry in the map of the same abstract type,
-      // update the AbstractTypeMap entry now.
-      if (TmpIt != ATMEntryIt) {
-        ATMEntryIt = TmpIt;
-      } else {
-        // Otherwise, we are removing the last instance of this type
-        // from the table.  Remove from the ATM, and from user list.
-        cast<DerivedType>(Ty)->removeAbstractTypeUser(this);
-        AbstractTypeMap.erase(Ty);
-      }
-    }
-  }
-
   void remove(ConstantClass *CP) {
     typename MapTy::iterator I = FindExistingElement(CP);
     assert(I != Map.end() && "Constant not found in constant table!");
@@ -736,12 +675,6 @@ public:
 
     if (HasLargeKey)  // Remember the reverse mapping if needed.
       InverseMap.erase(CP);
-      
-    // Now that we found the entry, make sure this isn't the entry that
-    // the AbstractTypeMap points to.
-    const TypeClass *Ty = I->first.first;
-    if (Ty->isAbstract())
-      UpdateAbstractTypeMap(static_cast<const DerivedType *>(Ty), I);
 
     Map.erase(I);
   }
@@ -755,22 +688,7 @@ public:
     assert(OldI != Map.end() && "Constant not found in constant table!");
     assert(OldI->second == C && "Didn't find correct element?");
       
-    // If this constant is the representative element for its abstract type,
-    // update the AbstractTypeMap so that the representative element is I.
-    //
-    // This must use getRawType() because if the type is under refinement, we
-    // will get the refineAbstractType callback below, and we don't want to
-    // kick union find in on the constant.
-    if (C->getRawType()->isAbstract()) {
-      typename AbstractTypeMapTy::iterator ATI =
-          AbstractTypeMap.find(cast<DerivedType>(C->getRawType()));
-      assert(ATI != AbstractTypeMap.end() &&
-             "Abstract type not in AbstractTypeMap?");
-      if (ATI->second == OldI)
-        ATI->second = I;
-    }
-      
-    // Remove the old entry from the map.
+     // Remove the old entry from the map.
     Map.erase(OldI);
     
     // Update the inverse map so that we know that this constant is now
@@ -780,58 +698,6 @@ public:
       InverseMap[C] = I;
     }
   }
-    
-  void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
-    typename AbstractTypeMapTy::iterator I = AbstractTypeMap.find(OldTy);
-
-    assert(I != AbstractTypeMap.end() &&
-           "Abstract type not in AbstractTypeMap?");
-
-    // Convert a constant at a time until the last one is gone.  The last one
-    // leaving will remove() itself, causing the AbstractTypeMapEntry to be
-    // eliminated eventually.
-    do {
-      ConstantClass *C = I->second->second;
-      MapKey Key(cast<TypeClass>(NewTy),
-                 ConstantKeyData<ConstantClass>::getValType(C));
-
-      std::pair<typename MapTy::iterator, bool> IP =
-        Map.insert(std::make_pair(Key, C));
-      if (IP.second) {
-        // The map didn't previously have an appropriate constant in the
-        // new type.
-        
-        // Remove the old entry.
-        typename MapTy::iterator OldI =
-          Map.find(MapKey(cast<TypeClass>(OldTy), IP.first->first.second));
-        assert(OldI != Map.end() && "Constant not in map!");
-        UpdateAbstractTypeMap(OldTy, OldI);
-        Map.erase(OldI);
-
-        // Set the constant's type. This is done in place!
-        setType(C, NewTy);
-
-        // Update the inverse map so that we know that this constant is now
-        // located at descriptor I.
-        if (HasLargeKey)
-          InverseMap[C] = IP.first;
-
-        AddAbstractTypeUser(NewTy, IP.first);
-      } else {
-        // The map already had an appropriate constant in the new type, so
-        // there's no longer a need for the old constant.
-        C->uncheckedReplaceAllUsesWith(IP.first->second);
-        C->destroyConstant();    // This constant is now dead, destroy it.
-      }
-      I = AbstractTypeMap.find(OldTy);
-    } while (I != AbstractTypeMap.end());
-  }
-
-  // If the type became concrete without being refined to any other existing
-  // type, we just remove ourselves from the ATU list.
-  void typeBecameConcrete(const DerivedType *AbsTy) {
-    AbsTy->removeAbstractTypeUser(this);
-  }
 
   void dump() const {
     DEBUG(dbgs() << "Constant.cpp: ConstantUniqueMap\n");
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 92f9440..2a816e1 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -19,7 +19,6 @@
 #include "llvm/GlobalVariable.h"
 #include "llvm/GlobalAlias.h"
 #include "llvm/LLVMContext.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/InlineAsm.h"
 #include "llvm/IntrinsicInst.h"
 #include "llvm/PassManager.h"
@@ -111,27 +110,6 @@ void LLVMSetTarget(LLVMModuleRef M, const char *Triple) {
   unwrap(M)->setTargetTriple(Triple);
 }
 
-/*--.. Type names ..........................................................--*/
-LLVMBool LLVMAddTypeName(LLVMModuleRef M, const char *Name, LLVMTypeRef Ty) {
-  return unwrap(M)->addTypeName(Name, unwrap(Ty));
-}
-
-void LLVMDeleteTypeName(LLVMModuleRef M, const char *Name) {
-  TypeSymbolTable &TST = unwrap(M)->getTypeSymbolTable();
-
-  TypeSymbolTable::iterator I = TST.find(Name);
-  if (I != TST.end())
-    TST.remove(I);
-}
-
-LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
-  return wrap(unwrap(M)->getTypeByName(Name));
-}
-
-const char *LLVMGetTypeName(LLVMModuleRef M, LLVMTypeRef Ty) {
-  return unwrap(M)->getTypeName(unwrap(Ty)).c_str();
-}
-
 void LLVMDumpModule(LLVMModuleRef M) {
   unwrap(M)->dump();
 }
@@ -182,8 +160,6 @@ LLVMTypeKind LLVMGetTypeKind(LLVMTypeRef Ty) {
     return LLVMArrayTypeKind;
   case Type::PointerTyID:
     return LLVMPointerTypeKind;
-  case Type::OpaqueTyID:
-    return LLVMOpaqueTypeKind;
   case Type::VectorTyID:
     return LLVMVectorTypeKind;
   case Type::X86_MMXTyID:
@@ -284,10 +260,7 @@ LLVMTypeRef LLVMX86MMXType(void) {
 LLVMTypeRef LLVMFunctionType(LLVMTypeRef ReturnType,
                              LLVMTypeRef *ParamTypes, unsigned ParamCount,
                              LLVMBool IsVarArg) {
-  std::vector<const Type*> Tys;
-  for (LLVMTypeRef *I = ParamTypes, *E = ParamTypes + ParamCount; I != E; ++I)
-    Tys.push_back(unwrap(*I));
-  
+  ArrayRef<Type*> Tys(unwrap(ParamTypes), ParamCount);
   return wrap(FunctionType::get(unwrap(ReturnType), Tys, IsVarArg != 0));
 }
 
@@ -314,11 +287,7 @@ void LLVMGetParamTypes(LLVMTypeRef FunctionTy, LLVMTypeRef *Dest) {
 
 LLVMTypeRef LLVMStructTypeInContext(LLVMContextRef C, LLVMTypeRef *ElementTypes,
                            unsigned ElementCount, LLVMBool Packed) {
-  std::vector<const Type*> Tys;
-  for (LLVMTypeRef *I = ElementTypes,
-                   *E = ElementTypes + ElementCount; I != E; ++I)
-    Tys.push_back(unwrap(*I));
-  
+  ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount);
   return wrap(StructType::get(*unwrap(C), Tys, Packed != 0));
 }
 
@@ -328,6 +297,16 @@ LLVMTypeRef LLVMStructType(LLVMTypeRef *ElementTypes,
                                  ElementCount, Packed);
 }
 
+LLVMTypeRef LLVMStructCreateNamed(LLVMContextRef C, const char *Name)
+{
+  return wrap(StructType::createNamed(*unwrap(C), Name));
+}
+
+void LLVMStructSetBody(LLVMTypeRef StructTy, LLVMTypeRef *ElementTypes,
+                       unsigned ElementCount, LLVMBool Packed) {
+  ArrayRef<Type*> Tys(unwrap(ElementTypes), ElementCount);
+  unwrap<StructType>(StructTy)->setBody(Tys, Packed != 0);
+}
 
 unsigned LLVMCountStructElementTypes(LLVMTypeRef StructTy) {
   return unwrap<StructType>(StructTy)->getNumElements();
@@ -344,6 +323,14 @@ LLVMBool LLVMIsPackedStruct(LLVMTypeRef StructTy) {
   return unwrap<StructType>(StructTy)->isPacked();
 }
 
+LLVMBool LLVMIsOpaqueStruct(LLVMTypeRef StructTy) {
+  return unwrap<StructType>(StructTy)->isOpaque();
+}
+
+LLVMTypeRef LLVMGetTypeByName(LLVMModuleRef M, const char *Name) {
+  return wrap(unwrap(M)->getTypeByName(Name));
+}
+
 /*--.. Operations on array, pointer, and vector types (sequence types) .....--*/
 
 LLVMTypeRef LLVMArrayType(LLVMTypeRef ElementType, unsigned ElementCount) {
@@ -382,9 +369,6 @@ LLVMTypeRef LLVMVoidTypeInContext(LLVMContextRef C)  {
 LLVMTypeRef LLVMLabelTypeInContext(LLVMContextRef C) {
   return wrap(Type::getLabelTy(*unwrap(C)));
 }
-LLVMTypeRef LLVMOpaqueTypeInContext(LLVMContextRef C) {
-  return wrap(OpaqueType::get(*unwrap(C)));
-}
 
 LLVMTypeRef LLVMVoidType(void)  {
   return LLVMVoidTypeInContext(LLVMGetGlobalContext());
@@ -392,28 +376,6 @@ LLVMTypeRef LLVMVoidType(void)  {
 LLVMTypeRef LLVMLabelType(void) {
   return LLVMLabelTypeInContext(LLVMGetGlobalContext());
 }
-LLVMTypeRef LLVMOpaqueType(void) {
-  return LLVMOpaqueTypeInContext(LLVMGetGlobalContext());
-}
-
-/*--.. Operations on type handles ..........................................--*/
-
-LLVMTypeHandleRef LLVMCreateTypeHandle(LLVMTypeRef PotentiallyAbstractTy) {
-  return wrap(new PATypeHolder(unwrap(PotentiallyAbstractTy)));
-}
-
-void LLVMDisposeTypeHandle(LLVMTypeHandleRef TypeHandle) {
-  delete unwrap(TypeHandle);
-}
-
-LLVMTypeRef LLVMResolveTypeHandle(LLVMTypeHandleRef TypeHandle) {
-  return wrap(unwrap(TypeHandle)->get());
-}
-
-void LLVMRefineType(LLVMTypeRef AbstractTy, LLVMTypeRef ConcreteTy) {
-  unwrap<DerivedType>(AbstractTy)->refineAbstractTypeTo(unwrap(ConcreteTy));
-}
-
 
 /*===-- Operations on values ----------------------------------------------===*/
 
@@ -612,9 +574,10 @@ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str,
 LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, 
                                       LLVMValueRef *ConstantVals,
                                       unsigned Count, LLVMBool Packed) {
-  return wrap(ConstantStruct::get(*unwrap(C),
-                                  unwrap<Constant>(ConstantVals, Count),
-                                  Count, Packed != 0));
+  Constant **Elements = unwrap<Constant>(ConstantVals, Count);
+  return wrap(ConstantStruct::getAnon(*unwrap(C),
+                                      ArrayRef<Constant*>(Elements, Count),
+                                      Packed != 0));
 }
 
 LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
@@ -624,15 +587,24 @@ LLVMValueRef LLVMConstString(const char *Str, unsigned Length,
 }
 LLVMValueRef LLVMConstArray(LLVMTypeRef ElementTy,
                             LLVMValueRef *ConstantVals, unsigned Length) {
-  return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length),
-                                 unwrap<Constant>(ConstantVals, Length),
-                                 Length));
+  ArrayRef<Constant*> V(unwrap<Constant>(ConstantVals, Length), Length);
+  return wrap(ConstantArray::get(ArrayType::get(unwrap(ElementTy), Length), V));
 }
 LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
                              LLVMBool Packed) {
   return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count,
                                   Packed);
 }
+
+LLVMValueRef LLVMConstNamedStruct(LLVMTypeRef StructTy,
+                                  LLVMValueRef *ConstantVals,
+                                  unsigned Count) {
+  Constant **Elements = unwrap<Constant>(ConstantVals, Count);
+  const StructType *Ty = cast<StructType>(unwrap(StructTy));
+
+  return wrap(ConstantStruct::get(Ty, ArrayRef<Constant*>(Elements, Count)));
+}
+
 LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
   return wrap(ConstantVector::get(ArrayRef<Constant*>(
                             unwrap<Constant>(ScalarConstantVals, Size), Size)));
@@ -962,7 +934,8 @@ LLVMValueRef LLVMConstShuffleVector(LLVMValueRef VectorAConstant,
 LLVMValueRef LLVMConstExtractValue(LLVMValueRef AggConstant, unsigned *IdxList,
                                    unsigned NumIdx) {
   return wrap(ConstantExpr::getExtractValue(unwrap<Constant>(AggConstant),
-                                            IdxList, NumIdx));
+                                            ArrayRef<unsigned>(IdxList,
+                                                               NumIdx)));
 }
 
 LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
@@ -970,7 +943,8 @@ LLVMValueRef LLVMConstInsertValue(LLVMValueRef AggConstant,
                                   unsigned *IdxList, unsigned NumIdx) {
   return wrap(ConstantExpr::getInsertValue(unwrap<Constant>(AggConstant),
                                          unwrap<Constant>(ElementValueConstant),
-                                           IdxList, NumIdx));
+                                           ArrayRef<unsigned>(IdxList,
+                                                              NumIdx)));
 }
 
 LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString,
@@ -1706,7 +1680,7 @@ LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
                              LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
                              const char *Name) {
   return wrap(unwrap(B)->CreateInvoke(unwrap(Fn), unwrap(Then), unwrap(Catch),
-                                      unwrap(Args), unwrap(Args) + NumArgs,
+                                      ArrayRef<Value *>(unwrap(Args), NumArgs),
                                       Name));
 }
 
@@ -2089,8 +2063,9 @@ LLVMValueRef LLVMBuildPhi(LLVMBuilderRef B, LLVMTypeRef Ty, const char *Name) {
 LLVMValueRef LLVMBuildCall(LLVMBuilderRef B, LLVMValueRef Fn,
                            LLVMValueRef *Args, unsigned NumArgs,
                            const char *Name) {
-  return wrap(unwrap(B)->CreateCall(unwrap(Fn), unwrap(Args),
-                                    unwrap(Args) + NumArgs, Name));
+  return wrap(unwrap(B)->CreateCall(unwrap(Fn),
+                                    ArrayRef<Value *>(unwrap(Args), NumArgs),
+                                    Name));
 }
 
 LLVMValueRef LLVMBuildSelect(LLVMBuilderRef B, LLVMValueRef If,
diff --git a/lib/VMCore/DebugLoc.cpp b/lib/VMCore/DebugLoc.cpp
index 520333c..4ff6b2c 100644
--- a/lib/VMCore/DebugLoc.cpp
+++ b/lib/VMCore/DebugLoc.cpp
@@ -128,6 +128,38 @@ DebugLoc DebugLoc::getFromDILocation(MDNode *N) {
   return get(LineNo, ColNo, Scope, dyn_cast_or_null<MDNode>(N->getOperand(3)));
 }
 
+/// getFromDILexicalBlock - Translate the DILexicalBlock into a DebugLoc.
+DebugLoc DebugLoc::getFromDILexicalBlock(MDNode *N) {
+  if (N == 0 || N->getNumOperands() < 3) return DebugLoc();
+  
+  MDNode *Scope = dyn_cast_or_null<MDNode>(N->getOperand(1));
+  if (Scope == 0) return DebugLoc();
+  
+  unsigned LineNo = 0, ColNo = 0;
+  if (ConstantInt *Line = dyn_cast_or_null<ConstantInt>(N->getOperand(2)))
+    LineNo = Line->getZExtValue();
+  if (ConstantInt *Col = dyn_cast_or_null<ConstantInt>(N->getOperand(3)))
+    ColNo = Col->getZExtValue();
+  
+  return get(LineNo, ColNo, Scope, NULL);
+}
+
+void DebugLoc::dump(const LLVMContext &Ctx) const {
+#ifndef NDEBUG
+  if (!isUnknown()) {
+    dbgs() << getLine();
+    if (getCol() != 0)
+      dbgs() << ',' << getCol();
+    DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(getInlinedAt(Ctx));
+    if (!InlinedAtDL.isUnknown()) {
+      dbgs() << " @ ";
+      InlinedAtDL.dump(Ctx);
+    } else
+      dbgs() << "\n";
+  }
+#endif
+}
+
 //===----------------------------------------------------------------------===//
 // DenseMap specialization
 //===----------------------------------------------------------------------===//
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 0ae0bdb..6536bcd 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -134,7 +134,7 @@ LLVMContext &Function::getContext() const {
   return getType()->getContext();
 }
 
-const FunctionType *Function::getFunctionType() const {
+FunctionType *Function::getFunctionType() const {
   return cast<FunctionType>(getType()->getElementType());
 }
 
@@ -142,7 +142,7 @@ bool Function::isVarArg() const {
   return getFunctionType()->isVarArg();
 }
 
-const Type *Function::getReturnType() const {
+Type *Function::getReturnType() const {
   return getFunctionType()->getReturnType();
 }
 
@@ -163,7 +163,7 @@ Function::Function(const FunctionType *Ty, LinkageTypes Linkage,
   : GlobalValue(PointerType::getUnqual(Ty), 
                 Value::FunctionVal, 0, 0, Linkage, name) {
   assert(FunctionType::isValidReturnType(getReturnType()) &&
-         !getReturnType()->isOpaqueTy() && "invalid return type");
+         "invalid return type");
   SymTab = new ValueSymbolTable();
 
   // If the function has arguments, mark them as lazily built.
@@ -333,7 +333,7 @@ unsigned Function::getIntrinsicID() const {
   return 0;
 }
 
-std::string Intrinsic::getName(ID id, const Type **Tys, unsigned numTys) { 
+std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) {
   assert(id < num_intrinsics && "Invalid intrinsic ID!");
   static const char * const Table[] = {
     "not_intrinsic",
@@ -341,10 +341,10 @@ std::string Intrinsic::getName(ID id, const Type **Tys, unsigned numTys) {
 #include "llvm/Intrinsics.gen"
 #undef GET_INTRINSIC_NAME_TABLE
   };
-  if (numTys == 0)
+  if (Tys.empty())
     return Table[id];
   std::string Result(Table[id]);
-  for (unsigned i = 0; i < numTys; ++i) {
+  for (unsigned i = 0; i < Tys.size(); ++i) {
     if (const PointerType* PTyp = dyn_cast<PointerType>(Tys[i])) {
       Result += ".p" + llvm::utostr(PTyp->getAddressSpace()) + 
                 EVT::getEVT(PTyp->getElementType()).getEVTString();
@@ -356,10 +356,9 @@ std::string Intrinsic::getName(ID id, const Type **Tys, unsigned numTys) {
 }
 
 const FunctionType *Intrinsic::getType(LLVMContext &Context,
-                                       ID id, const Type **Tys, 
-                                       unsigned numTys) {
+                                       ID id, ArrayRef<Type*> Tys) {
   const Type *ResultTy = NULL;
-  std::vector<const Type*> ArgTys;
+  std::vector<Type*> ArgTys;
   bool IsVarArg = false;
   
 #define GET_INTRINSIC_GENERATOR
@@ -384,14 +383,12 @@ bool Intrinsic::isOverloaded(ID id) {
 #include "llvm/Intrinsics.gen"
 #undef GET_INTRINSIC_ATTRIBUTES
 
-Function *Intrinsic::getDeclaration(Module *M, ID id, const Type **Tys, 
-                                    unsigned numTys) {
+Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) {
   // There can never be multiple globals with the same name of different types,
   // because intrinsics must be a specific type.
   return
-    cast<Function>(M->getOrInsertFunction(getName(id, Tys, numTys),
-                                          getType(M->getContext(),
-                                                  id, Tys, numTys)));
+    cast<Function>(M->getOrInsertFunction(getName(id, Tys),
+                                          getType(M->getContext(), id, Tys)));
 }
 
 // This defines the "Intrinsic::getIntrinsicForGCCBuiltin()" method.
@@ -417,7 +414,7 @@ bool Function::hasAddressTaken(const User* *PutOffender) const {
 /// setjmp or other function that gcc recognizes as "returning twice".
 ///
 /// FIXME: Remove after <rdar://problem/8031714> is fixed.
-/// FIXME: Is the obove FIXME valid?
+/// FIXME: Is the above FIXME valid?
 bool Function::callsFunctionThatReturnsTwice() const {
   const Module *M = this->getParent();
   static const char *ReturnsTwiceFns[] = {
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index 60000ad..db008e0 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -51,6 +51,7 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
   setAlignment(Src->getAlignment());
   setSection(Src->getSection());
   setVisibility(Src->getVisibility());
+  setUnnamedAddr(Src->hasUnnamedAddr());
 }
 
 void GlobalValue::setAlignment(unsigned Align) {
@@ -60,6 +61,20 @@ void GlobalValue::setAlignment(unsigned Align) {
   Alignment = Log2_32(Align) + 1;
   assert(getAlignment() == Align && "Alignment representation error!");
 }
+
+bool GlobalValue::isDeclaration() const {
+  // Globals are definitions if they have an initializer.
+  if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(this))
+    return GV->getNumOperands() == 0;
+
+  // Functions are definitions if they have a body.
+  if (const Function *F = dyn_cast<Function>(this))
+    return F->empty();
+
+  // Aliases are always definitions.
+  assert(isa<GlobalAlias>(this));
+  return false;
+}
   
 //===----------------------------------------------------------------------===//
 // GlobalVariable Implementation
@@ -201,39 +216,26 @@ void GlobalAlias::eraseFromParent() {
   getParent()->getAliasList().erase(this);
 }
 
-bool GlobalAlias::isDeclaration() const {
-  const GlobalValue* AV = getAliasedGlobal();
-  if (AV)
-    return AV->isDeclaration();
-  else
-    return false;
-}
-
-void GlobalAlias::setAliasee(Constant *Aliasee) 
-{
-  if (Aliasee)
-    assert(Aliasee->getType() == getType() &&
-           "Alias and aliasee types should match!");
+void GlobalAlias::setAliasee(Constant *Aliasee) {
+  assert((!Aliasee || Aliasee->getType() == getType()) &&
+         "Alias and aliasee types should match!");
   
   setOperand(0, Aliasee);
 }
 
 const GlobalValue *GlobalAlias::getAliasedGlobal() const {
   const Constant *C = getAliasee();
-  if (C) {
-    if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-      return GV;
-    else {
-      const ConstantExpr *CE = 0;
-      if ((CE = dyn_cast<ConstantExpr>(C)) &&
-          (CE->getOpcode() == Instruction::BitCast || 
-           CE->getOpcode() == Instruction::GetElementPtr))
-        return dyn_cast<GlobalValue>(CE->getOperand(0));
-      else
-        llvm_unreachable("Unsupported aliasee");
-    }
-  }
-  return 0;
+  if (C == 0) return 0;
+  
+  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
+    return GV;
+
+  const ConstantExpr *CE = cast<ConstantExpr>(C);
+  assert((CE->getOpcode() == Instruction::BitCast || 
+          CE->getOpcode() == Instruction::GetElementPtr) &&
+         "Unsupported aliasee");
+  
+  return dyn_cast<GlobalValue>(CE->getOperand(0));
 }
 
 const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const {
@@ -254,7 +256,7 @@ const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const {
     GV = GA->getAliasedGlobal();
 
     if (!Visited.insert(GV))
-      return NULL;
+      return 0;
   }
 
   return GV;
diff --git a/lib/VMCore/IRBuilder.cpp b/lib/VMCore/IRBuilder.cpp
index f2d469a..ffe961f 100644
--- a/lib/VMCore/IRBuilder.cpp
+++ b/lib/VMCore/IRBuilder.cpp
@@ -34,7 +34,7 @@ Value *IRBuilderBase::CreateGlobalString(StringRef Str, const Twine &Name) {
   return GV;
 }
 
-const Type *IRBuilderBase::getCurrentFunctionReturnType() const {
+Type *IRBuilderBase::getCurrentFunctionReturnType() const {
   assert(BB && BB->getParent() && "No current function!");
   return BB->getParent()->getReturnType();
 }
@@ -52,9 +52,9 @@ Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
   return BCI;
 }
 
-static CallInst *createCallHelper(Value *Callee, Value *const* Ops,
-                                  unsigned NumOps, IRBuilderBase *Builder) {
-  CallInst *CI = CallInst::Create(Callee, Ops, Ops + NumOps, "");
+static CallInst *createCallHelper(Value *Callee, ArrayRef<Value *> Ops,
+                                  IRBuilderBase *Builder) {
+  CallInst *CI = CallInst::Create(Callee, Ops, "");
   Builder->GetInsertBlock()->getInstList().insert(Builder->GetInsertPoint(),CI);
   Builder->SetInstDebugLocation(CI);
   return CI;  
@@ -65,11 +65,11 @@ CreateMemSet(Value *Ptr, Value *Val, Value *Size, unsigned Align,
              bool isVolatile, MDNode *TBAATag) {
   Ptr = getCastedInt8PtrValue(Ptr);
   Value *Ops[] = { Ptr, Val, Size, getInt32(Align), getInt1(isVolatile) };
-  const Type *Tys[] = { Ptr->getType(), Size->getType() };
+  Type *Tys[] = { Ptr->getType(), Size->getType() };
   Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys, 2);
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memset, Tys);
   
-  CallInst *CI = createCallHelper(TheFn, Ops, 5, this);
+  CallInst *CI = createCallHelper(TheFn, Ops, this);
   
   // Set the TBAA info if present.
   if (TBAATag)
@@ -85,11 +85,11 @@ CreateMemCpy(Value *Dst, Value *Src, Value *Size, unsigned Align,
   Src = getCastedInt8PtrValue(Src);
 
   Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
-  const Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+  Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
   Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys, 3);
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcpy, Tys);
   
-  CallInst *CI = createCallHelper(TheFn, Ops, 5, this);
+  CallInst *CI = createCallHelper(TheFn, Ops, this);
   
   // Set the TBAA info if present.
   if (TBAATag)
@@ -105,11 +105,11 @@ CreateMemMove(Value *Dst, Value *Src, Value *Size, unsigned Align,
   Src = getCastedInt8PtrValue(Src);
   
   Value *Ops[] = { Dst, Src, Size, getInt32(Align), getInt1(isVolatile) };
-  const Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
+  Type *Tys[] = { Dst->getType(), Src->getType(), Size->getType() };
   Module *M = BB->getParent()->getParent();
-  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys, 3);
+  Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memmove, Tys);
   
-  CallInst *CI = createCallHelper(TheFn, Ops, 5, this);
+  CallInst *CI = createCallHelper(TheFn, Ops, this);
   
   // Set the TBAA info if present.
   if (TBAATag)
@@ -130,7 +130,7 @@ CallInst *IRBuilderBase::CreateLifetimeStart(Value *Ptr, ConstantInt *Size) {
   Value *Ops[] = { Size, Ptr };
   Module *M = BB->getParent()->getParent();
   Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_start);
-  return createCallHelper(TheFn, Ops, 2, this);
+  return createCallHelper(TheFn, Ops, this);
 }
 
 CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
@@ -145,5 +145,5 @@ CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
   Value *Ops[] = { Size, Ptr };
   Module *M = BB->getParent()->getParent();
   Value *TheFn = Intrinsic::getDeclaration(M, Intrinsic::lifetime_end);
-  return createCallHelper(TheFn, Ops, 2, this);
+  return createCallHelper(TheFn, Ops, this);
 }
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
index bd3667d..4a03b39 100644
--- a/lib/VMCore/InlineAsm.cpp
+++ b/lib/VMCore/InlineAsm.cpp
@@ -47,11 +47,11 @@ InlineAsm::InlineAsm(const PointerType *Ty, const std::string &asmString,
 }
 
 void InlineAsm::destroyConstant() {
-  getRawType()->getContext().pImpl->InlineAsms.remove(this);
+  getType()->getContext().pImpl->InlineAsms.remove(this);
   delete this;
 }
 
-const FunctionType *InlineAsm::getFunctionType() const {
+FunctionType *InlineAsm::getFunctionType() const {
   return cast<FunctionType>(getType()->getElementType());
 }
     
diff --git a/lib/VMCore/Instruction.cpp b/lib/VMCore/Instruction.cpp
index 2c8b8b2..02c0757 100644
--- a/lib/VMCore/Instruction.cpp
+++ b/lib/VMCore/Instruction.cpp
@@ -204,22 +204,10 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
   if (const InvokeInst *CI = dyn_cast<InvokeInst>(this))
     return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
            CI->getAttributes() == cast<InvokeInst>(I)->getAttributes();
-  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) {
-    if (IVI->getNumIndices() != cast<InsertValueInst>(I)->getNumIndices())
-      return false;
-    for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
-      if (IVI->idx_begin()[i] != cast<InsertValueInst>(I)->idx_begin()[i])
-        return false;
-    return true;
-  }
-  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this)) {
-    if (EVI->getNumIndices() != cast<ExtractValueInst>(I)->getNumIndices())
-      return false;
-    for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
-      if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I)->idx_begin()[i])
-        return false;
-    return true;
-  }
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
+    return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
+    return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
 
   return true;
 }
@@ -256,22 +244,10 @@ bool Instruction::isSameOperationAs(const Instruction *I) const {
     return CI->getCallingConv() == cast<InvokeInst>(I)->getCallingConv() &&
            CI->getAttributes() ==
              cast<InvokeInst>(I)->getAttributes();
-  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this)) {
-    if (IVI->getNumIndices() != cast<InsertValueInst>(I)->getNumIndices())
-      return false;
-    for (unsigned i = 0, e = IVI->getNumIndices(); i != e; ++i)
-      if (IVI->idx_begin()[i] != cast<InsertValueInst>(I)->idx_begin()[i])
-        return false;
-    return true;
-  }
-  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this)) {
-    if (EVI->getNumIndices() != cast<ExtractValueInst>(I)->getNumIndices())
-      return false;
-    for (unsigned i = 0, e = EVI->getNumIndices(); i != e; ++i)
-      if (EVI->idx_begin()[i] != cast<ExtractValueInst>(I)->idx_begin()[i])
-        return false;
-    return true;
-  }
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(this))
+    return IVI->getIndices() == cast<InsertValueInst>(I)->getIndices();
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(this))
+    return EVI->getIndices() == cast<ExtractValueInst>(I)->getIndices();
 
   return true;
 }
@@ -429,8 +405,10 @@ Instruction *Instruction::clone() const {
   // Otherwise, enumerate and copy over metadata from the old instruction to the
   // new one.
   SmallVector<std::pair<unsigned, MDNode*>, 4> TheMDs;
-  getAllMetadata(TheMDs);
+  getAllMetadataOtherThanDebugLoc(TheMDs);
   for (unsigned i = 0, e = TheMDs.size(); i != e; ++i)
     New->setMetadata(TheMDs[i].first, TheMDs[i].second);
+  
+  New->setDebugLoc(getDebugLoc());
   return New;
 }
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 8f4eabe..9baad09 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -87,11 +87,8 @@ PHINode::PHINode(const PHINode &PN)
   : Instruction(PN.getType(), Instruction::PHI,
                 allocHungoffUses(PN.getNumOperands()), PN.getNumOperands()),
     ReservedSpace(PN.getNumOperands()) {
-  Use *OL = OperandList;
-  for (unsigned i = 0, e = PN.getNumOperands(); i != e; i+=2) {
-    OL[i] = PN.getOperand(i);
-    OL[i+1] = PN.getOperand(i+1);
-  }
+  std::copy(PN.op_begin(), PN.op_end(), op_begin());
+  std::copy(PN.block_begin(), PN.block_end(), block_begin());
   SubclassOptionalData = PN.SubclassOptionalData;
 }
 
@@ -99,31 +96,37 @@ PHINode::~PHINode() {
   dropHungoffUses();
 }
 
+Use *PHINode::allocHungoffUses(unsigned N) const {
+  // Allocate the array of Uses of the incoming values, followed by a pointer
+  // (with bottom bit set) to the User, followed by the array of pointers to
+  // the incoming basic blocks.
+  size_t size = N * sizeof(Use) + sizeof(Use::UserRef)
+    + N * sizeof(BasicBlock*);
+  Use *Begin = static_cast<Use*>(::operator new(size));
+  Use *End = Begin + N;
+  (void) new(End) Use::UserRef(const_cast<PHINode*>(this), 1);
+  return Use::initTags(Begin, End);
+}
+
 // removeIncomingValue - Remove an incoming value.  This is useful if a
 // predecessor basic block is deleted.
 Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
-  unsigned NumOps = getNumOperands();
-  Use *OL = OperandList;
-  assert(Idx*2 < NumOps && "BB not in PHI node!");
-  Value *Removed = OL[Idx*2];
+  Value *Removed = getIncomingValue(Idx);
 
   // Move everything after this operand down.
   //
   // FIXME: we could just swap with the end of the list, then erase.  However,
-  // client might not expect this to happen.  The code as it is thrashes the
+  // clients might not expect this to happen.  The code as it is thrashes the
   // use/def lists, which is kinda lame.
-  for (unsigned i = (Idx+1)*2; i != NumOps; i += 2) {
-    OL[i-2] = OL[i];
-    OL[i-2+1] = OL[i+1];
-  }
+  std::copy(op_begin() + Idx + 1, op_end(), op_begin() + Idx);
+  std::copy(block_begin() + Idx + 1, block_end(), block_begin() + Idx);
 
   // Nuke the last value.
-  OL[NumOps-2].set(0);
-  OL[NumOps-2+1].set(0);
-  NumOperands = NumOps-2;
+  Op<-1>().set(0);
+  --NumOperands;
 
   // If the PHI node is dead, because it has zero entries, nuke it now.
-  if (NumOps == 2 && DeletePHIIfEmpty) {
+  if (getNumOperands() == 0 && DeletePHIIfEmpty) {
     // If anyone is using this PHI, make them use a dummy value instead...
     replaceAllUsesWith(UndefValue::get(getType()));
     eraseFromParent();
@@ -137,15 +140,18 @@ Value *PHINode::removeIncomingValue(unsigned Idx, bool DeletePHIIfEmpty) {
 ///
 void PHINode::growOperands() {
   unsigned e = getNumOperands();
-  // Multiply by 1.5 and round down so the result is still even.
-  unsigned NumOps = e + e / 4 * 2;
-  if (NumOps < 4) NumOps = 4;      // 4 op PHI nodes are VERY common.
+  unsigned NumOps = e + e / 2;
+  if (NumOps < 2) NumOps = 2;      // 2 op PHI nodes are VERY common.
+
+  Use *OldOps = op_begin();
+  BasicBlock **OldBlocks = block_begin();
 
   ReservedSpace = NumOps;
-  Use *OldOps = OperandList;
-  Use *NewOps = allocHungoffUses(NumOps);
-  std::copy(OldOps, OldOps + e, NewOps);
-  OperandList = NewOps;
+  OperandList = allocHungoffUses(ReservedSpace);
+
+  std::copy(OldOps, OldOps + e, op_begin());
+  std::copy(OldBlocks, OldBlocks + e, block_begin());
+
   Use::zap(OldOps, OldOps + e, true);
 }
 
@@ -168,95 +174,42 @@ Value *PHINode::hasConstantValue() const {
 CallInst::~CallInst() {
 }
 
-void CallInst::init(Value *Func, Value* const *Params, unsigned NumParams) {
-  assert(NumOperands == NumParams+1 && "NumOperands not set up?");
+void CallInst::init(Value *Func, ArrayRef<Value *> Args, const Twine &NameStr) {
+  assert(NumOperands == Args.size() + 1 && "NumOperands not set up?");
   Op<-1>() = Func;
 
+#ifndef NDEBUG
   const FunctionType *FTy =
     cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-  (void)FTy;  // silence warning.
 
-  assert((NumParams == FTy->getNumParams() ||
-          (FTy->isVarArg() && NumParams > FTy->getNumParams())) &&
+  assert((Args.size() == FTy->getNumParams() ||
+          (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
          "Calling a function with bad signature!");
-  for (unsigned i = 0; i != NumParams; ++i) {
+
+  for (unsigned i = 0; i != Args.size(); ++i)
     assert((i >= FTy->getNumParams() || 
-            FTy->getParamType(i) == Params[i]->getType()) &&
+            FTy->getParamType(i) == Args[i]->getType()) &&
            "Calling a function with a bad signature!");
-    OperandList[i] = Params[i];
-  }
-}
-
-void CallInst::init(Value *Func, Value *Actual1, Value *Actual2) {
-  assert(NumOperands == 3 && "NumOperands not set up?");
-  Op<-1>() = Func;
-  Op<0>() = Actual1;
-  Op<1>() = Actual2;
-
-  const FunctionType *FTy =
-    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-  (void)FTy;  // silence warning.
-
-  assert((FTy->getNumParams() == 2 ||
-          (FTy->isVarArg() && FTy->getNumParams() < 2)) &&
-         "Calling a function with bad signature");
-  assert((0 >= FTy->getNumParams() || 
-          FTy->getParamType(0) == Actual1->getType()) &&
-         "Calling a function with a bad signature!");
-  assert((1 >= FTy->getNumParams() || 
-          FTy->getParamType(1) == Actual2->getType()) &&
-         "Calling a function with a bad signature!");
-}
-
-void CallInst::init(Value *Func, Value *Actual) {
-  assert(NumOperands == 2 && "NumOperands not set up?");
-  Op<-1>() = Func;
-  Op<0>() = Actual;
-
-  const FunctionType *FTy =
-    cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-  (void)FTy;  // silence warning.
+#endif
 
-  assert((FTy->getNumParams() == 1 ||
-          (FTy->isVarArg() && FTy->getNumParams() == 0)) &&
-         "Calling a function with bad signature");
-  assert((0 == FTy->getNumParams() || 
-          FTy->getParamType(0) == Actual->getType()) &&
-         "Calling a function with a bad signature!");
+  std::copy(Args.begin(), Args.end(), op_begin());
+  setName(NameStr);
 }
 
-void CallInst::init(Value *Func) {
+void CallInst::init(Value *Func, const Twine &NameStr) {
   assert(NumOperands == 1 && "NumOperands not set up?");
   Op<-1>() = Func;
 
+#ifndef NDEBUG
   const FunctionType *FTy =
     cast<FunctionType>(cast<PointerType>(Func->getType())->getElementType());
-  (void)FTy;  // silence warning.
 
   assert(FTy->getNumParams() == 0 && "Calling a function with bad signature");
-}
+#endif
 
-CallInst::CallInst(Value *Func, Value* Actual, const Twine &Name,
-                   Instruction *InsertBefore)
-  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
-                                   ->getElementType())->getReturnType(),
-                Instruction::Call,
-                OperandTraits<CallInst>::op_end(this) - 2,
-                2, InsertBefore) {
-  init(Func, Actual);
-  setName(Name);
+  setName(NameStr);
 }
 
-CallInst::CallInst(Value *Func, Value* Actual, const Twine &Name,
-                   BasicBlock  *InsertAtEnd)
-  : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
-                                   ->getElementType())->getReturnType(),
-                Instruction::Call,
-                OperandTraits<CallInst>::op_end(this) - 2,
-                2, InsertAtEnd) {
-  init(Func, Actual);
-  setName(Name);
-}
 CallInst::CallInst(Value *Func, const Twine &Name,
                    Instruction *InsertBefore)
   : Instruction(cast<FunctionType>(cast<PointerType>(Func->getType())
@@ -264,8 +217,7 @@ CallInst::CallInst(Value *Func, const Twine &Name,
                 Instruction::Call,
                 OperandTraits<CallInst>::op_end(this) - 1,
                 1, InsertBefore) {
-  init(Func);
-  setName(Name);
+  init(Func, Name);
 }
 
 CallInst::CallInst(Value *Func, const Twine &Name,
@@ -275,8 +227,7 @@ CallInst::CallInst(Value *Func, const Twine &Name,
                 Instruction::Call,
                 OperandTraits<CallInst>::op_end(this) - 1,
                 1, InsertAtEnd) {
-  init(Func);
-  setName(Name);
+  init(Func, Name);
 }
 
 CallInst::CallInst(const CallInst &CI)
@@ -287,10 +238,7 @@ CallInst::CallInst(const CallInst &CI)
   setTailCall(CI.isTailCall());
   setCallingConv(CI.getCallingConv());
     
-  Use *OL = OperandList;
-  Use *InOL = CI.OperandList;
-  for (unsigned i = 0, e = CI.getNumOperands(); i != e; ++i)
-    OL[i] = InOL[i];
+  std::copy(CI.op_begin(), CI.op_end(), op_begin());
   SubclassOptionalData = CI.SubclassOptionalData;
 }
 
@@ -366,7 +314,7 @@ static Instruction *createMalloc(Instruction *InsertBefore,
   // Create the call to Malloc.
   BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
   Module* M = BB->getParent()->getParent();
-  const Type *BPTy = Type::getInt8PtrTy(BB->getContext());
+  Type *BPTy = Type::getInt8PtrTy(BB->getContext());
   Value *MallocFunc = MallocF;
   if (!MallocFunc)
     // prototype malloc as "void *malloc(size_t)"
@@ -481,27 +429,28 @@ Instruction* CallInst::CreateFree(Value* Source, BasicBlock *InsertAtEnd) {
 //===----------------------------------------------------------------------===//
 
 void InvokeInst::init(Value *Fn, BasicBlock *IfNormal, BasicBlock *IfException,
-                      Value* const *Args, unsigned NumArgs) {
-  assert(NumOperands == 3+NumArgs && "NumOperands not set up?");
+                      ArrayRef<Value *> Args, const Twine &NameStr) {
+  assert(NumOperands == 3 + Args.size() && "NumOperands not set up?");
   Op<-3>() = Fn;
   Op<-2>() = IfNormal;
   Op<-1>() = IfException;
+
+#ifndef NDEBUG
   const FunctionType *FTy =
     cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType());
-  (void)FTy;  // silence warning.
 
-  assert(((NumArgs == FTy->getNumParams()) ||
-          (FTy->isVarArg() && NumArgs > FTy->getNumParams())) &&
+  assert(((Args.size() == FTy->getNumParams()) ||
+          (FTy->isVarArg() && Args.size() > FTy->getNumParams())) &&
          "Invoking a function with bad signature");
 
-  Use *OL = OperandList;
-  for (unsigned i = 0, e = NumArgs; i != e; i++) {
+  for (unsigned i = 0, e = Args.size(); i != e; i++)
     assert((i >= FTy->getNumParams() || 
             FTy->getParamType(i) == Args[i]->getType()) &&
            "Invoking a function with a bad signature!");
-    
-    OL[i] = Args[i];
-  }
+#endif
+
+  std::copy(Args.begin(), Args.end(), op_begin());
+  setName(NameStr);
 }
 
 InvokeInst::InvokeInst(const InvokeInst &II)
@@ -511,9 +460,7 @@ InvokeInst::InvokeInst(const InvokeInst &II)
                    II.getNumOperands()) {
   setAttributes(II.getAttributes());
   setCallingConv(II.getCallingConv());
-  Use *OL = OperandList, *InOL = II.OperandList;
-  for (unsigned i = 0, e = II.getNumOperands(); i != e; ++i)
-    OL[i] = InOL[i];
+  std::copy(II.op_begin(), II.op_end(), op_begin());
   SubclassOptionalData = II.SubclassOptionalData;
 }
 
@@ -817,7 +764,7 @@ bool AllocaInst::isArrayAllocation() const {
   return true;
 }
 
-const Type *AllocaInst::getAllocatedType() const {
+Type *AllocaInst::getAllocatedType() const {
   return getType()->getElementType();
 }
 
@@ -1092,7 +1039,7 @@ GetElementPtrInst::GetElementPtrInst(const GetElementPtrInst &GEPI)
 GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
                                      const Twine &Name, Instruction *InBe)
   : Instruction(PointerType::get(
-      checkType(getIndexedType(Ptr->getType(),Idx)), retrieveAddrSpace(Ptr)),
+      checkGEPType(getIndexedType(Ptr->getType(),Idx)), retrieveAddrSpace(Ptr)),
                 GetElementPtr,
                 OperandTraits<GetElementPtrInst>::op_end(this) - 2,
                 2, InBe) {
@@ -1102,7 +1049,7 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
 GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
                                      const Twine &Name, BasicBlock *IAE)
   : Instruction(PointerType::get(
-            checkType(getIndexedType(Ptr->getType(),Idx)),  
+            checkGEPType(getIndexedType(Ptr->getType(),Idx)),  
                 retrieveAddrSpace(Ptr)),
                 GetElementPtr,
                 OperandTraits<GetElementPtrInst>::op_end(this) - 2,
@@ -1120,60 +1067,50 @@ GetElementPtrInst::GetElementPtrInst(Value *Ptr, Value *Idx,
 /// pointer type.
 ///
 template <typename IndexTy>
-static const Type* getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs,
-                                          unsigned NumIdx) {
+static Type *getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs,
+                                    unsigned NumIdx) {
   const PointerType *PTy = dyn_cast<PointerType>(Ptr);
   if (!PTy) return 0;   // Type isn't a pointer type!
-  const Type *Agg = PTy->getElementType();
+  Type *Agg = PTy->getElementType();
 
   // Handle the special case of the empty set index set, which is always valid.
   if (NumIdx == 0)
     return Agg;
   
   // If there is at least one index, the top level type must be sized, otherwise
-  // it cannot be 'stepped over'.  We explicitly allow abstract types (those
-  // that contain opaque types) under the assumption that it will be resolved to
-  // a sane type later.
-  if (!Agg->isSized() && !Agg->isAbstract())
+  // it cannot be 'stepped over'.
+  if (!Agg->isSized())
     return 0;
 
   unsigned CurIdx = 1;
   for (; CurIdx != NumIdx; ++CurIdx) {
-    const CompositeType *CT = dyn_cast<CompositeType>(Agg);
+    CompositeType *CT = dyn_cast<CompositeType>(Agg);
     if (!CT || CT->isPointerTy()) return 0;
     IndexTy Index = Idxs[CurIdx];
     if (!CT->indexValid(Index)) return 0;
     Agg = CT->getTypeAtIndex(Index);
-
-    // If the new type forwards to another type, then it is in the middle
-    // of being refined to another type (and hence, may have dropped all
-    // references to what it was using before).  So, use the new forwarded
-    // type.
-    if (const Type *Ty = Agg->getForwardedType())
-      Agg = Ty;
   }
   return CurIdx == NumIdx ? Agg : 0;
 }
 
-const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
-                                              Value* const *Idxs,
-                                              unsigned NumIdx) {
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr, Value* const *Idxs,
+                                        unsigned NumIdx) {
   return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
 }
 
-const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
-                                              Constant* const *Idxs,
-                                              unsigned NumIdx) {
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr,
+                                        Constant* const *Idxs,
+                                        unsigned NumIdx) {
   return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
 }
 
-const Type* GetElementPtrInst::getIndexedType(const Type *Ptr,
-                                              uint64_t const *Idxs,
-                                              unsigned NumIdx) {
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr,
+                                        uint64_t const *Idxs,
+                                        unsigned NumIdx) {
   return getIndexedTypeInternal(Ptr, Idxs, NumIdx);
 }
 
-const Type* GetElementPtrInst::getIndexedType(const Type *Ptr, Value *Idx) {
+Type *GetElementPtrInst::getIndexedType(const Type *Ptr, Value *Idx) {
   const PointerType *PTy = dyn_cast<PointerType>(Ptr);
   if (!PTy) return 0;   // Type isn't a pointer type!
 
@@ -1390,27 +1327,22 @@ int ShuffleVectorInst::getMaskValue(unsigned i) const {
 //                             InsertValueInst Class
 //===----------------------------------------------------------------------===//
 
-void InsertValueInst::init(Value *Agg, Value *Val, const unsigned *Idx, 
-                           unsigned NumIdx, const Twine &Name) {
+void InsertValueInst::init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs, 
+                           const Twine &Name) {
   assert(NumOperands == 2 && "NumOperands not initialized?");
-  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idx, Idx + NumIdx) ==
-         Val->getType() && "Inserted value must match indexed type!");
-  Op<0>() = Agg;
-  Op<1>() = Val;
 
-  Indices.append(Idx, Idx + NumIdx);
-  setName(Name);
-}
+  // There's no fundamental reason why we require at least one index
+  // (other than weirdness with &*IdxBegin being invalid; see
+  // getelementptr's init routine for example). But there's no
+  // present need to support it.
+  assert(Idxs.size() > 0 && "InsertValueInst must have at least one index");
 
-void InsertValueInst::init(Value *Agg, Value *Val, unsigned Idx, 
-                           const Twine &Name) {
-  assert(NumOperands == 2 && "NumOperands not initialized?");
-  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idx) == Val->getType()
-         && "Inserted value must match indexed type!");
+  assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs) ==
+         Val->getType() && "Inserted value must match indexed type!");
   Op<0>() = Agg;
   Op<1>() = Val;
 
-  Indices.push_back(Idx);
+  Indices.append(Idxs.begin(), Idxs.end());
   setName(Name);
 }
 
@@ -1423,44 +1355,18 @@ InsertValueInst::InsertValueInst(const InsertValueInst &IVI)
   SubclassOptionalData = IVI.SubclassOptionalData;
 }
 
-InsertValueInst::InsertValueInst(Value *Agg,
-                                 Value *Val,
-                                 unsigned Idx, 
-                                 const Twine &Name,
-                                 Instruction *InsertBefore)
-  : Instruction(Agg->getType(), InsertValue,
-                OperandTraits<InsertValueInst>::op_begin(this),
-                2, InsertBefore) {
-  init(Agg, Val, Idx, Name);
-}
-
-InsertValueInst::InsertValueInst(Value *Agg,
-                                 Value *Val,
-                                 unsigned Idx, 
-                                 const Twine &Name,
-                                 BasicBlock *InsertAtEnd)
-  : Instruction(Agg->getType(), InsertValue,
-                OperandTraits<InsertValueInst>::op_begin(this),
-                2, InsertAtEnd) {
-  init(Agg, Val, Idx, Name);
-}
-
 //===----------------------------------------------------------------------===//
 //                             ExtractValueInst Class
 //===----------------------------------------------------------------------===//
 
-void ExtractValueInst::init(const unsigned *Idx, unsigned NumIdx,
-                            const Twine &Name) {
+void ExtractValueInst::init(ArrayRef<unsigned> Idxs, const Twine &Name) {
   assert(NumOperands == 1 && "NumOperands not initialized?");
 
-  Indices.append(Idx, Idx + NumIdx);
-  setName(Name);
-}
-
-void ExtractValueInst::init(unsigned Idx, const Twine &Name) {
-  assert(NumOperands == 1 && "NumOperands not initialized?");
+  // There's no fundamental reason why we require at least one index.
+  // But there's no present need to support it.
+  assert(Idxs.size() > 0 && "ExtractValueInst must have at least one index");
 
-  Indices.push_back(Idx);
+  Indices.append(Idxs.begin(), Idxs.end());
   setName(Name);
 }
 
@@ -1476,10 +1382,9 @@ ExtractValueInst::ExtractValueInst(const ExtractValueInst &EVI)
 // A null type is returned if the indices are invalid for the specified
 // pointer type.
 //
-const Type* ExtractValueInst::getIndexedType(const Type *Agg,
-                                             const unsigned *Idxs,
-                                             unsigned NumIdx) {
-  for (unsigned CurIdx = 0; CurIdx != NumIdx; ++CurIdx) {
+Type *ExtractValueInst::getIndexedType(const Type *Agg,
+                                       ArrayRef<unsigned> Idxs) {
+  for (unsigned CurIdx = 0; CurIdx != Idxs.size(); ++CurIdx) {
     unsigned Index = Idxs[CurIdx];
     // We can't use CompositeType::indexValid(Index) here.
     // indexValid() always returns true for arrays because getelementptr allows
@@ -1499,20 +1404,8 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg,
     }
 
     Agg = cast<CompositeType>(Agg)->getTypeAtIndex(Index);
-
-    // If the new type forwards to another type, then it is in the middle
-    // of being refined to another type (and hence, may have dropped all
-    // references to what it was using before).  So, use the new forwarded
-    // type.
-    if (const Type *Ty = Agg->getForwardedType())
-      Agg = Ty;
   }
-  return Agg;
-}
-
-const Type* ExtractValueInst::getIndexedType(const Type *Agg,
-                                             unsigned Idx) {
-  return getIndexedType(Agg, &Idx, 1);
+  return const_cast<Type*>(Agg);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp
index 1bd497d..ebd1e0a 100644
--- a/lib/VMCore/LLVMContext.cpp
+++ b/lib/VMCore/LLVMContext.cpp
@@ -39,6 +39,10 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
   // Create the 'tbaa' metadata kind.
   unsigned TBAAID = getMDKindID("tbaa");
   assert(TBAAID == MD_tbaa && "tbaa kind id drifted"); (void)TBAAID;
+
+  // Create the 'prof' metadata kind.
+  unsigned ProfID = getMDKindID("prof");
+  assert(ProfID == MD_prof && "prof kind id drifted"); (void)ProfID;
 }
 LLVMContext::~LLVMContext() { delete pImpl; }
 
diff --git a/lib/VMCore/LLVMContextImpl.cpp b/lib/VMCore/LLVMContextImpl.cpp
index ccb8dc5..504b372 100644
--- a/lib/VMCore/LLVMContextImpl.cpp
+++ b/lib/VMCore/LLVMContextImpl.cpp
@@ -13,6 +13,7 @@
 
 #include "LLVMContextImpl.h"
 #include "llvm/Module.h"
+#include "llvm/ADT/STLExtras.h"
 #include <algorithm>
 using namespace llvm;
 
@@ -31,14 +32,10 @@ LLVMContextImpl::LLVMContextImpl(LLVMContext &C)
     Int8Ty(C, 8),
     Int16Ty(C, 16),
     Int32Ty(C, 32),
-    Int64Ty(C, 64),
-    AlwaysOpaqueTy(new OpaqueType(C)) {
+    Int64Ty(C, 64) {
   InlineAsmDiagHandler = 0;
   InlineAsmDiagContext = 0;
-      
-  // Make sure the AlwaysOpaqueTy stays alive as long as the Context.
-  AlwaysOpaqueTy->addRef();
-  OpaqueTypes.insert(AlwaysOpaqueTy);
+  NamedStructTypesUniqueID = 0;
 }
 
 namespace {
@@ -58,9 +55,7 @@ LLVMContextImpl::~LLVMContextImpl() {
   // will try to remove itself from OwnedModules set.  This would cause
   // iterator invalidation if we iterated on the set directly.
   std::vector<Module*> Modules(OwnedModules.begin(), OwnedModules.end());
-  for (std::vector<Module*>::iterator I = Modules.begin(), E = Modules.end();
-       I != E; ++I)
-    delete *I;
+  DeleteContainerPointers(Modules);
   
   std::for_each(ExprConstants.map_begin(), ExprConstants.map_end(),
                 DropReferences());
@@ -78,38 +73,22 @@ LLVMContextImpl::~LLVMContextImpl() {
   NullPtrConstants.freeConstants();
   UndefValueConstants.freeConstants();
   InlineAsms.freeConstants();
-  for (IntMapTy::iterator I = IntConstants.begin(), E = IntConstants.end(); 
-       I != E; ++I) {
-    delete I->second;
-  }
-  for (FPMapTy::iterator I = FPConstants.begin(), E = FPConstants.end(); 
-       I != E; ++I) {
-    delete I->second;
-  }
-  AlwaysOpaqueTy->dropRef();
-  for (OpaqueTypesTy::iterator I = OpaqueTypes.begin(), E = OpaqueTypes.end();
-       I != E; ++I) {
-    (*I)->AbstractTypeUsers.clear();
-    delete *I;
-  }
+  DeleteContainerSeconds(IntConstants);
+  DeleteContainerSeconds(FPConstants);
+  
   // Destroy MDNodes.  ~MDNode can move and remove nodes between the MDNodeSet
   // and the NonUniquedMDNodes sets, so copy the values out first.
   SmallVector<MDNode*, 8> MDNodes;
   MDNodes.reserve(MDNodeSet.size() + NonUniquedMDNodes.size());
   for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end();
-       I != E; ++I) {
+       I != E; ++I)
     MDNodes.push_back(&*I);
-  }
   MDNodes.append(NonUniquedMDNodes.begin(), NonUniquedMDNodes.end());
   for (SmallVectorImpl<MDNode *>::iterator I = MDNodes.begin(),
-         E = MDNodes.end(); I != E; ++I) {
+         E = MDNodes.end(); I != E; ++I)
     (*I)->destroy();
-  }
   assert(MDNodeSet.empty() && NonUniquedMDNodes.empty() &&
          "Destroying all MDNodes didn't empty the Context's sets.");
   // Destroy MDStrings.
-  for (StringMap<MDString*>::iterator I = MDStringCache.begin(),
-         E = MDStringCache.end(); I != E; ++I) {
-    delete I->second;
-  }
+  DeleteContainerSeconds(MDStringCache);
 }
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 6ea4b48..06a6f2a 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -15,17 +15,16 @@
 #ifndef LLVM_LLVMCONTEXT_IMPL_H
 #define LLVM_LLVMCONTEXT_IMPL_H
 
+#include "llvm/LLVMContext.h"
 #include "ConstantsContext.h"
 #include "LeaksContext.h"
-#include "TypesContext.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Metadata.h"
-#include "llvm/Assembly/Writer.h"
 #include "llvm/Support/ValueHandle.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -139,27 +138,30 @@ public:
   // on Context destruction.
   SmallPtrSet<MDNode*, 1> NonUniquedMDNodes;
   
-  ConstantUniqueMap<char, Type, ConstantAggregateZero> AggZeroConstants;
+  ConstantUniqueMap<char, char, Type, ConstantAggregateZero> AggZeroConstants;
 
-  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayType,
-    ConstantArray, true /*largekey*/> ArrayConstantsTy;
+  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
+    ArrayType, ConstantArray, true /*largekey*/> ArrayConstantsTy;
   ArrayConstantsTy ArrayConstants;
   
-  typedef ConstantUniqueMap<std::vector<Constant*>, StructType,
-    ConstantStruct, true /*largekey*/> StructConstantsTy;
+  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
+    StructType, ConstantStruct, true /*largekey*/> StructConstantsTy;
   StructConstantsTy StructConstants;
   
-  typedef ConstantUniqueMap<std::vector<Constant*>, VectorType,
-                            ConstantVector> VectorConstantsTy;
+  typedef ConstantUniqueMap<std::vector<Constant*>, ArrayRef<Constant*>,
+                            VectorType, ConstantVector> VectorConstantsTy;
   VectorConstantsTy VectorConstants;
   
-  ConstantUniqueMap<char, PointerType, ConstantPointerNull> NullPtrConstants;
-  ConstantUniqueMap<char, Type, UndefValue> UndefValueConstants;
+  ConstantUniqueMap<char, char, PointerType, ConstantPointerNull>
+    NullPtrConstants;
+  ConstantUniqueMap<char, char, Type, UndefValue> UndefValueConstants;
   
   DenseMap<std::pair<Function*, BasicBlock*> , BlockAddress*> BlockAddresses;
-  ConstantUniqueMap<ExprMapKeyType, Type, ConstantExpr> ExprConstants;
+  ConstantUniqueMap<ExprMapKeyType, const ExprMapKeyType&, Type, ConstantExpr>
+    ExprConstants;
 
-  ConstantUniqueMap<InlineAsmKeyType, PointerType, InlineAsm> InlineAsms;
+  ConstantUniqueMap<InlineAsmKeyType, const InlineAsmKeyType&, PointerType,
+                    InlineAsm> InlineAsms;
   
   ConstantInt *TheTrueVal;
   ConstantInt *TheFalseVal;
@@ -167,41 +169,27 @@ public:
   LeakDetectorImpl<Value> LLVMObjects;
   
   // Basic type instances.
-  const Type VoidTy;
-  const Type LabelTy;
-  const Type FloatTy;
-  const Type DoubleTy;
-  const Type MetadataTy;
-  const Type X86_FP80Ty;
-  const Type FP128Ty;
-  const Type PPC_FP128Ty;
-  const Type X86_MMXTy;
-  const IntegerType Int1Ty;
-  const IntegerType Int8Ty;
-  const IntegerType Int16Ty;
-  const IntegerType Int32Ty;
-  const IntegerType Int64Ty;
-
-  // Concrete/Abstract TypeDescriptions - We lazily calculate type descriptions
-  // for types as they are needed.  Because resolution of types must invalidate
-  // all of the abstract type descriptions, we keep them in a separate map to
-  // make this easy.
-  TypePrinting ConcreteTypeDescriptions;
-  TypePrinting AbstractTypeDescriptions;
-  
-  TypeMap<ArrayValType, ArrayType> ArrayTypes;
-  TypeMap<VectorValType, VectorType> VectorTypes;
-  TypeMap<PointerValType, PointerType> PointerTypes;
-  TypeMap<FunctionValType, FunctionType> FunctionTypes;
-  TypeMap<StructValType, StructType> StructTypes;
-  TypeMap<IntegerValType, IntegerType> IntegerTypes;
-
-  // Opaque types are not structurally uniqued, so don't use TypeMap.
-  typedef SmallPtrSet<const OpaqueType*, 8> OpaqueTypesTy;
-  OpaqueTypesTy OpaqueTypes;
-
-  /// Used as an abstract type that will never be resolved.
-  OpaqueType *const AlwaysOpaqueTy;
+  Type VoidTy, LabelTy, FloatTy, DoubleTy, MetadataTy;
+  Type X86_FP80Ty, FP128Ty, PPC_FP128Ty, X86_MMXTy;
+  IntegerType Int1Ty, Int8Ty, Int16Ty, Int32Ty, Int64Ty;
+
+  
+  /// TypeAllocator - All dynamically allocated types are allocated from this.
+  /// They live forever until the context is torn down.
+  BumpPtrAllocator TypeAllocator;
+  
+  DenseMap<unsigned, IntegerType*> IntegerTypes;
+  
+  // TODO: Optimize FunctionTypes/AnonStructTypes!
+  std::map<std::vector<Type*>, FunctionType*> FunctionTypes;
+  std::map<std::vector<Type*>, StructType*> AnonStructTypes;
+  StringMap<StructType*> NamedStructTypes;
+  unsigned NamedStructTypesUniqueID;
+    
+  DenseMap<std::pair<Type *, uint64_t>, ArrayType*> ArrayTypes;
+  DenseMap<std::pair<Type *, unsigned>, VectorType*> VectorTypes;
+  DenseMap<Type*, PointerType*> PointerTypes;  // Pointers in AddrSpace = 0
+  DenseMap<std::pair<Type*, unsigned>, PointerType*> ASPointerTypes;
 
 
   /// ValueHandles - This map keeps track of all of the value handles that are
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index eb719e5..ace4dc2 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
 #include "SymbolTableListTraitsImpl.h"
 #include "llvm/Support/LeakDetector.h"
 #include "llvm/Support/ValueHandle.h"
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index 341e527..be2fcb8 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -17,12 +17,12 @@
 #include "llvm/DerivedTypes.h"
 #include "llvm/GVMaterializer.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/LeakDetector.h"
 #include "SymbolTableListTraitsImpl.h"
-#include "llvm/TypeSymbolTable.h"
 #include <algorithm>
 #include <cstdarg>
 #include <cstdlib>
@@ -60,7 +60,6 @@ template class llvm::SymbolTableListTraits<GlobalAlias, Module>;
 Module::Module(StringRef MID, LLVMContext& C)
   : Context(C), Materializer(NULL), ModuleID(MID) {
   ValSymTab = new ValueSymbolTable();
-  TypeSymTab = new TypeSymbolTable();
   NamedMDSymTab = new StringMap<NamedMDNode *>();
   Context.addModule(this);
 }
@@ -74,11 +73,10 @@ Module::~Module() {
   LibraryList.clear();
   NamedMDList.clear();
   delete ValSymTab;
-  delete TypeSymTab;
   delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab);
 }
 
-/// Target endian information...
+/// Target endian information.
 Module::Endianness Module::getEndianness() const {
   StringRef temp = DataLayout;
   Module::Endianness ret = AnyEndianness;
@@ -218,8 +216,8 @@ Constant *Module::getOrInsertFunction(StringRef Name,
   va_start(Args, RetTy);
 
   // Build the list of argument types...
-  std::vector<const Type*> ArgTys;
-  while (const Type *ArgTy = va_arg(Args, const Type*))
+  std::vector<Type*> ArgTys;
+  while (Type *ArgTy = va_arg(Args, Type*))
     ArgTys.push_back(ArgTy);
 
   va_end(Args);
@@ -236,8 +234,8 @@ Constant *Module::getOrInsertFunction(StringRef Name,
   va_start(Args, RetTy);
 
   // Build the list of argument types...
-  std::vector<const Type*> ArgTys;
-  while (const Type *ArgTy = va_arg(Args, const Type*))
+  std::vector<Type*> ArgTys;
+  while (Type *ArgTy = va_arg(Args, Type*))
     ArgTys.push_back(ArgTy);
 
   va_end(Args);
@@ -340,51 +338,6 @@ void Module::eraseNamedMetadata(NamedMDNode *NMD) {
   NamedMDList.erase(NMD);
 }
 
-//===----------------------------------------------------------------------===//
-// Methods for easy access to the types in the module.
-//
-
-
-// addTypeName - Insert an entry in the symbol table mapping Str to Type.  If
-// there is already an entry for this name, true is returned and the symbol
-// table is not modified.
-//
-bool Module::addTypeName(StringRef Name, const Type *Ty) {
-  TypeSymbolTable &ST = getTypeSymbolTable();
-
-  if (ST.lookup(Name)) return true;  // Already in symtab...
-
-  // Not in symbol table?  Set the name with the Symtab as an argument so the
-  // type knows what to update...
-  ST.insert(Name, Ty);
-
-  return false;
-}
-
-/// getTypeByName - Return the type with the specified name in this module, or
-/// null if there is none by that name.
-const Type *Module::getTypeByName(StringRef Name) const {
-  const TypeSymbolTable &ST = getTypeSymbolTable();
-  return cast_or_null<Type>(ST.lookup(Name));
-}
-
-// getTypeName - If there is at least one entry in the symbol table for the
-// specified type, return it.
-//
-std::string Module::getTypeName(const Type *Ty) const {
-  const TypeSymbolTable &ST = getTypeSymbolTable();
-
-  TypeSymbolTable::const_iterator TI = ST.begin();
-  TypeSymbolTable::const_iterator TE = ST.end();
-  if ( TI == TE ) return ""; // No names for types
-
-  while (TI != TE && TI->second != Ty)
-    ++TI;
-
-  if (TI != TE)  // Must have found an entry!
-    return TI->first;
-  return "";     // Must not have found anything...
-}
 
 //===----------------------------------------------------------------------===//
 // Methods to control the materialization of GlobalValues in the Module.
@@ -471,3 +424,130 @@ void Module::removeLibrary(StringRef Lib) {
       return;
     }
 }
+
+//===----------------------------------------------------------------------===//
+// Type finding functionality.
+//===----------------------------------------------------------------------===//
+
+namespace {
+  /// TypeFinder - Walk over a module, identifying all of the types that are
+  /// used by the module.
+  class TypeFinder {
+    // To avoid walking constant expressions multiple times and other IR
+    // objects, we keep several helper maps.
+    DenseSet<const Value*> VisitedConstants;
+    DenseSet<const Type*> VisitedTypes;
+    
+    std::vector<StructType*> &StructTypes;
+  public:
+    TypeFinder(std::vector<StructType*> &structTypes)
+      : StructTypes(structTypes) {}
+    
+    void run(const Module &M) {
+      // Get types from global variables.
+      for (Module::const_global_iterator I = M.global_begin(),
+           E = M.global_end(); I != E; ++I) {
+        incorporateType(I->getType());
+        if (I->hasInitializer())
+          incorporateValue(I->getInitializer());
+      }
+      
+      // Get types from aliases.
+      for (Module::const_alias_iterator I = M.alias_begin(),
+           E = M.alias_end(); I != E; ++I) {
+        incorporateType(I->getType());
+        if (const Value *Aliasee = I->getAliasee())
+          incorporateValue(Aliasee);
+      }
+      
+      SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+
+      // Get types from functions.
+      for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) {
+        incorporateType(FI->getType());
+        
+        for (Function::const_iterator BB = FI->begin(), E = FI->end();
+             BB != E;++BB)
+          for (BasicBlock::const_iterator II = BB->begin(),
+               E = BB->end(); II != E; ++II) {
+            const Instruction &I = *II;
+            // Incorporate the type of the instruction and all its operands.
+            incorporateType(I.getType());
+            for (User::const_op_iterator OI = I.op_begin(), OE = I.op_end();
+                 OI != OE; ++OI)
+              incorporateValue(*OI);
+            
+            // Incorporate types hiding in metadata.
+            I.getAllMetadataOtherThanDebugLoc(MDForInst);
+            for (unsigned i = 0, e = MDForInst.size(); i != e; ++i)
+              incorporateMDNode(MDForInst[i].second);
+            MDForInst.clear();
+          }
+      }
+      
+      for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
+           E = M.named_metadata_end(); I != E; ++I) {
+        const NamedMDNode *NMD = I;
+        for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+          incorporateMDNode(NMD->getOperand(i));
+      }
+    }
+    
+  private:
+    void incorporateType(Type *Ty) {
+      // Check to see if we're already visited this type.
+      if (!VisitedTypes.insert(Ty).second)
+        return;
+      
+      // If this is a structure or opaque type, add a name for the type.
+      if (StructType *STy = dyn_cast<StructType>(Ty))
+        StructTypes.push_back(STy);
+      
+      // Recursively walk all contained types.
+      for (Type::subtype_iterator I = Ty->subtype_begin(),
+           E = Ty->subtype_end(); I != E; ++I)
+        incorporateType(*I);
+    }
+    
+    /// incorporateValue - This method is used to walk operand lists finding
+    /// types hiding in constant expressions and other operands that won't be
+    /// walked in other ways.  GlobalValues, basic blocks, instructions, and
+    /// inst operands are all explicitly enumerated.
+    void incorporateValue(const Value *V) {
+      if (const MDNode *M = dyn_cast<MDNode>(V))
+        return incorporateMDNode(M);
+      if (!isa<Constant>(V) || isa<GlobalValue>(V)) return;
+      
+      // Already visited?
+      if (!VisitedConstants.insert(V).second)
+        return;
+      
+      // Check this type.
+      incorporateType(V->getType());
+      
+      // Look in operands for types.
+      const User *U = cast<User>(V);
+      for (Constant::const_op_iterator I = U->op_begin(),
+           E = U->op_end(); I != E;++I)
+        incorporateValue(*I);
+    }
+    
+    void incorporateMDNode(const MDNode *V) {
+      
+      // Already visited?
+      if (!VisitedConstants.insert(V).second)
+        return;
+      
+      // Look in operands for types.
+      for (unsigned i = 0, e = V->getNumOperands(); i != e; ++i)
+        if (Value *Op = V->getOperand(i))
+          incorporateValue(Op);
+    }
+  };
+} // end anonymous namespace
+
+void Module::findUsedStructTypes(std::vector<StructType*> &StructTypes) const {
+  TypeFinder(StructTypes).run(*this);
+}
+
+
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index e4496db..f874d1b 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -12,95 +12,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "LLVMContextImpl.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Constants.h"
-#include "llvm/Assembly/Writer.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Metadata.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Threading.h"
+#include "llvm/Module.h"
 #include <algorithm>
 #include <cstdarg>
+#include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
-// DEBUG_MERGE_TYPES - Enable this #define to see how and when derived types are
-// created and later destroyed, all in an effort to make sure that there is only
-// a single canonical version of a type.
-//
-// #define DEBUG_MERGE_TYPES 1
-
-AbstractTypeUser::~AbstractTypeUser() {}
-
-void AbstractTypeUser::setType(Value *V, const Type *NewTy) {
-  V->VTy = NewTy;
-}
-
 //===----------------------------------------------------------------------===//
 //                         Type Class Implementation
 //===----------------------------------------------------------------------===//
 
-/// Because of the way Type subclasses are allocated, this function is necessary
-/// to use the correct kind of "delete" operator to deallocate the Type object.
-/// Some type objects (FunctionTy, StructTy) allocate additional space
-/// after the space for their derived type to hold the contained types array of
-/// PATypeHandles. Using this allocation scheme means all the PATypeHandles are
-/// allocated with the type object, decreasing allocations and eliminating the
-/// need for a std::vector to be used in the Type class itself. 
-/// @brief Type destruction function
-void Type::destroy() const {
-  // Nothing calls getForwardedType from here on.
-  if (ForwardType && ForwardType->isAbstract()) {
-    ForwardType->dropRef();
-    ForwardType = NULL;
-  }
-
-  // Structures and Functions allocate their contained types past the end of
-  // the type object itself. These need to be destroyed differently than the
-  // other types.
-  if (this->isFunctionTy() || this->isStructTy()) {
-    // First, make sure we destruct any PATypeHandles allocated by these
-    // subclasses.  They must be manually destructed. 
-    for (unsigned i = 0; i < NumContainedTys; ++i)
-      ContainedTys[i].PATypeHandle::~PATypeHandle();
-
-    // Now call the destructor for the subclass directly because we're going
-    // to delete this as an array of char.
-    if (this->isFunctionTy())
-      static_cast<const FunctionType*>(this)->FunctionType::~FunctionType();
-    else {
-      assert(isStructTy());
-      static_cast<const StructType*>(this)->StructType::~StructType();
-    }
-
-    // Finally, remove the memory as an array deallocation of the chars it was
-    // constructed from.
-    operator delete(const_cast<Type *>(this));
-
-    return;
-  } else if (const OpaqueType *opaque_this = dyn_cast<OpaqueType>(this)) {
-    LLVMContextImpl *pImpl = this->getContext().pImpl;
-    pImpl->OpaqueTypes.erase(opaque_this);
-  }
-
-  // For all the other type subclasses, there is either no contained types or 
-  // just one (all Sequentials). For Sequentials, the PATypeHandle is not
-  // allocated past the type object, its included directly in the SequentialType
-  // class. This means we can safely just do "normal" delete of this object and
-  // all the destructors that need to run will be run.
-  delete this; 
-}
-
-const Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
+Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
   switch (IDNumber) {
   case VoidTyID      : return getVoidTy(C);
   case FloatTyID     : return getFloatTy(C);
@@ -116,15 +38,6 @@ const Type *Type::getPrimitiveType(LLVMContext &C, TypeID IDNumber) {
   }
 }
 
-const Type *Type::getVAArgsPromotedType(LLVMContext &C) const {
-  if (ID == IntegerTyID && getSubclassData() < 32)
-    return Type::getInt32Ty(C);
-  else if (ID == FloatTyID)
-    return Type::getDoubleTy(C);
-  else
-    return this;
-}
-
 /// getScalarType - If this is a vector type, return the element type,
 /// otherwise return this.
 const Type *Type::getScalarType() const {
@@ -262,13 +175,17 @@ bool Type::isSizedDerivedType() const {
   if (const ArrayType *ATy = dyn_cast<ArrayType>(this))
     return ATy->getElementType()->isSized();
 
-  if (const VectorType *PTy = dyn_cast<VectorType>(this))
-    return PTy->getElementType()->isSized();
+  if (const VectorType *VTy = dyn_cast<VectorType>(this))
+    return VTy->getElementType()->isSized();
 
   if (!this->isStructTy()) 
     return false;
 
-  // Okay, our struct is sized if all of the elements are...
+  // Opaque structs have no size.
+  if (cast<StructType>(this)->isOpaque())
+    return false;
+  
+  // Okay, our struct is sized if all of the elements are.
   for (subtype_iterator I = subtype_begin(), E = subtype_end(); I != E; ++I)
     if (!(*I)->isSized()) 
       return false;
@@ -276,696 +193,335 @@ bool Type::isSizedDerivedType() const {
   return true;
 }
 
-/// getForwardedTypeInternal - This method is used to implement the union-find
-/// algorithm for when a type is being forwarded to another type.
-const Type *Type::getForwardedTypeInternal() const {
-  assert(ForwardType && "This type is not being forwarded to another type!");
-
-  // Check to see if the forwarded type has been forwarded on.  If so, collapse
-  // the forwarding links.
-  const Type *RealForwardedType = ForwardType->getForwardedType();
-  if (!RealForwardedType)
-    return ForwardType;  // No it's not forwarded again
-
-  // Yes, it is forwarded again.  First thing, add the reference to the new
-  // forward type.
-  if (RealForwardedType->isAbstract())
-    RealForwardedType->addRef();
-
-  // Now drop the old reference.  This could cause ForwardType to get deleted.
-  // ForwardType must be abstract because only abstract types can have their own
-  // ForwardTypes.
-  ForwardType->dropRef();
-
-  // Return the updated type.
-  ForwardType = RealForwardedType;
-  return ForwardType;
-}
-
-void Type::refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
-  llvm_unreachable("Attempting to refine a derived type!");
-}
-void Type::typeBecameConcrete(const DerivedType *AbsTy) {
-  llvm_unreachable("DerivedType is already a concrete type!");
-}
-
-
-std::string Type::getDescription() const {
-  LLVMContextImpl *pImpl = getContext().pImpl;
-  TypePrinting &Map =
-    isAbstract() ?
-      pImpl->AbstractTypeDescriptions :
-      pImpl->ConcreteTypeDescriptions;
-  
-  std::string DescStr;
-  raw_string_ostream DescOS(DescStr);
-  Map.print(this, DescOS);
-  return DescOS.str();
-}
-
-
-bool StructType::indexValid(const Value *V) const {
-  // Structure indexes require 32-bit integer constants.
-  if (V->getType()->isIntegerTy(32))
-    if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
-      return indexValid(CU->getZExtValue());
-  return false;
-}
-
-bool StructType::indexValid(unsigned V) const {
-  return V < NumContainedTys;
-}
-
-// getTypeAtIndex - Given an index value into the type, return the type of the
-// element.  For a structure type, this must be a constant value...
-//
-const Type *StructType::getTypeAtIndex(const Value *V) const {
-  unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
-  return getTypeAtIndex(Idx);
-}
-
-const Type *StructType::getTypeAtIndex(unsigned Idx) const {
-  assert(indexValid(Idx) && "Invalid structure index!");
-  return ContainedTys[Idx];
-}
-
-
 //===----------------------------------------------------------------------===//
 //                          Primitive 'Type' data
 //===----------------------------------------------------------------------===//
 
-const Type *Type::getVoidTy(LLVMContext &C) {
-  return &C.pImpl->VoidTy;
-}
-
-const Type *Type::getLabelTy(LLVMContext &C) {
-  return &C.pImpl->LabelTy;
-}
-
-const Type *Type::getFloatTy(LLVMContext &C) {
-  return &C.pImpl->FloatTy;
-}
-
-const Type *Type::getDoubleTy(LLVMContext &C) {
-  return &C.pImpl->DoubleTy;
-}
-
-const Type *Type::getMetadataTy(LLVMContext &C) {
-  return &C.pImpl->MetadataTy;
-}
-
-const Type *Type::getX86_FP80Ty(LLVMContext &C) {
-  return &C.pImpl->X86_FP80Ty;
-}
-
-const Type *Type::getFP128Ty(LLVMContext &C) {
-  return &C.pImpl->FP128Ty;
-}
-
-const Type *Type::getPPC_FP128Ty(LLVMContext &C) {
-  return &C.pImpl->PPC_FP128Ty;
-}
-
-const Type *Type::getX86_MMXTy(LLVMContext &C) {
-  return &C.pImpl->X86_MMXTy;
-}
-
-const IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
+Type *Type::getVoidTy(LLVMContext &C) { return &C.pImpl->VoidTy; }
+Type *Type::getLabelTy(LLVMContext &C) { return &C.pImpl->LabelTy; }
+Type *Type::getFloatTy(LLVMContext &C) { return &C.pImpl->FloatTy; }
+Type *Type::getDoubleTy(LLVMContext &C) { return &C.pImpl->DoubleTy; }
+Type *Type::getMetadataTy(LLVMContext &C) { return &C.pImpl->MetadataTy; }
+Type *Type::getX86_FP80Ty(LLVMContext &C) { return &C.pImpl->X86_FP80Ty; }
+Type *Type::getFP128Ty(LLVMContext &C) { return &C.pImpl->FP128Ty; }
+Type *Type::getPPC_FP128Ty(LLVMContext &C) { return &C.pImpl->PPC_FP128Ty; }
+Type *Type::getX86_MMXTy(LLVMContext &C) { return &C.pImpl->X86_MMXTy; }
+
+IntegerType *Type::getInt1Ty(LLVMContext &C) { return &C.pImpl->Int1Ty; }
+IntegerType *Type::getInt8Ty(LLVMContext &C) { return &C.pImpl->Int8Ty; }
+IntegerType *Type::getInt16Ty(LLVMContext &C) { return &C.pImpl->Int16Ty; }
+IntegerType *Type::getInt32Ty(LLVMContext &C) { return &C.pImpl->Int32Ty; }
+IntegerType *Type::getInt64Ty(LLVMContext &C) { return &C.pImpl->Int64Ty; }
+
+IntegerType *Type::getIntNTy(LLVMContext &C, unsigned N) {
   return IntegerType::get(C, N);
 }
 
-const IntegerType *Type::getInt1Ty(LLVMContext &C) {
-  return &C.pImpl->Int1Ty;
-}
-
-const IntegerType *Type::getInt8Ty(LLVMContext &C) {
-  return &C.pImpl->Int8Ty;
-}
-
-const IntegerType *Type::getInt16Ty(LLVMContext &C) {
-  return &C.pImpl->Int16Ty;
-}
-
-const IntegerType *Type::getInt32Ty(LLVMContext &C) {
-  return &C.pImpl->Int32Ty;
-}
-
-const IntegerType *Type::getInt64Ty(LLVMContext &C) {
-  return &C.pImpl->Int64Ty;
-}
-
-const PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getFloatPtrTy(LLVMContext &C, unsigned AS) {
   return getFloatTy(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getDoublePtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getDoublePtrTy(LLVMContext &C, unsigned AS) {
   return getDoubleTy(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getX86_FP80PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getX86_FP80PtrTy(LLVMContext &C, unsigned AS) {
   return getX86_FP80Ty(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getFP128PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getFP128PtrTy(LLVMContext &C, unsigned AS) {
   return getFP128Ty(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getPPC_FP128PtrTy(LLVMContext &C, unsigned AS) {
   return getPPC_FP128Ty(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getX86_MMXPtrTy(LLVMContext &C, unsigned AS) {
   return getX86_MMXTy(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
+PointerType *Type::getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS) {
   return getIntNTy(C, N)->getPointerTo(AS);
 }
 
-const PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getInt1PtrTy(LLVMContext &C, unsigned AS) {
   return getInt1Ty(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getInt8PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getInt8PtrTy(LLVMContext &C, unsigned AS) {
   return getInt8Ty(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getInt16PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getInt16PtrTy(LLVMContext &C, unsigned AS) {
   return getInt16Ty(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getInt32PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getInt32PtrTy(LLVMContext &C, unsigned AS) {
   return getInt32Ty(C)->getPointerTo(AS);
 }
 
-const PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) {
+PointerType *Type::getInt64PtrTy(LLVMContext &C, unsigned AS) {
   return getInt64Ty(C)->getPointerTo(AS);
 }
 
+
 //===----------------------------------------------------------------------===//
-//                          Derived Type Constructors
+//                       IntegerType Implementation
 //===----------------------------------------------------------------------===//
 
-/// isValidReturnType - Return true if the specified type is valid as a return
-/// type.
-bool FunctionType::isValidReturnType(const Type *RetTy) {
-  return !RetTy->isFunctionTy() && !RetTy->isLabelTy() &&
-         !RetTy->isMetadataTy();
+IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
+  assert(NumBits >= MIN_INT_BITS && "bitwidth too small");
+  assert(NumBits <= MAX_INT_BITS && "bitwidth too large");
+  
+  // Check for the built-in integer types
+  switch (NumBits) {
+  case  1: return cast<IntegerType>(Type::getInt1Ty(C));
+  case  8: return cast<IntegerType>(Type::getInt8Ty(C));
+  case 16: return cast<IntegerType>(Type::getInt16Ty(C));
+  case 32: return cast<IntegerType>(Type::getInt32Ty(C));
+  case 64: return cast<IntegerType>(Type::getInt64Ty(C));
+  default: 
+    break;
+  }
+  
+  IntegerType *&Entry = C.pImpl->IntegerTypes[NumBits];
+  
+  if (Entry == 0)
+    Entry = new (C.pImpl->TypeAllocator) IntegerType(C, NumBits);
+  
+  return Entry;
 }
 
-/// isValidArgumentType - Return true if the specified type is valid as an
-/// argument type.
-bool FunctionType::isValidArgumentType(const Type *ArgTy) {
-  return ArgTy->isFirstClassType() || ArgTy->isOpaqueTy();
+bool IntegerType::isPowerOf2ByteWidth() const {
+  unsigned BitWidth = getBitWidth();
+  return (BitWidth > 7) && isPowerOf2_32(BitWidth);
 }
 
-FunctionType::FunctionType(const Type *Result,
-                           ArrayRef<const Type*> Params,
+APInt IntegerType::getMask() const {
+  return APInt::getAllOnesValue(getBitWidth());
+}
+
+//===----------------------------------------------------------------------===//
+//                       FunctionType Implementation
+//===----------------------------------------------------------------------===//
+
+FunctionType::FunctionType(const Type *Result, ArrayRef<Type*> Params,
                            bool IsVarArgs)
-  : DerivedType(Result->getContext(), FunctionTyID), isVarArgs(IsVarArgs) {
-  ContainedTys = reinterpret_cast<PATypeHandle*>(this+1);
-  NumContainedTys = Params.size() + 1; // + 1 for result type
+  : Type(Result->getContext(), FunctionTyID) {
+  Type **SubTys = reinterpret_cast<Type**>(this+1);
   assert(isValidReturnType(Result) && "invalid return type for function");
+  setSubclassData(IsVarArgs);
 
+  SubTys[0] = const_cast<Type*>(Result);
 
-  bool isAbstract = Result->isAbstract();
-  new (&ContainedTys[0]) PATypeHandle(Result, this);
-
-  for (unsigned i = 0; i != Params.size(); ++i) {
+  for (unsigned i = 0, e = Params.size(); i != e; ++i) {
     assert(isValidArgumentType(Params[i]) &&
            "Not a valid type for function argument!");
-    new (&ContainedTys[i+1]) PATypeHandle(Params[i], this);
-    isAbstract |= Params[i]->isAbstract();
+    SubTys[i+1] = Params[i];
   }
 
-  // Calculate whether or not this type is abstract
-  setAbstract(isAbstract);
+  ContainedTys = SubTys;
+  NumContainedTys = Params.size() + 1; // + 1 for result type
 }
 
-StructType::StructType(LLVMContext &C, 
-                       ArrayRef<const Type*> Types, bool isPacked)
-  : CompositeType(C, StructTyID) {
-  ContainedTys = reinterpret_cast<PATypeHandle*>(this + 1);
-  NumContainedTys = Types.size();
-  setSubclassData(isPacked);
-  bool isAbstract = false;
-  for (unsigned i = 0; i < Types.size(); ++i) {
-    assert(Types[i] && "<null> type for structure field!");
-    assert(isValidElementType(Types[i]) &&
-           "Invalid type for structure element!");
-    new (&ContainedTys[i]) PATypeHandle(Types[i], this);
-    isAbstract |= Types[i]->isAbstract();
+// FunctionType::get - The factory function for the FunctionType class.
+FunctionType *FunctionType::get(const Type *ReturnType,
+                                ArrayRef<Type*> Params, bool isVarArg) {
+  // TODO: This is brutally slow.
+  std::vector<Type*> Key;
+  Key.reserve(Params.size()+2);
+  Key.push_back(const_cast<Type*>(ReturnType));
+  for (unsigned i = 0, e = Params.size(); i != e; ++i)
+    Key.push_back(const_cast<Type*>(Params[i]));
+  if (isVarArg)
+    Key.push_back(0);
+  
+  LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
+  FunctionType *&FT = pImpl->FunctionTypes[Key];
+  
+  if (FT == 0) {
+    FT = (FunctionType*) pImpl->TypeAllocator.
+      Allocate(sizeof(FunctionType) + sizeof(Type*)*(Params.size()+1),
+               AlignOf<FunctionType>::Alignment);
+    new (FT) FunctionType(ReturnType, Params, isVarArg);
   }
 
-  // Calculate whether or not this type is abstract
-  setAbstract(isAbstract);
-}
-
-ArrayType::ArrayType(const Type *ElType, uint64_t NumEl)
-  : SequentialType(ArrayTyID, ElType) {
-  NumElements = NumEl;
-
-  // Calculate whether or not this type is abstract
-  setAbstract(ElType->isAbstract());
-}
-
-VectorType::VectorType(const Type *ElType, unsigned NumEl)
-  : SequentialType(VectorTyID, ElType) {
-  NumElements = NumEl;
-  setAbstract(ElType->isAbstract());
-  assert(NumEl > 0 && "NumEl of a VectorType must be greater than 0");
-  assert(isValidElementType(ElType) &&
-         "Elements of a VectorType must be a primitive type");
-
-}
-
-
-PointerType::PointerType(const Type *E, unsigned AddrSpace)
-  : SequentialType(PointerTyID, E) {
-  AddressSpace = AddrSpace;
-  // Calculate whether or not this type is abstract
-  setAbstract(E->isAbstract());
-}
-
-OpaqueType::OpaqueType(LLVMContext &C) : DerivedType(C, OpaqueTyID) {
-  setAbstract(true);
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "Derived new type: " << *this << "\n");
-#endif
+  return FT;
 }
 
-void PATypeHolder::destroy() {
-  Ty = 0;
-}
 
-// dropAllTypeUses - When this (abstract) type is resolved to be equal to
-// another (more concrete) type, we must eliminate all references to other
-// types, to avoid some circular reference problems.
-void DerivedType::dropAllTypeUses() {
-  if (NumContainedTys != 0) {
-    // The type must stay abstract.  To do this, we insert a pointer to a type
-    // that will never get resolved, thus will always be abstract.
-    ContainedTys[0] = getContext().pImpl->AlwaysOpaqueTy;
-
-    // Change the rest of the types to be Int32Ty's.  It doesn't matter what we
-    // pick so long as it doesn't point back to this type.  We choose something
-    // concrete to avoid overhead for adding to AbstractTypeUser lists and
-    // stuff.
-    const Type *ConcreteTy = Type::getInt32Ty(getContext());
-    for (unsigned i = 1, e = NumContainedTys; i != e; ++i)
-      ContainedTys[i] = ConcreteTy;
-  }
+FunctionType *FunctionType::get(const Type *Result, bool isVarArg) {
+  return get(Result, ArrayRef<Type *>(), isVarArg);
 }
 
 
-namespace {
-
-/// TypePromotionGraph and graph traits - this is designed to allow us to do
-/// efficient SCC processing of type graphs.  This is the exact same as
-/// GraphTraits<Type*>, except that we pretend that concrete types have no
-/// children to avoid processing them.
-struct TypePromotionGraph {
-  Type *Ty;
-  TypePromotionGraph(Type *T) : Ty(T) {}
-};
-
-}
-
-namespace llvm {
-  template <> struct GraphTraits<TypePromotionGraph> {
-    typedef Type NodeType;
-    typedef Type::subtype_iterator ChildIteratorType;
-
-    static inline NodeType *getEntryNode(TypePromotionGraph G) { return G.Ty; }
-    static inline ChildIteratorType child_begin(NodeType *N) {
-      if (N->isAbstract())
-        return N->subtype_begin();
-      // No need to process children of concrete types.
-      return N->subtype_end();
-    }
-    static inline ChildIteratorType child_end(NodeType *N) {
-      return N->subtype_end();
-    }
-  };
+/// isValidReturnType - Return true if the specified type is valid as a return
+/// type.
+bool FunctionType::isValidReturnType(const Type *RetTy) {
+  return !RetTy->isFunctionTy() && !RetTy->isLabelTy() &&
+  !RetTy->isMetadataTy();
 }
 
-
-// PromoteAbstractToConcrete - This is a recursive function that walks a type
-// graph calculating whether or not a type is abstract.
-//
-void Type::PromoteAbstractToConcrete() {
-  if (!isAbstract()) return;
-
-  scc_iterator<TypePromotionGraph> SI = scc_begin(TypePromotionGraph(this));
-  scc_iterator<TypePromotionGraph> SE = scc_end  (TypePromotionGraph(this));
-
-  for (; SI != SE; ++SI) {
-    std::vector<Type*> &SCC = *SI;
-
-    // Concrete types are leaves in the tree.  Since an SCC will either be all
-    // abstract or all concrete, we only need to check one type.
-    if (!SCC[0]->isAbstract()) continue;
-    
-    if (SCC[0]->isOpaqueTy())
-      return;     // Not going to be concrete, sorry.
-
-    // If all of the children of all of the types in this SCC are concrete,
-    // then this SCC is now concrete as well.  If not, neither this SCC, nor
-    // any parent SCCs will be concrete, so we might as well just exit.
-    for (unsigned i = 0, e = SCC.size(); i != e; ++i)
-      for (Type::subtype_iterator CI = SCC[i]->subtype_begin(),
-             E = SCC[i]->subtype_end(); CI != E; ++CI)
-        if ((*CI)->isAbstract())
-          // If the child type is in our SCC, it doesn't make the entire SCC
-          // abstract unless there is a non-SCC abstract type.
-          if (std::find(SCC.begin(), SCC.end(), *CI) == SCC.end())
-            return;               // Not going to be concrete, sorry.
-
-    // Okay, we just discovered this whole SCC is now concrete, mark it as
-    // such!
-    for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-      assert(SCC[i]->isAbstract() && "Why are we processing concrete types?");
-
-      SCC[i]->setAbstract(false);
-    }
-
-    for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
-      assert(!SCC[i]->isAbstract() && "Concrete type became abstract?");
-      // The type just became concrete, notify all users!
-      cast<DerivedType>(SCC[i])->notifyUsesThatTypeBecameConcrete();
-    }
-  }
+/// isValidArgumentType - Return true if the specified type is valid as an
+/// argument type.
+bool FunctionType::isValidArgumentType(const Type *ArgTy) {
+  return ArgTy->isFirstClassType();
 }
 
-
 //===----------------------------------------------------------------------===//
-//                      Type Structural Equality Testing
+//                       StructType Implementation
 //===----------------------------------------------------------------------===//
 
-// TypesEqual - Two types are considered structurally equal if they have the
-// same "shape": Every level and element of the types have identical primitive
-// ID's, and the graphs have the same edges/nodes in them.  Nodes do not have to
-// be pointer equals to be equivalent though.  This uses an optimistic algorithm
-// that assumes that two graphs are the same until proven otherwise.
-//
-static bool TypesEqual(const Type *Ty, const Type *Ty2,
-                       std::map<const Type *, const Type *> &EqTypes) {
-  if (Ty == Ty2) return true;
-  if (Ty->getTypeID() != Ty2->getTypeID()) return false;
-  if (Ty->isOpaqueTy())
-    return false;  // Two unequal opaque types are never equal
-
-  std::map<const Type*, const Type*>::iterator It = EqTypes.find(Ty);
-  if (It != EqTypes.end())
-    return It->second == Ty2;    // Looping back on a type, check for equality
-
-  // Otherwise, add the mapping to the table to make sure we don't get
-  // recursion on the types...
-  EqTypes.insert(It, std::make_pair(Ty, Ty2));
-
-  // Two really annoying special cases that breaks an otherwise nice simple
-  // algorithm is the fact that arraytypes have sizes that differentiates types,
-  // and that function types can be varargs or not.  Consider this now.
-  //
-  if (const IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
-    const IntegerType *ITy2 = cast<IntegerType>(Ty2);
-    return ITy->getBitWidth() == ITy2->getBitWidth();
-  }
-  
-  if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
-    const PointerType *PTy2 = cast<PointerType>(Ty2);
-    return PTy->getAddressSpace() == PTy2->getAddressSpace() &&
-           TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes);
+// Primitive Constructors.
+
+StructType *StructType::get(LLVMContext &Context, ArrayRef<Type*> ETypes, 
+                            bool isPacked) {
+  // FIXME: std::vector is horribly inefficient for this probe.
+  std::vector<Type*> Key;
+  for (unsigned i = 0, e = ETypes.size(); i != e; ++i) {
+    assert(isValidElementType(ETypes[i]) &&
+           "Invalid type for structure element!");
+    Key.push_back(ETypes[i]);
   }
+  if (isPacked)
+    Key.push_back(0);
   
-  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
-    const StructType *STy2 = cast<StructType>(Ty2);
-    if (STy->getNumElements() != STy2->getNumElements()) return false;
-    if (STy->isPacked() != STy2->isPacked()) return false;
-    for (unsigned i = 0, e = STy2->getNumElements(); i != e; ++i)
-      if (!TypesEqual(STy->getElementType(i), STy2->getElementType(i), EqTypes))
-        return false;
-    return true;
-  }
+  StructType *&ST = Context.pImpl->AnonStructTypes[Key];
+  if (ST) return ST;
   
-  if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
-    const ArrayType *ATy2 = cast<ArrayType>(Ty2);
-    return ATy->getNumElements() == ATy2->getNumElements() &&
-           TypesEqual(ATy->getElementType(), ATy2->getElementType(), EqTypes);
-  }
+  // Value not found.  Create a new type!
+  ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+  ST->setSubclassData(SCDB_IsAnonymous);  // Anonymous struct.
+  ST->setBody(ETypes, isPacked);
+  return ST;
+}
+
+void StructType::setBody(ArrayRef<Type*> Elements, bool isPacked) {
+  assert(isOpaque() && "Struct body already set!");
   
-  if (const VectorType *PTy = dyn_cast<VectorType>(Ty)) {
-    const VectorType *PTy2 = cast<VectorType>(Ty2);
-    return PTy->getNumElements() == PTy2->getNumElements() &&
-           TypesEqual(PTy->getElementType(), PTy2->getElementType(), EqTypes);
-  }
+  setSubclassData(getSubclassData() | SCDB_HasBody);
+  if (isPacked)
+    setSubclassData(getSubclassData() | SCDB_Packed);
   
-  if (const FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
-    const FunctionType *FTy2 = cast<FunctionType>(Ty2);
-    if (FTy->isVarArg() != FTy2->isVarArg() ||
-        FTy->getNumParams() != FTy2->getNumParams() ||
-        !TypesEqual(FTy->getReturnType(), FTy2->getReturnType(), EqTypes))
-      return false;
-    for (unsigned i = 0, e = FTy2->getNumParams(); i != e; ++i) {
-      if (!TypesEqual(FTy->getParamType(i), FTy2->getParamType(i), EqTypes))
-        return false;
-    }
-    return true;
-  }
+  Type **Elts = getContext().pImpl->
+    TypeAllocator.Allocate<Type*>(Elements.size());
+  memcpy(Elts, Elements.data(), sizeof(Elements[0])*Elements.size());
   
-  llvm_unreachable("Unknown derived type!");
-  return false;
-}
-
-namespace llvm { // in namespace llvm so findable by ADL
-static bool TypesEqual(const Type *Ty, const Type *Ty2) {
-  std::map<const Type *, const Type *> EqTypes;
-  return ::TypesEqual(Ty, Ty2, EqTypes);
-}
-}
-
-// AbstractTypeHasCycleThrough - Return true there is a path from CurTy to
-// TargetTy in the type graph.  We know that Ty is an abstract type, so if we
-// ever reach a non-abstract type, we know that we don't need to search the
-// subgraph.
-static bool AbstractTypeHasCycleThrough(const Type *TargetTy, const Type *CurTy,
-                                SmallPtrSet<const Type*, 128> &VisitedTypes) {
-  if (TargetTy == CurTy) return true;
-  if (!CurTy->isAbstract()) return false;
-
-  if (!VisitedTypes.insert(CurTy))
-    return false;  // Already been here.
-
-  for (Type::subtype_iterator I = CurTy->subtype_begin(),
-       E = CurTy->subtype_end(); I != E; ++I)
-    if (AbstractTypeHasCycleThrough(TargetTy, *I, VisitedTypes))
-      return true;
-  return false;
+  ContainedTys = Elts;
+  NumContainedTys = Elements.size();
 }
 
-static bool ConcreteTypeHasCycleThrough(const Type *TargetTy, const Type *CurTy,
-                                SmallPtrSet<const Type*, 128> &VisitedTypes) {
-  if (TargetTy == CurTy) return true;
-
-  if (!VisitedTypes.insert(CurTy))
-    return false;  // Already been here.
-
-  for (Type::subtype_iterator I = CurTy->subtype_begin(),
-       E = CurTy->subtype_end(); I != E; ++I)
-    if (ConcreteTypeHasCycleThrough(TargetTy, *I, VisitedTypes))
-      return true;
-  return false;
+StructType *StructType::createNamed(LLVMContext &Context, StringRef Name) {
+  StructType *ST = new (Context.pImpl->TypeAllocator) StructType(Context);
+  if (!Name.empty())
+    ST->setName(Name);
+  return ST;
 }
 
-/// TypeHasCycleThroughItself - Return true if the specified type has
-/// a cycle back to itself.
+void StructType::setName(StringRef Name) {
+  if (Name == getName()) return;
 
-namespace llvm { // in namespace llvm so it's findable by ADL
-static bool TypeHasCycleThroughItself(const Type *Ty) {
-  SmallPtrSet<const Type*, 128> VisitedTypes;
-
-  if (Ty->isAbstract()) {  // Optimized case for abstract types.
-    for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
-         I != E; ++I)
-      if (AbstractTypeHasCycleThrough(Ty, *I, VisitedTypes))
-        return true;
-  } else {
-    for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
-         I != E; ++I)
-      if (ConcreteTypeHasCycleThrough(Ty, *I, VisitedTypes))
-        return true;
+  // If this struct already had a name, remove its symbol table entry.
+  if (SymbolTableEntry) {
+    getContext().pImpl->NamedStructTypes.erase(getName());
+    SymbolTableEntry = 0;
   }
-  return false;
-}
-}
-
-//===----------------------------------------------------------------------===//
-// Function Type Factory and Value Class...
-//
-const IntegerType *IntegerType::get(LLVMContext &C, unsigned NumBits) {
-  assert(NumBits >= MIN_INT_BITS && "bitwidth too small");
-  assert(NumBits <= MAX_INT_BITS && "bitwidth too large");
-
-  // Check for the built-in integer types
-  switch (NumBits) {
-  case  1: return cast<IntegerType>(Type::getInt1Ty(C));
-  case  8: return cast<IntegerType>(Type::getInt8Ty(C));
-  case 16: return cast<IntegerType>(Type::getInt16Ty(C));
-  case 32: return cast<IntegerType>(Type::getInt32Ty(C));
-  case 64: return cast<IntegerType>(Type::getInt64Ty(C));
-  default: 
-    break;
-  }
-
-  LLVMContextImpl *pImpl = C.pImpl;
   
-  IntegerValType IVT(NumBits);
-  IntegerType *ITy = 0;
+  // If this is just removing the name, we're done.
+  if (Name.empty())
+    return;
   
-  // First, see if the type is already in the table, for which
-  // a reader lock suffices.
-  ITy = pImpl->IntegerTypes.get(IVT);
-    
-  if (!ITy) {
-    // Value not found.  Derive a new type!
-    ITy = new IntegerType(C, NumBits);
-    pImpl->IntegerTypes.add(IVT, ITy);
+  // Look up the entry for the name.
+  StringMapEntry<StructType*> *Entry =
+    &getContext().pImpl->NamedStructTypes.GetOrCreateValue(Name);
+  
+  // While we have a name collision, try a random rename.
+  if (Entry->getValue()) {
+    SmallString<64> TempStr(Name);
+    TempStr.push_back('.');
+    raw_svector_ostream TmpStream(TempStr);
+   
+    do {
+      TempStr.resize(Name.size()+1);
+      TmpStream.resync();
+      TmpStream << getContext().pImpl->NamedStructTypesUniqueID++;
+      
+      Entry = &getContext().pImpl->
+                 NamedStructTypes.GetOrCreateValue(TmpStream.str());
+    } while (Entry->getValue());
   }
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "Derived new type: " << *ITy << "\n");
-#endif
-  return ITy;
-}
 
-bool IntegerType::isPowerOf2ByteWidth() const {
-  unsigned BitWidth = getBitWidth();
-  return (BitWidth > 7) && isPowerOf2_32(BitWidth);
+  // Okay, we found an entry that isn't used.  It's us!
+  Entry->setValue(this);
+    
+  SymbolTableEntry = Entry;
 }
 
-APInt IntegerType::getMask() const {
-  return APInt::getAllOnesValue(getBitWidth());
-}
+//===----------------------------------------------------------------------===//
+// StructType Helper functions.
 
-FunctionValType FunctionValType::get(const FunctionType *FT) {
-  // Build up a FunctionValType
-  std::vector<const Type *> ParamTypes;
-  ParamTypes.reserve(FT->getNumParams());
-  for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i)
-    ParamTypes.push_back(FT->getParamType(i));
-  return FunctionValType(FT->getReturnType(), ParamTypes, FT->isVarArg());
+StructType *StructType::get(LLVMContext &Context, bool isPacked) {
+  return get(Context, llvm::ArrayRef<Type*>(), isPacked);
 }
 
-
-// FunctionType::get - The factory function for the FunctionType class...
-FunctionType *FunctionType::get(const Type *ReturnType,
-                                ArrayRef<const Type*> Params,
-                                bool isVarArg) {
-  FunctionValType VT(ReturnType, Params, isVarArg);
-  FunctionType *FT = 0;
-  
-  LLVMContextImpl *pImpl = ReturnType->getContext().pImpl;
-  
-  FT = pImpl->FunctionTypes.get(VT);
-  
-  if (!FT) {
-    FT = (FunctionType*) operator new(sizeof(FunctionType) +
-                                    sizeof(PATypeHandle)*(Params.size()+1));
-    new (FT) FunctionType(ReturnType, Params, isVarArg);
-    pImpl->FunctionTypes.add(VT, FT);
+StructType *StructType::get(Type *type, ...) {
+  assert(type != 0 && "Cannot create a struct type with no elements with this");
+  LLVMContext &Ctx = type->getContext();
+  va_list ap;
+  SmallVector<llvm::Type*, 8> StructFields;
+  va_start(ap, type);
+  while (type) {
+    StructFields.push_back(type);
+    type = va_arg(ap, llvm::Type*);
   }
-
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "Derived new type: " << FT << "\n");
-#endif
-  return FT;
+  return llvm::StructType::get(Ctx, StructFields);
 }
 
-ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
-  assert(ElementType && "Can't get array of <null> types!");
-  assert(isValidElementType(ElementType) && "Invalid type for array element!");
-
-  ArrayValType AVT(ElementType, NumElements);
-  ArrayType *AT = 0;
-
-  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
-  
-  AT = pImpl->ArrayTypes.get(AVT);
-      
-  if (!AT) {
-    // Value not found.  Derive a new type!
-    pImpl->ArrayTypes.add(AVT, AT = new ArrayType(ElementType, NumElements));
-  }
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "Derived new type: " << *AT << "\n");
-#endif
-  return AT;
+StructType *StructType::createNamed(LLVMContext &Context, StringRef Name,
+                                    ArrayRef<Type*> Elements, bool isPacked) {
+  StructType *ST = createNamed(Context, Name);
+  ST->setBody(Elements, isPacked);
+  return ST;
 }
 
-bool ArrayType::isValidElementType(const Type *ElemTy) {
-  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
-         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
+StructType *StructType::createNamed(StringRef Name, ArrayRef<Type*> Elements,
+                                    bool isPacked) {
+  assert(!Elements.empty() &&
+         "This method may not be invoked with an empty list");
+  return createNamed(Elements[0]->getContext(), Name, Elements, isPacked);
 }
 
-VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
-  assert(ElementType && "Can't get vector of <null> types!");
-
-  VectorValType PVT(ElementType, NumElements);
-  VectorType *PT = 0;
-  
-  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
-  
-  PT = pImpl->VectorTypes.get(PVT);
-    
-  if (!PT) {
-    pImpl->VectorTypes.add(PVT, PT = new VectorType(ElementType, NumElements));
+StructType *StructType::createNamed(StringRef Name, Type *type, ...) {
+  assert(type != 0 && "Cannot create a struct type with no elements with this");
+  LLVMContext &Ctx = type->getContext();
+  va_list ap;
+  SmallVector<llvm::Type*, 8> StructFields;
+  va_start(ap, type);
+  while (type) {
+    StructFields.push_back(type);
+    type = va_arg(ap, llvm::Type*);
   }
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "Derived new type: " << *PT << "\n");
-#endif
-  return PT;
-}
-
-bool VectorType::isValidElementType(const Type *ElemTy) {
-  return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy() ||
-         ElemTy->isOpaqueTy();
+  return llvm::StructType::createNamed(Ctx, Name, StructFields);
 }
 
-//===----------------------------------------------------------------------===//
-// Struct Type Factory...
-//
-
-StructType *StructType::get(LLVMContext &Context,
-                            ArrayRef<const Type*> ETypes, 
-                            bool isPacked) {
-  StructValType STV(ETypes, isPacked);
-  StructType *ST = 0;
-  
-  LLVMContextImpl *pImpl = Context.pImpl;
+StringRef StructType::getName() const {
+  assert(!isAnonymous() && "Anonymous structs never have names");
+  if (SymbolTableEntry == 0) return StringRef();
   
-  ST = pImpl->StructTypes.get(STV);
-    
-  if (!ST) {
-    // Value not found.  Derive a new type!
-    ST = (StructType*) operator new(sizeof(StructType) +
-                                    sizeof(PATypeHandle) * ETypes.size());
-    new (ST) StructType(Context, ETypes, isPacked);
-    pImpl->StructTypes.add(STV, ST);
-  }
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "Derived new type: " << *ST << "\n");
-#endif
-  return ST;
+  return ((StringMapEntry<StructType*> *)SymbolTableEntry)->getKey();
 }
 
-StructType *StructType::get(LLVMContext &Context, const Type *type, ...) {
+void StructType::setBody(Type *type, ...) {
+  assert(type != 0 && "Cannot create a struct type with no elements with this");
   va_list ap;
-  std::vector<const llvm::Type*> StructFields;
+  SmallVector<llvm::Type*, 8> StructFields;
   va_start(ap, type);
   while (type) {
     StructFields.push_back(type);
     type = va_arg(ap, llvm::Type*);
   }
-  return llvm::StructType::get(Context, StructFields);
+  setBody(StructFields);
 }
 
 bool StructType::isValidElementType(const Type *ElemTy) {
@@ -973,278 +529,159 @@ bool StructType::isValidElementType(const Type *ElemTy) {
          !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
 }
 
-
-//===----------------------------------------------------------------------===//
-// Pointer Type Factory...
-//
-
-PointerType *PointerType::get(const Type *ValueType, unsigned AddressSpace) {
-  assert(ValueType && "Can't get a pointer to <null> type!");
-  assert(ValueType->getTypeID() != VoidTyID &&
-         "Pointer to void is not valid, use i8* instead!");
-  assert(isValidElementType(ValueType) && "Invalid type for pointer element!");
-  PointerValType PVT(ValueType, AddressSpace);
-
-  PointerType *PT = 0;
-  
-  LLVMContextImpl *pImpl = ValueType->getContext().pImpl;
+/// isLayoutIdentical - Return true if this is layout identical to the
+/// specified struct.
+bool StructType::isLayoutIdentical(const StructType *Other) const {
+  if (this == Other) return true;
   
-  PT = pImpl->PointerTypes.get(PVT);
+  if (isPacked() != Other->isPacked() ||
+      getNumElements() != Other->getNumElements())
+    return false;
   
-  if (!PT) {
-    // Value not found.  Derive a new type!
-    pImpl->PointerTypes.add(PVT, PT = new PointerType(ValueType, AddressSpace));
-  }
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "Derived new type: " << *PT << "\n");
-#endif
-  return PT;
+  return std::equal(element_begin(), element_end(), Other->element_begin());
 }
 
-const PointerType *Type::getPointerTo(unsigned addrs) const {
-  return PointerType::get(this, addrs);
-}
 
-bool PointerType::isValidElementType(const Type *ElemTy) {
-  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
-         !ElemTy->isMetadataTy();
+/// getTypeByName - Return the type with the specified name, or null if there
+/// is none by that name.
+StructType *Module::getTypeByName(StringRef Name) const {
+  StringMap<StructType*>::iterator I =
+    getContext().pImpl->NamedStructTypes.find(Name);
+  if (I != getContext().pImpl->NamedStructTypes.end())
+    return I->second;
+  return 0;
 }
 
 
 //===----------------------------------------------------------------------===//
-// Opaque Type Factory...
-//
+//                       CompositeType Implementation
+//===----------------------------------------------------------------------===//
 
-OpaqueType *OpaqueType::get(LLVMContext &C) {
-  OpaqueType *OT = new OpaqueType(C);       // All opaque types are distinct.
-  LLVMContextImpl *pImpl = C.pImpl;
-  pImpl->OpaqueTypes.insert(OT);
-  return OT;
+Type *CompositeType::getTypeAtIndex(const Value *V) const {
+  if (const StructType *STy = dyn_cast<StructType>(this)) {
+    unsigned Idx = (unsigned)cast<ConstantInt>(V)->getZExtValue();
+    assert(indexValid(Idx) && "Invalid structure index!");
+    return STy->getElementType(Idx);
+  }
+  
+  return cast<SequentialType>(this)->getElementType();
+}
+Type *CompositeType::getTypeAtIndex(unsigned Idx) const {
+  if (const StructType *STy = dyn_cast<StructType>(this)) {
+    assert(indexValid(Idx) && "Invalid structure index!");
+    return STy->getElementType(Idx);
+  }
+  
+  return cast<SequentialType>(this)->getElementType();
+}
+bool CompositeType::indexValid(const Value *V) const {
+  if (const StructType *STy = dyn_cast<StructType>(this)) {
+    // Structure indexes require 32-bit integer constants.
+    if (V->getType()->isIntegerTy(32))
+      if (const ConstantInt *CU = dyn_cast<ConstantInt>(V))
+        return CU->getZExtValue() < STy->getNumElements();
+    return false;
+  }
+  
+  // Sequential types can be indexed by any integer.
+  return V->getType()->isIntegerTy();
 }
 
+bool CompositeType::indexValid(unsigned Idx) const {
+  if (const StructType *STy = dyn_cast<StructType>(this))
+    return Idx < STy->getNumElements();
+  // Sequential types can be indexed by any integer.
+  return true;
+}
 
 
 //===----------------------------------------------------------------------===//
-//                     Derived Type Refinement Functions
+//                           ArrayType Implementation
 //===----------------------------------------------------------------------===//
 
-// addAbstractTypeUser - Notify an abstract type that there is a new user of
-// it.  This function is called primarily by the PATypeHandle class.
-void Type::addAbstractTypeUser(AbstractTypeUser *U) const {
-  assert(isAbstract() && "addAbstractTypeUser: Current type not abstract!");
-  AbstractTypeUsers.push_back(U);
+ArrayType::ArrayType(Type *ElType, uint64_t NumEl)
+  : SequentialType(ArrayTyID, ElType) {
+  NumElements = NumEl;
 }
 
 
-// removeAbstractTypeUser - Notify an abstract type that a user of the class
-// no longer has a handle to the type.  This function is called primarily by
-// the PATypeHandle class.  When there are no users of the abstract type, it
-// is annihilated, because there is no way to get a reference to it ever again.
-//
-void Type::removeAbstractTypeUser(AbstractTypeUser *U) const {
-  
-  // Search from back to front because we will notify users from back to
-  // front.  Also, it is likely that there will be a stack like behavior to
-  // users that register and unregister users.
-  //
-  unsigned i;
-  for (i = AbstractTypeUsers.size(); AbstractTypeUsers[i-1] != U; --i)
-    assert(i != 0 && "AbstractTypeUser not in user list!");
-
-  --i;  // Convert to be in range 0 <= i < size()
-  assert(i < AbstractTypeUsers.size() && "Index out of range!");  // Wraparound?
-
-  AbstractTypeUsers.erase(AbstractTypeUsers.begin()+i);
-
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "  remAbstractTypeUser[" << (void*)this << ", "
-               << *this << "][" << i << "] User = " << U << "\n");
-#endif
-
-  if (AbstractTypeUsers.empty() && getRefCount() == 0 && isAbstract()) {
-#ifdef DEBUG_MERGE_TYPES
-    DEBUG(dbgs() << "DELETEing unused abstract type: <" << *this
-                 << ">[" << (void*)this << "]" << "\n");
-#endif
+ArrayType *ArrayType::get(const Type *elementType, uint64_t NumElements) {
+  Type *ElementType = const_cast<Type*>(elementType);
+  assert(isValidElementType(ElementType) && "Invalid type for array element!");
+    
+  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
+  ArrayType *&Entry = 
+    pImpl->ArrayTypes[std::make_pair(ElementType, NumElements)];
   
-    this->destroy();
-  }
-}
-
-// refineAbstractTypeTo - This function is used when it is discovered
-// that the 'this' abstract type is actually equivalent to the NewType
-// specified. This causes all users of 'this' to switch to reference the more 
-// concrete type NewType and for 'this' to be deleted.  Only used for internal
-// callers.
-//
-void DerivedType::refineAbstractTypeTo(const Type *NewType) {
-  assert(isAbstract() && "refineAbstractTypeTo: Current type is not abstract!");
-  assert(this != NewType && "Can't refine to myself!");
-  assert(ForwardType == 0 && "This type has already been refined!");
-
-  LLVMContextImpl *pImpl = getContext().pImpl;
-
-  // The descriptions may be out of date.  Conservatively clear them all!
-  pImpl->AbstractTypeDescriptions.clear();
-
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "REFINING abstract type [" << (void*)this << " "
-               << *this << "] to [" << (void*)NewType << " "
-               << *NewType << "]!\n");
-#endif
-
-  // Make sure to put the type to be refined to into a holder so that if IT gets
-  // refined, that we will not continue using a dead reference...
-  //
-  PATypeHolder NewTy(NewType);
-  // Any PATypeHolders referring to this type will now automatically forward to
-  // the type we are resolved to.
-  ForwardType = NewType;
-  if (ForwardType->isAbstract())
-    ForwardType->addRef();
-
-  // Add a self use of the current type so that we don't delete ourself until
-  // after the function exits.
-  //
-  PATypeHolder CurrentTy(this);
-
-  // To make the situation simpler, we ask the subclass to remove this type from
-  // the type map, and to replace any type uses with uses of non-abstract types.
-  // This dramatically limits the amount of recursive type trouble we can find
-  // ourselves in.
-  dropAllTypeUses();
-
-  // Iterate over all of the uses of this type, invoking callback.  Each user
-  // should remove itself from our use list automatically.  We have to check to
-  // make sure that NewTy doesn't _become_ 'this'.  If it does, resolving types
-  // will not cause users to drop off of the use list.  If we resolve to ourself
-  // we succeed!
-  //
-  while (!AbstractTypeUsers.empty() && NewTy != this) {
-    AbstractTypeUser *User = AbstractTypeUsers.back();
-
-    unsigned OldSize = AbstractTypeUsers.size(); (void)OldSize;
-#ifdef DEBUG_MERGE_TYPES
-    DEBUG(dbgs() << " REFINING user " << OldSize-1 << "[" << (void*)User
-                 << "] of abstract type [" << (void*)this << " "
-                 << *this << "] to [" << (void*)NewTy.get() << " "
-                 << *NewTy << "]!\n");
-#endif
-    User->refineAbstractType(this, NewTy);
-
-    assert(AbstractTypeUsers.size() != OldSize &&
-           "AbsTyUser did not remove self from user list!");
-  }
-
-  // If we were successful removing all users from the type, 'this' will be
-  // deleted when the last PATypeHolder is destroyed or updated from this type.
-  // This may occur on exit of this function, as the CurrentTy object is
-  // destroyed.
-}
-
-// notifyUsesThatTypeBecameConcrete - Notify AbstractTypeUsers of this type that
-// the current type has transitioned from being abstract to being concrete.
-//
-void DerivedType::notifyUsesThatTypeBecameConcrete() {
-#ifdef DEBUG_MERGE_TYPES
-  DEBUG(dbgs() << "typeIsREFINED type: " << (void*)this << " " << *this <<"\n");
-#endif
-
-  unsigned OldSize = AbstractTypeUsers.size(); (void)OldSize;
-  while (!AbstractTypeUsers.empty()) {
-    AbstractTypeUser *ATU = AbstractTypeUsers.back();
-    ATU->typeBecameConcrete(this);
-
-    assert(AbstractTypeUsers.size() < OldSize-- &&
-           "AbstractTypeUser did not remove itself from the use list!");
-  }
-}
-
-// refineAbstractType - Called when a contained type is found to be more
-// concrete - this could potentially change us from an abstract type to a
-// concrete type.
-//
-void FunctionType::refineAbstractType(const DerivedType *OldType,
-                                      const Type *NewType) {
-  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
-  pImpl->FunctionTypes.RefineAbstractType(this, OldType, NewType);
+  if (Entry == 0)
+    Entry = new (pImpl->TypeAllocator) ArrayType(ElementType, NumElements);
+  return Entry;
 }
 
-void FunctionType::typeBecameConcrete(const DerivedType *AbsTy) {
-  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
-  pImpl->FunctionTypes.TypeBecameConcrete(this, AbsTy);
+bool ArrayType::isValidElementType(const Type *ElemTy) {
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
 }
 
+//===----------------------------------------------------------------------===//
+//                          VectorType Implementation
+//===----------------------------------------------------------------------===//
 
-// refineAbstractType - Called when a contained type is found to be more
-// concrete - this could potentially change us from an abstract type to a
-// concrete type.
-//
-void ArrayType::refineAbstractType(const DerivedType *OldType,
-                                   const Type *NewType) {
-  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
-  pImpl->ArrayTypes.RefineAbstractType(this, OldType, NewType);
+VectorType::VectorType(Type *ElType, unsigned NumEl)
+  : SequentialType(VectorTyID, ElType) {
+  NumElements = NumEl;
 }
 
-void ArrayType::typeBecameConcrete(const DerivedType *AbsTy) {
-  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
-  pImpl->ArrayTypes.TypeBecameConcrete(this, AbsTy);
+VectorType *VectorType::get(const Type *elementType, unsigned NumElements) {
+  Type *ElementType = const_cast<Type*>(elementType);
+  assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
+  assert(isValidElementType(ElementType) &&
+         "Elements of a VectorType must be a primitive type");
+  
+  LLVMContextImpl *pImpl = ElementType->getContext().pImpl;
+  VectorType *&Entry = ElementType->getContext().pImpl
+    ->VectorTypes[std::make_pair(ElementType, NumElements)];
+  
+  if (Entry == 0)
+    Entry = new (pImpl->TypeAllocator) VectorType(ElementType, NumElements);
+  return Entry;
 }
 
-// refineAbstractType - Called when a contained type is found to be more
-// concrete - this could potentially change us from an abstract type to a
-// concrete type.
-//
-void VectorType::refineAbstractType(const DerivedType *OldType,
-                                   const Type *NewType) {
-  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
-  pImpl->VectorTypes.RefineAbstractType(this, OldType, NewType);
+bool VectorType::isValidElementType(const Type *ElemTy) {
+  return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy();
 }
 
-void VectorType::typeBecameConcrete(const DerivedType *AbsTy) {
-  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
-  pImpl->VectorTypes.TypeBecameConcrete(this, AbsTy);
-}
+//===----------------------------------------------------------------------===//
+//                         PointerType Implementation
+//===----------------------------------------------------------------------===//
 
-// refineAbstractType - Called when a contained type is found to be more
-// concrete - this could potentially change us from an abstract type to a
-// concrete type.
-//
-void StructType::refineAbstractType(const DerivedType *OldType,
-                                    const Type *NewType) {
-  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
-  pImpl->StructTypes.RefineAbstractType(this, OldType, NewType);
-}
+PointerType *PointerType::get(const Type *eltTy, unsigned AddressSpace) {
+  Type *EltTy = const_cast<Type*>(eltTy);
+  assert(EltTy && "Can't get a pointer to <null> type!");
+  assert(isValidElementType(EltTy) && "Invalid type for pointer element!");
+  
+  LLVMContextImpl *CImpl = EltTy->getContext().pImpl;
+  
+  // Since AddressSpace #0 is the common case, we special case it.
+  PointerType *&Entry = AddressSpace == 0 ? CImpl->PointerTypes[EltTy]
+     : CImpl->ASPointerTypes[std::make_pair(EltTy, AddressSpace)];
 
-void StructType::typeBecameConcrete(const DerivedType *AbsTy) {
-  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
-  pImpl->StructTypes.TypeBecameConcrete(this, AbsTy);
+  if (Entry == 0)
+    Entry = new (CImpl->TypeAllocator) PointerType(EltTy, AddressSpace);
+  return Entry;
 }
 
-// refineAbstractType - Called when a contained type is found to be more
-// concrete - this could potentially change us from an abstract type to a
-// concrete type.
-//
-void PointerType::refineAbstractType(const DerivedType *OldType,
-                                     const Type *NewType) {
-  LLVMContextImpl *pImpl = OldType->getContext().pImpl;
-  pImpl->PointerTypes.RefineAbstractType(this, OldType, NewType);
-}
 
-void PointerType::typeBecameConcrete(const DerivedType *AbsTy) {
-  LLVMContextImpl *pImpl = AbsTy->getContext().pImpl;
-  pImpl->PointerTypes.TypeBecameConcrete(this, AbsTy);
+PointerType::PointerType(Type *E, unsigned AddrSpace)
+  : SequentialType(PointerTyID, E) {
+  setSubclassData(AddrSpace);
 }
 
-bool SequentialType::indexValid(const Value *V) const {
-  if (V->getType()->isIntegerTy()) 
-    return true;
-  return false;
+PointerType *Type::getPointerTo(unsigned addrs) const {
+  return PointerType::get(this, addrs);
 }
 
-namespace llvm {
-raw_ostream &operator<<(raw_ostream &OS, const Type &T) {
-  T.print(OS);
-  return OS;
-}
+bool PointerType::isValidElementType(const Type *ElemTy) {
+  return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
+         !ElemTy->isMetadataTy();
 }
diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp
deleted file mode 100644
index d68a44b..0000000
--- a/lib/VMCore/TypeSymbolTable.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-//===-- TypeSymbolTable.cpp - Implement the TypeSymbolTable class ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the TypeSymbolTable class for the VMCore library.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/TypeSymbolTable.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/ADT/StringExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/raw_ostream.h"
-#include <algorithm>
-using namespace llvm;
-
-#define DEBUG_SYMBOL_TABLE 0
-#define DEBUG_ABSTYPE 0
-
-TypeSymbolTable::~TypeSymbolTable() {
-  // Drop all abstract type references in the type plane...
-  for (iterator TI = tmap.begin(), TE = tmap.end(); TI != TE; ++TI) {
-    if (TI->second->isAbstract())   // If abstract, drop the reference...
-      cast<DerivedType>(TI->second)->removeAbstractTypeUser(this);
-  }
-}
-
-std::string TypeSymbolTable::getUniqueName(StringRef BaseName) const {
-  std::string TryName = BaseName;
-  
-  const_iterator End = tmap.end();
-
-  // See if the name exists
-  while (tmap.find(TryName) != End)            // Loop until we find a free
-    TryName = BaseName.str() + utostr(++LastUnique); // name in the symbol table
-  return TryName;
-}
-
-// lookup a type by name - returns null on failure
-Type* TypeSymbolTable::lookup(StringRef Name) const {
-  const_iterator TI = tmap.find(Name);
-  Type* result = 0;
-  if (TI != tmap.end())
-    result = const_cast<Type*>(TI->second);
-  return result;
-}
-
-// remove - Remove a type from the symbol table...
-Type* TypeSymbolTable::remove(iterator Entry) {
-  assert(Entry != tmap.end() && "Invalid entry to remove!");
-  const Type* Result = Entry->second;
-
-#if DEBUG_SYMBOL_TABLE
-  dump();
-  dbgs() << " Removing Value: " << Result->getDescription() << "\n";
-#endif
-
-  tmap.erase(Entry);
-  
-  // If we are removing an abstract type, remove the symbol table from it's use
-  // list...
-  if (Result->isAbstract()) {
-#if DEBUG_ABSTYPE
-    dbgs() << "Removing abstract type from symtab"
-           << Result->getDescription()
-           << "\n";
-#endif
-    cast<DerivedType>(Result)->removeAbstractTypeUser(this);
-  }
-
-  return const_cast<Type*>(Result);
-}
-
-
-// insert - Insert a type into the symbol table with the specified name...
-void TypeSymbolTable::insert(StringRef Name, const Type* T) {
-  assert(T && "Can't insert null type into symbol table!");
-
-  if (tmap.insert(std::make_pair(Name, T)).second) {
-    // Type inserted fine with no conflict.
-    
-#if DEBUG_SYMBOL_TABLE
-    dump();
-    dbgs() << " Inserted type: " << Name << ": " << T->getDescription() << "\n";
-#endif
-  } else {
-    // If there is a name conflict...
-    
-    // Check to see if there is a naming conflict.  If so, rename this type!
-    std::string UniqueName = Name;
-    if (lookup(Name))
-      UniqueName = getUniqueName(Name);
-    
-#if DEBUG_SYMBOL_TABLE
-    dump();
-    dbgs() << " Inserting type: " << UniqueName << ": "
-           << T->getDescription() << "\n";
-#endif
-
-    // Insert the tmap entry
-    tmap.insert(make_pair(UniqueName, T));
-  }
-  
-  // If we are adding an abstract type, add the symbol table to it's use list.
-  if (T->isAbstract()) {
-    cast<DerivedType>(T)->addAbstractTypeUser(this);
-#if DEBUG_ABSTYPE
-    dbgs() << "Added abstract type to ST: " << T->getDescription() << "\n";
-#endif
-  }
-}
-
-// This function is called when one of the types in the type plane are refined
-void TypeSymbolTable::refineAbstractType(const DerivedType *OldType,
-                                         const Type *NewType) {
-  // Loop over all of the types in the symbol table, replacing any references
-  // to OldType with references to NewType.  Note that there may be multiple
-  // occurrences, and although we only need to remove one at a time, it's
-  // faster to remove them all in one pass.
-  //
-  for (iterator I = begin(), E = end(); I != E; ++I) {
-    // FIXME when Types aren't const.
-    if (I->second == const_cast<DerivedType *>(OldType)) {
-#if DEBUG_ABSTYPE
-      dbgs() << "Removing type " << OldType->getDescription() << "\n";
-#endif
-      OldType->removeAbstractTypeUser(this);
-
-      // TODO FIXME when types aren't const
-      I->second = const_cast<Type *>(NewType);
-      if (NewType->isAbstract()) {
-#if DEBUG_ABSTYPE
-        dbgs() << "Added type " << NewType->getDescription() << "\n";
-#endif
-        cast<DerivedType>(NewType)->addAbstractTypeUser(this);
-      }
-    }
-  }
-}
-
-
-// Handle situation where type becomes Concreate from Abstract
-void TypeSymbolTable::typeBecameConcrete(const DerivedType *AbsTy) {
-  // Loop over all of the types in the symbol table, dropping any abstract
-  // type user entries for AbsTy which occur because there are names for the
-  // type.
-  for (iterator TI = begin(), TE = end(); TI != TE; ++TI)
-    if (TI->second == const_cast<Type*>(static_cast<const Type*>(AbsTy)))
-      AbsTy->removeAbstractTypeUser(this);
-}
-
-static void DumpTypes(const std::pair<const std::string, const Type*>& T ) {
-  dbgs() << "  '" << T.first << "' = ";
-  T.second->dump();
-  dbgs() << "\n";
-}
-
-void TypeSymbolTable::dump() const {
-  dbgs() << "TypeSymbolPlane: ";
-  for_each(tmap.begin(), tmap.end(), DumpTypes);
-}
-
diff --git a/lib/VMCore/TypesContext.h b/lib/VMCore/TypesContext.h
deleted file mode 100644
index ad09478..0000000
--- a/lib/VMCore/TypesContext.h
+++ /dev/null
@@ -1,426 +0,0 @@
-//===-- TypesContext.h - Types-related Context Internals ------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file defines various helper methods and classes used by
-// LLVMContextImpl for creating and managing types.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TYPESCONTEXT_H
-#define LLVM_TYPESCONTEXT_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
-#include <map>
-
-
-//===----------------------------------------------------------------------===//
-//                       Derived Type Factory Functions
-//===----------------------------------------------------------------------===//
-namespace llvm {
-
-/// getSubElementHash - Generate a hash value for all of the SubType's of this
-/// type.  The hash value is guaranteed to be zero if any of the subtypes are 
-/// an opaque type.  Otherwise we try to mix them in as well as possible, but do
-/// not look at the subtype's subtype's.
-static unsigned getSubElementHash(const Type *Ty) {
-  unsigned HashVal = 0;
-  for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
-       I != E; ++I) {
-    HashVal *= 32;
-    const Type *SubTy = I->get();
-    HashVal += SubTy->getTypeID();
-    switch (SubTy->getTypeID()) {
-    default: break;
-    case Type::OpaqueTyID: return 0;    // Opaque -> hash = 0 no matter what.
-    case Type::IntegerTyID:
-      HashVal ^= (cast<IntegerType>(SubTy)->getBitWidth() << 3);
-      break;
-    case Type::FunctionTyID:
-      HashVal ^= cast<FunctionType>(SubTy)->getNumParams()*2 + 
-                 cast<FunctionType>(SubTy)->isVarArg();
-      break;
-    case Type::ArrayTyID:
-      HashVal ^= cast<ArrayType>(SubTy)->getNumElements();
-      break;
-    case Type::VectorTyID:
-      HashVal ^= cast<VectorType>(SubTy)->getNumElements();
-      break;
-    case Type::StructTyID:
-      HashVal ^= cast<StructType>(SubTy)->getNumElements();
-      break;
-    case Type::PointerTyID:
-      HashVal ^= cast<PointerType>(SubTy)->getAddressSpace();
-      break;
-    }
-  }
-  return HashVal ? HashVal : 1;  // Do not return zero unless opaque subty.
-}
-
-//===----------------------------------------------------------------------===//
-// Integer Type Factory...
-//
-class IntegerValType {
-  uint32_t bits;
-public:
-  IntegerValType(uint32_t numbits) : bits(numbits) {}
-
-  static IntegerValType get(const IntegerType *Ty) {
-    return IntegerValType(Ty->getBitWidth());
-  }
-
-  static unsigned hashTypeStructure(const IntegerType *Ty) {
-    return (unsigned)Ty->getBitWidth();
-  }
-
-  inline bool operator<(const IntegerValType &IVT) const {
-    return bits < IVT.bits;
-  }
-};
-
-// PointerValType - Define a class to hold the key that goes into the TypeMap
-//
-class PointerValType {
-  const Type *ValTy;
-  unsigned AddressSpace;
-public:
-  PointerValType(const Type *val, unsigned as) : ValTy(val), AddressSpace(as) {}
-
-  static PointerValType get(const PointerType *PT) {
-    return PointerValType(PT->getElementType(), PT->getAddressSpace());
-  }
-
-  static unsigned hashTypeStructure(const PointerType *PT) {
-    return getSubElementHash(PT);
-  }
-
-  bool operator<(const PointerValType &MTV) const {
-    if (AddressSpace < MTV.AddressSpace) return true;
-    return AddressSpace == MTV.AddressSpace && ValTy < MTV.ValTy;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// Array Type Factory...
-//
-class ArrayValType {
-  const Type *ValTy;
-  uint64_t Size;
-public:
-  ArrayValType(const Type *val, uint64_t sz) : ValTy(val), Size(sz) {}
-
-  static ArrayValType get(const ArrayType *AT) {
-    return ArrayValType(AT->getElementType(), AT->getNumElements());
-  }
-
-  static unsigned hashTypeStructure(const ArrayType *AT) {
-    return (unsigned)AT->getNumElements();
-  }
-
-  inline bool operator<(const ArrayValType &MTV) const {
-    if (Size < MTV.Size) return true;
-    return Size == MTV.Size && ValTy < MTV.ValTy;
-  }
-};
-
-//===----------------------------------------------------------------------===//
-// Vector Type Factory...
-//
-class VectorValType {
-  const Type *ValTy;
-  unsigned Size;
-public:
-  VectorValType(const Type *val, int sz) : ValTy(val), Size(sz) {}
-
-  static VectorValType get(const VectorType *PT) {
-    return VectorValType(PT->getElementType(), PT->getNumElements());
-  }
-
-  static unsigned hashTypeStructure(const VectorType *PT) {
-    return PT->getNumElements();
-  }
-
-  inline bool operator<(const VectorValType &MTV) const {
-    if (Size < MTV.Size) return true;
-    return Size == MTV.Size && ValTy < MTV.ValTy;
-  }
-};
-
-// StructValType - Define a class to hold the key that goes into the TypeMap
-//
-class StructValType {
-  std::vector<const Type*> ElTypes;
-  bool packed;
-public:
-  StructValType(ArrayRef<const Type*> args, bool isPacked)
-    : ElTypes(args.vec()), packed(isPacked) {}
-
-  static StructValType get(const StructType *ST) {
-    std::vector<const Type *> ElTypes;
-    ElTypes.reserve(ST->getNumElements());
-    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
-      ElTypes.push_back(ST->getElementType(i));
-
-    return StructValType(ElTypes, ST->isPacked());
-  }
-
-  static unsigned hashTypeStructure(const StructType *ST) {
-    return ST->getNumElements();
-  }
-
-  inline bool operator<(const StructValType &STV) const {
-    if (ElTypes < STV.ElTypes) return true;
-    else if (ElTypes > STV.ElTypes) return false;
-    else return (int)packed < (int)STV.packed;
-  }
-};
-
-// FunctionValType - Define a class to hold the key that goes into the TypeMap
-//
-class FunctionValType {
-  const Type *RetTy;
-  std::vector<const Type*> ArgTypes;
-  bool isVarArg;
-public:
-  FunctionValType(const Type *ret, ArrayRef<const Type*> args, bool isVA)
-    : RetTy(ret), ArgTypes(args.vec()), isVarArg(isVA) {}
-
-  static FunctionValType get(const FunctionType *FT);
-
-  static unsigned hashTypeStructure(const FunctionType *FT) {
-    unsigned Result = FT->getNumParams()*2 + FT->isVarArg();
-    return Result;
-  }
-
-  inline bool operator<(const FunctionValType &MTV) const {
-    if (RetTy < MTV.RetTy) return true;
-    if (RetTy > MTV.RetTy) return false;
-    if (isVarArg < MTV.isVarArg) return true;
-    if (isVarArg > MTV.isVarArg) return false;
-    if (ArgTypes < MTV.ArgTypes) return true;
-    if (ArgTypes > MTV.ArgTypes) return false;
-    return false;
-  }
-};
-
-class TypeMapBase {
-protected:
-  /// TypesByHash - Keep track of types by their structure hash value.  Note
-  /// that we only keep track of types that have cycles through themselves in
-  /// this map.
-  ///
-  std::multimap<unsigned, PATypeHolder> TypesByHash;
-
-  ~TypeMapBase() {
-    // PATypeHolder won't destroy non-abstract types.
-    // We can't destroy them by simply iterating, because
-    // they may contain references to each-other.
-    for (std::multimap<unsigned, PATypeHolder>::iterator I
-         = TypesByHash.begin(), E = TypesByHash.end(); I != E; ++I) {
-      Type *Ty = const_cast<Type*>(I->second.Ty);
-      I->second.destroy();
-      // We can't invoke destroy or delete, because the type may
-      // contain references to already freed types.
-      // So we have to destruct the object the ugly way.
-      if (Ty) {
-        Ty->AbstractTypeUsers.clear();
-        static_cast<const Type*>(Ty)->Type::~Type();
-        operator delete(Ty);
-      }
-    }
-  }
-
-public:
-  void RemoveFromTypesByHash(unsigned Hash, const Type *Ty) {
-    std::multimap<unsigned, PATypeHolder>::iterator I =
-      TypesByHash.lower_bound(Hash);
-    for (; I != TypesByHash.end() && I->first == Hash; ++I) {
-      if (I->second == Ty) {
-        TypesByHash.erase(I);
-        return;
-      }
-    }
-
-    // This must be do to an opaque type that was resolved.  Switch down to hash
-    // code of zero.
-    assert(Hash && "Didn't find type entry!");
-    RemoveFromTypesByHash(0, Ty);
-  }
-
-  /// TypeBecameConcrete - When Ty gets a notification that TheType just became
-  /// concrete, drop uses and make Ty non-abstract if we should.
-  void TypeBecameConcrete(DerivedType *Ty, const DerivedType *TheType) {
-    // If the element just became concrete, remove 'ty' from the abstract
-    // type user list for the type.  Do this for as many times as Ty uses
-    // OldType.
-    for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
-         I != E; ++I)
-      if (I->get() == TheType)
-        TheType->removeAbstractTypeUser(Ty);
-
-    // If the type is currently thought to be abstract, rescan all of our
-    // subtypes to see if the type has just become concrete!  Note that this
-    // may send out notifications to AbstractTypeUsers that types become
-    // concrete.
-    if (Ty->isAbstract())
-      Ty->PromoteAbstractToConcrete();
-  }
-};
-
-// TypeMap - Make sure that only one instance of a particular type may be
-// created on any given run of the compiler... note that this involves updating
-// our map if an abstract type gets refined somehow.
-//
-template<class ValType, class TypeClass>
-class TypeMap : public TypeMapBase {
-  std::map<ValType, PATypeHolder> Map;
-public:
-  typedef typename std::map<ValType, PATypeHolder>::iterator iterator;
-
-  inline TypeClass *get(const ValType &V) {
-    iterator I = Map.find(V);
-    return I != Map.end() ? cast<TypeClass>((Type*)I->second.get()) : 0;
-  }
-
-  inline void add(const ValType &V, TypeClass *Ty) {
-    Map.insert(std::make_pair(V, Ty));
-
-    // If this type has a cycle, remember it.
-    TypesByHash.insert(std::make_pair(ValType::hashTypeStructure(Ty), Ty));
-    print("add");
-  }
-  
-  /// RefineAbstractType - This method is called after we have merged a type
-  /// with another one.  We must now either merge the type away with
-  /// some other type or reinstall it in the map with it's new configuration.
-  void RefineAbstractType(TypeClass *Ty, const DerivedType *OldType,
-                        const Type *NewType) {
-#ifdef DEBUG_MERGE_TYPES
-    DEBUG(dbgs() << "RefineAbstractType(" << (void*)OldType << "[" << *OldType
-                 << "], " << (void*)NewType << " [" << *NewType << "])\n");
-#endif
-    
-    // Otherwise, we are changing one subelement type into another.  Clearly the
-    // OldType must have been abstract, making us abstract.
-    assert(Ty->isAbstract() && "Refining a non-abstract type!");
-    assert(OldType != NewType);
-
-    // Make a temporary type holder for the type so that it doesn't disappear on
-    // us when we erase the entry from the map.
-    PATypeHolder TyHolder = Ty;
-
-    // The old record is now out-of-date, because one of the children has been
-    // updated.  Remove the obsolete entry from the map.
-    unsigned NumErased = Map.erase(ValType::get(Ty));
-    assert(NumErased && "Element not found!"); (void)NumErased;
-
-    // Remember the structural hash for the type before we start hacking on it,
-    // in case we need it later.
-    unsigned OldTypeHash = ValType::hashTypeStructure(Ty);
-
-    // Find the type element we are refining... and change it now!
-    for (unsigned i = 0, e = Ty->getNumContainedTypes(); i != e; ++i)
-      if (Ty->ContainedTys[i] == OldType)
-        Ty->ContainedTys[i] = NewType;
-    unsigned NewTypeHash = ValType::hashTypeStructure(Ty);
-    
-    // If there are no cycles going through this node, we can do a simple,
-    // efficient lookup in the map, instead of an inefficient nasty linear
-    // lookup.
-    if (!TypeHasCycleThroughItself(Ty)) {
-      typename std::map<ValType, PATypeHolder>::iterator I;
-      bool Inserted;
-
-      tie(I, Inserted) = Map.insert(std::make_pair(ValType::get(Ty), Ty));
-      if (!Inserted) {
-        // Refined to a different type altogether?
-        RemoveFromTypesByHash(OldTypeHash, Ty);
-
-        // We already have this type in the table.  Get rid of the newly refined
-        // type.
-        TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
-        Ty->refineAbstractTypeTo(NewTy);
-        return;
-      }
-    } else {
-      // Now we check to see if there is an existing entry in the table which is
-      // structurally identical to the newly refined type.  If so, this type
-      // gets refined to the pre-existing type.
-      //
-      std::multimap<unsigned, PATypeHolder>::iterator I, E, Entry;
-      tie(I, E) = TypesByHash.equal_range(NewTypeHash);
-      Entry = E;
-      for (; I != E; ++I) {
-        if (I->second == Ty) {
-          // Remember the position of the old type if we see it in our scan.
-          Entry = I;
-          continue;
-        }
-        
-        if (!TypesEqual(Ty, I->second))
-          continue;
-        
-        TypeClass *NewTy = cast<TypeClass>((Type*)I->second.get());
-
-        // Remove the old entry form TypesByHash.  If the hash values differ
-        // now, remove it from the old place.  Otherwise, continue scanning
-        // within this hashcode to reduce work.
-        if (NewTypeHash != OldTypeHash) {
-          RemoveFromTypesByHash(OldTypeHash, Ty);
-        } else {
-          if (Entry == E) {
-            // Find the location of Ty in the TypesByHash structure if we
-            // haven't seen it already.
-            while (I->second != Ty) {
-              ++I;
-              assert(I != E && "Structure doesn't contain type??");
-            }
-            Entry = I;
-          }
-          TypesByHash.erase(Entry);
-        }
-        Ty->refineAbstractTypeTo(NewTy);
-        return;
-      }
-
-      // If there is no existing type of the same structure, we reinsert an
-      // updated record into the map.
-      Map.insert(std::make_pair(ValType::get(Ty), Ty));
-    }
-
-    // If the hash codes differ, update TypesByHash
-    if (NewTypeHash != OldTypeHash) {
-      RemoveFromTypesByHash(OldTypeHash, Ty);
-      TypesByHash.insert(std::make_pair(NewTypeHash, Ty));
-    }
-    
-    // If the type is currently thought to be abstract, rescan all of our
-    // subtypes to see if the type has just become concrete!  Note that this
-    // may send out notifications to AbstractTypeUsers that types become
-    // concrete.
-    if (Ty->isAbstract())
-      Ty->PromoteAbstractToConcrete();
-  }
-
-  void print(const char *Arg) const {
-#ifdef DEBUG_MERGE_TYPES
-    DEBUG(dbgs() << "TypeMap<>::" << Arg << " table contents:\n");
-    unsigned i = 0;
-    for (typename std::map<ValType, PATypeHolder>::const_iterator I
-           = Map.begin(), E = Map.end(); I != E; ++I)
-      DEBUG(dbgs() << " " << (++i) << ". " << (void*)I->second.get() << " "
-                   << *I->second.get() << "\n");
-#endif
-  }
-
-  void dump() const { print("dump output"); }
-};
-}
-
-#endif
diff --git a/lib/VMCore/Use.cpp b/lib/VMCore/Use.cpp
index 2258b8d..359a151 100644
--- a/lib/VMCore/Use.cpp
+++ b/lib/VMCore/Use.cpp
@@ -135,11 +135,9 @@ void Use::zap(Use *Start, const Use *Stop, bool del) {
 
 User *Use::getUser() const {
   const Use *End = getImpliedUser();
-  const PointerIntPair<User*, 1, unsigned>&
-    ref(static_cast<const AugmentedUse*>(End - 1)->ref);
-  User *She = ref.getPointer();
-  return ref.getInt()
-    ? She
+  const UserRef *ref = reinterpret_cast<const UserRef*>(End);
+  return ref->getInt()
+    ? ref->getPointer()
     : (User*)End;
 }
 
diff --git a/lib/VMCore/User.cpp b/lib/VMCore/User.cpp
index 2f4587d..f01fa34 100644
--- a/lib/VMCore/User.cpp
+++ b/lib/VMCore/User.cpp
@@ -40,14 +40,12 @@ void User::replaceUsesOfWith(Value *From, Value *To) {
 //===----------------------------------------------------------------------===//
 
 Use *User::allocHungoffUses(unsigned N) const {
-  Use *Begin = static_cast<Use*>(::operator new(sizeof(Use) * N
-                                                + sizeof(AugmentedUse)
-                                                - sizeof(Use)));
+  // Allocate the array of Uses, followed by a pointer (with bottom bit set) to
+  // the User.
+  size_t size = N * sizeof(Use) + sizeof(Use::UserRef);
+  Use *Begin = static_cast<Use*>(::operator new(size));
   Use *End = Begin + N;
-  PointerIntPair<User*, 1, unsigned>&
-    ref(static_cast<AugmentedUse&>(End[-1]).ref);
-  ref.setPointer(const_cast<User*>(this));
-  ref.setInt(1);
+  (void) new(End) Use::UserRef(const_cast<User*>(this), 1);
   return Use::initTags(Begin, End);
 }
 
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 29f6a80..f1815e3 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -35,22 +35,21 @@ using namespace llvm;
 //                                Value Class
 //===----------------------------------------------------------------------===//
 
-static inline const Type *checkType(const Type *Ty) {
+static inline Type *checkType(const Type *Ty) {
   assert(Ty && "Value defined with a null type: Error!");
-  return Ty;
+  return const_cast<Type*>(Ty);
 }
 
 Value::Value(const Type *ty, unsigned scid)
   : SubclassID(scid), HasValueHandle(0),
-    SubclassOptionalData(0), SubclassData(0), VTy(checkType(ty)),
+    SubclassOptionalData(0), SubclassData(0), VTy((Type*)checkType(ty)),
     UseList(0), Name(0) {
+  // FIXME: Why isn't this in the subclass gunk??
   if (isa<CallInst>(this) || isa<InvokeInst>(this))
-    assert((VTy->isFirstClassType() || VTy->isVoidTy() ||
-            ty->isOpaqueTy() || VTy->isStructTy()) &&
-           "invalid CallInst  type!");
+    assert((VTy->isFirstClassType() || VTy->isVoidTy() || VTy->isStructTy()) &&
+           "invalid CallInst type!");
   else if (!isa<Constant>(this) && !isa<BasicBlock>(this))
-    assert((VTy->isFirstClassType() || VTy->isVoidTy() ||
-            ty->isOpaqueTy()) &&
+    assert((VTy->isFirstClassType() || VTy->isVoidTy()) &&
            "Cannot create non-first-class values except for constants!");
 }
 
@@ -281,17 +280,16 @@ void Value::takeName(Value *V) {
 }
 
 
-// uncheckedReplaceAllUsesWith - This is exactly the same as replaceAllUsesWith,
-// except that it doesn't have all of the asserts.  The asserts fail because we
-// are half-way done resolving types, which causes some types to exist as two
-// different Type*'s at the same time.  This is a sledgehammer to work around
-// this problem.
-//
-void Value::uncheckedReplaceAllUsesWith(Value *New) {
+void Value::replaceAllUsesWith(Value *New) {
+  assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
+  assert(New != this && "this->replaceAllUsesWith(this) is NOT valid!");
+  assert(New->getType() == getType() &&
+         "replaceAllUses of value with new value of different type!");
+
   // Notify all ValueHandles (if present) that this value is going away.
   if (HasValueHandle)
     ValueHandleBase::ValueIsRAUWd(this, New);
-
+  
   while (!use_empty()) {
     Use &U = *UseList;
     // Must handle Constants specially, we cannot call replaceUsesOfWith on a
@@ -302,18 +300,12 @@ void Value::uncheckedReplaceAllUsesWith(Value *New) {
         continue;
       }
     }
-
+    
     U.set(New);
   }
-}
-
-void Value::replaceAllUsesWith(Value *New) {
-  assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
-  assert(New != this && "this->replaceAllUsesWith(this) is NOT valid!");
-  assert(New->getType() == getType() &&
-         "replaceAllUses of value with new value of different type!");
-
-  uncheckedReplaceAllUsesWith(New);
+  
+  if (BasicBlock *BB = dyn_cast<BasicBlock>(this))
+    BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New));
 }
 
 Value *Value::stripPointerCasts() {
diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp
index 254bf06..f1c9703 100644
--- a/lib/VMCore/ValueSymbolTable.cpp
+++ b/lib/VMCore/ValueSymbolTable.cpp
@@ -25,7 +25,7 @@ ValueSymbolTable::~ValueSymbolTable() {
 #ifndef NDEBUG   // Only do this in -g mode...
   for (iterator VI = vmap.begin(), VE = vmap.end(); VI != VE; ++VI)
     dbgs() << "Value still in symbol table! Type = '"
-           << VI->getValue()->getType()->getDescription() << "' Name = '"
+           << *VI->getValue()->getType() << "' Name = '"
            << VI->getKeyData() << "'\n";
   assert(vmap.empty() && "Values remain in symbol table!");
 #endif
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index c054ae4..21a1f03 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -133,6 +133,7 @@ std::string EVT::getEVTString() const {
   case MVT::v2f64:   return "v2f64";
   case MVT::v4f64:   return "v4f64";
   case MVT::Metadata:return "Metadata";
+  case MVT::untyped: return "untyped";
   }
 }
 
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 139e035..b146b89 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -49,7 +49,6 @@
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/PassManager.h"
-#include "llvm/TypeSymbolTable.h"
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/ValueTypes.h"
@@ -109,54 +108,6 @@ INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification",
 static char &PreVerifyID = PreVerifier::ID;
 
 namespace {
-  class TypeSet : public AbstractTypeUser {
-  public:
-    TypeSet() {}
-
-    /// Insert a type into the set of types.
-    bool insert(const Type *Ty) {
-      if (!Types.insert(Ty))
-        return false;
-      if (Ty->isAbstract())
-        Ty->addAbstractTypeUser(this);
-      return true;
-    }
-
-    // Remove ourselves as abstract type listeners for any types that remain
-    // abstract when the TypeSet is destroyed.
-    ~TypeSet() {
-      for (SmallSetVector<const Type *, 16>::iterator I = Types.begin(),
-             E = Types.end(); I != E; ++I) {
-        const Type *Ty = *I;
-        if (Ty->isAbstract())
-          Ty->removeAbstractTypeUser(this);
-      }
-    }
-
-    // Abstract type user interface.
-
-    /// Remove types from the set when refined. Do not insert the type it was
-    /// refined to because that type hasn't been verified yet.
-    void refineAbstractType(const DerivedType *OldTy, const Type *NewTy) {
-      Types.remove(OldTy);
-      OldTy->removeAbstractTypeUser(this);
-    }
-
-    /// Stop listening for changes to a type which is no longer abstract.
-    void typeBecameConcrete(const DerivedType *AbsTy) {
-      AbsTy->removeAbstractTypeUser(this);
-    }
-
-    void dump() const {}
-
-  private:
-    SmallSetVector<const Type *, 16> Types;
-
-    // Disallow copying.
-    TypeSet(const TypeSet &);
-    TypeSet &operator=(const TypeSet &);
-  };
-
   struct Verifier : public FunctionPass, public InstVisitor<Verifier> {
     static char ID; // Pass ID, replacement for typeid
     bool Broken;          // Is this module found to be broken?
@@ -176,9 +127,6 @@ namespace {
     /// an instruction in the same block.
     SmallPtrSet<Instruction*, 16> InstsInThisBlock;
 
-    /// Types - keep track of the types that have been checked already.
-    TypeSet Types;
-
     /// MDNodes - keep track of the metadata nodes that have been checked
     /// already.
     SmallPtrSet<MDNode *, 32> MDNodes;
@@ -199,7 +147,6 @@ namespace {
     bool doInitialization(Module &M) {
       Mod = &M;
       Context = &M.getContext();
-      verifyTypeSymbolTable(M.getTypeSymbolTable());
 
       // If this is a real pass, in a pass manager, we must abort before
       // returning back to the pass manager, or else the pass manager may try to
@@ -285,7 +232,6 @@ namespace {
 
 
     // Verification methods...
-    void verifyTypeSymbolTable(TypeSymbolTable &ST);
     void visitGlobalValue(GlobalValue &GV);
     void visitGlobalVariable(GlobalVariable &GV);
     void visitGlobalAlias(GlobalAlias &GA);
@@ -345,7 +291,6 @@ namespace {
                               bool isReturnValue, const Value *V);
     void VerifyFunctionAttrs(const FunctionType *FT, const AttrListPtr &Attrs,
                              const Value *V);
-    void VerifyType(const Type *Ty);
 
     void WriteValue(const Value *V) {
       if (!V) return;
@@ -359,8 +304,7 @@ namespace {
 
     void WriteType(const Type *T) {
       if (!T) return;
-      MessagesStr << ' ';
-      WriteTypeSymbolic(MessagesStr, T, Mod);
+      MessagesStr << ' ' << *T;
     }
 
 
@@ -568,11 +512,6 @@ void Verifier::visitMDNode(MDNode &MD, Function *F) {
   }
 }
 
-void Verifier::verifyTypeSymbolTable(TypeSymbolTable &ST) {
-  for (TypeSymbolTable::iterator I = ST.begin(), E = ST.end(); I != E; ++I)
-    VerifyType(I->second);
-}
-
 // VerifyParameterAttrs - Check the given attributes for an argument or return
 // value of the specified type.  The value V is printed in error messages.
 void Verifier::VerifyParameterAttrs(Attributes Attrs, const Type *Ty,
@@ -1139,9 +1078,6 @@ void Verifier::visitPHINode(PHINode &PN) {
   for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
     Assert1(PN.getType() == PN.getIncomingValue(i)->getType(),
             "PHI node operands are not the same type as the result!", &PN);
-    Assert1(isa<BasicBlock>(PN.getOperand(
-                PHINode::getOperandNumForIncomingBlock(i))),
-            "PHI node incoming block is not a BasicBlock!", &PN);
   }
 
   // All other PHI node constraints are checked in the visitBasicBlock method.
@@ -1195,11 +1131,11 @@ void Verifier::VerifyCallSite(CallSite CS) {
     }
 
   // Verify that there's no metadata unless it's a direct call to an intrinsic.
-  if (!CS.getCalledFunction() ||
+  if (CS.getCalledFunction() == 0 ||
       !CS.getCalledFunction()->getName().startswith("llvm.")) {
     for (FunctionType::param_iterator PI = FTy->param_begin(),
            PE = FTy->param_end(); PI != PE; ++PI)
-      Assert1(!PI->get()->isMetadataTy(),
+      Assert1(!(*PI)->isMetadataTy(),
               "Function has metadata parameter but isn't an intrinsic", I);
   }
 
@@ -1382,7 +1318,7 @@ void Verifier::visitAllocaInst(AllocaInst &AI) {
 
 void Verifier::visitExtractValueInst(ExtractValueInst &EVI) {
   Assert1(ExtractValueInst::getIndexedType(EVI.getAggregateOperand()->getType(),
-                                           EVI.idx_begin(), EVI.idx_end()) ==
+                                           EVI.getIndices()) ==
           EVI.getType(),
           "Invalid ExtractValueInst operands!", &EVI);
   
@@ -1391,7 +1327,7 @@ void Verifier::visitExtractValueInst(ExtractValueInst &EVI) {
 
 void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
   Assert1(ExtractValueInst::getIndexedType(IVI.getAggregateOperand()->getType(),
-                                           IVI.idx_begin(), IVI.idx_end()) ==
+                                           IVI.getIndices()) ==
           IVI.getOperand(1)->getType(),
           "Invalid InsertValueInst operands!", &IVI);
   
@@ -1482,8 +1418,10 @@ void Verifier::visitInstruction(Instruction &I) {
         // PHI nodes differ from other nodes because they actually "use" the
         // value in the predecessor basic blocks they correspond to.
         BasicBlock *UseBlock = BB;
-        if (isa<PHINode>(I))
-          UseBlock = dyn_cast<BasicBlock>(I.getOperand(i+1));
+        if (PHINode *PN = dyn_cast<PHINode>(&I)) {
+          unsigned j = PHINode::getIncomingValueNumForOperand(i);
+          UseBlock = PN->getIncomingBlock(j);
+        }
         Assert2(UseBlock, "Invoke operand is PHI node with bad incoming-BB",
                 Op, &I);
 
@@ -1515,10 +1453,11 @@ void Verifier::visitInstruction(Instruction &I) {
                 return;
               }
         }
-      } else if (isa<PHINode>(I)) {
+      } else if (PHINode *PN = dyn_cast<PHINode>(&I)) {
         // PHI nodes are more difficult than other nodes because they actually
         // "use" the value in the predecessor basic blocks they correspond to.
-        BasicBlock *PredBB = dyn_cast<BasicBlock>(I.getOperand(i+1));
+        unsigned j = PHINode::getIncomingValueNumForOperand(i);
+        BasicBlock *PredBB = PN->getIncomingBlock(j);
         Assert2(PredBB && (DT->dominates(OpBlock, PredBB) ||
                            !DT->isReachableFromEntry(PredBB)),
                 "Instruction does not dominate all uses!", Op, &I);
@@ -1542,69 +1481,6 @@ void Verifier::visitInstruction(Instruction &I) {
     }
   }
   InstsInThisBlock.insert(&I);
-
-  VerifyType(I.getType());
-}
-
-/// VerifyType - Verify that a type is well formed.
-///
-void Verifier::VerifyType(const Type *Ty) {
-  if (!Types.insert(Ty)) return;
-
-  Assert1(Context == &Ty->getContext(),
-          "Type context does not match Module context!", Ty);
-
-  switch (Ty->getTypeID()) {
-  case Type::FunctionTyID: {
-    const FunctionType *FTy = cast<FunctionType>(Ty);
-
-    const Type *RetTy = FTy->getReturnType();
-    Assert2(FunctionType::isValidReturnType(RetTy),
-            "Function type with invalid return type", RetTy, FTy);
-    VerifyType(RetTy);
-
-    for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) {
-      const Type *ElTy = FTy->getParamType(i);
-      Assert2(FunctionType::isValidArgumentType(ElTy),
-              "Function type with invalid parameter type", ElTy, FTy);
-      VerifyType(ElTy);
-    }
-    break;
-  }
-  case Type::StructTyID: {
-    const StructType *STy = cast<StructType>(Ty);
-    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
-      const Type *ElTy = STy->getElementType(i);
-      Assert2(StructType::isValidElementType(ElTy),
-              "Structure type with invalid element type", ElTy, STy);
-      VerifyType(ElTy);
-    }
-    break;
-  }
-  case Type::ArrayTyID: {
-    const ArrayType *ATy = cast<ArrayType>(Ty);
-    Assert1(ArrayType::isValidElementType(ATy->getElementType()),
-            "Array type with invalid element type", ATy);
-    VerifyType(ATy->getElementType());
-    break;
-  }
-  case Type::PointerTyID: {
-    const PointerType *PTy = cast<PointerType>(Ty);
-    Assert1(PointerType::isValidElementType(PTy->getElementType()),
-            "Pointer type with invalid element type", PTy);
-    VerifyType(PTy->getElementType());
-    break;
-  }
-  case Type::VectorTyID: {
-    const VectorType *VTy = cast<VectorType>(Ty);
-    Assert1(VectorType::isValidElementType(VTy->getElementType()),
-            "Vector type with invalid element type", VTy);
-    VerifyType(VTy->getElementType());
-    break;
-  }
-  default:
-    break;
-  }
 }
 
 // Flags used by TableGen to mark intrinsic parameters with the
diff --git a/runtime/libprofile/GCDAProfiling.c b/runtime/libprofile/GCDAProfiling.c
index 09a1aec..e066b22 100644
--- a/runtime/libprofile/GCDAProfiling.c
+++ b/runtime/libprofile/GCDAProfiling.c
@@ -89,7 +89,7 @@ static void recursive_mkdir(const char *filename) {
       pathname = malloc(i + 1);
       strncpy(pathname, filename, i);
       pathname[i] = '\0';
-#ifdef _MSC_VER
+#ifdef _WIN32
       _mkdir(pathname);
 #else
       mkdir(pathname, 0750);  /* some of these will fail, ignore it. */
diff --git a/runtime/libprofile/Makefile b/runtime/libprofile/Makefile
index eced5e5..cf31e46 100644
--- a/runtime/libprofile/Makefile
+++ b/runtime/libprofile/Makefile
@@ -20,3 +20,28 @@ EXTRA_DIST = libprofile.exports
 EXPORTED_SYMBOL_FILE = $(PROJ_SRC_DIR)/libprofile.exports
 
 include $(LEVEL)/Makefile.common
+
+ifeq ($(HOST_OS),Darwin)
+    # Special hack to allow libprofile_rt to have an offset version number.
+    PROFILE_RT_LIBRARY_VERSION := $(LLVM_SUBMIT_VERSION)
+
+    # Set dylib internal version number to llvmCore submission number.
+    ifdef LLVM_SUBMIT_VERSION
+        LLVMLibsOptions := $(LLVMLibsOptions) -Wl,-current_version \
+                        -Wl,$(PROFILE_RT_LIBRARY_VERSION).$(LLVM_SUBMIT_SUBVERSION) \
+                        -Wl,-compatibility_version -Wl,1
+    endif
+    # Extra options to override libtool defaults.
+    LLVMLibsOptions    := $(LLVMLibsOptions)  \
+                         -Wl,-dead_strip \
+                         -Wl,-seg1addr -Wl,0xE0000000 
+
+    # Mac OS X 10.4 and earlier tools do not allow a second -install_name on
+    # command line.
+    DARWIN_VERS := $(shell echo $(TARGET_TRIPLE) | sed 's/.*darwin\([0-9]*\).*/\1/')
+    ifneq ($(DARWIN_VERS),8)
+       LLVMLibsOptions    := $(LLVMLibsOptions)  \
+                            -Wl,-install_name \
+                            -Wl,"@executable_path/../lib/lib$(LIBRARYNAME)$(SHLIBEXT)"
+    endif
+endif
diff --git a/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll b/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll
deleted file mode 100644
index 50fb222..0000000
--- a/test/Analysis/BasicAA/2004-12-08-BasicAACrash.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: opt < %s -basicaa -licm
-
-%"java/lang/Object" = type { %struct.llvm_java_object_base }
-%"java/lang/StringBuffer" = type { "java/lang/Object", i32, { "java/lang/Object", i32, [0 x i8] }*, i1 }
-%struct.llvm_java_object_base = type opaque
-
-define void @"java/lang/StringBuffer/setLength(I)V"(%struct.llvm_java_object_base*) {
-bc0:
-	br i1 false, label %bc40, label %bc65
-
-bc65:		; preds = %bc0, %bc40
-	ret void
-
-bc40:		; preds = %bc0, %bc40
-	%tmp75 = bitcast %struct.llvm_java_object_base* %0 to %"java/lang/StringBuffer"*		; <"java/lang/StringBuffer"*> [#uses=1]
-	%tmp76 = getelementptr %"java/lang/StringBuffer"* %tmp75, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 0, i32* %tmp76
-	%tmp381 = bitcast %struct.llvm_java_object_base* %0 to %"java/lang/StringBuffer"*		; <"java/lang/StringBuffer"*> [#uses=1]
-	%tmp392 = getelementptr %"java/lang/StringBuffer"* %tmp381, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp403 = load i32* %tmp392		; <i32> [#uses=0]
-	br i1 false, label %bc40, label %bc65
-}
diff --git a/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll b/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll
deleted file mode 100644
index cc84314..0000000
--- a/test/Analysis/BasicAA/2004-12-08-BasicAACrash2.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt < %s -basicaa -dse
-
-%"java/lang/Object" = type { %struct.llvm_java_object_base }
-%"java/lang/StringBuffer" = type { "java/lang/Object", i32, { "java/lang/Object", i32, [0 x i8] }*, i1 }
-%struct.llvm_java_object_base = type opaque
-
-define void @"java/lang/StringBuffer/ensureCapacity_unsynchronized(I)V"() {
-bc0:
-	%tmp = getelementptr %"java/lang/StringBuffer"* null, i32 0, i32 3		; <i1*> [#uses=1]
-	br i1 false, label %bc16, label %bc7
-
-bc16:		; preds = %bc0
-	%tmp91 = getelementptr %"java/lang/StringBuffer"* null, i32 0, i32 2		; <{ "java/lang/Object", i32, [0 x i8] }**> [#uses=1]
-	store { %"java/lang/Object", i32, [0 x i8] }* null, { %"java/lang/Object", i32, [0 x i8] }** %tmp91
-	store i1 false, i1* %tmp
-	ret void
-
-bc7:		; preds = %bc0
-	ret void
-}
diff --git a/test/Analysis/BasicAA/2005-03-09-BrokenBasicAA.ll b/test/Analysis/BasicAA/2005-03-09-BrokenBasicAA.ll
deleted file mode 100644
index 4564263..0000000
--- a/test/Analysis/BasicAA/2005-03-09-BrokenBasicAA.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: opt < %s -basicaa -gvn -instcombine |\
-; RUN: llvm-dis | grep {load i32\\* %A}
-
-declare double* @useit(i32*)
-
-define i32 @foo(i32 %Amt) {
-	%A = malloc i32, i32 %Amt
-	%P = call double*  @useit(i32* %A)
-
-	%X = load i32* %A
-	store double 0.0, double* %P
-	%Y = load i32* %A
-	%Z = sub i32 %X, %Y
-	ret i32 %Z
-}
diff --git a/test/Analysis/BasicAA/args-rets-allocas-loads.ll b/test/Analysis/BasicAA/args-rets-allocas-loads.ll
index c3c4afc..c7b43ec 100644
--- a/test/Analysis/BasicAA/args-rets-allocas-loads.ll
+++ b/test/Analysis/BasicAA/args-rets-allocas-loads.ll
@@ -12,9 +12,6 @@ define void @caller_a(double* %arg_a0,
                       double* noalias %noalias_arg_a1,
                       double** %indirect_a0,
                       double** %indirect_a1) {
-  %loaded_a0 = load double** %indirect_a0
-  %loaded_a1 = load double** %indirect_a1
-
   %escape_alloca_a0 = alloca double
   %escape_alloca_a1 = alloca double
   %noescape_alloca_a0 = alloca double
@@ -25,6 +22,9 @@ define void @caller_a(double* %arg_a0,
   %noalias_ret_a0 = call double* @noalias_returner()
   %noalias_ret_a1 = call double* @noalias_returner()
 
+  %loaded_a0 = load double** %indirect_a0
+  %loaded_a1 = load double** %indirect_a1
+
   call void @callee(double* %escape_alloca_a0)
   call void @callee(double* %escape_alloca_a1)
   call void @nocap_callee(double* %noescape_alloca_a0)
@@ -48,263 +48,264 @@ define void @caller_a(double* %arg_a0,
 }
 
 ; CHECK: Function: caller_a: 16 pointers, 8 call sites
-; CHECK:   MayAlias:	double* %arg_a0, double* %arg_a1
-; CHECK:   NoAlias:	double* %arg_a0, double* %noalias_arg_a0
-; CHECK:   NoAlias:	double* %arg_a1, double* %noalias_arg_a0
-; CHECK:   NoAlias:	double* %arg_a0, double* %noalias_arg_a1
-; CHECK:   NoAlias:	double* %arg_a1, double* %noalias_arg_a1
-; CHECK:   NoAlias:	double* %noalias_arg_a0, double* %noalias_arg_a1
-; CHECK:   MayAlias:	double* %arg_a0, double** %indirect_a0
-; CHECK:   MayAlias:	double* %arg_a1, double** %indirect_a0
-; CHECK:   NoAlias:	double* %noalias_arg_a0, double** %indirect_a0
-; CHECK:   NoAlias:	double* %noalias_arg_a1, double** %indirect_a0
-; CHECK:   MayAlias:	double* %arg_a0, double** %indirect_a1
-; CHECK:   MayAlias:	double* %arg_a1, double** %indirect_a1
-; CHECK:   NoAlias:	double* %noalias_arg_a0, double** %indirect_a1
-; CHECK:   NoAlias:	double* %noalias_arg_a1, double** %indirect_a1
-; CHECK:   MayAlias:	double** %indirect_a0, double** %indirect_a1
-; CHECK:   MayAlias:	double* %arg_a0, double* %loaded_a0
-; CHECK:   MayAlias:	double* %arg_a1, double* %loaded_a0
-; CHECK:   NoAlias:	double* %loaded_a0, double* %noalias_arg_a0
-; CHECK:   NoAlias:	double* %loaded_a0, double* %noalias_arg_a1
-; CHECK:   MayAlias:	double* %loaded_a0, double** %indirect_a0
-; CHECK:   MayAlias:	double* %loaded_a0, double** %indirect_a1
-; CHECK:   MayAlias:	double* %arg_a0, double* %loaded_a1
-; CHECK:   MayAlias:	double* %arg_a1, double* %loaded_a1
-; CHECK:   NoAlias:	double* %loaded_a1, double* %noalias_arg_a0
-; CHECK:   NoAlias:	double* %loaded_a1, double* %noalias_arg_a1
-; CHECK:   MayAlias:	double* %loaded_a1, double** %indirect_a0
-; CHECK:   MayAlias:	double* %loaded_a1, double** %indirect_a1
-; CHECK:   MayAlias:	double* %loaded_a0, double* %loaded_a1
-; CHECK:   NoAlias:	double* %arg_a0, double* %escape_alloca_a0
-; CHECK:   NoAlias:	double* %arg_a1, double* %escape_alloca_a0
-; CHECK:   NoAlias:	double* %escape_alloca_a0, double* %noalias_arg_a0
-; CHECK:   NoAlias:	double* %escape_alloca_a0, double* %noalias_arg_a1
-; CHECK:   NoAlias:	double* %escape_alloca_a0, double** %indirect_a0
-; CHECK:   NoAlias:	double* %escape_alloca_a0, double** %indirect_a1
-; CHECK:   MayAlias:	double* %escape_alloca_a0, double* %loaded_a0
-; CHECK:   MayAlias:	double* %escape_alloca_a0, double* %loaded_a1
-; CHECK:   NoAlias:	double* %arg_a0, double* %escape_alloca_a1
-; CHECK:   NoAlias:	double* %arg_a1, double* %escape_alloca_a1
-; CHECK:   NoAlias:	double* %escape_alloca_a1, double* %noalias_arg_a0
-; CHECK:   NoAlias:	double* %escape_alloca_a1, double* %noalias_arg_a1
-; CHECK:   NoAlias:	double* %escape_alloca_a1, double** %indirect_a0
-; CHECK:   NoAlias:	double* %escape_alloca_a1, double** %indirect_a1
-; CHECK:   MayAlias:	double* %escape_alloca_a1, double* %loaded_a0
-; CHECK:   MayAlias:	double* %escape_alloca_a1, double* %loaded_a1
-; CHECK:   NoAlias:	double* %escape_alloca_a0, double* %escape_alloca_a1
-; CHECK:   NoAlias:	double* %arg_a0, double* %noescape_alloca_a0
-; CHECK:   NoAlias:	double* %arg_a1, double* %noescape_alloca_a0
-; CHECK:   NoAlias:	double* %noalias_arg_a0, double* %noescape_alloca_a0
-; CHECK:   NoAlias:	double* %noalias_arg_a1, double* %noescape_alloca_a0
-; CHECK:   NoAlias:	double* %noescape_alloca_a0, double** %indirect_a0
-; CHECK:   NoAlias:	double* %noescape_alloca_a0, double** %indirect_a1
-; CHECK:   NoAlias:	double* %loaded_a0, double* %noescape_alloca_a0
-; CHECK:   NoAlias:	double* %loaded_a1, double* %noescape_alloca_a0
-; CHECK:   NoAlias:	double* %escape_alloca_a0, double* %noescape_alloca_a0
-; CHECK:   NoAlias:	double* %escape_alloca_a1, double* %noescape_alloca_a0
-; CHECK:   NoAlias:	double* %arg_a0, double* %noescape_alloca_a1
-; CHECK:   NoAlias:	double* %arg_a1, double* %noescape_alloca_a1
-; CHECK:   NoAlias:	double* %noalias_arg_a0, double* %noescape_alloca_a1
-; CHECK:   NoAlias:	double* %noalias_arg_a1, double* %noescape_alloca_a1
-; CHECK:   NoAlias:	double* %noescape_alloca_a1, double** %indirect_a0
-; CHECK:   NoAlias:	double* %noescape_alloca_a1, double** %indirect_a1
-; CHECK:   NoAlias:	double* %loaded_a0, double* %noescape_alloca_a1
-; CHECK:   NoAlias:	double* %loaded_a1, double* %noescape_alloca_a1
-; CHECK:   NoAlias:	double* %escape_alloca_a0, double* %noescape_alloca_a1
-; CHECK:   NoAlias:	double* %escape_alloca_a1, double* %noescape_alloca_a1
-; CHECK:   NoAlias:	double* %noescape_alloca_a0, double* %noescape_alloca_a1
-; CHECK:   MayAlias:	double* %arg_a0, double* %normal_ret_a0
-; CHECK:   MayAlias:	double* %arg_a1, double* %normal_ret_a0
-; CHECK:   NoAlias:	double* %noalias_arg_a0, double* %normal_ret_a0
-; CHECK:   NoAlias:	double* %noalias_arg_a1, double* %normal_ret_a0
-; CHECK:   MayAlias:	double* %normal_ret_a0, double** %indirect_a0
-; CHECK:   MayAlias:	double* %normal_ret_a0, double** %indirect_a1
-; CHECK:   MayAlias:	double* %loaded_a0, double* %normal_ret_a0
-; CHECK:   MayAlias:	double* %loaded_a1, double* %normal_ret_a0
-; CHECK:   MayAlias:	double* %escape_alloca_a0, double* %normal_ret_a0
-; CHECK:   MayAlias:	double* %escape_alloca_a1, double* %normal_ret_a0
-; CHECK:   NoAlias:	double* %noescape_alloca_a0, double* %normal_ret_a0
-; CHECK:   NoAlias:	double* %noescape_alloca_a1, double* %normal_ret_a0
-; CHECK:   MayAlias:	double* %arg_a0, double* %normal_ret_a1
-; CHECK:   MayAlias:	double* %arg_a1, double* %normal_ret_a1
-; CHECK:   NoAlias:	double* %noalias_arg_a0, double* %normal_ret_a1
-; CHECK:   NoAlias:	double* %noalias_arg_a1, double* %normal_ret_a1
-; CHECK:   MayAlias:	double* %normal_ret_a1, double** %indirect_a0
-; CHECK:   MayAlias:	double* %normal_ret_a1, double** %indirect_a1
-; CHECK:   MayAlias:	double* %loaded_a0, double* %normal_ret_a1
-; CHECK:   MayAlias:	double* %loaded_a1, double* %normal_ret_a1
-; CHECK:   MayAlias:	double* %escape_alloca_a0, double* %normal_ret_a1
-; CHECK:   MayAlias:	double* %escape_alloca_a1, double* %normal_ret_a1
-; CHECK:   NoAlias:	double* %noescape_alloca_a0, double* %normal_ret_a1
-; CHECK:   NoAlias:	double* %noescape_alloca_a1, double* %normal_ret_a1
-; CHECK:   MayAlias:	double* %normal_ret_a0, double* %normal_ret_a1
-; CHECK:   NoAlias:	double* %arg_a0, double* %noalias_ret_a0
-; CHECK:   NoAlias:	double* %arg_a1, double* %noalias_ret_a0
-; CHECK:   NoAlias:	double* %noalias_arg_a0, double* %noalias_ret_a0
-; CHECK:   NoAlias:	double* %noalias_arg_a1, double* %noalias_ret_a0
-; CHECK:   NoAlias:	double* %noalias_ret_a0, double** %indirect_a0
-; CHECK:   NoAlias:	double* %noalias_ret_a0, double** %indirect_a1
-; CHECK:   NoAlias:	double* %loaded_a0, double* %noalias_ret_a0
-; CHECK:   NoAlias:	double* %loaded_a1, double* %noalias_ret_a0
-; CHECK:   NoAlias:	double* %escape_alloca_a0, double* %noalias_ret_a0
-; CHECK:   NoAlias:	double* %escape_alloca_a1, double* %noalias_ret_a0
-; CHECK:   NoAlias:	double* %noalias_ret_a0, double* %noescape_alloca_a0
-; CHECK:   NoAlias:	double* %noalias_ret_a0, double* %noescape_alloca_a1
-; CHECK:   NoAlias:	double* %noalias_ret_a0, double* %normal_ret_a0
-; CHECK:   NoAlias:	double* %noalias_ret_a0, double* %normal_ret_a1
-; CHECK:   NoAlias:	double* %arg_a0, double* %noalias_ret_a1
-; CHECK:   NoAlias:	double* %arg_a1, double* %noalias_ret_a1
-; CHECK:   NoAlias:	double* %noalias_arg_a0, double* %noalias_ret_a1
-; CHECK:   NoAlias:	double* %noalias_arg_a1, double* %noalias_ret_a1
-; CHECK:   NoAlias:	double* %noalias_ret_a1, double** %indirect_a0
-; CHECK:   NoAlias:	double* %noalias_ret_a1, double** %indirect_a1
-; CHECK:   NoAlias:	double* %loaded_a0, double* %noalias_ret_a1
-; CHECK:   NoAlias:	double* %loaded_a1, double* %noalias_ret_a1
-; CHECK:   NoAlias:	double* %escape_alloca_a0, double* %noalias_ret_a1
-; CHECK:   NoAlias:	double* %escape_alloca_a1, double* %noalias_ret_a1
-; CHECK:   NoAlias:	double* %noalias_ret_a1, double* %noescape_alloca_a0
-; CHECK:   NoAlias:	double* %noalias_ret_a1, double* %noescape_alloca_a1
-; CHECK:   NoAlias:	double* %noalias_ret_a1, double* %normal_ret_a0
-; CHECK:   NoAlias:	double* %noalias_ret_a1, double* %normal_ret_a1
-; CHECK:   NoAlias:	double* %noalias_ret_a0, double* %noalias_ret_a1
-; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  %normal_ret_a0 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  %normal_ret_a1 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  %normal_ret_a1 = call double* @normal_returner()
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  %normal_ret_a1 = call double* @normal_returner()
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  %normal_ret_a1 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  %normal_ret_a1 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  %normal_ret_a1 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  %normal_ret_a1 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  %normal_ret_a1 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  %normal_ret_a1 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  %normal_ret_a1 = call double* @normal_returner()
-; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  %normal_ret_a1 = call double* @normal_returner()
-; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  %normal_ret_a1 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  %normal_ret_a1 = call double* @normal_returner()
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  %normal_ret_a1 = call double* @normal_returner() 
-; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  %normal_ret_a1 = call double* @normal_returner() 
-; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  %normal_ret_a1 = call double* @normal_returner() 
-; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %noalias_ret_a0	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  %noalias_ret_a0 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %noalias_ret_a1	<->  %noalias_ret_a1 = call double* @noalias_returner() 
-; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  call void @callee(double* %escape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  call void @callee(double* %escape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %noescape_alloca_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
-; CHECK: Both ModRef:  Ptr: double* %arg_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %arg_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double** %indirect_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double** %indirect_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %loaded_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %loaded_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %noescape_alloca_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK: Both ModRef:  Ptr: double* %normal_ret_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
-; CHECK:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT:   MayAlias:	double* %arg_a0, double* %arg_a1
+; CHECK-NEXT:   NoAlias:	double* %arg_a0, double* %noalias_arg_a0
+; CHECK-NEXT:   NoAlias:	double* %arg_a1, double* %noalias_arg_a0
+; CHECK-NEXT:   NoAlias:	double* %arg_a0, double* %noalias_arg_a1
+; CHECK-NEXT:   NoAlias:	double* %arg_a1, double* %noalias_arg_a1
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a0, double* %noalias_arg_a1
+; CHECK-NEXT:   MayAlias:	double* %arg_a0, double** %indirect_a0
+; CHECK-NEXT:   MayAlias:	double* %arg_a1, double** %indirect_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a0, double** %indirect_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a1, double** %indirect_a0
+; CHECK-NEXT:   MayAlias:	double* %arg_a0, double** %indirect_a1
+; CHECK-NEXT:   MayAlias:	double* %arg_a1, double** %indirect_a1
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a0, double** %indirect_a1
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a1, double** %indirect_a1
+; CHECK-NEXT:   MayAlias:	double** %indirect_a0, double** %indirect_a1
+; CHECK-NEXT:   NoAlias:	double* %arg_a0, double* %escape_alloca_a0
+; CHECK-NEXT:   NoAlias:	double* %arg_a1, double* %escape_alloca_a0
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a0, double* %noalias_arg_a0
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a0, double* %noalias_arg_a1
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a0, double** %indirect_a0
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a0, double** %indirect_a1
+; CHECK-NEXT:   NoAlias:	double* %arg_a0, double* %escape_alloca_a1
+; CHECK-NEXT:   NoAlias:	double* %arg_a1, double* %escape_alloca_a1
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a1, double* %noalias_arg_a0
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a1, double* %noalias_arg_a1
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a1, double** %indirect_a0
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a1, double** %indirect_a1
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a0, double* %escape_alloca_a1
+; CHECK-NEXT:   NoAlias:	double* %arg_a0, double* %noescape_alloca_a0
+; CHECK-NEXT:   NoAlias:	double* %arg_a1, double* %noescape_alloca_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a0, double* %noescape_alloca_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a1, double* %noescape_alloca_a0
+; CHECK-NEXT:   NoAlias:	double* %noescape_alloca_a0, double** %indirect_a0
+; CHECK-NEXT:   NoAlias:	double* %noescape_alloca_a0, double** %indirect_a1
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a0, double* %noescape_alloca_a0
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a1, double* %noescape_alloca_a0
+; CHECK-NEXT:   NoAlias:	double* %arg_a0, double* %noescape_alloca_a1
+; CHECK-NEXT:   NoAlias:	double* %arg_a1, double* %noescape_alloca_a1
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a0, double* %noescape_alloca_a1
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a1, double* %noescape_alloca_a1
+; CHECK-NEXT:   NoAlias:	double* %noescape_alloca_a1, double** %indirect_a0
+; CHECK-NEXT:   NoAlias:	double* %noescape_alloca_a1, double** %indirect_a1
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a0, double* %noescape_alloca_a1
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a1, double* %noescape_alloca_a1
+; CHECK-NEXT:   NoAlias:	double* %noescape_alloca_a0, double* %noescape_alloca_a1
+; CHECK-NEXT:   MayAlias:	double* %arg_a0, double* %normal_ret_a0
+; CHECK-NEXT:   MayAlias:	double* %arg_a1, double* %normal_ret_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a0, double* %normal_ret_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a1, double* %normal_ret_a0
+; CHECK-NEXT:   MayAlias:	double* %normal_ret_a0, double** %indirect_a0
+; CHECK-NEXT:   MayAlias:	double* %normal_ret_a0, double** %indirect_a1
+; CHECK-NEXT:   MayAlias:	double* %escape_alloca_a0, double* %normal_ret_a0
+; CHECK-NEXT:   MayAlias:	double* %escape_alloca_a1, double* %normal_ret_a0
+; CHECK-NEXT:   NoAlias:	double* %noescape_alloca_a0, double* %normal_ret_a0
+; CHECK-NEXT:   NoAlias:	double* %noescape_alloca_a1, double* %normal_ret_a0
+; CHECK-NEXT:   MayAlias:	double* %arg_a0, double* %normal_ret_a1
+; CHECK-NEXT:   MayAlias:	double* %arg_a1, double* %normal_ret_a1
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a0, double* %normal_ret_a1
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a1, double* %normal_ret_a1
+; CHECK-NEXT:   MayAlias:	double* %normal_ret_a1, double** %indirect_a0
+; CHECK-NEXT:   MayAlias:	double* %normal_ret_a1, double** %indirect_a1
+; CHECK-NEXT:   MayAlias:	double* %escape_alloca_a0, double* %normal_ret_a1
+; CHECK-NEXT:   MayAlias:	double* %escape_alloca_a1, double* %normal_ret_a1
+; CHECK-NEXT:   NoAlias:	double* %noescape_alloca_a0, double* %normal_ret_a1
+; CHECK-NEXT:   NoAlias:	double* %noescape_alloca_a1, double* %normal_ret_a1
+; CHECK-NEXT:   MayAlias:	double* %normal_ret_a0, double* %normal_ret_a1
+; CHECK-NEXT:   NoAlias:	double* %arg_a0, double* %noalias_ret_a0
+; CHECK-NEXT:   NoAlias:	double* %arg_a1, double* %noalias_ret_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a0, double* %noalias_ret_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a1, double* %noalias_ret_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_ret_a0, double** %indirect_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_ret_a0, double** %indirect_a1
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a0, double* %noalias_ret_a0
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a1, double* %noalias_ret_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_ret_a0, double* %noescape_alloca_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_ret_a0, double* %noescape_alloca_a1
+; CHECK-NEXT:   NoAlias:	double* %noalias_ret_a0, double* %normal_ret_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_ret_a0, double* %normal_ret_a1
+; CHECK-NEXT:   NoAlias:	double* %arg_a0, double* %noalias_ret_a1
+; CHECK-NEXT:   NoAlias:	double* %arg_a1, double* %noalias_ret_a1
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a0, double* %noalias_ret_a1
+; CHECK-NEXT:   NoAlias:	double* %noalias_arg_a1, double* %noalias_ret_a1
+; CHECK-NEXT:   NoAlias:	double* %noalias_ret_a1, double** %indirect_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_ret_a1, double** %indirect_a1
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a0, double* %noalias_ret_a1
+; CHECK-NEXT:   NoAlias:	double* %escape_alloca_a1, double* %noalias_ret_a1
+; CHECK-NEXT:   NoAlias:	double* %noalias_ret_a1, double* %noescape_alloca_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_ret_a1, double* %noescape_alloca_a1
+; CHECK-NEXT:   NoAlias:	double* %noalias_ret_a1, double* %normal_ret_a0
+; CHECK-NEXT:   NoAlias:	double* %noalias_ret_a1, double* %normal_ret_a1
+; CHECK-NEXT:   NoAlias:	double* %noalias_ret_a0, double* %noalias_ret_a1
+; CHECK-NEXT:   MayAlias:	double* %arg_a0, double* %loaded_a0
+; CHECK-NEXT:   MayAlias:	double* %arg_a1, double* %loaded_a0
+; CHECK-NEXT:   NoAlias:	double* %loaded_a0, double* %noalias_arg_a0
+; CHECK-NEXT:   NoAlias:	double* %loaded_a0, double* %noalias_arg_a1
+; CHECK-NEXT:   MayAlias:	double* %loaded_a0, double** %indirect_a0
+; CHECK-NEXT:   MayAlias:	double* %loaded_a0, double** %indirect_a1
+; CHECK-NEXT:   MayAlias:	double* %escape_alloca_a0, double* %loaded_a0
+; CHECK-NEXT:   MayAlias:	double* %escape_alloca_a1, double* %loaded_a0
+; CHECK-NEXT:   NoAlias:	double* %loaded_a0, double* %noescape_alloca_a0
+; CHECK-NEXT:   NoAlias:	double* %loaded_a0, double* %noescape_alloca_a1
+; CHECK-NEXT:   MayAlias:	double* %loaded_a0, double* %normal_ret_a0
+; CHECK-NEXT:   MayAlias:	double* %loaded_a0, double* %normal_ret_a1
+; CHECK-NEXT:   NoAlias:	double* %loaded_a0, double* %noalias_ret_a0
+; CHECK-NEXT:   NoAlias:	double* %loaded_a0, double* %noalias_ret_a1
+; CHECK-NEXT:   MayAlias:	double* %arg_a0, double* %loaded_a1
+; CHECK-NEXT:   MayAlias:	double* %arg_a1, double* %loaded_a1
+; CHECK-NEXT:   NoAlias:	double* %loaded_a1, double* %noalias_arg_a0
+; CHECK-NEXT:   NoAlias:	double* %loaded_a1, double* %noalias_arg_a1
+; CHECK-NEXT:   MayAlias:	double* %loaded_a1, double** %indirect_a0
+; CHECK-NEXT:   MayAlias:	double* %loaded_a1, double** %indirect_a1
+; CHECK-NEXT:   MayAlias:	double* %escape_alloca_a0, double* %loaded_a1
+; CHECK-NEXT:   MayAlias:	double* %escape_alloca_a1, double* %loaded_a1
+; CHECK-NEXT:   NoAlias:	double* %loaded_a1, double* %noescape_alloca_a0
+; CHECK-NEXT:   NoAlias:	double* %loaded_a1, double* %noescape_alloca_a1
+; CHECK-NEXT:   MayAlias:	double* %loaded_a1, double* %normal_ret_a0
+; CHECK-NEXT:   MayAlias:	double* %loaded_a1, double* %normal_ret_a1
+; CHECK-NEXT:   NoAlias:	double* %loaded_a1, double* %noalias_ret_a0
+; CHECK-NEXT:   NoAlias:	double* %loaded_a1, double* %noalias_ret_a1
+; CHECK-NEXT:   MayAlias:	double* %loaded_a0, double* %loaded_a1
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a0	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a1	<->  %normal_ret_a0 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a0	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a1	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a0	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a1	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a0	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a1	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a0	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a1	<->  %normal_ret_a1 = call double* @normal_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a0	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a1	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a0	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a1	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a0	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a1	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %noalias_ret_a0	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a0	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a1	<->  %noalias_ret_a0 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a0	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a1	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a0	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a1	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a0	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a1	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %noalias_ret_a1	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a0	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a1	<->  %noalias_ret_a1 = call double* @noalias_returner()
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a0	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a1	<->  call void @callee(double* %escape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a0	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a1	<->  call void @callee(double* %escape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %noescape_alloca_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noescape_alloca_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a0	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a1	<->  call void @nocap_callee(double* %noescape_alloca_a0)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %arg_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_arg_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double** %indirect_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %escape_alloca_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noescape_alloca_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %noescape_alloca_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %normal_ret_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_ret_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT:   NoModRef:  Ptr: double* %noalias_ret_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a0	<->  call void @nocap_callee(double* %noescape_alloca_a1)
+; CHECK-NEXT: Both ModRef:  Ptr: double* %loaded_a1	<->  call void @nocap_callee(double* %noescape_alloca_a1)
 ; CHECK: ===== Alias Analysis Evaluator Report =====
-; CHECK:   120 Total Alias Queries Performed
-; CHECK:   84 no alias responses (70.0%)
-; CHECK:   36 may alias responses (30.0%)
-; CHECK:   0 must alias responses (0.0%)
-; CHECK:   Alias Analysis Evaluator Pointer Alias Summary: 70%/30%/0%
-; CHECK:   184 Total ModRef Queries Performed
-; CHECK:   44 no mod/ref responses (23.9%)
-; CHECK:   0 mod responses (0.0%)
-; CHECK:   0 ref responses (0.0%)
-; CHECK:   140 mod & ref responses (76.0%)
-; CHECK:   Alias Analysis Evaluator Mod/Ref Summary: 23%/0%/0%/76%
+; CHECK-NEXT:   120 Total Alias Queries Performed
+; CHECK-NEXT:   84 no alias responses (70.0%)
+; CHECK-NEXT:   36 may alias responses (30.0%)
+; CHECK-NEXT:   0 partial alias responses (0.0%)
+; CHECK-NEXT:   0 must alias responses (0.0%)
+; CHECK-NEXT:   Alias Analysis Evaluator Pointer Alias Summary: 70%/30%/0%
+; CHECK-NEXT:   184 Total ModRef Queries Performed
+; CHECK-NEXT:   44 no mod/ref responses (23.9%)
+; CHECK-NEXT:   0 mod responses (0.0%)
+; CHECK-NEXT:   0 ref responses (0.0%)
+; CHECK-NEXT:   140 mod & ref responses (76.0%)
+; CHECK-NEXT:   Alias Analysis Evaluator Mod/Ref Summary: 23%/0%/0%/76%
diff --git a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
index 062ea59..ebd349a 100644
--- a/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
+++ b/test/Analysis/BasicAA/getmodrefinfo-cs-cs.ll
@@ -15,12 +15,12 @@ define void @test0() {
 ; CHECK: NoModRef:   call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false) <->   call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false)
 ; CHECK: NoModRef:   call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false) <->   call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false)
 
-declare void @llvm.memset.i64(i8*, i8, i64, i32)
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 
 @A = external global i8
 @B = external global i8
 define void @test1() {
-  call void @llvm.memset.i64(i8* @A, i8 0, i64 1, i32 1)
-  call void @llvm.memset.i64(i8* @B, i8 0, i64 1, i32 1)
+  call void @llvm.memset.p0i8.i64(i8* @A, i8 0, i64 1, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i64(i8* @B, i8 0, i64 1, i32 1, i1 false)
   ret void
 }
diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll
index 7a71e3e..233396b 100644
--- a/test/Analysis/BasicAA/modref.ll
+++ b/test/Analysis/BasicAA/modref.ll
@@ -1,10 +1,6 @@
 ; RUN: opt < %s -basicaa -gvn -dse -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-declare void @llvm.memset.i8(i8*, i8, i8, i32)
-declare void @llvm.memcpy.i8(i8*, i8*, i8, i32)
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
 declare void @llvm.lifetime.end(i64, i8* nocapture)
 
 declare void @external(i32*) 
@@ -15,7 +11,7 @@ define i32 @test0(i8* %P) {
   
   store i32 0, i32* %A
   
-  call void @llvm.memset.i32(i8* %P, i8 0, i32 42, i32 1)
+  call void @llvm.memset.p0i8.i32(i8* %P, i8 0, i32 42, i32 1, i1 false)
   
   %B = load i32* %A
   ret i32 %B
@@ -24,8 +20,6 @@ define i32 @test0(i8* %P) {
 ; CHECK: ret i32 0
 }
 
-declare void @llvm.memcpy.i8(i8*, i8*, i8, i32)
-
 define i8 @test1() {
 ; CHECK: @test1
   %A = alloca i8
@@ -33,7 +27,7 @@ define i8 @test1() {
 
   store i8 2, i8* %B  ;; Not written to by memcpy
 
-  call void @llvm.memcpy.i8(i8* %A, i8* %B, i8 -1, i32 0)
+  call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false)
 
   %C = load i8* %B
   ret i8 %C
@@ -44,7 +38,7 @@ define i8 @test2(i8* %P) {
 ; CHECK: @test2
   %P2 = getelementptr i8* %P, i32 127
   store i8 1, i8* %P2  ;; Not dead across memset
-  call void @llvm.memset.i8(i8* %P, i8 2, i8 127, i32 0)
+  call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false)
   %A = load i8* %P2
   ret i8 %A
 ; CHECK: ret i8 1
@@ -57,7 +51,7 @@ define i8 @test2a(i8* %P) {
   ;; FIXME: DSE isn't zapping this dead store.
   store i8 1, i8* %P2  ;; Dead, clobbered by memset.
   
-  call void @llvm.memset.i8(i8* %P, i8 2, i8 127, i32 0)
+  call void @llvm.memset.p0i8.i8(i8* %P, i8 2, i8 127, i32 0, i1 false)
   %A = load i8* %P2
   ret i8 %A
 ; CHECK-NOT: load
@@ -97,7 +91,7 @@ define void @test3a(i8* %P, i8 %X) {
 
 define i32 @test4(i8* %P) {
   %tmp = load i32* @G1
-  call void @llvm.memset.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i32 1)
+  call void @llvm.memset.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8 0, i32 4000, i32 1, i1 false)
   %tmp2 = load i32* @G1
   %sub = sub i32 %tmp2, %tmp
   ret i32 %sub
@@ -112,7 +106,7 @@ define i32 @test4(i8* %P) {
 ; write to G1.
 define i32 @test5(i8* %P, i32 %Len) {
   %tmp = load i32* @G1
-  call void @llvm.memcpy.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i32 1)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4000 x i32]* @G2 to i8*), i8* bitcast (i32* @G1 to i8*), i32 %Len, i32 1, i1 false)
   %tmp2 = load i32* @G1
   %sub = sub i32 %tmp2, %tmp
   ret i32 %sub
@@ -134,3 +128,9 @@ define i8 @test6(i8* %p, i8* noalias %a) {
 ; CHECK-NOT: load
 ; CHECK: ret
 }
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
diff --git a/test/Analysis/CallGraph/no-intrinsics.ll b/test/Analysis/CallGraph/no-intrinsics.ll
new file mode 100644
index 0000000..272a559
--- /dev/null
+++ b/test/Analysis/CallGraph/no-intrinsics.ll
@@ -0,0 +1,13 @@
+; RUN: opt < %s -print-callgraph -disable-output |& FileCheck %s
+
+; Check that intrinsics aren't added to the call graph
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+
+define void @f(i8* %out, i8* %in) {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %in, i32 100, i32 4, i1 false)
+  ret void
+}
+
+; CHECK: Call graph node for function: 'f'
+; CHECK-NOT: calls function 'llvm.memcpy.p0i8.p0i8.i32'
+\ No newline at end of file
diff --git a/test/Analysis/GlobalsModRef/indirect-global.ll b/test/Analysis/GlobalsModRef/indirect-global.ll
index 1eab0bc..826f55c 100644
--- a/test/Analysis/GlobalsModRef/indirect-global.ll
+++ b/test/Analysis/GlobalsModRef/indirect-global.ll
@@ -3,8 +3,11 @@
 
 @G = internal global i32* null		; <i32**> [#uses=3]
 
+
+declare i8* @malloc(i32)
 define void @test() {
-	%A = malloc i32		; <i32*> [#uses=1]
+	%a = call i8* @malloc(i32 4)
+        %A = bitcast i8* %a to i32*
 	store i32* %A, i32** @G
 	ret void
 }
diff --git a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
index 9573aed..ec95141 100644
--- a/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
+++ b/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
@@ -7,7 +7,7 @@
 
 define void @test(i32 %N) {
 entry:
-        "alloca point" = bitcast i32 0 to i32           ; <i32> [#uses=0]
+        %"alloca point" = bitcast i32 0 to i32           ; <i32> [#uses=0]
         br label %bb3
 
 bb:             ; preds = %bb3
diff --git a/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll b/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll
index 31b95e1..f61b667 100644
--- a/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll
+++ b/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll
@@ -7,32 +7,32 @@ target triple = "x86_64-unknown-freebsd8.0"
 module asm ".ident\09\22$FreeBSD: head/sys/kern/vfs_subr.c 195285 2009-07-02 14:19:33Z jamie $\22"
 module asm ".section set_pcpu, \22aw\22, @progbits"
 module asm ".previous"
-	type <{ [40 x i8] }>		; type %0
-	type <{ %struct.vm_object*, %struct.vm_object** }>		; type %1
-	type <{ %struct.vm_object* }>		; type %2
-	type <{ %struct.vm_page*, %struct.vm_page** }>		; type %3
-	type <{ %struct.pv_entry*, %struct.pv_entry** }>		; type %4
-	type <{ %struct.vm_reserv* }>		; type %5
-	type <{ %struct.bufobj*, %struct.bufobj** }>		; type %6
-	type <{ %struct.proc*, %struct.proc** }>		; type %7
-	type <{ %struct.thread*, %struct.thread** }>		; type %8
-	type <{ %struct.prison*, %struct.prison** }>		; type %9
-	type <{ %struct.prison* }>		; type %10
-	type <{ %struct.task* }>		; type %11
-	type <{ %struct.osd*, %struct.osd** }>		; type %12
-	type <{ %struct.proc* }>		; type %13
-	type <{ %struct.ksiginfo*, %struct.ksiginfo** }>		; type %14
-	type <{ %struct.pv_chunk*, %struct.pv_chunk** }>		; type %15
-	type <{ %struct.pgrp*, %struct.pgrp** }>		; type %16
-	type <{ %struct.knote*, %struct.knote** }>		; type %17
-	type <{ %struct.ktr_request*, %struct.ktr_request** }>		; type %18
-	type <{ %struct.mqueue_notifier* }>		; type %19
-	type <{ %struct.turnstile* }>		; type %20
-	type <{ %struct.namecache* }>		; type %21
-	type <{ %struct.namecache*, %struct.namecache** }>		; type %22
-	type <{ %struct.lockf*, %struct.lockf** }>		; type %23
-	type <{ %struct.lockf_entry*, %struct.lockf_entry** }>		; type %24
-	type <{ %struct.lockf_edge*, %struct.lockf_edge** }>		; type %25
+	%0 = type <{ [40 x i8] }>		; type %0
+	%1 = type <{ %struct.vm_object*, %struct.vm_object** }>		; type %1
+	%2 = type <{ %struct.vm_object* }>		; type %2
+	%3 = type <{ %struct.vm_page*, %struct.vm_page** }>		; type %3
+	%4 = type <{ %struct.pv_entry*, %struct.pv_entry** }>		; type %4
+	%5 = type <{ %struct.vm_reserv* }>		; type %5
+	%6 = type <{ %struct.bufobj*, %struct.bufobj** }>		; type %6
+	%7 = type <{ %struct.proc*, %struct.proc** }>		; type %7
+	%8 = type <{ %struct.thread*, %struct.thread** }>		; type %8
+	%9 = type <{ %struct.prison*, %struct.prison** }>		; type %9
+	%10 = type <{ %struct.prison* }>		; type %10
+	%11 = type <{ %struct.task* }>		; type %11
+	%12 = type <{ %struct.osd*, %struct.osd** }>		; type %12
+	%13 = type <{ %struct.proc* }>		; type %13
+	%14 = type <{ %struct.ksiginfo*, %struct.ksiginfo** }>		; type %14
+	%15 = type <{ %struct.pv_chunk*, %struct.pv_chunk** }>		; type %15
+	%16 = type <{ %struct.pgrp*, %struct.pgrp** }>		; type %16
+	%17 = type <{ %struct.knote*, %struct.knote** }>		; type %17
+	%18 = type <{ %struct.ktr_request*, %struct.ktr_request** }>		; type %18
+	%19 = type <{ %struct.mqueue_notifier* }>		; type %19
+	%20 = type <{ %struct.turnstile* }>		; type %20
+	%21 = type <{ %struct.namecache* }>		; type %21
+	%22 = type <{ %struct.namecache*, %struct.namecache** }>		; type %22
+	%23 = type <{ %struct.lockf*, %struct.lockf** }>		; type %23
+	%24 = type <{ %struct.lockf_entry*, %struct.lockf_entry** }>		; type %24
+	%25 = type <{ %struct.lockf_edge*, %struct.lockf_edge** }>		; type %25
 	%struct.__siginfo = type <{ i32, i32, i32, i32, i32, i32, i8*, %union.sigval, %0 }>
 	%struct.__sigset = type <{ [4 x i32] }>
 	%struct.acl = type <{ i32, i32, [4 x i32], [254 x %struct.acl_entry] }>
diff --git a/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/test/Analysis/ScalarEvolution/avoid-smax-1.ll
index 89e8b98..474d564 100644
--- a/test/Analysis/ScalarEvolution/avoid-smax-1.ll
+++ b/test/Analysis/ScalarEvolution/avoid-smax-1.ll
@@ -173,7 +173,7 @@ bb23:		; preds = %bb24, %bb.nph
 	%55 = mul i32 %y.21, %w		; <i32> [#uses=1]
 	%.sum5 = add i32 %55, %.sum3		; <i32> [#uses=1]
 	%56 = getelementptr i8* %j, i32 %.sum5		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %56, i8* %54, i32 %w, i32 1)
+	tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %56, i8* %54, i32 %w, i32 1, i1 false)
 	%57 = add i32 %y.21, 1		; <i32> [#uses=2]
 	br label %bb24
 
@@ -190,7 +190,7 @@ bb26:		; preds = %bb24.bb26_crit_edge, %bb22
 	%60 = getelementptr i8* %j, i32 %.sum4		; <i8*> [#uses=1]
 	%61 = mul i32 %x, %w		; <i32> [#uses=1]
 	%62 = sdiv i32 %61, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %60, i8 -128, i32 %62, i32 1)
+	tail call void @llvm.memset.p0i8.i32(i8* %60, i8 -128, i32 %62, i32 1, i1 false)
 	ret void
 
 bb29:		; preds = %bb20, %entry
@@ -208,7 +208,7 @@ bb30:		; preds = %bb31, %bb.nph11
 	%67 = getelementptr i8* %r, i32 %66		; <i8*> [#uses=1]
 	%68 = mul i32 %y.310, %w		; <i32> [#uses=1]
 	%69 = getelementptr i8* %j, i32 %68		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %69, i8* %67, i32 %w, i32 1)
+	tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %69, i8* %67, i32 %w, i32 1, i1 false)
 	%70 = add i32 %y.310, 1		; <i32> [#uses=2]
 	br label %bb31
 
@@ -224,13 +224,12 @@ bb33:		; preds = %bb31.bb33_crit_edge, %bb29
 	%73 = getelementptr i8* %j, i32 %72		; <i8*> [#uses=1]
 	%74 = mul i32 %x, %w		; <i32> [#uses=1]
 	%75 = sdiv i32 %74, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %73, i8 -128, i32 %75, i32 1)
+	tail call void @llvm.memset.p0i8.i32(i8* %73, i8 -128, i32 %75, i32 1, i1 false)
 	ret void
 
 return:		; preds = %bb20
 	ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Analysis/ScalarEvolution/pr3909.ll b/test/Analysis/ScalarEvolution/pr3909.ll
index 10e328d..cf7531d 100644
--- a/test/Analysis/ScalarEvolution/pr3909.ll
+++ b/test/Analysis/ScalarEvolution/pr3909.ll
@@ -2,8 +2,8 @@
 ; PR 3909
 
 
-	type { i32, %1* }		; type %0
-	type { i32, i8* }		; type %1
+	%0 = type { i32, %1* }		; type %0
+	%1 = type { i32, i8* }		; type %1
 
 define x86_stdcallcc i32 @_Dmain(%0 %unnamed) {
 entry:
diff --git a/test/Analysis/ScalarEvolution/trip-count.ll b/test/Analysis/ScalarEvolution/trip-count.ll
index d750d4a..cb4e267 100644
--- a/test/Analysis/ScalarEvolution/trip-count.ll
+++ b/test/Analysis/ScalarEvolution/trip-count.ll
@@ -7,7 +7,6 @@
 
 define void @test(i32 %N) {
 entry:
-        "alloca point" = bitcast i32 0 to i32           ; <i32> [#uses=0]
         br label %bb3
 
 bb:             ; preds = %bb3
diff --git a/test/Analysis/ScalarEvolution/trip-count2.ll b/test/Analysis/ScalarEvolution/trip-count2.ll
index 79f3161..e26cbea 100644
--- a/test/Analysis/ScalarEvolution/trip-count2.ll
+++ b/test/Analysis/ScalarEvolution/trip-count2.ll
@@ -7,7 +7,6 @@
 
 define void @test(i32 %N) {
 entry:
-        "alloca point" = bitcast i32 0 to i32           ; <i32> [#uses=0]
         br label %bb3
 
 bb:             ; preds = %bb3
diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll
index 10b798b..1bf86ae 100644
--- a/test/Analysis/ScalarEvolution/trip-count3.ll
+++ b/test/Analysis/ScalarEvolution/trip-count3.ll
@@ -5,15 +5,14 @@
 ; dividing by the stride will have a remainder. This could theoretically
 ; be teaching it how to use a more elaborate trip count computation.
 
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-unknown-linux-gnu"
-	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
-	%struct.SHA_INFO = type { [5 x i32], i32, i32, [16 x i32] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
-@_2E_str = external constant [26 x i8]		; <[26 x i8]*> [#uses=0]
-@stdin = external global %struct.FILE*		; <%struct.FILE**> [#uses=0]
-@_2E_str1 = external constant [3 x i8]		; <[3 x i8]*> [#uses=0]
-@_2E_str12 = external constant [30 x i8]		; <[30 x i8]*> [#uses=0]
+%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+%struct.SHA_INFO = type { [5 x i32], i32, i32, [16 x i32] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
+
+@_2E_str = external constant [26 x i8]
+@stdin = external global %struct.FILE*
+@_2E_str1 = external constant [3 x i8]
+@_2E_str12 = external constant [30 x i8]
 
 declare void @sha_init(%struct.SHA_INFO* nocapture) nounwind
 
@@ -25,12 +24,8 @@ declare i32 @printf(i8* nocapture, ...) nounwind
 
 declare void @sha_final(%struct.SHA_INFO* nocapture) nounwind
 
-declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
-
 declare void @sha_update(%struct.SHA_INFO* nocapture, i8* nocapture, i32) nounwind
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
-
 declare i64 @fread(i8* noalias nocapture, i64, i64, %struct.FILE* noalias nocapture) nounwind
 
 declare i32 @main(i32, i8** nocapture) nounwind
@@ -43,36 +38,41 @@ declare void @sha_stream(%struct.SHA_INFO* nocapture, %struct.FILE* nocapture) n
 
 define void @sha_stream_bb3_2E_i(%struct.SHA_INFO* %sha_info, i8* %data1, i32, i8** %buffer_addr.0.i.out, i32* %count_addr.0.i.out) nounwind {
 newFuncRoot:
-	br label %bb3.i
-
-sha_update.exit.exitStub:		; preds = %bb3.i
-	store i8* %buffer_addr.0.i, i8** %buffer_addr.0.i.out
-	store i32 %count_addr.0.i, i32* %count_addr.0.i.out
-	ret void
-
-bb2.i:		; preds = %bb3.i
-	%1 = getelementptr %struct.SHA_INFO* %sha_info, i64 0, i32 3		; <[16 x i32]*> [#uses=1]
-	%2 = bitcast [16 x i32]* %1 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i64(i8* %2, i8* %buffer_addr.0.i, i64 64, i32 1) nounwind
-	%3 = getelementptr %struct.SHA_INFO* %sha_info, i64 0, i32 3, i64 0		; <i32*> [#uses=1]
-	%4 = bitcast i32* %3 to i8*		; <i8*> [#uses=1]
-	br label %codeRepl
-
-codeRepl:		; preds = %bb2.i
-	call void @sha_stream_bb3_2E_i_bb1_2E_i_2E_i(i8* %4)
-	br label %byte_reverse.exit.i
-
-byte_reverse.exit.i:		; preds = %codeRepl
-	call fastcc void @sha_transform(%struct.SHA_INFO* %sha_info) nounwind
-	%5 = getelementptr i8* %buffer_addr.0.i, i64 64		; <i8*> [#uses=1]
-	%6 = add i32 %count_addr.0.i, -64		; <i32> [#uses=1]
-	br label %bb3.i
-
-bb3.i:		; preds = %byte_reverse.exit.i, %newFuncRoot
-	%buffer_addr.0.i = phi i8* [ %data1, %newFuncRoot ], [ %5, %byte_reverse.exit.i ]		; <i8*> [#uses=3]
-	%count_addr.0.i = phi i32 [ %0, %newFuncRoot ], [ %6, %byte_reverse.exit.i ]		; <i32> [#uses=3]
-	%7 = icmp sgt i32 %count_addr.0.i, 63		; <i1> [#uses=1]
-	br i1 %7, label %bb2.i, label %sha_update.exit.exitStub
+  br label %bb3.i
+
+sha_update.exit.exitStub:                         ; preds = %bb3.i
+  store i8* %buffer_addr.0.i, i8** %buffer_addr.0.i.out
+  store i32 %count_addr.0.i, i32* %count_addr.0.i.out
+  ret void
+
+bb2.i:                                            ; preds = %bb3.i
+  %1 = getelementptr %struct.SHA_INFO* %sha_info, i64 0, i32 3
+  %2 = bitcast [16 x i32]* %1 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %buffer_addr.0.i, i64 64, i32 1, i1 false)
+  %3 = getelementptr %struct.SHA_INFO* %sha_info, i64 0, i32 3, i64 0
+  %4 = bitcast i32* %3 to i8*
+  br label %codeRepl
+
+codeRepl:                                         ; preds = %bb2.i
+  call void @sha_stream_bb3_2E_i_bb1_2E_i_2E_i(i8* %4)
+  br label %byte_reverse.exit.i
+
+byte_reverse.exit.i:                              ; preds = %codeRepl
+  call fastcc void @sha_transform(%struct.SHA_INFO* %sha_info) nounwind
+  %5 = getelementptr i8* %buffer_addr.0.i, i64 64
+  %6 = add i32 %count_addr.0.i, -64
+  br label %bb3.i
+
+bb3.i:                                            ; preds = %byte_reverse.exit.i, %newFuncRoot
+  %buffer_addr.0.i = phi i8* [ %data1, %newFuncRoot ], [ %5, %byte_reverse.exit.i ]
+  %count_addr.0.i = phi i32 [ %0, %newFuncRoot ], [ %6, %byte_reverse.exit.i ]
+  %7 = icmp sgt i32 %count_addr.0.i, 63
+  br i1 %7, label %bb2.i, label %sha_update.exit.exitStub
 }
 
 declare void @sha_stream_bb3_2E_i_bb1_2E_i_2E_i(i8*) nounwind
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
diff --git a/test/Archive/extract.ll b/test/Archive/extract.ll
index 714c5f1..5c0f508 100644
--- a/test/Archive/extract.ll
+++ b/test/Archive/extract.ll
@@ -3,14 +3,14 @@
 ; This test just makes sure that llvm-ar can extract bytecode members
 ; from various style archives.
 
-; RUN: llvm-ar x %p/GNU.a very_long_bytecode_file_name.bc
-; RUN: cmp -s %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc
+; RUN: llvm-ar p %p/GNU.a very_long_bytecode_file_name.bc | \
+; RUN:   cmp -s %p/very_long_bytecode_file_name.bc -
 
-; RUN: llvm-ar x %p/MacOSX.a very_long_bytecode_file_name.bc
-; RUN: cmp -s %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc
+; RUN: llvm-ar p %p/MacOSX.a very_long_bytecode_file_name.bc | \
+; RUN:   cmp -s %p/very_long_bytecode_file_name.bc -
 
-; RUN: llvm-ar x %p/SVR4.a very_long_bytecode_file_name.bc
-; RUN: cmp -s %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc
+; RUN: llvm-ar p %p/SVR4.a very_long_bytecode_file_name.bc | \
+; RUN:   cmp -s %p/very_long_bytecode_file_name.bc -
 
-; RUN: llvm-ar x %p/xpg4.a very_long_bytecode_file_name.bc
-; RUN: cmp -s %p/very_long_bytecode_file_name.bc very_long_bytecode_file_name.bc
+; RUN: llvm-ar p %p/xpg4.a very_long_bytecode_file_name.bc |\
+; RUN:   cmp -s %p/very_long_bytecode_file_name.bc -
diff --git a/test/Assembler/2002-01-24-BadSymbolTableAssert.ll b/test/Assembler/2002-01-24-BadSymbolTableAssert.ll
deleted file mode 100644
index 7c49e2b..0000000
--- a/test/Assembler/2002-01-24-BadSymbolTableAssert.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-
-; This testcase failed due to a bad assertion in SymbolTable.cpp, removed in
-; the 1.20 revision. Basically the symbol table assumed that if there was an
-; abstract type in the symbol table, [in this case for the entry %foo of type
-; void(opaque)* ], that there should have also been named types by now.  This
-; was obviously not the case here, and this is valid.  Assertion disabled.
-	
-%bb = type i32
-
-declare void @foo(i32)
diff --git a/test/Assembler/2002-01-24-ValueRefineAbsType.ll b/test/Assembler/2002-01-24-ValueRefineAbsType.ll
deleted file mode 100644
index 6e49674..0000000
--- a/test/Assembler/2002-01-24-ValueRefineAbsType.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-
-; This testcase used to fail due to a lack of this diff in Value.cpp:
-; diff -r1.16 Value.cpp
-; 11c11
-; < #include "llvm/Type.h"
-; ---
-; > #include "llvm/DerivedTypes.h"
-; 74c74,76
-; <   assert(Ty.get() == (const Type*)OldTy &&"Can't refine anything but my type!");
-; ---
-; >   assert(Ty.get() == OldTy &&"Can't refine anything but my type!");
-; >   if (OldTy == NewTy && !OldTy->isAbstract())
-; >     Ty.removeUserFromConcrete();
-;
-; This was causing an assertion failure, due to the "foo" Method object never
-; releasing it's reference to the opaque %bb value.
-;
-	
-%bb = type i32
-%exception_descriptor = type i32
-
-declare void @foo(i32)
diff --git a/test/Assembler/2002-02-19-TypeParsing.ll b/test/Assembler/2002-02-19-TypeParsing.ll
deleted file mode 100644
index 0df6784..0000000
--- a/test/Assembler/2002-02-19-TypeParsing.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-	
-%Hosp = type { i32, i32, i32, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* }, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* }, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* }, { \2*, { i32, i32, i32, { [4 x \3], \2, \5, %Hosp, i32, i32 }* }*, \2* } }
diff --git a/test/Assembler/2002-04-04-PureVirtMethCall.ll b/test/Assembler/2002-04-04-PureVirtMethCall.ll
deleted file mode 100644
index 29aed55..0000000
--- a/test/Assembler/2002-04-04-PureVirtMethCall.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-
-  type { { \2 *, \4 ** },
-         { \2 *, \4 ** }
-       }
-
diff --git a/test/Assembler/2002-04-04-PureVirtMethCall2.ll b/test/Assembler/2002-04-04-PureVirtMethCall2.ll
deleted file mode 100644
index a096899..0000000
--- a/test/Assembler/2002-04-04-PureVirtMethCall2.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-
-%t = type { { \2*, \2 },
-            { \2*, \2 }
-          }
diff --git a/test/Assembler/2002-04-05-TypeParsing.ll b/test/Assembler/2002-04-05-TypeParsing.ll
deleted file mode 100644
index f725944..0000000
--- a/test/Assembler/2002-04-05-TypeParsing.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-  
- %Hosp = type { { \2*, { \2, %Hosp }* }, { \2*, { \2, %Hosp }* } }
diff --git a/test/Assembler/2002-05-02-ParseError.ll b/test/Assembler/2002-05-02-ParseError.ll
deleted file mode 100644
index 5a9817c..0000000
--- a/test/Assembler/2002-05-02-ParseError.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-
-%T = type i32 *
-
-define %T @test() {
-	ret %T null
-}
diff --git a/test/Assembler/2002-07-08-HugePerformanceProblem.ll b/test/Assembler/2002-07-08-HugePerformanceProblem.ll
deleted file mode 100644
index 52c90af..0000000
--- a/test/Assembler/2002-07-08-HugePerformanceProblem.ll
+++ /dev/null
@@ -1,67 +0,0 @@
-; This file takes about 48 __MINUTES__ to assemble using as.  This is WAY too
-; long.  The type resolution code needs to be sped up a lot.
-; RUN: llvm-as %s -o /dev/null
-	%ALL_INTERSECTIONS_METHOD = type i32 (%OBJECT*, %RAY*, %ISTACK*)*
-	%BBOX = type { %BBOX_VECT, %BBOX_VECT }
-	%BBOX_TREE = type { i16, i16, %BBOX, %BBOX_TREE** }
-	%BBOX_VECT = type [3 x float]
-	%BLEND_MAP = type { i16, i16, i16, i32, %BLEND_MAP_ENTRY* }
-	%BLEND_MAP_ENTRY = type { float, i8, { %COLOUR, %PIGMENT*, %TNORMAL*, %TEXTURE*, %UV_VECT } }
-	%CAMERA = type { %VECTOR, %VECTOR, %VECTOR, %VECTOR, %VECTOR, %VECTOR, double, double, i32, double, double, i32, double, %TNORMAL* }
-	%COLOUR = type [5 x float]
-	%COPY_METHOD = type i8* (%OBJECT*)*
-	%COUNTER = type { i32, i32 }
-	%DENSITY_FILE = type { i32, %DENSITY_FILE_DATA* }
-	%DENSITY_FILE_DATA = type { i32, i8*, i32, i32, i32, i8*** }
-	%DESTROY_METHOD = type void (%OBJECT*)*
-	%FILE = type { i32, i8*, i8*, i8, i8, i32, i32, i32 }
-	%FILE_HANDLE = type { i8*, i32, i32, i32, i32, i8*, %FILE*, i32, i32 (%FILE_HANDLE*, i8*, i32*, i32*, i32, i32)*, void (%FILE_HANDLE*, %COLOUR*, i32)*, i32 (%FILE_HANDLE*, %COLOUR*, i32*)*, void (%IMAGE*, i8*)*, void (%FILE_HANDLE*)* }
-	%FINISH = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, %BBOX_VECT, %BBOX_VECT }
-	%FOG = type { i32, double, double, double, %COLOUR, %VECTOR, %TURB*, float, %FOG* }
-	%FRAME = type { %CAMERA*, i32, i32, i32, %LIGHT_SOURCE*, %OBJECT*, double, double, %COLOUR, %COLOUR, %COLOUR, %IMEDIA*, %FOG*, %RAINBOW*, %SKYSPHERE* }
-	%FRAMESEQ = type { i32, double, i32, i32, double, i32, i32, double, i32, double, i32, double, i32, i32 }
-	%IMAGE = type { i32, i32, i32, i32, i32, i16, i16, %VECTOR, float, float, i32, i32, i16, %IMAGE_COLOUR*, { %IMAGE_LINE*, i8** } }
-	%IMAGE_COLOUR = type { i16, i16, i16, i16, i16 }
-	%IMAGE_LINE = type { i8*, i8*, i8*, i8* }
-	%IMEDIA = type { i32, i32, i32, i32, i32, double, double, i32, i32, i32, i32, %COLOUR, %COLOUR, %COLOUR, %COLOUR, double, double, double, double*, %PIGMENT*, %IMEDIA* }
-	%INSIDE_METHOD = type i32 (double*, %OBJECT*)*
-	%INTERIOR = type { i32, i32, float, float, float, float, float, %IMEDIA* }
-	%INTERSECTION = type { double, %VECTOR, %VECTOR, %OBJECT*, i32, i32, double, double, i8* }
-	%INVERT_METHOD = type void (%OBJECT*)*
-	%ISTACK = type { %ISTACK*, %INTERSECTION*, i32 }
-	%LIGHT_SOURCE = type { %METHODS*, i32, %OBJECT*, %TEXTURE*, %INTERIOR*, %OBJECT*, %OBJECT*, %BBOX, i32, %OBJECT*, %COLOUR, %VECTOR, %VECTOR, %VECTOR, %VECTOR, %VECTOR, double, double, double, double, double, %LIGHT_SOURCE*, i8, i8, i8, i8, i32, i32, i32, i32, i32, %COLOUR**, %OBJECT*, [6 x %PROJECT_TREE_NODE*] }
-	%MATRIX = type [4 x %VECTOR_4D]
-	%METHODS = type { %ALL_INTERSECTIONS_METHOD, %INSIDE_METHOD, %NORMAL_METHOD, %COPY_METHOD, %ROTATE_METHOD, %ROTATE_METHOD, %ROTATE_METHOD, %TRANSFORM_METHOD, %DESTROY_METHOD, %DESTROY_METHOD }
-	%NORMAL_METHOD = type void (double*, %OBJECT*, %INTERSECTION*)*
-	%OBJECT = type { %METHODS*, i32, %OBJECT*, %TEXTURE*, %INTERIOR*, %OBJECT*, %OBJECT*, %BBOX, i32 }
-	%Opts = type { i32, i32, i8, i8, i8, i32, [150 x i8], [150 x i8], [150 x i8], [150 x i8], [150 x i8], double, double, i32, i32, double, double, i32, [25 x i8*], i32, i32, i32, double, double, i32, i32, double, double, double, i32, i32, i32, i32, i32, %FRAMESEQ, double, i32, double, double, double, double, double, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [150 x i8], %SHELLDATA*, [150 x i8], i32, i32 }
-	%PIGMENT = type { i16, i16, i16, i32, float, float, float, %WARP*, %TPATTERN*, %BLEND_MAP*, { %DENSITY_FILE*, %IMAGE*, %VECTOR, float, i16, i16, i16, { float, %VECTOR }, %complex.float }, %COLOUR }
-	%PRIORITY_QUEUE = type { i32, i32, %QELEM* }
-	%PROJECT = type { i32, i32, i32, i32 }
-	%PROJECT_QUEUE = type { i32, i32, %PROJECT_TREE_NODE** }
-	%PROJECT_TREE_NODE = type { i16, %BBOX_TREE*, %PROJECT, i16, %PROJECT_TREE_NODE** }
-	%QELEM = type { double, %BBOX_TREE* }
-	%RAINBOW = type { double, double, double, double, double, double, double, %VECTOR, %VECTOR, %VECTOR, %PIGMENT*, %RAINBOW* }
-	%RAY = type { %VECTOR, %VECTOR, i32, [100 x %INTERIOR*] }
-	%RAYINFO = type { %VECTOR, %VECTOR, %VECTORI, %VECTORI }
-	%RGB = type [3 x float]
-	%ROTATE_METHOD = type void (%OBJECT*, double*, %TRANSFORM*)*
-	%SCALE_METHOD = type void (%OBJECT*, double*, %TRANSFORM*)*
-	%SHELLDATA = type { i32, i32, [250 x i8] }
-	%SKYSPHERE = type { i32, %PIGMENT**, %TRANSFORM* }
-	%SNGL_VECT = type [3 x float]
-	%TEXTURE = type { i16, i16, i16, i32, float, float, float, %WARP*, %TPATTERN*, %BLEND_MAP*, { %DENSITY_FILE*, %IMAGE*, %VECTOR, float, i16, i16, i16, { float, %VECTOR }, %complex.float }, %TEXTURE*, %PIGMENT*, %TNORMAL*, %FINISH*, %TEXTURE*, i32 }
-	%TNORMAL = type { i16, i16, i16, i32, float, float, float, %WARP*, %TPATTERN*, %BLEND_MAP*, { %DENSITY_FILE*, %IMAGE*, %VECTOR, float, i16, i16, i16, { float, %VECTOR }, %complex.float }, float }
-	%TPATTERN = type { i16, i16, i16, i32, float, float, float, %WARP*, %TPATTERN*, %BLEND_MAP*, { %DENSITY_FILE*, %IMAGE*, %VECTOR, float, i16, i16, i16, { float, %VECTOR }, %complex.float } }
-	%TRANSFORM = type { %MATRIX, %MATRIX }
-	%TRANSFORM_METHOD = type void (%OBJECT*, %TRANSFORM*)*
-	%TRANSLATE_METHOD = type void (%OBJECT*, double*, %TRANSFORM*)*
-	%TURB = type { i16, %WARP*, %VECTOR, i32, float, float }
-	%UV_VECT = type [2 x double]
-	%VECTOR = type [3 x double]
-	%VECTORI = type [3 x i32]
-	%VECTOR_4D = type [4 x double]
-	%WARP = type { i16, %WARP* }
-	%__FILE = type { i32, i8*, i8*, i8, i8, i32, i32, i32 }
-	%_h_val = type { [2 x i32], double }
-	%complex.float = type { float, float }
diff --git a/test/Assembler/2002-07-14-InternalLossage.ll b/test/Assembler/2002-07-14-InternalLossage.ll
deleted file mode 100644
index f93f1c4..0000000
--- a/test/Assembler/2002-07-14-InternalLossage.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Test to make sure that the 'internal' tag is not lost!
-;
-; RUN: llvm-as < %s | llvm-dis | grep internal
-
-declare void @foo()
-
-define internal void @foo() {
-        ret void
-}
diff --git a/test/Assembler/2002-07-25-ParserAssertionFailure.ll b/test/Assembler/2002-07-25-ParserAssertionFailure.ll
deleted file mode 100644
index 3c5c554..0000000
--- a/test/Assembler/2002-07-25-ParserAssertionFailure.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; Make sure we don't get an assertion failure, even though this is a parse 
-; error
-; RUN: not llvm-as %s -o /dev/null |& grep {'@foo' defined with}
-
-%ty = type void (i32)
-
-declare %ty* @foo()
-
-define void @test() {
-        call %ty* @foo( )               ; <%ty*>:0 [#uses=0]
-        ret void
-}
-
diff --git a/test/Assembler/2002-07-25-ReturnPtrFunction.ll b/test/Assembler/2002-07-25-ReturnPtrFunction.ll
index 515d105..6988fad 100644
--- a/test/Assembler/2002-07-25-ReturnPtrFunction.ll
+++ b/test/Assembler/2002-07-25-ReturnPtrFunction.ll
@@ -3,12 +3,10 @@
 ;
 ; RUN: llvm-as < %s | llvm-dis | llvm-as
 
-%ty = type void (i32)
-
-declare %ty* @foo()
+declare void (i32)* @foo()
 
 define void @test() {
-        call %ty* ()* @foo( )           ; <%ty*>:1 [#uses=0]
+        call void (i32)* ()* @foo( )           ; <%ty*>:1 [#uses=0]
         ret void
 }
 
diff --git a/test/Assembler/2002-10-15-NameClash.ll b/test/Assembler/2002-10-15-NameClash.ll
deleted file mode 100644
index 89346cb..0000000
--- a/test/Assembler/2002-10-15-NameClash.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-
-declare i32 @"ArrayRef"([100 x i32] * %Array)
-
-define i32 @"ArrayRef"([100 x i32] * %Array) {
-	ret i32 0
-}
diff --git a/test/Assembler/2002-12-15-GlobalResolve.ll b/test/Assembler/2002-12-15-GlobalResolve.ll
index f9ad12e..a873a61 100644
--- a/test/Assembler/2002-12-15-GlobalResolve.ll
+++ b/test/Assembler/2002-12-15-GlobalResolve.ll
@@ -4,4 +4,4 @@
 @X1 = external global %T* 
 @X2 = external global i32*
 
-%T = type i32
+%T = type {i32}
diff --git a/test/Assembler/2003-04-15-ConstantInitAssertion.ll b/test/Assembler/2003-04-15-ConstantInitAssertion.ll
index e012168..fa6b807 100644
--- a/test/Assembler/2003-04-15-ConstantInitAssertion.ll
+++ b/test/Assembler/2003-04-15-ConstantInitAssertion.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as < %s >/dev/null |& grep {constant expression type mismatch}
+; RUN: not llvm-as < %s >/dev/null |& grep {struct initializer doesn't match struct element type}
 ; Test the case of a misformed constant initializer
 ; This should cause an assembler error, not an assertion failure!
 constant { i32 } { float 1.0 }
diff --git a/test/Assembler/2003-05-21-MalformedStructCrash.ll b/test/Assembler/2003-05-21-MalformedStructCrash.ll
index 1efb577..8d20e070 100644
--- a/test/Assembler/2003-05-21-MalformedStructCrash.ll
+++ b/test/Assembler/2003-05-21-MalformedStructCrash.ll
@@ -1,4 +1,4 @@
 ; Found by inspection of the code
-; RUN: not llvm-as < %s  > /dev/null |& grep {constant expression type mismatch}
+; RUN: not llvm-as < %s  > /dev/null |& grep {initializer with struct type has wrong # elements}
 
 global {} { i32 7, float 1.0, i32 7, i32 8 }
diff --git a/test/Assembler/2003-06-30-RecursiveTypeProblem.ll b/test/Assembler/2003-06-30-RecursiveTypeProblem.ll
deleted file mode 100644
index 5db3114..0000000
--- a/test/Assembler/2003-06-30-RecursiveTypeProblem.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-
-%MidFnTy = type void (%MidFnTy*)
diff --git a/test/Assembler/2003-10-04-NotMergingGlobalConstants.ll b/test/Assembler/2003-10-04-NotMergingGlobalConstants.ll
deleted file mode 100644
index 5fec05d..0000000
--- a/test/Assembler/2003-10-04-NotMergingGlobalConstants.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-
-%T = type i32
-@X = global i32* null           ; <i32**> [#uses=0]
-@Y = global i32* null           ; <i32**> [#uses=0]
-
diff --git a/test/Assembler/2003-12-30-TypeMapInvalidMemory.ll b/test/Assembler/2003-12-30-TypeMapInvalidMemory.ll
deleted file mode 100644
index 93f9a70..0000000
--- a/test/Assembler/2003-12-30-TypeMapInvalidMemory.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: not llvm-as %s -o /dev/null |& grep {use of undefined type named 'struct.D_Scope'}
-; END.
-
-@d_reduction_0_dparser_gram = global { 
-  i32 (i8*, i8**, i32, i32, { 
-    %struct.Grammar*, void (\4, %struct.d_loc_t*, i8**)*, %struct.D_Scope*, 
-    void (\4)*, { i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
-      void (\8, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
-      %struct.ParseNode_User }* (\4, i32, { i32, %struct.d_loc_t, i8*, i8*, 
-        %struct.D_Scope*, void (\9, %struct.d_loc_t*, i8**)*, %struct.Grammar*,
-        %struct.ParseNode_User }**)*, 
-        void ({ i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
-          void (\8, %struct.d_loc_t*, i8**)*, 
-          %struct.Grammar*, %struct.ParseNode_User }*)*, 
-        %struct.d_loc_t, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32,
-        i32 }*)*, 
-        i32 (i8*, i8**, i32, i32, { %struct.Grammar*, 
-        void (\4, %struct.d_loc_t*, i8**)*, %struct.D_Scope*, void (\4)*, { 
-          i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
-          void (\8, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
-          %struct.ParseNode_User }* (\4, i32, { i32, %struct.d_loc_t, i8*, i8*, 
-            %struct.D_Scope*, void (\9, %struct.d_loc_t*, i8**)*, 
-            %struct.Grammar*, %struct.ParseNode_User }**)*, 
-            void ({ i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
-              void (\8, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
-              %struct.ParseNode_User }*)*, %struct.d_loc_t, i32, i32, i32, i32,
-              i32, i32, i32, i32, i32, i32, i32, i32 }*)** }
-
-        { i32 (i8*, i8**, i32, i32, { 
-          %struct.Grammar*, void (\4, %struct.d_loc_t*, i8**)*, 
-          %struct.D_Scope*, void (\4)*, { 
-            i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
-            void (\8, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
-            %struct.ParseNode_User 
-          }* (\4, i32, { i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
-            void (\9, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
-            %struct.ParseNode_User }**)*, 
-          void ({ i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
-            void (\8, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
-            %struct.ParseNode_User }*)*, 
-          %struct.d_loc_t, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, 
-          i32, i32 }*)* null, 
-        i32 (i8*, i8**, i32, i32, { 
-          %struct.Grammar*, void (\4, %struct.d_loc_t*, i8**)*, 
-          %struct.D_Scope*, void (\4)*, { i32, %struct.d_loc_t, i8*, i8*, 
-            %struct.D_Scope*, void (\8, %struct.d_loc_t*, i8**)*, 
-            %struct.Grammar*, %struct.ParseNode_User }* (\4, i32, { i32, 
-              %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
-              void (\9, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
-              %struct.ParseNode_User }**)*, 
-              void ({ i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, 
-                void (\8, %struct.d_loc_t*, i8**)*, %struct.Grammar*, 
-                %struct.ParseNode_User }*)*, %struct.d_loc_t, i32, i32, i32, 
-                i32, i32, i32, i32, i32, i32, i32, i32, i32 }*)** null 
-        }
diff --git a/test/Assembler/2004-11-28-InvalidTypeCrash.ll b/test/Assembler/2004-11-28-InvalidTypeCrash.ll
index f9b453b..40648fd 100644
--- a/test/Assembler/2004-11-28-InvalidTypeCrash.ll
+++ b/test/Assembler/2004-11-28-InvalidTypeCrash.ll
@@ -1,4 +1,4 @@
 ; Test for PR463.  This program is erroneous, but should not crash llvm-as.
-; RUN: not llvm-as %s -o /dev/null |& grep {invalid type for null constant}
+; RUN: not llvm-as %s -o /dev/null |& grep {use of undefined type named 'struct.none'}
 
 @.FOO  = internal global %struct.none zeroinitializer
diff --git a/test/Assembler/2005-02-09-AsmWriterStoreBug.ll b/test/Assembler/2005-02-09-AsmWriterStoreBug.ll
deleted file mode 100644
index 4ec1796..0000000
--- a/test/Assembler/2005-02-09-AsmWriterStoreBug.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as
-
-; Ensure that the asm writer emits types before both operands of the 
-; store, even though they can be the same.
-
-%RecTy = type %RecTy*
-
-define void @foo() {
-        %A = malloc %RecTy              ; <%RecTy> [#uses=1]
-        %B = malloc %RecTy              ; <%RecTy> [#uses=1]
-        store %RecTy %B, %RecTy %A
-        ret void
-}
-
diff --git a/test/Assembler/2006-05-26-VarargsCallEncode.ll b/test/Assembler/2006-05-26-VarargsCallEncode.ll
deleted file mode 100644
index 6dc60c3..0000000
--- a/test/Assembler/2006-05-26-VarargsCallEncode.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis | grep {tail call void.*sret null}
-
-declare void @foo({  }* sret , ...)
-
-define void @bar() {
-        tail call void ({  }* sret , ...)* @foo( {  }* null sret , i32 0 )
-        ret void
-}
diff --git a/test/Assembler/2007-07-30-AutoUpgradeZextSext.ll b/test/Assembler/2007-07-30-AutoUpgradeZextSext.ll
deleted file mode 100644
index ea2db44..0000000
--- a/test/Assembler/2007-07-30-AutoUpgradeZextSext.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; Test that upgrading zext/sext attributes to zeroext and signext
-; works correctly.
-; PR1553
-; RUN: llvm-as < %s > /dev/null
-
-define i32 @bar() {
-        %t = call i8 @foo( i8 10 sext ) zext
-        %x = zext i8 %t to i32
-        ret i32 %x
-}
-
-declare i8 @foo(i8 signext ) zeroext
diff --git a/test/Assembler/2007-11-27-AutoUpgradeAttributes.ll b/test/Assembler/2007-11-27-AutoUpgradeAttributes.ll
deleted file mode 100644
index ee260ea..0000000
--- a/test/Assembler/2007-11-27-AutoUpgradeAttributes.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llvm-as < %s
-
-@FP = weak global i8 (...) signext * null
diff --git a/test/Assembler/2008-02-20-MultipleReturnValue.ll b/test/Assembler/2008-02-20-MultipleReturnValue.ll
deleted file mode 100644
index 32c893a..0000000
--- a/test/Assembler/2008-02-20-MultipleReturnValue.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: opt < %s -verify -S | llvm-as -disable-output
-
-define {i32, i8} @foo(i32 %p) {
-  ret i32 1, i8 2
-}
-
-define i8 @f2(i32 %p) {
-   %c = call {i32, i8} @foo(i32 %p)
-   %d = getresult {i32, i8} %c, 1
-   %e = add i8 %d, 1
-   ret i8 %e
-}
-
-define i32 @f3(i32 %p) {
-   %c = invoke {i32, i8} @foo(i32 %p)
-         to label %L unwind label %L2
-   L: 
-   %d = getresult {i32, i8} %c, 0
-   ret i32 %d
-   L2:
-   ret i32 0
-}
diff --git a/test/Assembler/2008-10-14-NamedTypeOnInteger.ll b/test/Assembler/2008-10-14-NamedTypeOnInteger.ll
deleted file mode 100644
index 009489d..0000000
--- a/test/Assembler/2008-10-14-NamedTypeOnInteger.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis
-; PR2733
-
-%t1 = type i32
-%t2 = type { %t1 }
-@i1 = constant %t2 { %t1 15 } 
diff --git a/test/Assembler/2009-02-28-CastOpc.ll b/test/Assembler/2009-02-28-CastOpc.ll
index ee98d41..6035643 100644
--- a/test/Assembler/2009-02-28-CastOpc.ll
+++ b/test/Assembler/2009-02-28-CastOpc.ll
@@ -1,8 +1,7 @@
 ; RUN: llvm-as < %s | llvm-dis
 
-type i32
 
 define void @foo() {
-  bitcast %0* null to i32*
+  bitcast i32* null to i32*
   ret void
 }
diff --git a/test/Assembler/AutoUpgradeIntrinsics.ll b/test/Assembler/AutoUpgradeIntrinsics.ll
index 417493f..eb4ac76 100644
--- a/test/Assembler/AutoUpgradeIntrinsics.ll
+++ b/test/Assembler/AutoUpgradeIntrinsics.ll
@@ -1,87 +1,6 @@
 ; Tests to make sure intrinsics are automatically upgraded.
-; RUN: llvm-as < %s | llvm-dis | not grep {i32 @llvm\\.ct}
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   not grep {llvm\\.part\\.set\\.i\[0-9\]*\\.i\[0-9\]*\\.i\[0-9\]*}
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   not grep {llvm\\.part\\.select\\.i\[0-9\]*\\.i\[0-9\]*}
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   not grep {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*}
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   not grep {llvm\\.x86\\.sse2\\.loadu}
-; RUN: llvm-as < %s | llvm-dis | \
-; RUN:   grep {llvm\\.x86\\.mmx\\.ps} | grep {x86_mmx} | count 16
 ; RUN: llvm-as < %s | llvm-dis | FileCheck %s
 
-declare i32 @llvm.ctpop.i28(i28 %val)
-declare i32 @llvm.cttz.i29(i29 %val)
-declare i32 @llvm.ctlz.i30(i30 %val)
-
-define i32 @test_ct(i32 %A) {
-  %c1 = call i32 @llvm.ctpop.i28(i28 1234)
-  %c2 = call i32 @llvm.cttz.i29(i29 2345)
-  %c3 = call i32 @llvm.ctlz.i30(i30 3456)
-  %r1 = add i32 %c1, %c2
-  %r2 = add i32 %r1, %c3
-  ret i32 %r2
-}
-
-declare i32 @llvm.part.set.i32.i32.i32(i32 %x, i32 %rep, i32 %hi, i32 %lo)
-declare i16 @llvm.part.set.i16.i16.i16(i16 %x, i16 %rep, i32 %hi, i32 %lo)
-define i32 @test_part_set(i32 %A, i16 %B) {
-  %a = call i32 @llvm.part.set.i32.i32.i32(i32 %A, i32 27, i32 8, i32 0)
-  %b = call i16 @llvm.part.set.i16.i16.i16(i16 %B, i16 27, i32 8, i32 0)
-  %c = zext i16 %b to i32
-  %d = add i32 %a, %c
-  ret i32 %d
-}
-
-declare i32 @llvm.part.select.i32.i32(i32 %x, i32 %hi, i32 %lo)
-declare i16 @llvm.part.select.i16.i16(i16 %x, i32 %hi, i32 %lo)
-define i32 @test_part_select(i32 %A, i16 %B) {
-  %a = call i32 @llvm.part.select.i32.i32(i32 %A, i32 8, i32 0)
-  %b = call i16 @llvm.part.select.i16.i16(i16 %B, i32 8, i32 0)
-  %c = zext i16 %b to i32
-  %d = add i32 %a, %c
-  ret i32 %d
-}
-
-declare i32 @llvm.bswap.i32.i32(i32 %x)
-declare i16 @llvm.bswap.i16.i16(i16 %x)
-define i32 @test_bswap(i32 %A, i16 %B) {
-  %a = call i32 @llvm.bswap.i32.i32(i32 %A)
-  %b = call i16 @llvm.bswap.i16.i16(i16 %B)
-  %c = zext i16 %b to i32
-  %d = add i32 %a, %c
-  ret i32 %d
-}
-
-declare <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16>, <2 x i32>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16>, <2 x i32>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16>, <2 x i32>) nounwind readnone 
-define void @sh16(<4 x i16> %A, <2 x i32> %B) {
-	%r1 = call <4 x i16> @llvm.x86.mmx.psra.w( <4 x i16> %A, <2 x i32> %B )		; <<4 x i16>> [#uses=0]
-	%r2 = call <4 x i16> @llvm.x86.mmx.psll.w( <4 x i16> %A, <2 x i32> %B )		; <<4 x i16>> [#uses=0]
-	%r3 = call <4 x i16> @llvm.x86.mmx.psrl.w( <4 x i16> %A, <2 x i32> %B )		; <<4 x i16>> [#uses=0]
-	ret void
-}
-
-declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <2 x i32>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32>, <2 x i32>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32>, <2 x i32>) nounwind readnone 
-define void @sh32(<2 x i32> %A, <2 x i32> %B) {
-	%r1 = call <2 x i32> @llvm.x86.mmx.psra.d( <2 x i32> %A, <2 x i32> %B )		; <<2 x i32>> [#uses=0]
-	%r2 = call <2 x i32> @llvm.x86.mmx.psll.d( <2 x i32> %A, <2 x i32> %B )		; <<2 x i32>> [#uses=0]
-	%r3 = call <2 x i32> @llvm.x86.mmx.psrl.d( <2 x i32> %A, <2 x i32> %B )		; <<2 x i32>> [#uses=0]
-	ret void
-}
-
-declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <2 x i32>) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <2 x i32>) nounwind readnone 
-define void @sh64(<1 x i64> %A, <2 x i32> %B) {
-	%r1 = call <1 x i64> @llvm.x86.mmx.psll.q( <1 x i64> %A, <2 x i32> %B )		; <<1 x i64>> [#uses=0]
-	%r2 = call <1 x i64> @llvm.x86.mmx.psrl.q( <1 x i64> %A, <2 x i32> %B )		; <<1 x i64>> [#uses=0]
-	ret void
-}
 
 declare <4 x float> @llvm.x86.sse.loadu.ps(i8*) nounwind readnone
 declare <16 x i8> @llvm.x86.sse2.loadu.dq(i8*) nounwind readnone
@@ -90,6 +9,10 @@ define void @test_loadu(i8* %a, double* %b) {
   %v0 = call <4 x float> @llvm.x86.sse.loadu.ps(i8* %a)
   %v1 = call <16 x i8> @llvm.x86.sse2.loadu.dq(i8* %a)
   %v2 = call <2 x double> @llvm.x86.sse2.loadu.pd(double* %b)
+
+; CHECK: load i128* {{.*}}, align 1
+; CHECK: load i128* {{.*}}, align 1
+; CHECK: load i128* {{.*}}, align 1
   ret void
 }
 
@@ -109,3 +32,11 @@ define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D) {
   call void @llvm.x86.sse2.movnt.i(i8* %B, i32 %D)
   ret void
 }
+
+declare void @llvm.prefetch(i8*, i32, i32) nounwind
+
+define void @p(i8* %ptr) {
+; CHECK: llvm.prefetch(i8* %ptr, i32 0, i32 1, i32 1)
+  tail call void @llvm.prefetch(i8* %ptr, i32 0, i32 1)
+  ret void
+}
diff --git a/test/Assembler/AutoUpgradeMMXIntrinsics.ll b/test/Assembler/AutoUpgradeMMXIntrinsics.ll
deleted file mode 100644
index 54120ff..0000000
--- a/test/Assembler/AutoUpgradeMMXIntrinsics.ll
+++ /dev/null
@@ -1,223 +0,0 @@
-; Tests to make sure MMX intrinsics are automatically upgraded.
-; RUN: llvm-as < %s | llvm-dis -o %t
-; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<1 x i64\\\>}
-; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<2 x i32\\\>}
-; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<4 x i16\\\>}
-; RUN: grep {llvm\\.x86\\.mmx} %t | not grep {\\\<8 x i8\\\>}
-; RUN: grep {llvm\\.x86\\.sse\\.pshuf\\.w} %t | not grep i32
-
-; Addition
-declare <8 x i8>  @llvm.x86.mmx.padd.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.padd.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.x86.mmx.padd.d(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64>, <1 x i64>) nounwind readnone
-declare <8 x i8>  @llvm.x86.mmx.padds.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i8>  @llvm.x86.mmx.paddus.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16>, <4 x i16>) nounwind readnone
-define void @add(<8 x i8> %A,  <8 x i8> %B,  <4 x i16> %C, <4 x i16> %D,
-                 <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.padd.b(<8 x i8> %A,  <8 x i8> %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.padd.w(<4 x i16> %C, <4 x i16> %D)
-  %r3 = call <2 x i32> @llvm.x86.mmx.padd.d(<2 x i32> %E, <2 x i32> %F)
-  %r4 = call <1 x i64> @llvm.x86.mmx.padd.q(<1 x i64> %G, <1 x i64> %H)
-  %r5 = call <8 x i8>  @llvm.x86.mmx.padds.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r6 = call <4 x i16> @llvm.x86.mmx.padds.w(<4 x i16> %C, <4 x i16> %D)
-  %r7 = call <8 x i8>  @llvm.x86.mmx.paddus.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r8 = call <4 x i16> @llvm.x86.mmx.paddus.w(<4 x i16> %C, <4 x i16> %D)
-  ret void
-}
-
-; Subtraction
-declare <8 x i8>  @llvm.x86.mmx.psub.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.psub.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.x86.mmx.psub.d(<2 x i32>, <2 x i32>) nounwind readnone
-declare <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64>, <1 x i64>) nounwind readnone
-declare <8 x i8>  @llvm.x86.mmx.psubs.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <8 x i8>  @llvm.x86.mmx.psubus.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16>, <4 x i16>) nounwind readnone
-define void @sub(<8 x i8> %A,  <8 x i8> %B,  <4 x i16> %C, <4 x i16> %D,
-                 <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.psub.b(<8 x i8> %A,  <8 x i8> %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.psub.w(<4 x i16> %C, <4 x i16> %D)
-  %r3 = call <2 x i32> @llvm.x86.mmx.psub.d(<2 x i32> %E, <2 x i32> %F)
-  %r4 = call <1 x i64> @llvm.x86.mmx.psub.q(<1 x i64> %G, <1 x i64> %H)
-  %r5 = call <8 x i8>  @llvm.x86.mmx.psubs.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r6 = call <4 x i16> @llvm.x86.mmx.psubs.w(<4 x i16> %C, <4 x i16> %D)
-  %r7 = call <8 x i8>  @llvm.x86.mmx.psubus.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r8 = call <4 x i16> @llvm.x86.mmx.psubus.w(<4 x i16> %C, <4 x i16> %D)
-  ret void
-}
-
-; Multiplication
-declare <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pmull.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pmulhu.w(<4 x i16>, <4 x i16>) nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pmulu.dq(<4 x i16>, <4 x i16>) nounwind readnone
-declare <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16>, <4 x i16>) nounwind readnone
-define void @mul(<4 x i16> %A, <4 x i16> %B) {
-  %r1 = call <4 x i16> @llvm.x86.mmx.pmulh.w(<4 x i16> %A, <4 x i16> %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.pmull.w(<4 x i16> %A, <4 x i16> %B)
-  %r3 = call <4 x i16> @llvm.x86.mmx.pmulhu.w(<4 x i16> %A, <4 x i16> %B)
-  %r4 = call <4 x i16> @llvm.x86.mmx.pmulu.dq(<4 x i16> %A, <4 x i16> %B)
-  %r5 = call <2 x i32> @llvm.x86.mmx.pmadd.wd(<4 x i16> %A, <4 x i16> %B)
-  ret void
-}
-
-; Bitwise operations
-declare <1 x i64> @llvm.x86.mmx.pand(<1 x i64>, <1 x i64>)  nounwind readnone
-declare <1 x i64> @llvm.x86.mmx.pandn(<1 x i64>, <1 x i64>) nounwind readnone
-declare <1 x i64> @llvm.x86.mmx.por(<1 x i64>, <1 x i64>)   nounwind readnone
-declare <1 x i64> @llvm.x86.mmx.pxor(<1 x i64>, <1 x i64>)  nounwind readnone
-define void @bit(<1 x i64> %A, <1 x i64> %B) {
-  %r1 = call <1 x i64> @llvm.x86.mmx.pand(<1 x i64> %A, <1 x i64> %B)
-  %r2 = call <1 x i64> @llvm.x86.mmx.pandn(<1 x i64> %A, <1 x i64> %B)
-  %r3 = call <1 x i64> @llvm.x86.mmx.por(<1 x i64> %A, <1 x i64> %B)
-  %r4 = call <1 x i64> @llvm.x86.mmx.pxor(<1 x i64> %A, <1 x i64> %B)
-  ret void
-}
-
-; Averages
-declare <8 x i8>  @llvm.x86.mmx.pavg.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pavg.w(<4 x i16>, <4 x i16>) nounwind readnone
-define void @avg(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.pavg.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.pavg.w(<4 x i16> %C, <4 x i16> %D)
-  ret void
-}
-
-; Maximum
-declare <8 x i8>  @llvm.x86.mmx.pmaxu.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pmaxs.w(<4 x i16>, <4 x i16>) nounwind readnone
-define void @max(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.pmaxu.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.pmaxs.w(<4 x i16> %C, <4 x i16> %D)
-  ret void
-}
-
-; Minimum
-declare <8 x i8>  @llvm.x86.mmx.pminu.b(<8 x i8>,  <8 x i8>)  nounwind readnone
-declare <4 x i16> @llvm.x86.mmx.pmins.w(<4 x i16>, <4 x i16>) nounwind readnone
-define void @min(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.pminu.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.pmins.w(<4 x i16> %C, <4 x i16> %D)
-  ret void
-}
-
-; Packed sum of absolute differences
-declare <4 x i16> @llvm.x86.mmx.psad.bw(<8 x i8>, <8 x i8>) nounwind readnone
-define void @psad(<8 x i8> %A, <8 x i8> %B) {
-  %r1 = call <4 x i16> @llvm.x86.mmx.psad.bw(<8 x i8> %A, <8 x i8> %B)
-  ret void
-}
-
-; Shift left
-declare <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16>, <1 x i64>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32>, <1 x i64>) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64>, <1 x i64>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.pslli.w(<4 x i16>, i32) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.pslli.d(<2 x i32>, i32) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64>, i32) nounwind readnone 
-define void @shl(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) {
-  %r1 = call <4 x i16> @llvm.x86.mmx.psll.w(<4 x i16> %A, <1 x i64> %C)
-  %r2 = call <2 x i32> @llvm.x86.mmx.psll.d(<2 x i32> %B, <1 x i64> %C)
-  %r3 = call <1 x i64> @llvm.x86.mmx.psll.q(<1 x i64> %C, <1 x i64> %C)
-  %r4 = call <4 x i16> @llvm.x86.mmx.pslli.w(<4 x i16> %A, i32 %D)
-  %r5 = call <2 x i32> @llvm.x86.mmx.pslli.d(<2 x i32> %B, i32 %D)
-  %r6 = call <1 x i64> @llvm.x86.mmx.pslli.q(<1 x i64> %C, i32 %D)
-  ret void
-}
-
-; Shift right logical
-declare <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16>, <1 x i64>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32>, <1 x i64>) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64>, <1 x i64>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16>, i32) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psrli.d(<2 x i32>, i32) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64>, i32) nounwind readnone 
-define void @shr(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) {
-  %r1 = call <4 x i16> @llvm.x86.mmx.psrl.w(<4 x i16> %A, <1 x i64> %C)
-  %r2 = call <2 x i32> @llvm.x86.mmx.psrl.d(<2 x i32> %B, <1 x i64> %C)
-  %r3 = call <1 x i64> @llvm.x86.mmx.psrl.q(<1 x i64> %C, <1 x i64> %C)
-  %r4 = call <4 x i16> @llvm.x86.mmx.psrli.w(<4 x i16> %A, i32 %D)
-  %r5 = call <2 x i32> @llvm.x86.mmx.psrli.d(<2 x i32> %B, i32 %D)
-  %r6 = call <1 x i64> @llvm.x86.mmx.psrli.q(<1 x i64> %C, i32 %D)
-  ret void
-}
-
-; Shift right arithmetic
-declare <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16>, <1 x i64>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32>, <1 x i64>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.psrai.w(<4 x i16>, i32) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.psrai.d(<2 x i32>, i32) nounwind readnone 
-define void @sra(<4 x i16> %A, <2 x i32> %B, <1 x i64> %C, i32 %D) {
-  %r1 = call <4 x i16> @llvm.x86.mmx.psra.w(<4 x i16> %A, <1 x i64> %C)
-  %r2 = call <2 x i32> @llvm.x86.mmx.psra.d(<2 x i32> %B, <1 x i64> %C)
-  %r3 = call <4 x i16> @llvm.x86.mmx.psrai.w(<4 x i16> %A, i32 %D)
-  %r4 = call <2 x i32> @llvm.x86.mmx.psrai.d(<2 x i32> %B, i32 %D)
-  ret void
-}
-
-; Pack/Unpack ops
-declare <8 x i8>  @llvm.x86.mmx.packsswb(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.packssdw(<2 x i32>, <2 x i32>) nounwind readnone 
-declare <8 x i8>  @llvm.x86.mmx.packuswb(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <8 x i8>  @llvm.x86.mmx.punpckhbw(<8 x i8>, <8 x i8>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.punpckhwd(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.punpckhdq(<2 x i32>, <2 x i32>) nounwind readnone 
-declare <8 x i8>  @llvm.x86.mmx.punpcklbw(<8 x i8>, <8 x i8>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.punpcklwd(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.punpckldq(<2 x i32>, <2 x i32>) nounwind readnone 
-define void @pack_unpack(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
-                         <2 x i32> %E, <2 x i32> %F) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.packsswb(<4 x i16> %C, <4 x i16> %D)
-  %r2 = call <4 x i16> @llvm.x86.mmx.packssdw(<2 x i32> %E, <2 x i32> %F)
-  %r3 = call <8 x i8>  @llvm.x86.mmx.packuswb(<4 x i16> %C, <4 x i16> %D)
-  %r4 = call <8 x i8>  @llvm.x86.mmx.punpckhbw(<8 x i8>  %A, <8 x i8>  %B)
-  %r5 = call <4 x i16> @llvm.x86.mmx.punpckhwd(<4 x i16> %C, <4 x i16> %D)
-  %r6 = call <2 x i32> @llvm.x86.mmx.punpckhdq(<2 x i32> %E, <2 x i32> %F)
-  %r7 = call <8 x i8>  @llvm.x86.mmx.punpcklbw(<8 x i8>  %A, <8 x i8>  %B)
-  %r8 = call <4 x i16> @llvm.x86.mmx.punpcklwd(<4 x i16> %C, <4 x i16> %D)
-  %r9 = call <2 x i32> @llvm.x86.mmx.punpckldq(<2 x i32> %E, <2 x i32> %F)
-  ret void
-}
-
-; Integer comparison ops
-declare <8 x i8>  @llvm.x86.mmx.pcmpeq.b(<8 x i8>, <8 x i8>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.pcmpeq.w(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.pcmpeq.d(<2 x i32>, <2 x i32>) nounwind readnone 
-declare <8 x i8>  @llvm.x86.mmx.pcmpgt.b(<8 x i8>, <8 x i8>) nounwind readnone 
-declare <4 x i16> @llvm.x86.mmx.pcmpgt.w(<4 x i16>, <4 x i16>) nounwind readnone 
-declare <2 x i32> @llvm.x86.mmx.pcmpgt.d(<2 x i32>, <2 x i32>) nounwind readnone 
-define void @cmp(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
-                 <2 x i32> %E, <2 x i32> %F) {
-  %r1 = call <8 x i8>  @llvm.x86.mmx.pcmpeq.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r2 = call <4 x i16> @llvm.x86.mmx.pcmpeq.w(<4 x i16> %C, <4 x i16> %D)
-  %r3 = call <2 x i32> @llvm.x86.mmx.pcmpeq.d(<2 x i32> %E, <2 x i32> %F)
-  %r4 = call <8 x i8>  @llvm.x86.mmx.pcmpgt.b(<8 x i8>  %A, <8 x i8>  %B)
-  %r5 = call <4 x i16> @llvm.x86.mmx.pcmpgt.w(<4 x i16> %C, <4 x i16> %D)
-  %r6 = call <2 x i32> @llvm.x86.mmx.pcmpgt.d(<2 x i32> %E, <2 x i32> %F)
-  ret void
-}
-
-; Miscellaneous
-declare void      @llvm.x86.mmx.maskmovq(<8 x i8>, <8 x i8>, i32*) nounwind readnone 
-declare i32       @llvm.x86.mmx.pmovmskb(<8 x i8>) nounwind readnone 
-declare void      @llvm.x86.mmx.movnt.dq(i32*, <1 x i64>) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64>, <1 x i64>,  i8) nounwind readnone 
-declare i32       @llvm.x86.mmx.pextr.w(<1 x i64>, i32) nounwind readnone 
-declare <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64>, i32, i32) nounwind readnone 
-declare <4 x i16> @llvm.x86.ssse3.pshuf.w(<4 x i16>, i32) nounwind readnone 
-define void @misc(<8 x i8> %A, <8 x i8> %B, <4 x i16> %C, <4 x i16> %D,
-                  <2 x i32> %E, <2 x i32> %F, <1 x i64> %G, <1 x i64> %H,
-                  i32* %I, i8 %J, i16 %K, i32 %L) {
-        call void      @llvm.x86.mmx.maskmovq(<8 x i8> %A, <8 x i8> %B, i32* %I)
-  %r1 = call i32       @llvm.x86.mmx.pmovmskb(<8 x i8> %A)
-        call void      @llvm.x86.mmx.movnt.dq(i32* %I, <1 x i64> %G)
-  %r2 = call <1 x i64> @llvm.x86.mmx.palignr.b(<1 x i64> %G, <1 x i64> %H, i8 %J)
-  %r3 = call i32       @llvm.x86.mmx.pextr.w(<1 x i64> %G, i32 37)
-  %r4 = call <1 x i64> @llvm.x86.mmx.pinsr.w(<1 x i64> %G, i32 37, i32 927)
-  %r5 = call <4 x i16> @llvm.x86.ssse3.pshuf.w(<4 x i16> %C, i32 37)
-  ret void
-}
diff --git a/test/Assembler/getelementptr.ll b/test/Assembler/getelementptr.ll
index ebef58f..ce6866d 100644
--- a/test/Assembler/getelementptr.ll
+++ b/test/Assembler/getelementptr.ll
@@ -9,13 +9,13 @@
 
 ;; Verify that i16 indices work.
 @x = external global {i32, i32}
-@y = global i32* getelementptr ({i32, i32}* @x, i16 42, i32 0)
-; CHECK: @y = global i32* getelementptr (%0* @x, i16 42, i32 0)
+@y = global i32* getelementptr ({ i32, i32 }* @x, i16 42, i32 0)
+; CHECK: @y = global i32* getelementptr ({ i32, i32 }* @x, i16 42, i32 0)
 
 ; see if i92 indices work too.
 define i32 *@test({i32, i32}* %t, i92 %n) {
 ; CHECK: @test
-; CHECK: %B = getelementptr %0* %t, i92 %n, i32 0
+; CHECK: %B = getelementptr { i32, i32 }* %t, i92 %n, i32 0
   %B = getelementptr {i32, i32}* %t, i92 %n, i32 0
   ret i32* %B
 }
diff --git a/test/Assembler/named-metadata.ll b/test/Assembler/named-metadata.ll
new file mode 100644
index 0000000..db72810
--- /dev/null
+++ b/test/Assembler/named-metadata.ll
@@ -0,0 +1,24 @@
+; RUN: llvm-as < %s | llvm-dis | FileCheck %s
+
+!0 = metadata !{metadata !"zero"}
+!1 = metadata !{metadata !"one"}
+!2 = metadata !{metadata !"two"}
+
+!foo = !{!0, !1, !2}
+; CHECK: !foo = !{!0, !1, !2}
+
+!\23pragma = !{!0, !1, !2}
+; CHECK: !\23pragma = !{!0, !1, !2}
+
+; \31 is the digit '1'. On emission, we escape the first character (to avoid
+; conflicting with anonymous metadata), but not the subsequent ones.
+!\31\31\31 = !{!0, !1, !2}
+; CHECK: !\3111 = !{!0, !1, !2}
+
+!\22name\22 = !{!0, !1, !2}
+; CHECK: !\22name\22 = !{!0, !1, !2}
+
+; \x doesn't mean anything, so we parse it literally but escape the \ into \5C
+; when emitting it, followed by xfoo.
+!\xfoo = !{!0, !1, !2}
+; CHECK: !\5Cxfoo = !{!0, !1, !2}
diff --git a/test/Assembler/private.ll b/test/Assembler/private.ll
deleted file mode 100644
index 3714572..0000000
--- a/test/Assembler/private.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Test to make sure that the 'private' tag is not lost!
-;
-; RUN: llvm-as < %s | llvm-dis | grep private
-
-declare void @foo()
-
-define private void @foo() {
-        ret void
-}
diff --git a/test/Bindings/Ocaml/bitreader.ml b/test/Bindings/Ocaml/bitreader.ml
index a3bd91a..e5beccd 100644
--- a/test/Bindings/Ocaml/bitreader.ml
+++ b/test/Bindings/Ocaml/bitreader.ml
@@ -1,6 +1,6 @@
 (* RUN: %ocamlopt -warn-error A llvm.cmxa llvm_bitreader.cmxa llvm_bitwriter.cmxa %s -o %t
  * RUN: %t %t.bc
- * RUN: llvm-dis < %t.bc | grep caml_int_ty
+ * RUN: llvm-dis < %t.bc
  * XFAIL: vg_leak
  *)
 
@@ -15,8 +15,6 @@ let _ =
   let fn = Sys.argv.(1) in
   let m = Llvm.create_module context "ocaml_test_module" in
   
-  ignore (Llvm.define_type_name "caml_int_ty" (Llvm.i32_type context) m);
-  
   test (Llvm_bitwriter.write_bitcode_file m fn);
   
   Llvm.dispose_module m;
diff --git a/test/Bindings/Ocaml/bitwriter.ml b/test/Bindings/Ocaml/bitwriter.ml
index 3f55fb9..1388760 100644
--- a/test/Bindings/Ocaml/bitwriter.ml
+++ b/test/Bindings/Ocaml/bitwriter.ml
@@ -1,6 +1,6 @@
 (* RUN: %ocamlopt -warn-error A unix.cmxa llvm.cmxa llvm_bitwriter.cmxa %s -o %t
  * RUN: %t %t.bc
- * RUN: llvm-dis < %t.bc | grep caml_int_ty
+ * RUN: llvm-dis < %t.bc
  * XFAIL: vg_leak
  *)
 
@@ -37,8 +37,6 @@ let temp_bitcode ?unbuffered m =
 let _ =
   let m = Llvm.create_module context "ocaml_test_module" in
   
-  ignore (Llvm.define_type_name "caml_int_ty" (Llvm.i32_type context) m);
-
   test (Llvm_bitwriter.write_bitcode_file m Sys.argv.(1));
   let file_buf = read_file Sys.argv.(1) in
 
diff --git a/test/Bindings/Ocaml/vmcore.ml b/test/Bindings/Ocaml/vmcore.ml
index ceb650e..bff04a1 100644
--- a/test/Bindings/Ocaml/vmcore.ml
+++ b/test/Bindings/Ocaml/vmcore.ml
@@ -80,130 +80,6 @@ let test_target () =
     insist (layout = data_layout m)
   end
 
-(*===-- Types -------------------------------------------------------------===*)
-
-let test_types () =
-  (* RUN: grep {void_type.*void} < %t.ll
-   *)
-  group "void";
-  insist (define_type_name "void_type" void_type m);
-  insist (TypeKind.Void == classify_type void_type);
-
-  (* RUN: grep {i1_type.*i1} < %t.ll
-   *)
-  group "i1";
-  insist (define_type_name "i1_type" i1_type m);
-  insist (TypeKind.Integer == classify_type i1_type);
-
-  (* RUN: grep {i32_type.*i32} < %t.ll
-   *)
-  group "i32";
-  insist (define_type_name "i32_type" i32_type m);
-
-  (* RUN: grep {i42_type.*i42} < %t.ll
-   *)
-  group "i42";
-  let ty = integer_type context 42 in
-  insist (define_type_name "i42_type" ty m);
-
-  (* RUN: grep {float_type.*float} < %t.ll
-   *)
-  group "float";
-  insist (define_type_name "float_type" float_type m);
-  insist (TypeKind.Float == classify_type float_type);
-
-  (* RUN: grep {double_type.*double} < %t.ll
-   *)
-  group "double";
-  insist (define_type_name "double_type" double_type m);
-  insist (TypeKind.Double == classify_type double_type);
-
-  (* RUN: grep {function_type.*i32.*i1, double} < %t.ll
-   *)
-  group "function";
-  let ty = function_type i32_type [| i1_type; double_type |] in
-  insist (define_type_name "function_type" ty m);
-  insist (TypeKind.Function = classify_type ty);
-  insist (not (is_var_arg ty));
-  insist (i32_type == return_type ty);
-  insist (double_type == (param_types ty).(1));
-  
-  (* RUN: grep {var_arg_type.*\.\.\.} < %t.ll
-   *)
-  group "var arg function";
-  let ty = var_arg_function_type void_type [| i32_type |] in
-  insist (define_type_name "var_arg_type" ty m);
-  insist (is_var_arg ty);
-  
-  (* RUN: grep {array_type.*\\\[7 x i8\\\]} < %t.ll
-   *)
-  group "array";
-  let ty = array_type i8_type 7 in
-  insist (define_type_name "array_type" ty m);
-  insist (7 = array_length ty);
-  insist (i8_type == element_type ty);
-  insist (TypeKind.Array == classify_type ty);
-  
-  begin group "pointer";
-    (* RUN: grep {pointer_type.*float\*} < %t.ll
-     *)
-    let ty = pointer_type float_type in
-    insist (define_type_name "pointer_type" ty m);
-    insist (float_type == element_type ty);
-    insist (0 == address_space ty);
-    insist (TypeKind.Pointer == classify_type ty)
-  end;
-  
-  begin group "qualified_pointer";
-    (* RUN: grep {qualified_pointer_type.*i8.*3.*\*} < %t.ll
-     *)
-    let ty = qualified_pointer_type i8_type 3 in
-    insist (define_type_name "qualified_pointer_type" ty m);
-    insist (i8_type == element_type ty);
-    insist (3 == address_space ty)
-  end;
-  
-  (* RUN: grep {vector_type.*\<4 x i16\>} < %t.ll
-   *)
-  group "vector";
-  let ty = vector_type i16_type 4 in
-  insist (define_type_name "vector_type" ty m);
-  insist (i16_type == element_type ty);
-  insist (4 = vector_size ty);
-  
-  (* RUN: grep {opaque_type.*opaque} < %t.ll
-   *)
-  group "opaque";
-  let ty = opaque_type context in
-  insist (define_type_name "opaque_type" ty m);
-  insist (ty == ty);
-  insist (ty <> opaque_type context);
-  
-  (* RUN: grep -v {delete_type} < %t.ll
-   *)
-  group "delete";
-  let ty = opaque_type context in
-  insist (define_type_name "delete_type" ty m);
-  delete_type_name "delete_type" m;
-
-  (* RUN: grep {type_name.*opaque} < %t.ll
-   *)
-  group "type_name"; begin
-    let ty = opaque_type context in
-    insist (define_type_name "type_name" ty m);
-    insist ((type_by_name m "type_name") = Some ty)
-  end;
-  
-  (* RUN: grep -v {recursive_type.*recursive_type} < %t.ll
-   *)
-  group "recursive";
-  let ty = opaque_type context in
-  let th = handle_to_type ty in
-  refine_type ty (pointer_type ty);
-  let ty = type_of_handle th in
-  insist (define_type_name "recursive_type" ty m);
-  insist (ty == element_type ty)
-
 
 (*===-- Constants ---------------------------------------------------------===*)
 
@@ -1314,7 +1190,6 @@ let test_writer () =
 
 let _ =
   suite "target"           test_target;
-  suite "types"            test_types;
   suite "constants"        test_constants;
   suite "global values"    test_global_values;
   suite "global variables" test_global_variables;
diff --git a/test/Bitcode/AutoUpgradeIntrinsics.ll b/test/Bitcode/AutoUpgradeIntrinsics.ll
deleted file mode 100644
index c3e2e9e..0000000
--- a/test/Bitcode/AutoUpgradeIntrinsics.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; This isn't really an assembly file. It just runs test on bitcode to ensure
-; it is auto-upgraded.
-; RUN: llvm-dis < %s.bc | FileCheck %s
-; CHECK-NOT: {i32 @llvm\\.ct}
-; CHECK-NOT: {llvm\\.part\\.set\\.i\[0-9\]*\\.i\[0-9\]*\\.i\[0-9\]*}
-; CHECK-NOT: {llvm\\.part\\.select\\.i\[0-9\]*\\.i\[0-9\]*}
-; CHECK-NOT: {llvm\\.bswap\\.i\[0-9\]*\\.i\[0-9\]*}
-
diff --git a/test/Bitcode/AutoUpgradeIntrinsics.ll.bc b/test/Bitcode/AutoUpgradeIntrinsics.ll.bc
deleted file mode 100644
index 9de756b..0000000
--- a/test/Bitcode/AutoUpgradeIntrinsics.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/memcpy.ll b/test/Bitcode/memcpy.ll
deleted file mode 100644
index 299eb1e..0000000
--- a/test/Bitcode/memcpy.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llvm-as %s -o /dev/null
-
-define void @test(i32* %P, i32* %Q) {
-entry:
-        %tmp.1 = bitcast i32* %P to i8*         ; <i8*> [#uses=3]
-        %tmp.3 = bitcast i32* %Q to i8*         ; <i8*> [#uses=4]
-        tail call void @llvm.memcpy.i32( i8* %tmp.1, i8* %tmp.3, i32 100000, i32 1 )
-        tail call void @llvm.memcpy.i64( i8* %tmp.1, i8* %tmp.3, i64 100000, i32 1 )
-        tail call void @llvm.memset.i32( i8* %tmp.3, i8 14, i32 10000, i32 0 )
-        tail call void @llvm.memmove.i32( i8* %tmp.1, i8* %tmp.3, i32 123124, i32 1 )
-        tail call void @llvm.memmove.i64( i8* %tmp.1, i8* %tmp.3, i64 123124, i32 1 )
-        ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
-
-declare void @llvm.memmove.i64(i8*, i8*, i64, i32)
diff --git a/test/Bitcode/metadata-2.ll b/test/Bitcode/metadata-2.ll
index 1a59ce6..dbf46b0 100644
--- a/test/Bitcode/metadata-2.ll
+++ b/test/Bitcode/metadata-2.ll
@@ -1,7 +1,7 @@
 ; RUN: llvm-as < %s | llvm-dis -o /dev/null
-	type { %object.ModuleInfo.__vtbl*, i8*, %"byte[]", %1, %"ClassInfo[]", i32, void ()*, void ()*, void ()*, i8*, void ()* }		; type %0
-	type { i64, %object.ModuleInfo* }		; type %1
-	type { i32, void ()* }		; type %2
+	%0 = type { %object.ModuleInfo.__vtbl*, i8*, %"byte[]", %1, %"ClassInfo[]", i32, void ()*, void ()*, void ()*, i8*, void ()* }		; type %0
+	%1 = type { i64, %object.ModuleInfo* }		; type %1
+	%2 = type { i32, void ()* }		; type %2
 	%"ClassInfo[]" = type { i64, %object.ClassInfo** }
 	%"Interface[]" = type { i64, %object.Interface* }
 	%"ModuleInfo[]" = type { i64, %object.ModuleInfo** }
diff --git a/test/Bitcode/neon-intrinsics.ll b/test/Bitcode/neon-intrinsics.ll
deleted file mode 100644
index feb2d74..0000000
--- a/test/Bitcode/neon-intrinsics.ll
+++ /dev/null
@@ -1,206 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s
-
-; vmovls should be auto-upgraded to sext
-
-; CHECK: vmovls8
-; CHECK-NOT: arm.neon.vmovls.v8i16
-; CHECK: sext <8 x i8>
-
-; CHECK: vmovls16
-; CHECK-NOT: arm.neon.vmovls.v4i32
-; CHECK: sext <4 x i16>
-
-; CHECK: vmovls32
-; CHECK-NOT: arm.neon.vmovls.v2i64
-; CHECK: sext <2 x i32>
-
-; vmovlu should be auto-upgraded to zext
-
-; CHECK: vmovlu8
-; CHECK-NOT: arm.neon.vmovlu.v8i16
-; CHECK: zext <8 x i8>
-
-; CHECK: vmovlu16
-; CHECK-NOT: arm.neon.vmovlu.v4i32
-; CHECK: zext <4 x i16>
-
-; CHECK: vmovlu32
-; CHECK-NOT: arm.neon.vmovlu.v2i64
-; CHECK: zext <2 x i32>
-
-; vaddl/vaddw should be auto-upgraded to add with sext/zext
-
-; CHECK: vaddls16
-; CHECK-NOT: arm.neon.vaddls.v4i32
-; CHECK: sext <4 x i16>
-; CHECK-NEXT: sext <4 x i16>
-; CHECK-NEXT: add <4 x i32>
-
-; CHECK: vaddlu32
-; CHECK-NOT: arm.neon.vaddlu.v2i64
-; CHECK: zext <2 x i32>
-; CHECK-NEXT: zext <2 x i32>
-; CHECK-NEXT: add <2 x i64>
-
-; CHECK: vaddws8
-; CHECK-NOT: arm.neon.vaddws.v8i16
-; CHECK: sext <8 x i8>
-; CHECK-NEXT: add <8 x i16>
-
-; CHECK: vaddwu16
-; CHECK-NOT: arm.neon.vaddwu.v4i32
-; CHECK: zext <4 x i16>
-; CHECK-NEXT: add <4 x i32>
-
-; vsubl/vsubw should be auto-upgraded to subtract with sext/zext
-
-; CHECK: vsubls16
-; CHECK-NOT: arm.neon.vsubls.v4i32
-; CHECK: sext <4 x i16>
-; CHECK-NEXT: sext <4 x i16>
-; CHECK-NEXT: sub <4 x i32>
-
-; CHECK: vsublu32
-; CHECK-NOT: arm.neon.vsublu.v2i64
-; CHECK: zext <2 x i32>
-; CHECK-NEXT: zext <2 x i32>
-; CHECK-NEXT: sub <2 x i64>
-
-; CHECK: vsubws8
-; CHECK-NOT: arm.neon.vsubws.v8i16
-; CHECK: sext <8 x i8>
-; CHECK-NEXT: sub <8 x i16>
-
-; CHECK: vsubwu16
-; CHECK-NOT: arm.neon.vsubwu.v4i32
-; CHECK: zext <4 x i16>
-; CHECK-NEXT: sub <4 x i32>
-
-; vmull* intrinsics will remain intrinsics
-
-; CHECK: vmulls8
-; CHECK: arm.neon.vmulls.v8i16
-
-; CHECK: vmullu16
-; CHECK: arm.neon.vmullu.v4i32
-
-; CHECK: vmullp8
-; CHECK: arm.neon.vmullp.v8i16
-
-; vmlal should be auto-upgraded to multiply/add with sext/zext
-
-; CHECK: vmlals32
-; CHECK-NOT: arm.neon.vmlals.v2i64
-; CHECK: sext <2 x i32>
-; CHECK-NEXT: sext <2 x i32>
-; CHECK-NEXT: mul <2 x i64>
-; CHECK-NEXT: add <2 x i64>
-
-; CHECK: vmlalu8
-; CHECK-NOT: arm.neon.vmlalu.v8i16
-; CHECK: zext <8 x i8>
-; CHECK-NEXT: zext <8 x i8>
-; CHECK-NEXT: mul <8 x i16>
-; CHECK-NEXT: add <8 x i16>
-
-; vmlsl should be auto-upgraded to multiply/sub with sext/zext
-
-; CHECK: vmlsls16
-; CHECK-NOT: arm.neon.vmlsls.v4i32
-; CHECK: sext <4 x i16>
-; CHECK-NEXT: sext <4 x i16>
-; CHECK-NEXT: mul <4 x i32>
-; CHECK-NEXT: sub <4 x i32>
-
-; CHECK: vmlslu32
-; CHECK-NOT: arm.neon.vmlslu.v2i64
-; CHECK: zext <2 x i32>
-; CHECK-NEXT: zext <2 x i32>
-; CHECK-NEXT: mul <2 x i64>
-; CHECK-NEXT: sub <2 x i64>
-
-; vaba should be auto-upgraded to vabd + add
-
-; CHECK: vabas32
-; CHECK-NOT: arm.neon.vabas.v2i32
-; CHECK: arm.neon.vabds.v2i32
-; CHECK-NEXT: add <2 x i32>
-
-; CHECK: vabaQu8
-; CHECK-NOT: arm.neon.vabau.v16i8
-; CHECK: arm.neon.vabdu.v16i8
-; CHECK-NEXT: add <16 x i8>
-
-; vabal should be auto-upgraded to vabd with zext + add
-
-; CHECK: vabals16
-; CHECK-NOT: arm.neon.vabals.v4i32
-; CHECK: arm.neon.vabds.v4i16
-; CHECK-NEXT: zext <4 x i16>
-; CHECK-NEXT: add <4 x i32>
-
-; CHECK: vabalu32
-; CHECK-NOT: arm.neon.vabalu.v2i64
-; CHECK: arm.neon.vabdu.v2i32
-; CHECK-NEXT: zext <2 x i32>
-; CHECK-NEXT: add <2 x i64>
-
-; vabdl should be auto-upgraded to vabd with zext
-
-; CHECK: vabdls8
-; CHECK-NOT: arm.neon.vabdls.v8i16
-; CHECK: arm.neon.vabds.v8i8
-; CHECK-NEXT: zext <8 x i8>
-
-; CHECK: vabdlu16
-; CHECK-NOT: arm.neon.vabdlu.v4i32
-; CHECK: arm.neon.vabdu.v4i16
-; CHECK-NEXT: zext <4 x i16>
-
-; vmovn should be auto-upgraded to trunc
-
-; CHECK: vmovni16
-; CHECK-NOT: arm.neon.vmovn.v8i8
-; CHECK: trunc <8 x i16>
-
-; CHECK: vmovni32
-; CHECK-NOT: arm.neon.vmovn.v4i16
-; CHECK: trunc <4 x i32>
-
-; CHECK: vmovni64
-; CHECK-NOT: arm.neon.vmovn.v2i32
-; CHECK: trunc <2 x i64>
-
-; vld* and vst* intrinsic calls need an alignment argument (defaulted to 1)
-
-; CHECK: vld1i8
-; CHECK: i32 1
-; CHECK: vld2Qi16
-; CHECK: i32 1
-; CHECK: vld3i32
-; CHECK: i32 1
-; CHECK: vld4Qf
-; CHECK: i32 1
-
-; CHECK: vst1i8
-; CHECK: i32 1
-; CHECK: vst2Qi16
-; CHECK: i32 1
-; CHECK: vst3i32
-; CHECK: i32 1
-; CHECK: vst4Qf
-; CHECK: i32 1
-
-; CHECK: vld2laneQi16
-; CHECK: i32 1
-; CHECK: vld3lanei32
-; CHECK: i32 1
-; CHECK: vld4laneQf
-; CHECK: i32 1
-
-; CHECK: vst2laneQi16
-; CHECK: i32 1
-; CHECK: vst3lanei32
-; CHECK: i32 1
-; CHECK: vst4laneQf
-; CHECK: i32 1
diff --git a/test/Bitcode/neon-intrinsics.ll.bc b/test/Bitcode/neon-intrinsics.ll.bc
deleted file mode 100644
index cabc3c9..0000000
--- a/test/Bitcode/neon-intrinsics.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/sse2_loadl_pd.ll b/test/Bitcode/sse2_loadl_pd.ll
deleted file mode 100644
index 6cb0da5..0000000
--- a/test/Bitcode/sse2_loadl_pd.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s
-; CHECK-NOT: {i32 @llvm\\.loadl.pd} 
-; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_loadl_pd.ll.bc b/test/Bitcode/sse2_loadl_pd.ll.bc
deleted file mode 100644
index 402cbe1..0000000
--- a/test/Bitcode/sse2_loadl_pd.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/sse2_movl_dq.ll b/test/Bitcode/sse2_movl_dq.ll
deleted file mode 100644
index 2fc0149..0000000
--- a/test/Bitcode/sse2_movl_dq.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s 
-; CHECK-NOT: {i32 @llvm\\.movl.dq}
-; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_movl_dq.ll.bc b/test/Bitcode/sse2_movl_dq.ll.bc
deleted file mode 100644
index 74d1826..0000000
--- a/test/Bitcode/sse2_movl_dq.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/sse2_movs_d.ll b/test/Bitcode/sse2_movs_d.ll
deleted file mode 100644
index ab82c43..0000000
--- a/test/Bitcode/sse2_movs_d.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s
-; CHECK-NOT: {i32 @llvm\\.movs.d}
-; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_movs_d.ll.bc b/test/Bitcode/sse2_movs_d.ll.bc
deleted file mode 100644
index 719d529..0000000
--- a/test/Bitcode/sse2_movs_d.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/sse2_punpck_qdq.ll b/test/Bitcode/sse2_punpck_qdq.ll
deleted file mode 100644
index 4c68af5..0000000
--- a/test/Bitcode/sse2_punpck_qdq.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s
-; CHECK-NOT: {i32 @llvm\\.punpckh.qdq}
-; CHECK-NOT: {i32 @llvm\\.punpckl.qdq}
-; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_punpck_qdq.ll.bc b/test/Bitcode/sse2_punpck_qdq.ll.bc
deleted file mode 100644
index 7c1b7ed..0000000
--- a/test/Bitcode/sse2_punpck_qdq.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/sse2_shuf_pd.ll b/test/Bitcode/sse2_shuf_pd.ll
deleted file mode 100644
index 1ba6a1d..0000000
--- a/test/Bitcode/sse2_shuf_pd.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s
-; CHECK-NOT: {i32 @llvm\\.shuf.pd}
-; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_shuf_pd.ll.bc b/test/Bitcode/sse2_shuf_pd.ll.bc
deleted file mode 100644
index 832c39e..0000000
--- a/test/Bitcode/sse2_shuf_pd.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/sse2_unpck_pd.ll b/test/Bitcode/sse2_unpck_pd.ll
deleted file mode 100644
index 99b61b6..0000000
--- a/test/Bitcode/sse2_unpck_pd.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s
-; CHECK-NOT: {i32 @llvm\\.unpckh.pd}
-; CHECK-NOT: {i32 @llvm\\.unpckl.pd}
-; CHECK: shufflevector
diff --git a/test/Bitcode/sse2_unpck_pd.ll.bc b/test/Bitcode/sse2_unpck_pd.ll.bc
deleted file mode 100644
index 4fb829c..0000000
--- a/test/Bitcode/sse2_unpck_pd.ll.bc
+++ /dev/null
diff --git a/test/Bitcode/sse41_pmulld.ll b/test/Bitcode/sse41_pmulld.ll
deleted file mode 100644
index 752786d..0000000
--- a/test/Bitcode/sse41_pmulld.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llvm-dis < %s.bc | FileCheck %s
-; CHECK-NOT: {i32 @llvm\\.pmulld}
-; CHECK: mul
diff --git a/test/Bitcode/sse41_pmulld.ll.bc b/test/Bitcode/sse41_pmulld.ll.bc
deleted file mode 100644
index bd66f0a..0000000
--- a/test/Bitcode/sse41_pmulld.ll.bc
+++ /dev/null
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index 993b6e2..9fc76a9 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -81,6 +81,12 @@ if(PYTHONINTERP_FOUND)
   set(ENABLE_SHARED ${LLVM_SHARED_LIBS_ENABLED})
   set(SHLIBPATH_VAR ${SHLIBPATH_VAR})
 
+  if(LLVM_ENABLE_ASSERTIONS AND NOT MSVC_IDE)
+    set(ENABLE_ASSERTIONS "1")
+  else()
+    set(ENABLE_ASSERTIONS "0")
+  endif()
+
   configure_file(
     ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in
     ${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
diff --git a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll b/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
deleted file mode 100644
index 76fa364..0000000
--- a/test/CodeGen/ARM/2007-03-26-RegScavengerAssert.ll
+++ /dev/null
@@ -1,947 +0,0 @@
-; RUN: llc < %s -march=arm
-; PR1266
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "arm-unknown-linux-gnueabi"
-	%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32 }
-	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] }
-	%struct.VEC_edge = type { i32, i32, [1 x %struct.edge_def*] }
-	%struct.VEC_tree = type { i32, i32, [1 x %struct.tree_node*] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
-	%struct._obstack_chunk = type { i8*, %struct._obstack_chunk*, [4 x i8] }
-	%struct.addr_diff_vec_flags = type { i8, i8, i8, i8 }
-	%struct.arm_stack_offsets = type { i32, i32, i32, i32, i32 }
-	%struct.attribute_spec = type { i8*, i32, i32, i8, i8, i8, %struct.tree_node* (%struct.tree_node**, %struct.tree_node*, %struct.tree_node*, i32, i8*)* }
-	%struct.basic_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.VEC_edge*, %struct.VEC_edge*, %struct.bitmap_head_def*, %struct.bitmap_head_def*, i8*, %struct.loop*, [2 x %struct.et_node*], %struct.basic_block_def*, %struct.basic_block_def*, %struct.reorder_block_def*, %struct.bb_ann_d*, i64, i32, i32, i32, i32 }
-	%struct.bb_ann_d = type { %struct.tree_node*, i8, %struct.edge_prediction* }
-	%struct.bitmap_element_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, [4 x i32] }
-	%struct.bitmap_head_def = type { %struct.bitmap_element_def*, %struct.bitmap_element_def*, i32, %struct.bitmap_obstack* }
-	%struct.bitmap_obstack = type { %struct.bitmap_element_def*, %struct.bitmap_head_def*, %struct.obstack }
-	%struct.cgraph_edge = type { %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_edge*, %struct.cgraph_edge*, %struct.cgraph_edge*, %struct.cgraph_edge*, %struct.tree_node*, i8*, i8* }
-	%struct.cgraph_global_info = type { %struct.cgraph_node*, i32, i8 }
-	%struct.cgraph_local_info = type { i32, i8, i8, i8, i8, i8, i8, i8 }
-	%struct.cgraph_node = type { %struct.tree_node*, %struct.cgraph_edge*, %struct.cgraph_edge*, %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_node*, %struct.cgraph_node*, i8*, %struct.cgraph_local_info, %struct.cgraph_global_info, %struct.cgraph_rtl_info, i32, i8, i8, i8, i8, i8 }
-	%struct.cgraph_rtl_info = type { i32, i8, i8 }
-	%struct.cl_perfunc_opts = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.cselib_val_struct = type opaque
-	%struct.dataflow_d = type { %struct.varray_head_tag*, [2 x %struct.tree_node*] }
-	%struct.def_operand_ptr = type { %struct.tree_node** }
-	%struct.def_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
-	%struct.diagnostic_context = type { %struct.pretty_printer*, [8 x i32], i8, i8, i8, void (%struct.diagnostic_context*, %struct.diagnostic_info*)*, void (%struct.diagnostic_context*, %struct.diagnostic_info*)*, void (i8*, i8**)*, %struct.tree_node*, i32, i32 }
-	%struct.diagnostic_info = type { %struct.text_info, %struct.location_t, i32 }
-	%struct.die_struct = type opaque
-	%struct.edge_def = type { %struct.basic_block_def*, %struct.basic_block_def*, %struct.edge_def_insns, i8*, %struct.location_t*, i32, i32, i64, i32 }
-	%struct.edge_def_insns = type { %struct.rtx_def* }
-	%struct.edge_prediction = type { %struct.edge_prediction*, %struct.edge_def*, i32, i32 }
-	%struct.eh_status = type opaque
-	%struct.elt_list = type opaque
-	%struct.elt_t = type { %struct.tree_node*, %struct.tree_node* }
-	%struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack*, i32, %struct.location_t, i32, i8*, %struct.rtx_def** }
-	%struct.et_node = type opaque
-	%struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
-	%struct.function = type { %struct.eh_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, %struct.initial_value_struct*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i8, i32, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.varray_head_tag*, %struct.temp_slot*, i32, %struct.var_refs_queue*, i32, i32, %struct.rtvec_def*, %struct.tree_node*, i32, i32, i32, %struct.machine_function*, i32, i32, i8, i8, %struct.language_function*, %struct.rtx_def*, i32, i32, i32, i32, %struct.location_t, %struct.varray_head_tag*, %struct.tree_node*, i8, i8, i8 }
-	%struct.ggc_root_tab = type { i8*, i32, i32, void (i8*)*, void (i8*)* }
-	%struct.gimplify_ctx = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.varray_head_tag*, %struct.htab*, i32, i8, i8 }
-	%struct.gimplify_init_ctor_preeval_data = type { %struct.tree_node*, i32 }
-	%struct.ht_identifier = type { i8*, i32, i32 }
-	%struct.htab = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i32, i32, i32, i32, i32, i8* (i32, i32)*, void (i8*)*, i8*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i32 }
-	%struct.initial_value_struct = type opaque
-	%struct.lang_decl = type opaque
-	%struct.lang_hooks = type { i8*, i32, i32 (i32)*, i32 (i32, i8**)*, void (%struct.diagnostic_context*)*, i32 (i32, i8*, i32)*, i8 (i8*, i32) zeroext *, i8 (i8**) zeroext *, i8 () zeroext *, void ()*, void ()*, void (i32)*, void ()*, i64 (%struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, %struct.rtx_def* (%struct.tree_node*, %struct.rtx_def*, i32, i32, %struct.rtx_def**)*, i32 (%struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, i32 (%struct.rtx_def*, %struct.tree_node*)*, void (%struct.tree_node*)*, i8 (%struct.tree_node*) zeroext *, %struct.tree_node* (%struct.tree_node*)*, void (%struct.tree_node*)*, void (%struct.tree_node*)*, i8 () zeroext *, i8, i8, void ()*, void (%struct.FILE*, %struct.tree_node*, i32)*, void (%struct.FILE*, %struct.tree_node*, i32)*, void (%struct.FILE*, %struct.tree_node*, i32)*, void (%struct.FILE*, %struct.tree_node*, i32)*, i8* (%struct.tree_node*, i32)*, i32 (%struct.tree_node*, %struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, void (%struct.diagnostic_context*, i8*)*, %struct.tree_node* (%struct.tree_node*)*, i64 (i64)*, %struct.attribute_spec*, %struct.attribute_spec*, %struct.attribute_spec*, i32 (%struct.tree_node*)*, %struct.lang_hooks_for_functions, %struct.lang_hooks_for_tree_inlining, %struct.lang_hooks_for_callgraph, %struct.lang_hooks_for_tree_dump, %struct.lang_hooks_for_decls, %struct.lang_hooks_for_types, i32 (%struct.tree_node**, %struct.tree_node**, %struct.tree_node**)*, %struct.tree_node* (%struct.tree_node*, %struct.tree_node*)*, %struct.tree_node* (i8*, %struct.tree_node*, i32, i32, i8*, %struct.tree_node*)* }
-	%struct.lang_hooks_for_callgraph = type { %struct.tree_node* (%struct.tree_node**, i32*, %struct.tree_node*)*, void (%struct.tree_node*)* }
-	%struct.lang_hooks_for_decls = type { i32 ()*, void (%struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, %struct.tree_node* ()*, i8 (%struct.tree_node*) zeroext *, void ()*, void (%struct.tree_node*)*, i8 (%struct.tree_node*) zeroext *, i8* (%struct.tree_node*)* }
-	%struct.lang_hooks_for_functions = type { void (%struct.function*)*, void (%struct.function*)*, void (%struct.function*)*, void (%struct.function*)*, i8 (%struct.tree_node*) zeroext * }
-	%struct.lang_hooks_for_tree_dump = type { i8 (i8*, %struct.tree_node*) zeroext *, i32 (%struct.tree_node*)* }
-	%struct.lang_hooks_for_tree_inlining = type { %struct.tree_node* (%struct.tree_node**, i32*, %struct.tree_node* (%struct.tree_node**, i32*, i8*)*, i8*, %struct.pointer_set_t*)*, i32 (%struct.tree_node**)*, i32 (%struct.tree_node*)*, %struct.tree_node* (i8*, %struct.tree_node*)*, i32 (%struct.tree_node*, %struct.tree_node*)*, i32 (%struct.tree_node*)*, i8 (%struct.tree_node*, %struct.tree_node*) zeroext *, i32 (%struct.tree_node*)*, void (%struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32)* }
-	%struct.lang_hooks_for_types = type { %struct.tree_node* (i32)*, %struct.tree_node* (i32, i32)*, %struct.tree_node* (i32, i32)*, %struct.tree_node* (%struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, %struct.tree_node* (i32, %struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, void (%struct.tree_node*, i8*)*, void (%struct.tree_node*, %struct.tree_node*)*, %struct.tree_node* (%struct.tree_node*)*, i8 }
-	%struct.lang_type = type opaque
-	%struct.language_function = type opaque
-	%struct.location_t = type { i8*, i32 }
-	%struct.loop = type opaque
-	%struct.machine_function = type { %struct.rtx_def*, i32, i32, i32, %struct.arm_stack_offsets, i32, i32, i32, [14 x %struct.rtx_def*] }
-	%struct.mem_attrs = type { i64, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32 }
-	%struct.obstack = type { i32, %struct._obstack_chunk*, i8*, i8*, i8*, i32, i32, %struct._obstack_chunk* (i8*, i32)*, void (i8*, %struct._obstack_chunk*)*, i8*, i8 }
-	%struct.output_buffer = type { %struct.obstack, %struct.FILE*, i32, [128 x i8] }
-	%struct.phi_arg_d = type { %struct.tree_node*, i8 }
-	%struct.pointer_set_t = type opaque
-	%struct.pretty_printer = type { %struct.output_buffer*, i8*, i32, i32, i32, i32, i32, i8 (%struct.pretty_printer*, %struct.text_info*) zeroext *, i8, i8 }
-	%struct.ptr_info_def = type { i8, %struct.bitmap_head_def*, %struct.tree_node* }
-	%struct.real_value = type { i8, [3 x i8], [5 x i32] }
-	%struct.reg_attrs = type { %struct.tree_node*, i64 }
-	%struct.reg_info_def = type opaque
-	%struct.reorder_block_def = type { %struct.rtx_def*, %struct.rtx_def*, %struct.basic_block_def*, %struct.basic_block_def*, %struct.basic_block_def*, i32, i32, i32 }
-	%struct.rtunion = type { i32 }
-	%struct.rtvec_def = type { i32, [1 x %struct.rtx_def*] }
-	%struct.rtx_def = type { i16, i8, i8, %struct.u }
-	%struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.sequence_stack* }
-	%struct.stmt_ann_d = type { %struct.tree_ann_common_d, i8, %struct.basic_block_def*, %struct.stmt_operands_d, %struct.dataflow_d*, %struct.bitmap_head_def*, i32 }
-	%struct.stmt_operands_d = type { %struct.def_optype_d*, %struct.def_optype_d*, %struct.v_may_def_optype_d*, %struct.vuse_optype_d*, %struct.v_may_def_optype_d* }
-	%struct.temp_slot = type opaque
-	%struct.text_info = type { i8*, i8**, i32 }
-	%struct.tree_ann_common_d = type { i32, i8*, %struct.tree_node* }
-	%struct.tree_ann_d = type { %struct.stmt_ann_d }
-	%struct.tree_binfo = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.VEC_tree }
-	%struct.tree_block = type { %struct.tree_common, i8, [3 x i8], %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node* }
-	%struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, %struct.tree_ann_d*, i8, i8, i8, i8, i8 }
-	%struct.tree_complex = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
-	%struct.tree_decl = type { %struct.tree_common, %struct.location_t, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, i8, i8, i32, %struct.tree_decl_u1, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.tree_decl_u2, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
-	%struct.tree_decl_u1 = type { i64 }
-	%struct.tree_decl_u1_a = type { i32 }
-	%struct.tree_decl_u2 = type { %struct.function* }
-	%struct.tree_exp = type { %struct.tree_common, %struct.location_t*, i32, %struct.tree_node*, [1 x %struct.tree_node*] }
-	%struct.tree_identifier = type { %struct.tree_common, %struct.ht_identifier }
-	%struct.tree_int_cst = type { %struct.tree_common, %struct.tree_int_cst_lowhi }
-	%struct.tree_int_cst_lowhi = type { i64, i64 }
-	%struct.tree_list = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node* }
-	%struct.tree_node = type { %struct.tree_decl }
-	%struct.tree_phi_node = type { %struct.tree_common, %struct.tree_node*, i32, i32, i32, %struct.basic_block_def*, %struct.dataflow_d*, [1 x %struct.phi_arg_d] }
-	%struct.tree_real_cst = type { %struct.tree_common, %struct.real_value* }
-	%struct.tree_ssa_name = type { %struct.tree_common, %struct.tree_node*, i32, %struct.ptr_info_def*, %struct.tree_node*, i8* }
-	%struct.tree_statement_list = type { %struct.tree_common, %struct.tree_statement_list_node*, %struct.tree_statement_list_node* }
-	%struct.tree_statement_list_node = type { %struct.tree_statement_list_node*, %struct.tree_statement_list_node*, %struct.tree_node* }
-	%struct.tree_stmt_iterator = type { %struct.tree_statement_list_node*, %struct.tree_node* }
-	%struct.tree_string = type { %struct.tree_common, i32, [1 x i8] }
-	%struct.tree_type = type { %struct.tree_common, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i32, i16, i8, i8, i32, %struct.tree_node*, %struct.tree_node*, %struct.rtunion, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_type* }
-	%struct.tree_type_symtab = type { i32 }
-	%struct.tree_value_handle = type { %struct.tree_common, %struct.value_set*, i32 }
-	%struct.tree_vec = type { %struct.tree_common, i32, [1 x %struct.tree_node*] }
-	%struct.tree_vector = type { %struct.tree_common, %struct.tree_node* }
-	%struct.u = type { [1 x i64] }
-	%struct.use_operand_ptr = type { %struct.tree_node** }
-	%struct.use_optype_d = type { i32, [1 x %struct.def_operand_ptr] }
-	%struct.v_def_use_operand_type_t = type { %struct.tree_node*, %struct.tree_node* }
-	%struct.v_may_def_optype_d = type { i32, [1 x %struct.elt_t] }
-	%struct.v_must_def_optype_d = type { i32, [1 x %struct.elt_t] }
-	%struct.value_set = type opaque
-	%struct.var_ann_d = type { %struct.tree_ann_common_d, i8, i8, %struct.tree_node*, %struct.varray_head_tag*, i32, i32, i32, %struct.tree_node*, %struct.tree_node* }
-	%struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
-	%struct.varasm_status = type opaque
-	%struct.varray_data = type { [1 x i64] }
-	%struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.u }
-	%struct.vuse_optype_d = type { i32, [1 x %struct.tree_node*] }
-@gt_pch_rs_gt_gimplify_h = external global [2 x %struct.ggc_root_tab]		; <[2 x %struct.ggc_root_tab]*> [#uses=0]
-@tmp_var_id_num = external global i32		; <i32*> [#uses=0]
-@gt_ggc_r_gt_gimplify_h = external global [1 x %struct.ggc_root_tab]		; <[1 x %struct.ggc_root_tab]*> [#uses=0]
-@__FUNCTION__.19956 = external global [15 x i8]		; <[15 x i8]*> [#uses=0]
-@str = external global [42 x i8]		; <[42 x i8]*> [#uses=1]
-@__FUNCTION__.19974 = external global [22 x i8]		; <[22 x i8]*> [#uses=0]
-@gimplify_ctxp = external global %struct.gimplify_ctx*		; <%struct.gimplify_ctx**> [#uses=0]
-@cl_pf_opts = external global %struct.cl_perfunc_opts		; <%struct.cl_perfunc_opts*> [#uses=0]
-@__FUNCTION__.20030 = external global [22 x i8]		; <[22 x i8]*> [#uses=0]
-@__FUNCTION__.20099 = external global [24 x i8]		; <[24 x i8]*> [#uses=0]
-@global_trees = external global [47 x %struct.tree_node*]		; <[47 x %struct.tree_node*]*> [#uses=0]
-@tree_code_type = external global [0 x i32]		; <[0 x i32]*> [#uses=2]
-@current_function_decl = external global %struct.tree_node*		; <%struct.tree_node**> [#uses=0]
-@str1 = external global [2 x i8]		; <[2 x i8]*> [#uses=0]
-@str2 = external global [7 x i8]		; <[7 x i8]*> [#uses=0]
-@__FUNCTION__.20151 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
-@__FUNCTION__.20221 = external global [9 x i8]		; <[9 x i8]*> [#uses=0]
-@tree_code_length = external global [0 x i8]		; <[0 x i8]*> [#uses=0]
-@__FUNCTION__.20435 = external global [17 x i8]		; <[17 x i8]*> [#uses=0]
-@__FUNCTION__.20496 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
-@cfun = external global %struct.function*		; <%struct.function**> [#uses=0]
-@__FUNCTION__.20194 = external global [15 x i8]		; <[15 x i8]*> [#uses=0]
-@__FUNCTION__.19987 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
-@__FUNCTION__.20532 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
-@__FUNCTION__.20583 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
-@__FUNCTION__.20606 = external global [22 x i8]		; <[22 x i8]*> [#uses=0]
-@__FUNCTION__.20644 = external global [17 x i8]		; <[17 x i8]*> [#uses=0]
-@__FUNCTION__.20681 = external global [13 x i8]		; <[13 x i8]*> [#uses=0]
-@__FUNCTION__.20700 = external global [13 x i8]		; <[13 x i8]*> [#uses=0]
-@__FUNCTION__.21426 = external global [20 x i8]		; <[20 x i8]*> [#uses=0]
-@__FUNCTION__.21471 = external global [17 x i8]		; <[17 x i8]*> [#uses=0]
-@__FUNCTION__.21962 = external global [27 x i8]		; <[27 x i8]*> [#uses=0]
-@__FUNCTION__.22992 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
-@__FUNCTION__.23735 = external global [15 x i8]		; <[15 x i8]*> [#uses=0]
-@lang_hooks = external global %struct.lang_hooks		; <%struct.lang_hooks*> [#uses=0]
-@__FUNCTION__.27383 = external global [22 x i8]		; <[22 x i8]*> [#uses=0]
-@__FUNCTION__.20776 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
-@__FUNCTION__.10672 = external global [9 x i8]		; <[9 x i8]*> [#uses=0]
-@str3 = external global [47 x i8]		; <[47 x i8]*> [#uses=0]
-@str4 = external global [7 x i8]		; <[7 x i8]*> [#uses=0]
-@__FUNCTION__.20065 = external global [25 x i8]		; <[25 x i8]*> [#uses=0]
-@__FUNCTION__.23256 = external global [16 x i8]		; <[16 x i8]*> [#uses=0]
-@__FUNCTION__.23393 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
-@__FUNCTION__.20043 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
-@__FUNCTION__.20729 = external global [23 x i8]		; <[23 x i8]*> [#uses=0]
-@__FUNCTION__.20563 = external global [24 x i8]		; <[24 x i8]*> [#uses=0]
-@__FUNCTION__.10663 = external global [10 x i8]		; <[10 x i8]*> [#uses=0]
-@__FUNCTION__.20367 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
-@__FUNCTION__.20342 = external global [15 x i8]		; <[15 x i8]*> [#uses=0]
-@input_location = external global %struct.location_t		; <%struct.location_t*> [#uses=0]
-@__FUNCTION__.24510 = external global [27 x i8]		; <[27 x i8]*> [#uses=0]
-@__FUNCTION__.25097 = external global [25 x i8]		; <[25 x i8]*> [#uses=0]
-@__FUNCTION__.24705 = external global [26 x i8]		; <[26 x i8]*> [#uses=0]
-@str5 = external global [2 x i8]		; <[2 x i8]*> [#uses=0]
-@__FUNCTION__.25136 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
-@__FUNCTION__.24450 = external global [31 x i8]		; <[31 x i8]*> [#uses=0]
-@implicit_built_in_decls = external global [471 x %struct.tree_node*]		; <[471 x %struct.tree_node*]*> [#uses=0]
-@__FUNCTION__.24398 = external global [31 x i8]		; <[31 x i8]*> [#uses=0]
-@__FUNCTION__.26156 = external global [14 x i8]		; <[14 x i8]*> [#uses=1]
-@unknown_location = external global %struct.location_t		; <%struct.location_t*> [#uses=0]
-@__FUNCTION__.23038 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
-@str6 = external global [43 x i8]		; <[43 x i8]*> [#uses=0]
-@__FUNCTION__.25476 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
-@__FUNCTION__.22136 = external global [20 x i8]		; <[20 x i8]*> [#uses=1]
-@__FUNCTION__.21997 = external global [23 x i8]		; <[23 x i8]*> [#uses=0]
-@__FUNCTION__.21247 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
-@built_in_decls = external global [471 x %struct.tree_node*]		; <[471 x %struct.tree_node*]*> [#uses=0]
-@__FUNCTION__.21924 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
-@__FUNCTION__.21861 = external global [25 x i8]		; <[25 x i8]*> [#uses=0]
-@global_dc = external global %struct.diagnostic_context*		; <%struct.diagnostic_context**> [#uses=0]
-@__FUNCTION__.25246 = external global [32 x i8]		; <[32 x i8]*> [#uses=0]
-@str7 = external global [4 x i8]		; <[4 x i8]*> [#uses=0]
-@stderr = external global %struct.FILE*		; <%struct.FILE**> [#uses=0]
-@str8 = external global [24 x i8]		; <[24 x i8]*> [#uses=0]
-@str9 = external global [22 x i8]		; <[22 x i8]*> [#uses=0]
-@__FUNCTION__.27653 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
-@__FUNCTION__.27322 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
-@__FUNCTION__.27139 = external global [20 x i8]		; <[20 x i8]*> [#uses=0]
-@__FUNCTION__.22462 = external global [23 x i8]		; <[23 x i8]*> [#uses=0]
-@str10 = external global [6 x i8]		; <[6 x i8]*> [#uses=0]
-@__FUNCTION__.25389 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
-@__FUNCTION__.25650 = external global [18 x i8]		; <[18 x i8]*> [#uses=0]
-@str11 = external global [32 x i8]		; <[32 x i8]*> [#uses=0]
-@str12 = external global [3 x i8]		; <[3 x i8]*> [#uses=0]
-@str13 = external global [44 x i8]		; <[44 x i8]*> [#uses=0]
-@__FUNCTION__.27444 = external global [14 x i8]		; <[14 x i8]*> [#uses=0]
-@timevar_enable = external global i8		; <i8*> [#uses=0]
-@__FUNCTION__.27533 = external global [23 x i8]		; <[23 x i8]*> [#uses=0]
-@flag_instrument_function_entry_exit = external global i32		; <i32*> [#uses=0]
-@__FUNCTION__.25331 = external global [23 x i8]		; <[23 x i8]*> [#uses=0]
-@__FUNCTION__.20965 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
-@str14 = external global [12 x i8]		; <[12 x i8]*> [#uses=0]
-@__FUNCTION__.26053 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
-@__FUNCTION__.26004 = external global [20 x i8]		; <[20 x i8]*> [#uses=0]
-@str15 = external global [8 x i8]		; <[8 x i8]*> [#uses=0]
-@__FUNCTION__.21584 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
-@str16 = external global [12 x i8]		; <[12 x i8]*> [#uses=0]
-@__FUNCTION__.25903 = external global [28 x i8]		; <[28 x i8]*> [#uses=0]
-@__FUNCTION__.22930 = external global [23 x i8]		; <[23 x i8]*> [#uses=0]
-@__FUNCTION__.23832 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
-@str17 = external global [6 x i8]		; <[6 x i8]*> [#uses=0]
-@__FUNCTION__.24620 = external global [24 x i8]		; <[24 x i8]*> [#uses=0]
-@__FUNCTION__.24582 = external global [30 x i8]		; <[30 x i8]*> [#uses=0]
-@__FUNCTION__.21382 = external global [19 x i8]		; <[19 x i8]*> [#uses=0]
-@__FUNCTION__.21117 = external global [21 x i8]		; <[21 x i8]*> [#uses=0]
-
-
-declare void @push_gimplify_context()
-
-declare i32 @gimple_tree_hash(i8*)
-
-declare i32 @iterative_hash_expr(%struct.tree_node*, i32)
-
-declare i32 @gimple_tree_eq(i8*, i8*)
-
-declare i32 @operand_equal_p(%struct.tree_node*, %struct.tree_node*, i32)
-
-declare void @fancy_abort(i8*, i32, i8*)
-
-declare i8* @xcalloc(i32, i32)
-
-declare %struct.htab* @htab_create(i32, i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*)
-
-declare void @free(i8*)
-
-declare void @gimple_push_bind_expr(%struct.tree_node*)
-
-declare void @gimple_pop_bind_expr()
-
-declare %struct.tree_node* @gimple_current_bind_expr()
-
-declare fastcc void @gimple_push_condition()
-
-declare %struct.tree_node* @create_artificial_label()
-
-declare %struct.tree_node* @build_decl_stat(i32, %struct.tree_node*, %struct.tree_node*)
-
-declare void @tree_class_check_failed(%struct.tree_node*, i32, i8*, i32, i8*)
-
-declare %struct.tree_node* @create_tmp_var_name(i8*)
-
-declare i32 @strlen(i8*)
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare i32 @sprintf(i8*, i8*, ...)
-
-declare %struct.tree_node* @get_identifier(i8*)
-
-declare %struct.tree_node* @create_tmp_var_raw(%struct.tree_node*, i8*)
-
-declare %struct.tree_node* @build_qualified_type(%struct.tree_node*, i32)
-
-declare i8* @get_name(%struct.tree_node*)
-
-declare void @tree_operand_check_failed(i32, i32, i8*, i32, i8*)
-
-declare void @tree_check_failed(%struct.tree_node*, i8*, i32, i8*, ...)
-
-declare void @declare_tmp_vars(%struct.tree_node*, %struct.tree_node*)
-
-declare %struct.tree_node* @nreverse(%struct.tree_node*)
-
-declare void @gimple_add_tmp_var(%struct.tree_node*)
-
-declare void @record_vars(%struct.tree_node*)
-
-declare %struct.tree_node* @create_tmp_var(%struct.tree_node*, i8*)
-
-declare void @pop_gimplify_context(%struct.tree_node*)
-
-declare void @htab_delete(%struct.htab*)
-
-declare fastcc void @annotate_one_with_locus(%struct.tree_node*, i32, i32)
-
-declare void @annotate_with_locus(%struct.tree_node*, i32, i32)
-
-declare %struct.tree_node* @mostly_copy_tree_r(%struct.tree_node**, i32*, i8*)
-
-declare %struct.tree_node* @copy_tree_r(%struct.tree_node**, i32*, i8*)
-
-declare %struct.tree_node* @mark_decls_volatile_r(%struct.tree_node**, i32*, i8*)
-
-declare %struct.tree_node* @copy_if_shared_r(%struct.tree_node**, i32*, i8*)
-
-declare %struct.tree_node* @walk_tree(%struct.tree_node**, %struct.tree_node* (%struct.tree_node**, i32*, i8*)*, i8*, %struct.pointer_set_t*)
-
-declare %struct.tree_node* @unmark_visited_r(%struct.tree_node**, i32*, i8*)
-
-declare fastcc void @unshare_body(%struct.tree_node**, %struct.tree_node*)
-
-declare %struct.cgraph_node* @cgraph_node(%struct.tree_node*)
-
-declare fastcc void @unvisit_body(%struct.tree_node**, %struct.tree_node*)
-
-declare void @unshare_all_trees(%struct.tree_node*)
-
-declare %struct.tree_node* @unshare_expr(%struct.tree_node*)
-
-declare %struct.tree_node* @build_and_jump(%struct.tree_node**)
-
-declare %struct.tree_node* @build1_stat(i32, %struct.tree_node*, %struct.tree_node*)
-
-declare i32 @compare_case_labels(i8*, i8*)
-
-declare i32 @tree_int_cst_compare(%struct.tree_node*, %struct.tree_node*)
-
-declare void @sort_case_labels(%struct.tree_node*)
-
-declare void @tree_vec_elt_check_failed(i32, i32, i8*, i32, i8*)
-
-declare void @qsort(i8*, i32, i32, i32 (i8*, i8*)*)
-
-declare %struct.tree_node* @force_labels_r(%struct.tree_node**, i32*, i8*)
-
-declare fastcc void @canonicalize_component_ref(%struct.tree_node**)
-
-declare %struct.tree_node* @get_unwidened(%struct.tree_node*, %struct.tree_node*)
-
-declare fastcc void @maybe_with_size_expr(%struct.tree_node**)
-
-declare %struct.tree_node* @substitute_placeholder_in_expr(%struct.tree_node*, %struct.tree_node*)
-
-declare %struct.tree_node* @build2_stat(i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
-
-declare fastcc %struct.tree_node* @gimple_boolify(%struct.tree_node*)
-
-declare %struct.tree_node* @convert(%struct.tree_node*, %struct.tree_node*)
-
-declare %struct.tree_node* @gimplify_init_ctor_preeval_1(%struct.tree_node**, i32*, i8*)
-
-declare i64 @get_alias_set(%struct.tree_node*)
-
-declare i32 @alias_sets_conflict_p(i64, i64)
-
-declare fastcc i8 @cpt_same_type(%struct.tree_node*, %struct.tree_node*) zeroext
-
-declare %struct.tree_node* @check_pointer_types_r(%struct.tree_node**, i32*, i8*)
-
-declare %struct.tree_node* @voidify_wrapper_expr(%struct.tree_node*, %struct.tree_node*)
-
-declare i32 @integer_zerop(%struct.tree_node*)
-
-declare fastcc void @append_to_statement_list_1(%struct.tree_node*, %struct.tree_node**)
-
-declare %struct.tree_node* @alloc_stmt_list()
-
-declare void @tsi_link_after(%struct.tree_stmt_iterator*, %struct.tree_node*, i32)
-
-declare void @append_to_statement_list_force(%struct.tree_node*, %struct.tree_node**)
-
-declare void @append_to_statement_list(%struct.tree_node*, %struct.tree_node**)
-
-declare fastcc %struct.tree_node* @shortcut_cond_r(%struct.tree_node*, %struct.tree_node**, %struct.tree_node**)
-
-declare %struct.tree_node* @build3_stat(i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
-
-declare fastcc %struct.tree_node* @shortcut_cond_expr(%struct.tree_node*)
-
-declare %struct.tree_node* @expr_last(%struct.tree_node*)
-
-declare i8 @block_may_fallthru(%struct.tree_node*) zeroext 
-
-declare fastcc void @gimple_pop_condition(%struct.tree_node**)
-
-declare %struct.tree_node* @gimple_build_eh_filter(%struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
-
-declare void @annotate_all_with_locus(%struct.tree_node**, i32, i32)
-
-declare fastcc %struct.tree_node* @internal_get_tmp_var(%struct.tree_node*, %struct.tree_node**, %struct.tree_node**, i8 zeroext )
-
-define i32 @gimplify_expr(%struct.tree_node** %expr_p, %struct.tree_node** %pre_p, %struct.tree_node** %post_p, i8 (%struct.tree_node*) zeroext * %gimple_test_f, i32 %fallback) {
-entry:
-	%internal_post = alloca %struct.tree_node*, align 4		; <%struct.tree_node**> [#uses=2]
-	%pre_p_addr.0 = select i1 false, %struct.tree_node** null, %struct.tree_node** %pre_p		; <%struct.tree_node**> [#uses=7]
-	%post_p_addr.0 = select i1 false, %struct.tree_node** %internal_post, %struct.tree_node** %post_p		; <%struct.tree_node**> [#uses=7]
-	br i1 false, label %bb277, label %bb191
-
-bb191:		; preds = %entry
-	ret i32 0
-
-bb277:		; preds = %entry
-	%tmp283 = call i32 null( %struct.tree_node** %expr_p, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %post_p_addr.0 )		; <i32> [#uses=1]
-	switch i32 %tmp283, label %bb7478 [
-		 i32 0, label %cond_next289
-		 i32 -1, label %cond_next298
-	]
-
-cond_next289:		; preds = %bb277
-	ret i32 0
-
-cond_next298:		; preds = %bb277
-	switch i32 0, label %bb7444 [
-		 i32 24, label %bb7463
-		 i32 25, label %bb7463
-		 i32 26, label %bb7463
-		 i32 27, label %bb7463
-		 i32 28, label %bb7463
-		 i32 33, label %bb4503
-		 i32 39, label %bb397
-		 i32 40, label %bb5650
-		 i32 41, label %bb4339
-		 i32 42, label %bb4350
-		 i32 43, label %bb4350
-		 i32 44, label %bb319
-		 i32 45, label %bb397
-		 i32 46, label %bb6124
-		 i32 47, label %bb7463
-		 i32 49, label %bb5524
-		 i32 50, label %bb1283
-		 i32 51, label %bb1289
-		 i32 52, label %bb1289
-		 i32 53, label %bb5969
-		 i32 54, label %bb408
-		 i32 56, label %bb5079
-		 i32 57, label %bb428
-		 i32 59, label %bb5965
-		 i32 74, label %bb4275
-		 i32 75, label %bb4275
-		 i32 76, label %bb4275
-		 i32 77, label %bb4275
-		 i32 91, label %bb1296
-		 i32 92, label %bb1296
-		 i32 96, label %bb1322
-		 i32 112, label %bb2548
-		 i32 113, label %bb2548
-		 i32 115, label %bb397
-		 i32 116, label %bb5645
-		 i32 117, label %bb1504
-		 i32 121, label %bb397
-		 i32 122, label %bb397
-		 i32 123, label %bb313
-		 i32 124, label %bb313
-		 i32 125, label %bb313
-		 i32 126, label %bb313
-		 i32 127, label %bb2141
-		 i32 128, label %cond_next5873
-		 i32 129, label %cond_next5873
-		 i32 130, label %bb4536
-		 i32 131, label %bb5300
-		 i32 132, label %bb5170
-		 i32 133, label %bb5519
-		 i32 134, label %bb5091
-		 i32 135, label %bb5083
-		 i32 136, label %bb5087
-		 i32 137, label %bb5382
-		 i32 139, label %bb7463
-		 i32 140, label %bb7463
-		 i32 142, label %bb5974
-		 i32 143, label %bb6049
-		 i32 147, label %bb6296
-		 i32 151, label %cond_next6474
-	]
-
-bb313:		; preds = %cond_next298, %cond_next298, %cond_next298, %cond_next298
-	ret i32 0
-
-bb319:		; preds = %cond_next298
-	ret i32 0
-
-bb397:		; preds = %cond_next298, %cond_next298, %cond_next298, %cond_next298, %cond_next298
-	ret i32 0
-
-bb408:		; preds = %cond_next298
-	%tmp413 = call fastcc i32 @gimplify_cond_expr( %struct.tree_node** %expr_p, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %post_p_addr.0, %struct.tree_node* null, i32 %fallback )		; <i32> [#uses=0]
-	ret i32 0
-
-bb428:		; preds = %cond_next298
-	ret i32 0
-
-bb1283:		; preds = %cond_next298
-	ret i32 0
-
-bb1289:		; preds = %cond_next298, %cond_next298
-	ret i32 0
-
-bb1296:		; preds = %cond_next298, %cond_next298
-	ret i32 0
-
-bb1322:		; preds = %cond_next298
-	ret i32 0
-
-bb1504:		; preds = %cond_next298
-	ret i32 0
-
-bb2141:		; preds = %cond_next298
-	ret i32 0
-
-bb2548:		; preds = %cond_next298, %cond_next298
-	%tmp2554 = load %struct.tree_node** %expr_p		; <%struct.tree_node*> [#uses=2]
-	%tmp2562 = and i32 0, 255		; <i32> [#uses=1]
-	%tmp2569 = add i8 0, -4		; <i8> [#uses=1]
-	icmp ugt i8 %tmp2569, 5		; <i1>:0 [#uses=2]
-	%tmp2587 = load i8* null		; <i8> [#uses=1]
-	icmp eq i8 %tmp2587, 0		; <i1>:1 [#uses=2]
-	%tmp2607 = load %struct.tree_node** null		; <%struct.tree_node*> [#uses=2]
-	br i1 false, label %bb2754, label %cond_next2617
-
-cond_next2617:		; preds = %bb2548
-	ret i32 0
-
-bb2754:		; preds = %bb2548
-	br i1 %0, label %cond_true2780, label %cond_next2783
-
-cond_true2780:		; preds = %bb2754
-	call void @tree_class_check_failed( %struct.tree_node* %tmp2554, i32 9, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 1415, i8* getelementptr ([20 x i8]* @__FUNCTION__.22136, i32 0, i32 0) )
-	unreachable
-
-cond_next2783:		; preds = %bb2754
-	%tmp2825 = and i32 0, 255		; <i32> [#uses=1]
-	%tmp2829 = load i32* null		; <i32> [#uses=1]
-	%tmp28292830 = trunc i32 %tmp2829 to i8		; <i8> [#uses=1]
-	%tmp2832 = add i8 %tmp28292830, -4		; <i8> [#uses=1]
-	icmp ugt i8 %tmp2832, 5		; <i1>:2 [#uses=1]
-	icmp eq i8 0, 0		; <i1>:3 [#uses=1]
-	%tmp28652866 = bitcast %struct.tree_node* %tmp2607 to %struct.tree_exp*		; <%struct.tree_exp*> [#uses=1]
-	%tmp2868 = getelementptr %struct.tree_exp* %tmp28652866, i32 0, i32 4, i32 0		; <%struct.tree_node**> [#uses=1]
-	%tmp2870 = load %struct.tree_node** %tmp2868		; <%struct.tree_node*> [#uses=1]
-	br i1 %1, label %cond_true2915, label %cond_next2927
-
-cond_true2915:		; preds = %cond_next2783
-	unreachable
-
-cond_next2927:		; preds = %cond_next2783
-	%tmp2938 = load %struct.tree_node** null		; <%struct.tree_node*> [#uses=1]
-	%tmp2944 = load i32* null		; <i32> [#uses=1]
-	%tmp2946 = and i32 %tmp2944, 255		; <i32> [#uses=1]
-	%tmp2949 = getelementptr [0 x i32]* @tree_code_type, i32 0, i32 %tmp2946		; <i32*> [#uses=1]
-	%tmp2950 = load i32* %tmp2949		; <i32> [#uses=1]
-	icmp eq i32 %tmp2950, 2		; <i1>:4 [#uses=1]
-	br i1 %4, label %cond_next2954, label %cond_true2951
-
-cond_true2951:		; preds = %cond_next2927
-	call void @tree_class_check_failed( %struct.tree_node* %tmp2938, i32 2, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 1415, i8* getelementptr ([20 x i8]* @__FUNCTION__.22136, i32 0, i32 0) )
-	unreachable
-
-cond_next2954:		; preds = %cond_next2927
-	br i1 %0, label %cond_true2991, label %cond_next2994
-
-cond_true2991:		; preds = %cond_next2954
-	unreachable
-
-cond_next2994:		; preds = %cond_next2954
-	br i1 %1, label %cond_true3009, label %cond_next3021
-
-cond_true3009:		; preds = %cond_next2994
-	call void @tree_operand_check_failed( i32 0, i32 %tmp2562, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 1415, i8* getelementptr ([20 x i8]* @__FUNCTION__.22136, i32 0, i32 0) )
-	unreachable
-
-cond_next3021:		; preds = %cond_next2994
-	br i1 %2, label %cond_true3044, label %cond_next3047
-
-cond_true3044:		; preds = %cond_next3021
-	call void @tree_class_check_failed( %struct.tree_node* %tmp2607, i32 9, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 1415, i8* getelementptr ([20 x i8]* @__FUNCTION__.22136, i32 0, i32 0) )
-	unreachable
-
-cond_next3047:		; preds = %cond_next3021
-	br i1 %3, label %cond_true3062, label %cond_next3074
-
-cond_true3062:		; preds = %cond_next3047
-	call void @tree_operand_check_failed( i32 0, i32 %tmp2825, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 1415, i8* getelementptr ([20 x i8]* @__FUNCTION__.22136, i32 0, i32 0) )
-	unreachable
-
-cond_next3074:		; preds = %cond_next3047
-	%tmp3084 = getelementptr %struct.tree_node* %tmp2870, i32 0, i32 0, i32 0, i32 1		; <%struct.tree_node**> [#uses=1]
-	%tmp3085 = load %struct.tree_node** %tmp3084		; <%struct.tree_node*> [#uses=1]
-	%tmp31043105 = bitcast %struct.tree_node* %tmp3085 to %struct.tree_type*		; <%struct.tree_type*> [#uses=1]
-	%tmp3106 = getelementptr %struct.tree_type* %tmp31043105, i32 0, i32 6		; <i16*> [#uses=1]
-	%tmp31063107 = bitcast i16* %tmp3106 to i32*		; <i32*> [#uses=1]
-	%tmp3108 = load i32* %tmp31063107		; <i32> [#uses=1]
-	xor i32 %tmp3108, 0		; <i32>:5 [#uses=1]
-	%tmp81008368 = and i32 %5, 65024		; <i32> [#uses=1]
-	icmp eq i32 %tmp81008368, 0		; <i1>:6 [#uses=1]
-	br i1 %6, label %cond_next3113, label %bb3351
-
-cond_next3113:		; preds = %cond_next3074
-	ret i32 0
-
-bb3351:		; preds = %cond_next3074
-	%tmp3354 = call i8 @tree_ssa_useless_type_conversion( %struct.tree_node* %tmp2554 ) zeroext 		; <i8> [#uses=1]
-	icmp eq i8 %tmp3354, 0		; <i1>:7 [#uses=1]
-	%tmp3424 = load i32* null		; <i32> [#uses=1]
-	br i1 %7, label %cond_next3417, label %cond_true3356
-
-cond_true3356:		; preds = %bb3351
-	ret i32 0
-
-cond_next3417:		; preds = %bb3351
-	br i1 false, label %cond_true3429, label %cond_next4266
-
-cond_true3429:		; preds = %cond_next3417
-	%tmp3443 = and i32 %tmp3424, 255		; <i32> [#uses=0]
-	ret i32 0
-
-cond_next4266:		; preds = %cond_next3417
-	%tmp4268 = load %struct.tree_node** %expr_p		; <%struct.tree_node*> [#uses=1]
-	icmp eq %struct.tree_node* %tmp4268, null		; <i1>:8 [#uses=1]
-	br i1 %8, label %bb4275, label %bb7463
-
-bb4275:		; preds = %cond_next4266, %cond_next298, %cond_next298, %cond_next298, %cond_next298
-	%tmp4289 = and i32 0, 255		; <i32> [#uses=2]
-	%tmp4292 = getelementptr [0 x i32]* @tree_code_type, i32 0, i32 %tmp4289		; <i32*> [#uses=1]
-	%tmp4293 = load i32* %tmp4292		; <i32> [#uses=1]
-	%tmp42934294 = trunc i32 %tmp4293 to i8		; <i8> [#uses=1]
-	%tmp4296 = add i8 %tmp42934294, -4		; <i8> [#uses=1]
-	icmp ugt i8 %tmp4296, 5		; <i1>:9 [#uses=1]
-	br i1 %9, label %cond_true4297, label %cond_next4300
-
-cond_true4297:		; preds = %bb4275
-	unreachable
-
-cond_next4300:		; preds = %bb4275
-	%tmp4314 = load i8* null		; <i8> [#uses=1]
-	icmp eq i8 %tmp4314, 0		; <i1>:10 [#uses=1]
-	br i1 %10, label %cond_true4315, label %cond_next4327
-
-cond_true4315:		; preds = %cond_next4300
-	call void @tree_operand_check_failed( i32 0, i32 %tmp4289, i8* getelementptr ([42 x i8]* @str, i32 0, i32 0), i32 3997, i8* getelementptr ([14 x i8]* @__FUNCTION__.26156, i32 0, i32 0) )
-	unreachable
-
-cond_next4327:		; preds = %cond_next4300
-	%tmp4336 = call i32 @gimplify_expr( %struct.tree_node** null, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %post_p_addr.0, i8 (%struct.tree_node*) zeroext * @is_gimple_val, i32 1 )		; <i32> [#uses=0]
-	ret i32 0
-
-bb4339:		; preds = %cond_next298
-	ret i32 0
-
-bb4350:		; preds = %cond_next298, %cond_next298
-	ret i32 0
-
-bb4503:		; preds = %cond_next298
-	ret i32 0
-
-bb4536:		; preds = %cond_next298
-	ret i32 0
-
-bb5079:		; preds = %cond_next298
-	ret i32 0
-
-bb5083:		; preds = %cond_next298
-	ret i32 0
-
-bb5087:		; preds = %cond_next298
-	ret i32 0
-
-bb5091:		; preds = %cond_next298
-	ret i32 0
-
-bb5170:		; preds = %cond_next298
-	ret i32 0
-
-bb5300:		; preds = %cond_next298
-	ret i32 0
-
-bb5382:		; preds = %cond_next298
-	ret i32 0
-
-bb5519:		; preds = %cond_next298
-	ret i32 0
-
-bb5524:		; preds = %cond_next298
-	ret i32 0
-
-bb5645:		; preds = %cond_next298
-	ret i32 0
-
-bb5650:		; preds = %cond_next298
-	ret i32 0
-
-cond_next5873:		; preds = %cond_next298, %cond_next298
-	ret i32 0
-
-bb5965:		; preds = %cond_next298
-	%tmp5968 = call fastcc i32 @gimplify_cleanup_point_expr( %struct.tree_node** %expr_p, %struct.tree_node** %pre_p_addr.0 )		; <i32> [#uses=0]
-	ret i32 0
-
-bb5969:		; preds = %cond_next298
-	%tmp5973 = call fastcc i32 @gimplify_target_expr( %struct.tree_node** %expr_p, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %post_p_addr.0 )		; <i32> [#uses=0]
-	ret i32 0
-
-bb5974:		; preds = %cond_next298
-	ret i32 0
-
-bb6049:		; preds = %cond_next298
-	ret i32 0
-
-bb6124:		; preds = %cond_next298
-	ret i32 0
-
-bb6296:		; preds = %cond_next298
-	ret i32 0
-
-cond_next6474:		; preds = %cond_next298
-	icmp eq %struct.tree_node** %internal_post, %post_p_addr.0		; <i1>:11 [#uses=1]
-	%iftmp.381.0 = select i1 %11, %struct.tree_node** null, %struct.tree_node** %post_p_addr.0		; <%struct.tree_node**> [#uses=1]
-	%tmp6490 = call i32 @gimplify_expr( %struct.tree_node** null, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %iftmp.381.0, i8 (%struct.tree_node*) zeroext * %gimple_test_f, i32 %fallback )		; <i32> [#uses=0]
-	%tmp6551 = call i32 @gimplify_expr( %struct.tree_node** null, %struct.tree_node** %pre_p_addr.0, %struct.tree_node** %post_p_addr.0, i8 (%struct.tree_node*) zeroext * @is_gimple_val, i32 1 )		; <i32> [#uses=0]
-	ret i32 0
-
-bb7444:		; preds = %cond_next298
-	ret i32 0
-
-bb7463:		; preds = %cond_next4266, %cond_next298, %cond_next298, %cond_next298, %cond_next298, %cond_next298, %cond_next298, %cond_next298, %cond_next298
-	ret i32 0
-
-bb7478:		; preds = %bb277
-	ret i32 0
-}
-
-declare i8 @is_gimple_formal_tmp_rhs(%struct.tree_node*) zeroext 
-
-declare void @gimplify_and_add(%struct.tree_node*, %struct.tree_node**)
-
-declare %struct.tree_node* @get_initialized_tmp_var(%struct.tree_node*, %struct.tree_node**, %struct.tree_node**)
-
-declare %struct.tree_node* @get_formal_tmp_var(%struct.tree_node*, %struct.tree_node**)
-
-declare fastcc void @gimplify_init_ctor_preeval(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, %struct.gimplify_init_ctor_preeval_data*)
-
-declare i8 @type_contains_placeholder_p(%struct.tree_node*) zeroext 
-
-declare i8 @is_gimple_mem_rhs(%struct.tree_node*) zeroext 
-
-declare fastcc i32 @gimplify_modify_expr_rhs(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, %struct.tree_node**, %struct.tree_node**, i8 zeroext )
-
-declare %struct.tree_node* @fold_indirect_ref(%struct.tree_node*)
-
-declare fastcc i32 @gimplify_compound_expr(%struct.tree_node**, %struct.tree_node**, i8 zeroext )
-
-declare i8 @is_gimple_lvalue(%struct.tree_node*) zeroext 
-
-declare void @categorize_ctor_elements(%struct.tree_node*, i64*, i64*, i64*, i8*)
-
-declare void @lhd_set_decl_assembler_name(%struct.tree_node*)
-
-declare i64 @int_size_in_bytes(%struct.tree_node*)
-
-declare i32 @can_move_by_pieces(i64, i32)
-
-declare i64 @count_type_elements(%struct.tree_node*)
-
-declare void @gimplify_stmt(%struct.tree_node**)
-
-declare %struct.tree_node* @get_base_address(%struct.tree_node*)
-
-declare fastcc void @gimplify_init_ctor_eval(%struct.tree_node*, %struct.tree_node*, %struct.tree_node**, i8 zeroext )
-
-declare %struct.tree_node* @build_complex(%struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
-
-declare i8 (%struct.tree_node*) zeroext * @rhs_predicate_for(%struct.tree_node*)
-
-declare %struct.tree_node* @build_vector(%struct.tree_node*, %struct.tree_node*)
-
-declare i8 @is_gimple_val(%struct.tree_node*) zeroext 
-
-declare i8 @is_gimple_reg_type(%struct.tree_node*) zeroext 
-
-declare fastcc i32 @gimplify_cond_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, %struct.tree_node*, i32)
-
-declare fastcc i32 @gimplify_modify_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, i8 zeroext )
-
-declare %struct.tree_node* @tree_cons_stat(%struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
-
-declare %struct.tree_node* @build_fold_addr_expr(%struct.tree_node*)
-
-declare %struct.tree_node* @build_function_call_expr(%struct.tree_node*, %struct.tree_node*)
-
-declare i8 @is_gimple_addressable(%struct.tree_node*) zeroext 
-
-declare i8 @is_gimple_reg(%struct.tree_node*) zeroext 
-
-declare %struct.tree_node* @make_ssa_name(%struct.tree_node*, %struct.tree_node*)
-
-declare i8 @tree_ssa_useless_type_conversion(%struct.tree_node*) zeroext 
-
-declare fastcc i32 @gimplify_self_mod_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, i8 zeroext )
-
-declare fastcc i32 @gimplify_compound_lval(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**, i32)
-
-declare %struct.tree_node* @get_callee_fndecl(%struct.tree_node*)
-
-declare %struct.tree_node* @fold_builtin(%struct.tree_node*, i8 zeroext )
-
-declare void @error(i8*, ...)
-
-declare %struct.tree_node* @build_empty_stmt()
-
-declare i8 @fold_builtin_next_arg(%struct.tree_node*) zeroext 
-
-declare fastcc i32 @gimplify_arg(%struct.tree_node**, %struct.tree_node**)
-
-declare i8 @is_gimple_call_addr(%struct.tree_node*) zeroext 
-
-declare i32 @call_expr_flags(%struct.tree_node*)
-
-declare void @recalculate_side_effects(%struct.tree_node*)
-
-declare %struct.tree_node* @fold_convert(%struct.tree_node*, %struct.tree_node*)
-
-declare void @recompute_tree_invarant_for_addr_expr(%struct.tree_node*)
-
-declare i32 @gimplify_va_arg_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**)
-
-declare %struct.tree_node* @size_int_kind(i64, i32)
-
-declare %struct.tree_node* @size_binop(i32, %struct.tree_node*, %struct.tree_node*)
-
-declare %struct.tree_node* @build4_stat(i32, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*)
-
-declare void @gimplify_type_sizes(%struct.tree_node*, %struct.tree_node**)
-
-declare void @gimplify_one_sizepos(%struct.tree_node**, %struct.tree_node**)
-
-declare %struct.tree_node* @build_pointer_type(%struct.tree_node*)
-
-declare %struct.tree_node* @build_fold_indirect_ref(%struct.tree_node*)
-
-declare fastcc i32 @gimplify_bind_expr(%struct.tree_node**, %struct.tree_node*, %struct.tree_node**)
-
-declare fastcc void @gimplify_loop_expr(%struct.tree_node**, %struct.tree_node**)
-
-declare fastcc i32 @gimplify_switch_expr(%struct.tree_node**, %struct.tree_node**)
-
-declare %struct.tree_node* @decl_function_context(%struct.tree_node*)
-
-declare %struct.varray_head_tag* @varray_grow(%struct.varray_head_tag*, i32)
-
-declare fastcc void @gimplify_return_expr(%struct.tree_node*, %struct.tree_node**)
-
-declare fastcc i32 @gimplify_save_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**)
-
-declare fastcc i32 @gimplify_asm_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**)
-
-declare void @gimplify_to_stmt_list(%struct.tree_node**)
-
-declare fastcc i32 @gimplify_cleanup_point_expr(%struct.tree_node**, %struct.tree_node**)
-
-declare fastcc i32 @gimplify_target_expr(%struct.tree_node**, %struct.tree_node**, %struct.tree_node**)
-
-declare void @tsi_delink(%struct.tree_stmt_iterator*)
-
-declare void @tsi_link_before(%struct.tree_stmt_iterator*, %struct.tree_node*, i32)
-
-declare i8 @is_gimple_stmt(%struct.tree_node*) zeroext 
-
-declare void @print_generic_expr(%struct.FILE*, %struct.tree_node*, i32)
-
-declare void @debug_tree(%struct.tree_node*)
-
-declare void @internal_error(i8*, ...)
-
-declare %struct.tree_node* @force_gimple_operand(%struct.tree_node*, %struct.tree_node**, i8 zeroext , %struct.tree_node*)
-
-declare i8 @is_gimple_reg_rhs(%struct.tree_node*) zeroext 
-
-declare void @add_referenced_tmp_var(%struct.tree_node*)
-
-declare i8 @contains_placeholder_p(%struct.tree_node*) zeroext 
-
-declare %struct.varray_head_tag* @varray_init(i32, i32, i8*)
-
-declare i32 @handled_component_p(%struct.tree_node*)
-
-declare void @varray_check_failed(%struct.varray_head_tag*, i32, i8*, i32, i8*)
-
-declare %struct.tree_node* @array_ref_low_bound(%struct.tree_node*)
-
-declare i8 @is_gimple_min_invariant(%struct.tree_node*) zeroext 
-
-declare i8 @is_gimple_formal_tmp_reg(%struct.tree_node*) zeroext 
-
-declare %struct.tree_node* @array_ref_element_size(%struct.tree_node*)
-
-declare %struct.tree_node* @component_ref_field_offset(%struct.tree_node*)
-
-declare i8 @is_gimple_min_lval(%struct.tree_node*) zeroext 
-
-declare void @varray_underflow(%struct.varray_head_tag*, i8*, i32, i8*)
-
-declare i32 @list_length(%struct.tree_node*)
-
-declare i8 @parse_output_constraint(i8**, i32, i32, i32, i8*, i8*, i8*) zeroext 
-
-declare i8* @xstrdup(i8*)
-
-declare %struct.tree_node* @build_string(i32, i8*)
-
-declare i8* @strchr(i8*, i32)
-
-declare %struct.tree_node* @build_tree_list_stat(%struct.tree_node*, %struct.tree_node*)
-
-declare %struct.tree_node* @chainon(%struct.tree_node*, %struct.tree_node*)
-
-declare i8 @parse_input_constraint(i8**, i32, i32, i32, i32, i8**, i8*, i8*) zeroext 
-
-declare i8 @is_gimple_asm_val(%struct.tree_node*) zeroext 
-
-declare void @gimplify_body(%struct.tree_node**, %struct.tree_node*, i8 zeroext )
-
-declare void @timevar_push_1(i32)
-
-declare %struct.tree_node* @gimplify_parameters()
-
-declare %struct.tree_node* @expr_only(%struct.tree_node*)
-
-declare void @timevar_pop_1(i32)
-
-declare void @gimplify_function_tree(%struct.tree_node*)
-
-declare void @allocate_struct_function(%struct.tree_node*)
-
-declare %struct.tree_node* @make_tree_vec_stat(i32)
-
-declare %struct.tree_node* @tsi_split_statement_list_after(%struct.tree_stmt_iterator*)
-
-declare i8 @is_gimple_condexpr(%struct.tree_node*) zeroext 
-
-declare %struct.tree_node* @invert_truthvalue(%struct.tree_node*)
-
-declare i8 @initializer_zerop(%struct.tree_node*) zeroext 
-
-declare i32 @simple_cst_equal(%struct.tree_node*, %struct.tree_node*)
-
-declare i32 @aggregate_value_p(%struct.tree_node*, %struct.tree_node*)
-
-declare i32 @fwrite(i8*, i32, i32, %struct.FILE*)
diff --git a/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
index c73b679..25ac52e 100644
--- a/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
+++ b/test/CodeGen/ARM/2007-05-03-BadPostIndexedLd.ll
@@ -2,7 +2,7 @@
 
 	%struct.Connection = type { i32, [10 x i8], i32 }
 	%struct.IntChunk = type { %struct.cppobjtype, i32, i32*, i32 }
-	%struct.Point = type { i8*, %struct.cppobjtype, i16 (%struct.Point*) signext *, i16 (%struct.Point*) signext *, double (%struct.Point*)*, double (%struct.Point*)* }
+	%struct.Point = type { i8*, %struct.cppobjtype, i16 (%struct.Point*)  *, i16 (%struct.Point*)  *, double (%struct.Point*)*, double (%struct.Point*)* }
 	%struct.RefPoint = type { %struct.Point*, %struct.cppobjtype }
 	%struct.ShortArray = type { %struct.cppobjtype, i32, i16* }
 	%struct.TestObj = type { i8*, %struct.cppobjtype, i8, [32 x i8], i8*, i8**, i16, i16, i32, i32, i32, i32, float, double, %struct.cppobjtype, i32, i16*, i16**, i8**, i32, %struct.XyPoint, [3 x %struct.Connection], %struct.Point*, %struct.XyPoint*, i32, i8*, i8*, i16*, %struct.ShortArray, %struct.IntChunk, %struct.cppobjtype, %struct.cppobjtype, %struct.RefPoint, i32, %struct.cppobjtype, %struct.cppobjtype }
diff --git a/test/CodeGen/ARM/2007-05-07-jumptoentry.ll b/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
deleted file mode 100644
index 26864f1..0000000
--- a/test/CodeGen/ARM/2007-05-07-jumptoentry.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc < %s | not grep 1_0
-; This used to create an extra branch to 'entry', LBB1_0.
-
-; ModuleID = 'bug.bc'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
-target triple = "arm-apple-darwin8"
-	%struct.HexxagonMove = type { i8, i8, i32 }
-	%struct.HexxagonMoveList = type { i32, %struct.HexxagonMove* }
-
-define void @_ZN16HexxagonMoveList8sortListEv(%struct.HexxagonMoveList* %this) {
-entry:
-	%tmp51 = getelementptr %struct.HexxagonMoveList* %this, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp2 = getelementptr %struct.HexxagonMoveList* %this, i32 0, i32 1		; <%struct.HexxagonMove**> [#uses=2]
-	br label %bb49
-
-bb1:		; preds = %bb49
-	%tmp3 = load %struct.HexxagonMove** %tmp2		; <%struct.HexxagonMove*> [#uses=5]
-	%tmp6 = getelementptr %struct.HexxagonMove* %tmp3, i32 %i.1, i32 2		; <i32*> [#uses=1]
-	%tmp7 = load i32* %tmp6		; <i32> [#uses=2]
-	%tmp12 = add i32 %i.1, 1		; <i32> [#uses=7]
-	%tmp14 = getelementptr %struct.HexxagonMove* %tmp3, i32 %tmp12, i32 2		; <i32*> [#uses=1]
-	%tmp15 = load i32* %tmp14		; <i32> [#uses=1]
-	%tmp16 = icmp slt i32 %tmp7, %tmp15		; <i1> [#uses=1]
-	br i1 %tmp16, label %cond_true, label %bb49
-
-cond_true:		; preds = %bb1
-	%tmp23.0 = getelementptr %struct.HexxagonMove* %tmp3, i32 %i.1, i32 0		; <i8*> [#uses=2]
-	%tmp67 = load i8* %tmp23.0		; <i8> [#uses=1]
-	%tmp23.1 = getelementptr %struct.HexxagonMove* %tmp3, i32 %i.1, i32 1		; <i8*> [#uses=1]
-	%tmp68 = load i8* %tmp23.1		; <i8> [#uses=1]
-	%tmp3638 = getelementptr %struct.HexxagonMove* %tmp3, i32 %tmp12, i32 0		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32( i8* %tmp23.0, i8* %tmp3638, i32 8, i32 4 )
-	%tmp41 = load %struct.HexxagonMove** %tmp2		; <%struct.HexxagonMove*> [#uses=3]
-	%tmp44.0 = getelementptr %struct.HexxagonMove* %tmp41, i32 %tmp12, i32 0		; <i8*> [#uses=1]
-	store i8 %tmp67, i8* %tmp44.0
-	%tmp44.1 = getelementptr %struct.HexxagonMove* %tmp41, i32 %tmp12, i32 1		; <i8*> [#uses=1]
-	store i8 %tmp68, i8* %tmp44.1
-	%tmp44.2 = getelementptr %struct.HexxagonMove* %tmp41, i32 %tmp12, i32 2		; <i32*> [#uses=1]
-	store i32 %tmp7, i32* %tmp44.2
-	br label %bb49
-
-bb49:		; preds = %bb59, %cond_true, %bb1, %entry
-	%i.1 = phi i32 [ 0, %entry ], [ %tmp12, %bb1 ], [ %tmp12, %cond_true ], [ 0, %bb59 ]		; <i32> [#uses=5]
-	%move.2 = phi i32 [ 0, %entry ], [ 1, %cond_true ], [ %move.2, %bb1 ], [ 0, %bb59 ]		; <i32> [#uses=2]
-	%tmp52 = load i32* %tmp51		; <i32> [#uses=1]
-	%tmp53 = add i32 %tmp52, -1		; <i32> [#uses=1]
-	%tmp55 = icmp sgt i32 %tmp53, %i.1		; <i1> [#uses=1]
-	br i1 %tmp55, label %bb1, label %bb59
-
-bb59:		; preds = %bb49
-	%tmp61 = icmp eq i32 %move.2, 0		; <i1> [#uses=1]
-	br i1 %tmp61, label %return, label %bb49
-
-return:		; preds = %bb59
-	ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
index 52937c1..55cea3a 100644
--- a/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
+++ b/test/CodeGen/ARM/2007-05-07-tailmerge-1.ll
@@ -12,7 +12,6 @@ entry:
 	%i_addr = alloca i32		; <i32*> [#uses=2]
 	%q_addr = alloca i32		; <i32*> [#uses=2]
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %i, i32* %i_addr
 	store i32 %q, i32* %q_addr
 	%tmp = load i32* %i_addr		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
index c925fa8..4894116 100644
--- a/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
+++ b/test/CodeGen/ARM/2007-05-09-tailmerge-2.ll
@@ -12,7 +12,6 @@ entry:
 	%i_addr = alloca i32		; <i32*> [#uses=2]
 	%q_addr = alloca i32		; <i32*> [#uses=2]
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %i, i32* %i_addr
 	store i32 %q, i32* %q_addr
 	%tmp = load i32* %i_addr		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
index 9df5af5..acbab8a 100644
--- a/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/ARM/2007-05-22-tailmerge-3.ll
@@ -14,7 +14,6 @@ entry:
 	%i_addr = alloca i32		; <i32*> [#uses=2]
 	%q_addr = alloca i32		; <i32*> [#uses=2]
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %i, i32* %i_addr
 	store i32 %q, i32* %q_addr
 	%tmp = load i32* %i_addr		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll b/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
deleted file mode 100644
index 7ba2a19..0000000
--- a/test/CodeGen/ARM/2007-05-31-RegScavengerInfiniteLoop.ll
+++ /dev/null
@@ -1,237 +0,0 @@
-; RUN: llc < %s 
-; PR1424
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "arm-unknown-linux-gnueabi"
-	%struct.AVClass = type { i8*, i8* (i8*)*, %struct.AVOption* }
-	%struct.AVCodec = type { i8*, i32, i32, i32, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32, i8*)*, i32 (%struct.AVCodecContext*)*, i32 (%struct.AVCodecContext*, i8*, i32*, i8*, i32)*, i32, %struct.AVCodec*, void (%struct.AVCodecContext*)*, %struct.AVRational*, i32* }
-	%struct.AVCodecContext = type { %struct.AVClass*, i32, i32, i32, i32, i32, i8*, i32, %struct.AVRational, i32, i32, i32, i32, i32, void (%struct.AVCodecContext*, %struct.AVFrame*, i32*, i32, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, float, float, i32, i32, i32, i32, float, i32, i32, i32, %struct.AVCodec*, i8*, i32, i32, void (%struct.AVCodecContext*, i8*, i32, i32)*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, [32 x i8], i32, i32, i32, i32, i32, i32, i32, float, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, void (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i8*, i8*, float, float, i32, %struct.RcOverride*, i32, i8*, i32, i32, i32, float, float, float, float, i32, float, float, float, float, float, i32, i32, i32, i32*, i32, i32, i32, i32, %struct.AVRational, %struct.AVFrame*, i32, i32, [4 x i64], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32*)*, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i16*, i16*, i32, i32, i32, i32, %struct.AVPaletteControl*, i32, i32 (%struct.AVCodecContext*, %struct.AVFrame*)*, i32, i32, i32, i32, i32, i32, i32, i32 (%struct.AVCodecContext*, i32 (%struct.AVCodecContext*, i8*)*, i8**, i32*, i32)*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64 }
-	%struct.AVEvalExpr = type opaque
-	%struct.AVFrame = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*] }
-	%struct.AVOption = type opaque
-	%struct.AVPaletteControl = type { i32, [256 x i32] }
-	%struct.AVPanScan = type { i32, i32, i32, [3 x [2 x i16]] }
-	%struct.AVRational = type { i32, i32 }
-	%struct.BlockNode = type { i16, i16, i8, [3 x i8], i8, i8 }
-	%struct.DSPContext = type { void (i16*, i8*, i32)*, void (i16*, i8*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i16*, i8*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, i32 (i16*)*, void (i8*, i8*, i32, i32, i32, i32, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, void (i16*)*, i32 (i8*, i32)*, i32 (i8*, i32)*, [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], [5 x i32 (i8*, i8*, i8*, i32, i32)*], i32 (i8*, i16*, i32)*, [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [4 x [4 x void (i8*, i8*, i32, i32)*]], [2 x void (i8*, i8*, i8*, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [11 x void (i8*, i8*, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], [8 x void (i8*, i8*, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [3 x void (i8*, i8*, i32, i32, i32, i32)*], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [4 x [16 x void (i8*, i8*, i32)*]], [10 x void (i8*, i32, i32, i32, i32)*], [10 x void (i8*, i8*, i32, i32, i32, i32, i32)*], [2 x [16 x void (i8*, i8*, i32)*]], [2 x [16 x void (i8*, i8*, i32)*]], void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i32, i32, i32, i32, i32, i32)*, void (i8*, i16*, i32)*, [2 x [4 x i32 (i8*, i8*, i8*, i32, i32)*]], void (i8*, i8*, i32)*, void (i8*, i8*, i8*, i32)*, void (i8*, i8*, i8*, i32, i32*, i32*)*, void (i32*, i32*, i32)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32, i8*)*, void (i8*, i32, i32, i32)*, void (i8*, i32, i32, i32)*, void ([4 x [4 x i16]]*, i8*, [40 x i8]*, [40 x [2 x i16]]*, i32, i32, i32, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32, i32)*, void (i8*, i32)*, void (float*, float*, i32)*, void (float*, float*, i32)*, void (float*, float*, float*, i32)*, void (float*, float*, float*, float*, i32, i32, i32)*, void (i16*, float*, i32)*, void (i16*)*, void (i16*)*, void (i16*)*, void (i8*, i32, i16*)*, void (i8*, i32, i16*)*, [64 x i8], i32, i32 (i16*, i16*, i16*, i32)*, void (i16*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void (i8*, i16*, i32)*, void ([4 x i16]*)*, void (i32*, i32*, i32*, i32*, i32*, i32*, i32)*, void (i32*, i32)*, void (i8*, i32, i8**, i32, i32, i32, i32, i32, %struct.slice_buffer*, i32, i8*)*, void (i8*, i32, i32)*, [4 x void (i8*, i32, i8*, i32, i32, i32)*], void (i16*)*, void (i16*, i32)*, void (i16*, i32)*, void (i16*, i32)*, void (i8*, i32)*, void (i8*, i32)*, [16 x void (i8*, i8*, i32, i32)*] }
-	%struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
-	%struct.GetBitContext = type { i8*, i8*, i32*, i32, i32, i32, i32 }
-	%struct.MJpegContext = type opaque
-	%struct.MotionEstContext = type { %struct.AVCodecContext*, i32, [4 x [2 x i32]], [4 x [2 x i32]], i8*, i8*, [2 x i8*], i8*, i32, i32*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x [4 x i8*]], [4 x [4 x i8*]], i32, i32, i32, i32, i32, [4 x void (i8*, i8*, i32, i32)*]*, [4 x void (i8*, i8*, i32, i32)*]*, [16 x void (i8*, i8*, i32)*]*, [16 x void (i8*, i8*, i32)*]*, [4097 x i8]*, i8*, i32 (%struct.MpegEncContext*, i32*, i32*, i32, i32, i32, i32, i32)* }
-	%struct.MpegEncContext = type { %struct.AVCodecContext*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.PutBitContext, i32, i32, i32, i32, i32, i32, i64, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Picture*, %struct.Picture**, %struct.Picture**, i32, i32, [8 x %struct.MpegEncContext*], %struct.Picture, %struct.Picture, %struct.Picture, %struct.Picture, %struct.Picture*, %struct.Picture*, %struct.Picture*, [3 x i8*], [3 x i32], i16*, [3 x i16*], [20 x i16], i32, i32, i8*, i8*, i8*, i8*, i8*, [16 x i16]*, [3 x [16 x i16]*], i32, i8*, i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, i32, i32*, i32, i32, i32, i32, i32, i32, i32, [5 x i32], i32, i32, i32, i32, %struct.DSPContext, i32, i32, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x [2 x [2 x i16]*]], [2 x [2 x [2 x [2 x i16]*]]], [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x i16]*, [2 x [2 x [2 x i16]*]], [2 x [2 x [2 x [2 x i16]*]]], [2 x i8*], [2 x [2 x i8*]], i32, i32, i32, [2 x [4 x [2 x i32]]], [2 x [2 x i32]], [2 x [2 x [2 x i32]]], i8*, [2 x [64 x i16]], %struct.MotionEstContext, i32, i32, i32, i32, i32, i32, i16*, [6 x i32], [6 x i32], [3 x i8*], i32*, [64 x i16], [64 x i16], [64 x i16], [64 x i16], i32, i32, i32, i32, i32, i8*, i8*, i8*, i8*, i8*, i8*, [8 x i32], [64 x i32]*, [64 x i32]*, [2 x [64 x i16]]*, [2 x [64 x i16]]*, [12 x i32], %struct.ScanTable, %struct.ScanTable, %struct.ScanTable, %struct.ScanTable, [64 x i32]*, [2 x i32], [64 x i16]*, i8*, i64, i64, i32, i32, %struct.RateControlContext, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i32, i32, %struct.GetBitContext, i32, i32, i32, %struct.ParseContext, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i16, i16, i16, i16, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x [2 x i32]], [2 x [2 x i32]], [2 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.PutBitContext, %struct.PutBitContext, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, [3 x i32], %struct.MJpegContext*, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x [65 x [65 x [2 x i32]]]]*, i32, i32, %struct.GetBitContext, i32, i32, i32, i8*, i32, [2 x [2 x i32]], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i32], i32, i32, i32, i32, i8*, i32, [12 x i16*], [64 x i16]*, [8 x [64 x i16]]*, i32 (%struct.MpegEncContext*, [64 x i16]*)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, void (%struct.MpegEncContext*, i16*, i32, i32)*, i32 (%struct.MpegEncContext*, i16*, i32, i32, i32*)*, i32 (%struct.MpegEncContext*, i16*, i32, i32, i32*)*, void (%struct.MpegEncContext*, i16*)* }
-	%struct.ParseContext = type { i8*, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.Picture = type { [4 x i8*], [4 x i32], [4 x i8*], i32, i32, i64, i32, i32, i32, i32, i32, i8*, i32, i8*, [2 x [2 x i16]*], i32*, i8, i8*, [4 x i64], i32, i32, i32, i32, i32, %struct.AVPanScan*, i32, i32, i16*, [2 x i8*], [3 x i8*], [2 x [2 x i16]*], i32*, [2 x i32], i32, i32, i32, i32, [2 x [16 x i32]], [2 x i32], i32, i32, i16*, i16*, i8*, i32*, i32 }
-	%struct.Plane = type { i32, i32, [8 x [4 x %struct.SubBand]] }
-	%struct.Predictor = type { double, double, double }
-	%struct.PutBitContext = type { i32, i32, i8*, i8*, i8* }
-	%struct.RangeCoder = type { i32, i32, i32, i32, [256 x i8], [256 x i8], i8*, i8*, i8* }
-	%struct.RateControlContext = type { %struct.FILE*, i32, %struct.RateControlEntry*, double, [5 x %struct.Predictor], double, double, double, double, double, [5 x double], i32, i32, [5 x i64], [5 x i64], [5 x i64], [5 x i64], [5 x i32], i32, i8*, float, i32, %struct.AVEvalExpr* }
-	%struct.RateControlEntry = type { i32, float, i32, i32, i32, i32, i32, i64, i32, float, i32, i32, i32, i32, i32, i32 }
-	%struct.RcOverride = type { i32, i32, i32, float }
-	%struct.ScanTable = type { i8*, [64 x i8], [64 x i8] }
-	%struct.SnowContext = type { %struct.AVCodecContext*, %struct.RangeCoder, %struct.DSPContext, %struct.AVFrame, %struct.AVFrame, %struct.AVFrame, [8 x %struct.AVFrame], %struct.AVFrame, [32 x i8], [4224 x i8], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [8 x [2 x i16]*], [8 x i32*], i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [4 x %struct.Plane], %struct.BlockNode*, [1024 x i32], i32, %struct.slice_buffer, %struct.MpegEncContext }
-	%struct.SubBand = type { i32, i32, i32, i32, i32, i32*, i32, i32, i32, %struct.x_and_coeff*, %struct.SubBand*, [519 x [32 x i8]] }
-	%struct._IO_marker = type { %struct._IO_marker*, %struct.FILE*, i32 }
-	%struct.slice_buffer = type { i32**, i32**, i32, i32, i32, i32, i32* }
-	%struct.x_and_coeff = type { i16, i16 }
-
-define fastcc void @iterative_me(%struct.SnowContext* %s) {
-entry:
-	%state = alloca [4224 x i8], align 8		; <[4224 x i8]*> [#uses=0]
-	%best_rd4233 = alloca i32, align 4		; <i32*> [#uses=0]
-	%tmp21 = getelementptr %struct.SnowContext* %s, i32 0, i32 36		; <i32*> [#uses=2]
-	br label %bb4198
-
-bb79:		; preds = %bb4189.preheader
-	br i1 false, label %cond_next239, label %cond_true
-
-cond_true:		; preds = %bb79
-	ret void
-
-cond_next239:		; preds = %bb79
-	%tmp286 = alloca i8, i32 0		; <i8*> [#uses=0]
-	ret void
-
-bb4198:		; preds = %bb4189.preheader, %entry
-	br i1 false, label %bb4189.preheader, label %bb4204
-
-bb4189.preheader:		; preds = %bb4198
-	br i1 false, label %bb79, label %bb4198
-
-bb4204:		; preds = %bb4198
-	br i1 false, label %bb4221, label %cond_next4213
-
-cond_next4213:		; preds = %bb4204
-	ret void
-
-bb4221:		; preds = %bb4204
-	br i1 false, label %bb5242.preheader, label %UnifiedReturnBlock
-
-bb5242.preheader:		; preds = %bb4221
-	br label %bb5242
-
-bb4231:		; preds = %bb5233
-	%tmp4254.sum = add i32 0, 1		; <i32> [#uses=2]
-	br i1 false, label %bb4559, label %cond_next4622
-
-bb4559:		; preds = %bb4231
-	ret void
-
-cond_next4622:		; preds = %bb4231
-	%tmp4637 = load i16* null		; <i16> [#uses=1]
-	%tmp46374638 = sext i16 %tmp4637 to i32		; <i32> [#uses=1]
-	%tmp4642 = load i16* null		; <i16> [#uses=1]
-	%tmp46424643 = sext i16 %tmp4642 to i32		; <i32> [#uses=1]
-	%tmp4648 = load i16* null		; <i16> [#uses=1]
-	%tmp46484649 = sext i16 %tmp4648 to i32		; <i32> [#uses=1]
-	%tmp4653 = getelementptr %struct.BlockNode* null, i32 %tmp4254.sum, i32 0		; <i16*> [#uses=1]
-	%tmp4654 = load i16* %tmp4653		; <i16> [#uses=1]
-	%tmp46544655 = sext i16 %tmp4654 to i32		; <i32> [#uses=1]
-	%tmp4644 = add i32 %tmp46374638, 2		; <i32> [#uses=1]
-	%tmp4650 = add i32 %tmp4644, %tmp46424643		; <i32> [#uses=1]
-	%tmp4656 = add i32 %tmp4650, %tmp46484649		; <i32> [#uses=1]
-	%tmp4657 = add i32 %tmp4656, %tmp46544655		; <i32> [#uses=2]
-	%tmp4658 = ashr i32 %tmp4657, 2		; <i32> [#uses=1]
-	%tmp4662 = load i16* null		; <i16> [#uses=1]
-	%tmp46624663 = sext i16 %tmp4662 to i32		; <i32> [#uses=1]
-	%tmp4672 = getelementptr %struct.BlockNode* null, i32 0, i32 1		; <i16*> [#uses=1]
-	%tmp4673 = load i16* %tmp4672		; <i16> [#uses=1]
-	%tmp46734674 = sext i16 %tmp4673 to i32		; <i32> [#uses=1]
-	%tmp4678 = getelementptr %struct.BlockNode* null, i32 %tmp4254.sum, i32 1		; <i16*> [#uses=1]
-	%tmp4679 = load i16* %tmp4678		; <i16> [#uses=1]
-	%tmp46794680 = sext i16 %tmp4679 to i32		; <i32> [#uses=1]
-	%tmp4669 = add i32 %tmp46624663, 2		; <i32> [#uses=1]
-	%tmp4675 = add i32 %tmp4669, 0		; <i32> [#uses=1]
-	%tmp4681 = add i32 %tmp4675, %tmp46734674		; <i32> [#uses=1]
-	%tmp4682 = add i32 %tmp4681, %tmp46794680		; <i32> [#uses=2]
-	%tmp4683 = ashr i32 %tmp4682, 2		; <i32> [#uses=1]
-	%tmp4703 = load i32* %tmp21		; <i32> [#uses=1]
-	%tmp4707 = shl i32 %tmp4703, 0		; <i32> [#uses=4]
-	%tmp4710 = load %struct.BlockNode** null		; <%struct.BlockNode*> [#uses=6]
-	%tmp4713 = mul i32 %tmp4707, %mb_y.4		; <i32> [#uses=1]
-	%tmp4715 = add i32 %tmp4713, %mb_x.7		; <i32> [#uses=7]
-	store i8 0, i8* null
-	store i8 0, i8* null
-	%tmp47594761 = bitcast %struct.BlockNode* null to i8*		; <i8*> [#uses=2]
-	call void @llvm.memcpy.i32( i8* null, i8* %tmp47594761, i32 10, i32 0 )
-	%tmp4716.sum5775 = add i32 %tmp4715, 1		; <i32> [#uses=1]
-	%tmp4764 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4716.sum5775		; <%struct.BlockNode*> [#uses=1]
-	%tmp47644766 = bitcast %struct.BlockNode* %tmp4764 to i8*		; <i8*> [#uses=1]
-	%tmp4716.sum5774 = add i32 %tmp4715, %tmp4707		; <i32> [#uses=0]
-	%tmp47704772 = bitcast %struct.BlockNode* null to i8*		; <i8*> [#uses=1]
-	%tmp4774 = add i32 %tmp4707, 1		; <i32> [#uses=1]
-	%tmp4716.sum5773 = add i32 %tmp4774, %tmp4715		; <i32> [#uses=1]
-	%tmp4777 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4716.sum5773		; <%struct.BlockNode*> [#uses=1]
-	%tmp47774779 = bitcast %struct.BlockNode* %tmp4777 to i8*		; <i8*> [#uses=1]
-	%tmp4781 = icmp slt i32 %mb_x.7, 0		; <i1> [#uses=1]
-	%tmp4788 = or i1 %tmp4781, %tmp4784		; <i1> [#uses=2]
-	br i1 %tmp4788, label %cond_true4791, label %cond_next4794
-
-cond_true4791:		; preds = %cond_next4622
-	unreachable
-
-cond_next4794:		; preds = %cond_next4622
-	%tmp4797 = icmp slt i32 %mb_x.7, %tmp4707		; <i1> [#uses=1]
-	br i1 %tmp4797, label %cond_next4803, label %cond_true4800
-
-cond_true4800:		; preds = %cond_next4794
-	unreachable
-
-cond_next4803:		; preds = %cond_next4794
-	%tmp4825 = ashr i32 %tmp4657, 12		; <i32> [#uses=1]
-	shl i32 %tmp4682, 4		; <i32>:0 [#uses=1]
-	%tmp4828 = and i32 %0, -64		; <i32> [#uses=1]
-	%tmp4831 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 2		; <i8*> [#uses=0]
-	%tmp4826 = add i32 %tmp4828, %tmp4825		; <i32> [#uses=1]
-	%tmp4829 = add i32 %tmp4826, 0		; <i32> [#uses=1]
-	%tmp4835 = add i32 %tmp4829, 0		; <i32> [#uses=1]
-	store i32 %tmp4835, i32* null
-	%tmp48534854 = trunc i32 %tmp4658 to i16		; <i16> [#uses=1]
-	%tmp4856 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 0		; <i16*> [#uses=1]
-	store i16 %tmp48534854, i16* %tmp4856
-	%tmp48574858 = trunc i32 %tmp4683 to i16		; <i16> [#uses=1]
-	%tmp4860 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 1		; <i16*> [#uses=1]
-	store i16 %tmp48574858, i16* %tmp4860
-	%tmp4866 = getelementptr %struct.BlockNode* %tmp4710, i32 %tmp4715, i32 4		; <i8*> [#uses=0]
-	br i1 false, label %bb4933, label %cond_false4906
-
-cond_false4906:		; preds = %cond_next4803
-	call void @llvm.memcpy.i32( i8* %tmp47594761, i8* null, i32 10, i32 0 )
-	call void @llvm.memcpy.i32( i8* %tmp47644766, i8* null, i32 10, i32 0 )
-	call void @llvm.memcpy.i32( i8* %tmp47704772, i8* null, i32 10, i32 0 )
-	call void @llvm.memcpy.i32( i8* %tmp47774779, i8* null, i32 10, i32 0 )
-	br label %bb5215
-
-bb4933:		; preds = %bb5215, %cond_next4803
-	br i1 false, label %cond_true4944, label %bb5215
-
-cond_true4944:		; preds = %bb4933
-	%tmp4982 = load i32* %tmp21		; <i32> [#uses=1]
-	%tmp4986 = shl i32 %tmp4982, 0		; <i32> [#uses=2]
-	%tmp4992 = mul i32 %tmp4986, %mb_y.4		; <i32> [#uses=1]
-	%tmp4994 = add i32 %tmp4992, %mb_x.7		; <i32> [#uses=5]
-	%tmp4995.sum5765 = add i32 %tmp4994, 1		; <i32> [#uses=1]
-	%tmp5043 = getelementptr %struct.BlockNode* null, i32 %tmp4995.sum5765		; <%struct.BlockNode*> [#uses=1]
-	%tmp50435045 = bitcast %struct.BlockNode* %tmp5043 to i8*		; <i8*> [#uses=2]
-	call void @llvm.memcpy.i32( i8* null, i8* %tmp50435045, i32 10, i32 0 )
-	%tmp4995.sum5764 = add i32 %tmp4994, %tmp4986		; <i32> [#uses=1]
-	%tmp5049 = getelementptr %struct.BlockNode* null, i32 %tmp4995.sum5764		; <%struct.BlockNode*> [#uses=1]
-	%tmp50495051 = bitcast %struct.BlockNode* %tmp5049 to i8*		; <i8*> [#uses=2]
-	call void @llvm.memcpy.i32( i8* null, i8* %tmp50495051, i32 10, i32 0 )
-	%tmp4995.sum5763 = add i32 0, %tmp4994		; <i32> [#uses=1]
-	%tmp5056 = getelementptr %struct.BlockNode* null, i32 %tmp4995.sum5763		; <%struct.BlockNode*> [#uses=1]
-	%tmp50565058 = bitcast %struct.BlockNode* %tmp5056 to i8*		; <i8*> [#uses=1]
-	br i1 %tmp4788, label %cond_true5070, label %cond_next5073
-
-cond_true5070:		; preds = %cond_true4944
-	unreachable
-
-cond_next5073:		; preds = %cond_true4944
-	%tmp5139 = getelementptr %struct.BlockNode* null, i32 %tmp4994, i32 1		; <i16*> [#uses=0]
-	%tmp5145 = getelementptr %struct.BlockNode* null, i32 %tmp4994, i32 4		; <i8*> [#uses=0]
-	call void @llvm.memcpy.i32( i8* %tmp50435045, i8* null, i32 10, i32 0 )
-	call void @llvm.memcpy.i32( i8* %tmp50495051, i8* null, i32 10, i32 0 )
-	call void @llvm.memcpy.i32( i8* %tmp50565058, i8* null, i32 10, i32 0 )
-	br label %bb5215
-
-bb5215:		; preds = %cond_next5073, %bb4933, %cond_false4906
-	%i4232.3 = phi i32 [ 0, %cond_false4906 ], [ 0, %cond_next5073 ], [ 0, %bb4933 ]		; <i32> [#uses=1]
-	%tmp5217 = icmp slt i32 %i4232.3, 4		; <i1> [#uses=1]
-	br i1 %tmp5217, label %bb4933, label %bb5220
-
-bb5220:		; preds = %bb5215
-	br i1 false, label %bb5230, label %cond_true5226
-
-cond_true5226:		; preds = %bb5220
-	ret void
-
-bb5230:		; preds = %bb5220
-	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
-	br label %bb5233
-
-bb5233:		; preds = %bb5233.preheader, %bb5230
-	%indvar = phi i32 [ 0, %bb5233.preheader ], [ %indvar.next, %bb5230 ]		; <i32> [#uses=2]
-	%mb_x.7 = shl i32 %indvar, 1		; <i32> [#uses=4]
-	br i1 false, label %bb4231, label %bb5239
-
-bb5239:		; preds = %bb5233
-	%indvar.next37882 = add i32 %indvar37881, 1		; <i32> [#uses=1]
-	br label %bb5242
-
-bb5242:		; preds = %bb5239, %bb5242.preheader
-	%indvar37881 = phi i32 [ 0, %bb5242.preheader ], [ %indvar.next37882, %bb5239 ]		; <i32> [#uses=2]
-	%mb_y.4 = shl i32 %indvar37881, 1		; <i32> [#uses=3]
-	br i1 false, label %bb5233.preheader, label %bb5248
-
-bb5233.preheader:		; preds = %bb5242
-	%tmp4784 = icmp slt i32 %mb_y.4, 0		; <i1> [#uses=1]
-	br label %bb5233
-
-bb5248:		; preds = %bb5242
-	ret void
-
-UnifiedReturnBlock:		; preds = %bb4221
-	ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
index 234c7b6..6b39a76 100644
--- a/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-04-04-ScavengerAssert.ll
@@ -46,7 +46,8 @@ bb17.i:		; preds = %cond_next119.i
 cond_true53.i:		; preds = %bb17.i
 	ret { i16, %struct.rnode* }* null
 cond_false99.i:		; preds = %bb17.i
-	%tmp106.i = malloc %struct.ch_set		; <%struct.ch_set*> [#uses=1]
+        %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64), i64 2) to i32))
+        %tmp106.i = bitcast i8* %malloccall to %struct.ch_set*
 	br i1 false, label %bb126.i, label %cond_next119.i
 cond_next119.i:		; preds = %cond_false99.i, %bb42
 	%curr_ptr.0.reg2mem.0.i = phi %struct.ch_set* [ %tmp106.i, %cond_false99.i ], [ null, %bb42 ]		; <%struct.ch_set*> [#uses=2]
@@ -58,3 +59,5 @@ bb126.i:		; preds = %cond_next119.i, %cond_false99.i
 bb78:		; preds = %entry
 	ret { i16, %struct.rnode* }* null
 }
+
+declare noalias i8* @malloc(i32)
diff --git a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
index 77418be..c9a8a67 100644
--- a/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
+++ b/test/CodeGen/ARM/2008-04-10-ScavengerAssert.ll
@@ -188,11 +188,6 @@ bb231:		; preds = %bb226
 	ret void
 }
 
-	%struct.III_psy_xmin = type { [22 x double], [13 x [3 x double]] }
-	%struct.III_scalefac_t = type { [22 x i32], [13 x [3 x i32]] }
-	%struct.gr_info = type { i32, i32, i32, i32, i32, i32, i32, i32, [3 x i32], [3 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, [4 x i32] }
-	%struct.lame_global_flags = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, float, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, i32, i32, i32, float, float, float, float, i32, i32, i32, i32, i32, i32, i32, i32 }
-
 define fastcc void @outer_loop2(%struct.lame_global_flags* %gfp, double* %xr, i32 %targ_bits, double* %best_noise, %struct.III_psy_xmin* %l3_xmin, i32* %l3_enc, %struct.III_scalefac_t* %scalefac, %struct.gr_info* %cod_info, i32 %ch) {
 entry:
 	%cod_info.20128.1 = getelementptr %struct.gr_info* %cod_info, i32 0, i32 20, i32 1		; <i32*> [#uses=1]
diff --git a/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll b/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
deleted file mode 100644
index 5f9d9ae..0000000
--- a/test/CodeGen/ARM/2008-09-14-CoalescerBug.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin
-
-@"\01LC1" = external constant [288 x i8]		; <[288 x i8]*> [#uses=1]
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
-
-define i32 @main(i32 %argc, i8** %argv) nounwind {
-entry:
-	br label %bb.i
-
-bb.i:		; preds = %bb.i, %entry
-	%i.01.i = phi i32 [ 0, %entry ], [ %indvar.next52, %bb.i ]		; <i32> [#uses=1]
-	%indvar.next52 = add i32 %i.01.i, 1		; <i32> [#uses=2]
-	%exitcond53 = icmp eq i32 %indvar.next52, 15		; <i1> [#uses=1]
-	br i1 %exitcond53, label %bb.i33.loopexit, label %bb.i
-
-bb.i33.loopexit:		; preds = %bb.i
-	%0 = malloc [347 x i8]		; <[347 x i8]*> [#uses=2]
-	%.sub = getelementptr [347 x i8]* %0, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %.sub, i8* getelementptr ([288 x i8]* @"\01LC1", i32 0, i32 0), i32 287, i32 1 ) nounwind
-	br label %bb.i28
-
-bb.i28:		; preds = %bb.i28, %bb.i33.loopexit
-	br i1 false, label %repeat_fasta.exit, label %bb.i28
-
-repeat_fasta.exit:		; preds = %bb.i28
-	free [347 x i8]* %0
-	unreachable
-}
diff --git a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
index 0ec17ae..377bbd2 100644
--- a/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
+++ b/test/CodeGen/ARM/2009-03-07-SpillerBug.ll
@@ -1,16 +1,15 @@
 ; RUN: llc < %s -mtriple=armv6-apple-darwin9 -mattr=+vfp2
 ; rdar://6653182
 
-	%struct.ggBRDF = type { i32 (...)** }
-	%struct.ggPoint2 = type { [2 x double] }
-	%struct.ggPoint3 = type { [3 x double] }
-	%struct.ggSpectrum = type { [8 x float] }
-	%struct.ggSphere = type { %struct.ggPoint3, double }
-	%struct.mrDiffuseAreaSphereLuminaire = type { %struct.mrSphere, %struct.ggSpectrum }
-	%struct.mrDiffuseCosineSphereLuminaire = type { %struct.mrDiffuseAreaSphereLuminaire }
-	%struct.mrSphere = type { %struct.ggBRDF, %struct.ggSphere }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+%struct.ggBRDF = type { i32 (...)** }
+%struct.ggPoint2 = type { [2 x double] }
+%struct.ggPoint3 = type { [3 x double] }
+%struct.ggSpectrum = type { [8 x float] }
+%struct.ggSphere = type { %struct.ggPoint3, double }
+%struct.mrDiffuseAreaSphereLuminaire = type { %struct.mrSphere, %struct.ggSpectrum }
+%struct.mrDiffuseCosineSphereLuminaire = type { %struct.mrDiffuseAreaSphereLuminaire }
+%struct.mrSphere = type { %struct.ggBRDF, %struct.ggSphere }
 
 declare double @llvm.sqrt.f64(double) nounwind readonly
 
@@ -20,59 +19,61 @@ declare double @acos(double) nounwind readonly
 
 define i32 @_ZNK34mrDiffuseSolidAngleSphereLuminaire18selectVisiblePointERK8ggPoint3RK9ggVector3RK8ggPoint2dRS0_Rd(%struct.mrDiffuseCosineSphereLuminaire* nocapture %this, %struct.ggPoint3* nocapture %x, %struct.ggPoint3* nocapture %unnamed_arg, %struct.ggPoint2* nocapture %uv, double %unnamed_arg2, %struct.ggPoint3* nocapture %on_light, double* nocapture %invProb) nounwind {
 entry:
-	%0 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind		; <double> [#uses=4]
-	%1 = fcmp ult double 0.000000e+00, %0		; <i1> [#uses=1]
-	br i1 %1, label %bb3, label %bb7
+  %0 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind
+  %1 = fcmp ult double 0.000000e+00, %0
+  br i1 %1, label %bb3, label %bb7
 
-bb3:		; preds = %entry
-	%2 = fdiv double 1.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%3 = fmul double 0.000000e+00, %2		; <double> [#uses=2]
-	%4 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind		; <double> [#uses=1]
-	%5 = fdiv double 1.000000e+00, %4		; <double> [#uses=2]
-	%6 = fmul double %3, %5		; <double> [#uses=2]
-	%7 = fmul double 0.000000e+00, %5		; <double> [#uses=2]
-	%8 = fmul double %3, %7		; <double> [#uses=1]
-	%9 = fsub double %8, 0.000000e+00		; <double> [#uses=1]
-	%10 = fmul double 0.000000e+00, %6		; <double> [#uses=1]
-	%11 = fsub double 0.000000e+00, %10		; <double> [#uses=1]
-	%12 = fsub double -0.000000e+00, %11		; <double> [#uses=1]
-	%13 = fmul double %0, %0		; <double> [#uses=2]
-	%14 = fsub double %13, 0.000000e+00		; <double> [#uses=1]
-	%15 = call double @llvm.sqrt.f64(double %14)		; <double> [#uses=1]
-	%16 = fmul double 0.000000e+00, %15		; <double> [#uses=1]
-	%17 = fdiv double %16, %0		; <double> [#uses=1]
-	%18 = fadd double 0.000000e+00, %17		; <double> [#uses=1]
-	%19 = call double @acos(double %18) nounwind readonly		; <double> [#uses=1]
-	%20 = load double* null, align 4		; <double> [#uses=1]
-	%21 = fmul double %20, 0x401921FB54442D18		; <double> [#uses=1]
-	%22 = call double @sin(double %19) nounwind readonly		; <double> [#uses=2]
-	%23 = fmul double %22, 0.000000e+00		; <double> [#uses=2]
-	%24 = fmul double %6, %23		; <double> [#uses=1]
-	%25 = fmul double %7, %23		; <double> [#uses=1]
-	%26 = call double @sin(double %21) nounwind readonly		; <double> [#uses=1]
-	%27 = fmul double %22, %26		; <double> [#uses=2]
-	%28 = fmul double %9, %27		; <double> [#uses=1]
-	%29 = fmul double %27, %12		; <double> [#uses=1]
-	%30 = fadd double %24, %28		; <double> [#uses=1]
-	%31 = fadd double 0.000000e+00, %29		; <double> [#uses=1]
-	%32 = fadd double %25, 0.000000e+00		; <double> [#uses=1]
-	%33 = fadd double %30, 0.000000e+00		; <double> [#uses=1]
-	%34 = fadd double %31, 0.000000e+00		; <double> [#uses=1]
-	%35 = fadd double %32, 0.000000e+00		; <double> [#uses=1]
-	%36 = bitcast %struct.ggPoint3* %x to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32(i8* null, i8* %36, i32 24, i32 4) nounwind
-	store double %33, double* null, align 8
-	br i1 false, label %_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit, label %bb5.i.i.i
+bb3:                                              ; preds = %entry
+  %2 = fdiv double 1.000000e+00, 0.000000e+00
+  %3 = fmul double 0.000000e+00, %2
+  %4 = call double @llvm.sqrt.f64(double 0.000000e+00) nounwind
+  %5 = fdiv double 1.000000e+00, %4
+  %6 = fmul double %3, %5
+  %7 = fmul double 0.000000e+00, %5
+  %8 = fmul double %3, %7
+  %9 = fsub double %8, 0.000000e+00
+  %10 = fmul double 0.000000e+00, %6
+  %11 = fsub double 0.000000e+00, %10
+  %12 = fsub double -0.000000e+00, %11
+  %13 = fmul double %0, %0
+  %14 = fsub double %13, 0.000000e+00
+  %15 = call double @llvm.sqrt.f64(double %14)
+  %16 = fmul double 0.000000e+00, %15
+  %17 = fdiv double %16, %0
+  %18 = fadd double 0.000000e+00, %17
+  %19 = call double @acos(double %18) nounwind readonly
+  %20 = load double* null, align 4
+  %21 = fmul double %20, 0x401921FB54442D18
+  %22 = call double @sin(double %19) nounwind readonly
+  %23 = fmul double %22, 0.000000e+00
+  %24 = fmul double %6, %23
+  %25 = fmul double %7, %23
+  %26 = call double @sin(double %21) nounwind readonly
+  %27 = fmul double %22, %26
+  %28 = fmul double %9, %27
+  %29 = fmul double %27, %12
+  %30 = fadd double %24, %28
+  %31 = fadd double 0.000000e+00, %29
+  %32 = fadd double %25, 0.000000e+00
+  %33 = fadd double %30, 0.000000e+00
+  %34 = fadd double %31, 0.000000e+00
+  %35 = fadd double %32, 0.000000e+00
+  %36 = bitcast %struct.ggPoint3* %x to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* null, i8* %36, i32 24, i32 4, i1 false)
+  store double %33, double* null, align 8
+  br i1 false, label %_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit, label %bb5.i.i.i
 
-bb5.i.i.i:		; preds = %bb3
-	unreachable
+bb5.i.i.i:                                        ; preds = %bb3
+  unreachable
 
-_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit:		; preds = %bb3
-	%37 = fsub double %13, 0.000000e+00		; <double> [#uses=0]
-	%38 = fsub double -0.000000e+00, %34		; <double> [#uses=0]
-	%39 = fsub double -0.000000e+00, %35		; <double> [#uses=0]
-	ret i32 1
+_Z20ggRaySphereIntersectRK6ggRay3RK8ggSphereddRd.exit: ; preds = %bb3
+  %37 = fsub double %13, 0.000000e+00
+  %38 = fsub double -0.000000e+00, %34
+  %39 = fsub double -0.000000e+00, %35
+  ret i32 1
 
-bb7:		; preds = %entry
-	ret i32 0
+bb7:                                              ; preds = %entry
+  ret i32 0
 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll b/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
deleted file mode 100644
index 27888d7..0000000
--- a/test/CodeGen/ARM/2009-06-12-RegScavengerAssert.ll
+++ /dev/null
@@ -1,77 +0,0 @@
-; RUN: llc < %s -mtriple=armv6-apple-darwin
-
-	type { i32, i32, %struct.D_Sym**, [3 x %struct.D_Sym*] }		; type %0
-	type { i32, %struct.D_Reduction** }		; type %1
-	type { i32, %struct.D_RightEpsilonHint* }		; type %2
-	type { i32, %struct.D_ErrorRecoveryHint* }		; type %3
-	type { i32, i32, %struct.D_Reduction**, [3 x %struct.D_Reduction*] }		; type %4
-	%struct.D_ErrorRecoveryHint = type { i16, i16, i8* }
-	%struct.D_ParseNode = type { i32, %struct.d_loc_t, i8*, i8*, %struct.D_Scope*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, i8*, i8* }
-	%struct.D_Parser = type { i8*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, %struct.D_Scope*, void (%struct.D_Parser*)*, %struct.D_ParseNode* (%struct.D_Parser*, i32, %struct.D_ParseNode**)*, void (%struct.D_ParseNode*)*, %struct.d_loc_t, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.D_ParserTables = type { i32, %struct.D_State*, i16*, i32, i32, %struct.D_Symbol*, void (%struct.D_Parser*, %struct.d_loc_t*, i8**)*, i32, %struct.D_Pass*, i32 }
-	%struct.D_Pass = type { i8*, i32, i32, i32 }
-	%struct.D_Reduction = type { i16, i16, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)*, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)*, i16, i16, i32, i32, i32, i32, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)** }
-	%struct.D_RightEpsilonHint = type { i16, i16, %struct.D_Reduction* }
-	%struct.D_Scope = type { i8, %struct.D_Sym*, %struct.D_SymHash*, %struct.D_Sym*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope*, %struct.D_Scope* }
-	%struct.D_Shift = type { i16, i8, i8, i32, i32, i32 (i8*, i8**, i32, i32, %struct.D_Parser*)* }
-	%struct.D_State = type { i8*, i32, %1, %2, %3, %struct.D_Shift**, i32 (i8**, i32*, i32*, i16*, i32*, i8*, i32*)*, i8*, i8, i8, i8, i8*, %struct.D_Shift***, i32 }
-	%struct.D_Sym = type { i8*, i32, i32, %struct.D_Sym*, %struct.D_Sym*, i32 }
-	%struct.D_SymHash = type { i32, i32, %0 }
-	%struct.D_Symbol = type { i32, i8*, i32 }
-	%struct.PNode = type { i32, i32, i32, i32, %struct.D_Reduction*, %struct.D_Shift*, i32, %struct.VecPNode, i32, i8, i8, %struct.PNode*, %struct.PNode*, %struct.PNode*, %struct.PNode*, i8*, i8*, %struct.D_Scope*, i8*, %struct.D_ParseNode }
-	%struct.PNodeHash = type { %struct.PNode**, i32, i32, i32, %struct.PNode* }
-	%struct.Parser = type { %struct.D_Parser, i8*, i8*, %struct.D_ParserTables*, i32, i32, i32, i32, i32, i32, i32, %struct.PNodeHash, %struct.SNodeHash, %struct.Reduction*, %struct.Shift*, %struct.D_Scope*, %struct.SNode*, i32, %struct.Reduction*, %struct.Shift*, i32, %struct.PNode*, %struct.SNode*, %struct.ZNode*, %4, %struct.ShiftResult*, %struct.D_Shift, %struct.Parser*, i8* }
-	%struct.Reduction = type { %struct.ZNode*, %struct.SNode*, %struct.D_Reduction*, %struct.SNode*, i32, %struct.Reduction* }
-	%struct.SNode = type { %struct.D_State*, %struct.D_Scope*, i8*, %struct.d_loc_t, i32, %struct.PNode*, %struct.VecZNode, i32, %struct.SNode*, %struct.SNode* }
-	%struct.SNodeHash = type { %struct.SNode**, i32, i32, i32, %struct.SNode*, %struct.SNode* }
-	%struct.Shift = type { %struct.SNode*, %struct.Shift* }
-	%struct.ShiftResult = type { %struct.D_Shift*, %struct.d_loc_t }
-	%struct.VecPNode = type { i32, i32, %struct.PNode**, [3 x %struct.PNode*] }
-	%struct.VecSNode = type { i32, i32, %struct.SNode**, [3 x %struct.SNode*] }
-	%struct.VecZNode = type { i32, i32, %struct.ZNode**, [3 x %struct.ZNode*] }
-	%struct.ZNode = type { %struct.PNode*, %struct.VecSNode }
-	%struct.d_loc_t = type { i8*, i8*, i32, i32, i32 }
-
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
-
-define fastcc i32 @exhaustive_parse(%struct.Parser* %p, i32 %state) nounwind {
-entry:
-	store i8* undef, i8** undef, align 4
-	%0 = getelementptr %struct.Parser* %p, i32 0, i32 0, i32 6		; <%struct.d_loc_t*> [#uses=1]
-	%1 = bitcast %struct.d_loc_t* %0 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32(i8* undef, i8* %1, i32 20, i32 4)
-	br label %bb10
-
-bb10:		; preds = %bb30, %bb29, %bb26, %entry
-	br i1 undef, label %bb18, label %bb20
-
-bb18:		; preds = %bb10
-	br i1 undef, label %bb20, label %bb19
-
-bb19:		; preds = %bb18
-	br label %bb20
-
-bb20:		; preds = %bb19, %bb18, %bb10
-	br i1 undef, label %bb21, label %bb22
-
-bb21:		; preds = %bb20
-	unreachable
-
-bb22:		; preds = %bb20
-	br i1 undef, label %bb24, label %bb26
-
-bb24:		; preds = %bb22
-	unreachable
-
-bb26:		; preds = %bb22
-	br i1 undef, label %bb10, label %bb29
-
-bb29:		; preds = %bb26
-	br i1 undef, label %bb10, label %bb30
-
-bb30:		; preds = %bb29
-	br i1 undef, label %bb31, label %bb10
-
-bb31:		; preds = %bb30
-	unreachable
-}
diff --git a/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
index 90a4a42..382038e 100644
--- a/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
+++ b/test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
@@ -14,7 +14,8 @@ entry:
   br i1 %p, label %bb8, label %bb1
 
 bb1:                                              ; preds = %entry
-  %0 = malloc %struct.Village                     ; <%struct.Village*> [#uses=3]
+  %malloccall = tail call i8* @malloc(i32 ptrtoint (%struct.Village* getelementptr (%struct.Village* null, i32 1) to i32))
+  %0 = bitcast i8* %malloccall to %struct.Village*
   %exp2 = call double @ldexp(double 1.000000e+00, i32 %level) nounwind ; <double> [#uses=1]
   %.c = fptosi double %exp2 to i32                ; <i32> [#uses=1]
   store i32 %.c, i32* null
@@ -29,3 +30,4 @@ bb8:                                              ; preds = %entry
 }
 
 declare double @ldexp(double, i32)
+declare noalias i8* @malloc(i32)
diff --git a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
index 397eba4..8bde748 100644
--- a/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
+++ b/test/CodeGen/ARM/2009-08-31-LSDA-Name.ll
@@ -1,32 +1,35 @@
 ; RUN: llc < %s -mtriple=arm-apple-darwin9 -march=arm | FileCheck %s
 
+; CHECK: L_LSDA_0:
+
+
 %struct.A = type { i32* }
 
 define void @"\01-[MyFunction Name:]"() {
 entry:
-  %save_filt.1 = alloca i32                       ; <i32*> [#uses=2]
-  %save_eptr.0 = alloca i8*                       ; <i8**> [#uses=2]
-  %a = alloca %struct.A                           ; <%struct.A*> [#uses=3]
-  %eh_exception = alloca i8*                      ; <i8**> [#uses=5]
-  %eh_selector = alloca i32                       ; <i32*> [#uses=3]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  call  void @_ZN1AC1Ev(%struct.A* %a)
-  invoke  void @_Z3barv()
+  %save_filt.1 = alloca i32
+  %save_eptr.0 = alloca i8*
+  %a = alloca %struct.A
+  %eh_exception = alloca i8*
+  %eh_selector = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  call void @_ZN1AC1Ev(%struct.A* %a)
+  invoke void @_Z3barv()
           to label %invcont unwind label %lpad
 
 invcont:                                          ; preds = %entry
-  call  void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  call void @_ZN1AD1Ev(%struct.A* %a) nounwind
   br label %return
 
 bb:                                               ; preds = %ppad
-  %eh_select = load i32* %eh_selector             ; <i32> [#uses=1]
+  %eh_select = load i32* %eh_selector
   store i32 %eh_select, i32* %save_filt.1, align 4
-  %eh_value = load i8** %eh_exception             ; <i8*> [#uses=1]
+  %eh_value = load i8** %eh_exception
   store i8* %eh_value, i8** %save_eptr.0, align 4
-  call  void @_ZN1AD1Ev(%struct.A* %a) nounwind
-  %0 = load i8** %save_eptr.0, align 4            ; <i8*> [#uses=1]
+  call void @_ZN1AD1Ev(%struct.A* %a) nounwind
+  %0 = load i8** %save_eptr.0, align 4
   store i8* %0, i8** %eh_exception, align 4
-  %1 = load i32* %save_filt.1, align 4            ; <i32> [#uses=1]
+  %1 = load i32* %save_filt.1, align 4
   store i32 %1, i32* %eh_selector, align 4
   br label %Unwind
 
@@ -34,10 +37,10 @@ return:                                           ; preds = %invcont
   ret void
 
 lpad:                                             ; preds = %entry
-  %eh_ptr = call i8* @llvm.eh.exception()         ; <i8*> [#uses=1]
+  %eh_ptr = call i8* @llvm.eh.exception()
   store i8* %eh_ptr, i8** %eh_exception
-  %eh_ptr1 = load i8** %eh_exception              ; <i8*> [#uses=1]
-  %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0) ; <i32> [#uses=1]
+  %eh_ptr1 = load i8** %eh_exception
+  %eh_select2 = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr1, i8* bitcast (i32 (...)* @__gxx_personality_sj0 to i8*), i32 0)
   store i32 %eh_select2, i32* %eh_selector
   br label %ppad
 
@@ -45,20 +48,20 @@ ppad:                                             ; preds = %lpad
   br label %bb
 
 Unwind:                                           ; preds = %bb
-  %eh_ptr3 = load i8** %eh_exception              ; <i8*> [#uses=1]
-  call  void @_Unwind_SjLj_Resume(i8* %eh_ptr3)
+  %eh_ptr3 = load i8** %eh_exception
+  call void @_Unwind_SjLj_Resume(i8* %eh_ptr3)
   unreachable
 }
 
 define linkonce_odr void @_ZN1AC1Ev(%struct.A* %this) {
 entry:
-  %this_addr = alloca %struct.A*                  ; <%struct.A**> [#uses=2]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %this_addr = alloca %struct.A*
+  %"alloca point" = bitcast i32 0 to i32
   store %struct.A* %this, %struct.A** %this_addr
-  %0 = call  i8* @_Znwm(i32 4)         ; <i8*> [#uses=1]
-  %1 = bitcast i8* %0 to i32*                     ; <i32*> [#uses=1]
-  %2 = load %struct.A** %this_addr, align 4       ; <%struct.A*> [#uses=1]
-  %3 = getelementptr inbounds %struct.A* %2, i32 0, i32 0 ; <i32**> [#uses=1]
+  %0 = call i8* @_Znwm(i32 4)
+  %1 = bitcast i8* %0 to i32*
+  %2 = load %struct.A** %this_addr, align 4
+  %3 = getelementptr inbounds %struct.A* %2, i32 0, i32 0
   store i32* %1, i32** %3, align 4
   br label %return
 
@@ -70,14 +73,14 @@ declare i8* @_Znwm(i32)
 
 define linkonce_odr void @_ZN1AD1Ev(%struct.A* %this) nounwind {
 entry:
-  %this_addr = alloca %struct.A*                  ; <%struct.A**> [#uses=2]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %this_addr = alloca %struct.A*
+  %"alloca point" = bitcast i32 0 to i32
   store %struct.A* %this, %struct.A** %this_addr
-  %0 = load %struct.A** %this_addr, align 4       ; <%struct.A*> [#uses=1]
-  %1 = getelementptr inbounds %struct.A* %0, i32 0, i32 0 ; <i32**> [#uses=1]
-  %2 = load i32** %1, align 4                     ; <i32*> [#uses=1]
-  %3 = bitcast i32* %2 to i8*                     ; <i8*> [#uses=1]
-  call  void @_ZdlPv(i8* %3) nounwind
+  %0 = load %struct.A** %this_addr, align 4
+  %1 = getelementptr inbounds %struct.A* %0, i32 0, i32 0
+  %2 = load i32** %1, align 4
+  %3 = bitcast i32* %2 to i8*
+  call void @_ZdlPv(i8* %3) nounwind
   br label %bb
 
 bb:                                               ; preds = %entry
@@ -86,17 +89,16 @@ bb:                                               ; preds = %entry
 return:                                           ; preds = %bb
   ret void
 }
-;CHECK: L_LSDA_0:
 
 declare void @_ZdlPv(i8*) nounwind
 
 declare void @_Z3barv()
 
-declare i8* @llvm.eh.exception() nounwind
+declare i8* @llvm.eh.exception() nounwind readonly
 
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
 
-declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare i32 @__gxx_personality_sj0(...)
 
diff --git a/test/CodeGen/ARM/2009-10-30.ll b/test/CodeGen/ARM/2009-10-30.ll
index 87d1a8b..e46ab1e 100644
--- a/test/CodeGen/ARM/2009-10-30.ll
+++ b/test/CodeGen/ARM/2009-10-30.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s  -mtriple=arm-linux-gnueabi  | FileCheck %s
+; RUN: llc < %s  -mtriple=armv6-linux-gnueabi  | FileCheck %s
 ; This test checks that the address of the varg arguments is correctly
 ; computed when there are 5 or more regular arguments.
 
diff --git a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
index 34f7519..a18a830 100644
--- a/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
+++ b/test/CodeGen/ARM/2009-11-01-NeonMoves.ll
@@ -19,7 +19,7 @@ entry:
   %5 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %3, <2 x float> %4) nounwind ; <<2 x float>> [#uses=2]
   %6 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %5, <2 x float> %5) nounwind ; <<2 x float>> [#uses=2]
   %7 = shufflevector <2 x float> %6, <2 x float> %6, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=2]
-;CHECK: vmov
+;CHECK: vorr
   %8 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %7) nounwind ; <<4 x float>> [#uses=3]
   %9 = fmul <4 x float> %8, %8                    ; <<4 x float>> [#uses=1]
   %10 = call <4 x float> @llvm.arm.neon.vrsqrts.v4f32(<4 x float> %9, <4 x float> %7) nounwind ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/2010-08-04-StackVariable.ll b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
index 25d38ed..6aeaa26 100644
--- a/test/CodeGen/ARM/2010-08-04-StackVariable.ll
+++ b/test/CodeGen/ARM/2010-08-04-StackVariable.ll
@@ -31,7 +31,7 @@ return:                                           ; preds = %bb2
   ret i32 %.0, !dbg !29
 }
 
-define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2 {
+define linkonce_odr void @_ZN4SValC1Ev(%struct.SVal* %this) nounwind ssp align 2  {
 entry:
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
   call void @llvm.dbg.value(metadata !{%struct.SVal* %this}, i64 0, metadata !31), !dbg !34
diff --git a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
index ee443fe..99db637 100644
--- a/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
+++ b/test/CodeGen/ARM/2010-10-19-mc-elf-objheader.ll
@@ -1,7 +1,7 @@
 ; RUN: llc  %s -mtriple=arm-linux-gnueabi -filetype=obj -o - | \
 ; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=BASIC %s 
 ; RUN: llc  %s -mtriple=armv7-linux-gnueabi -march=arm -mcpu=cortex-a8 \
-; RUN:    -mattr=-neon -mattr=+vfp2 \
+; RUN:    -mattr=-neon,-vfp3,+vfp2 \
 ; RUN:    -arm-reserve-r9 -filetype=obj -o - | \
 ; RUN:    elf-dump --dump-section-data | FileCheck  -check-prefix=CORTEXA8 %s
 
diff --git a/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
new file mode 100644
index 0000000..7baacfe
--- /dev/null
+++ b/test/CodeGen/ARM/2011-06-16-TailCallByVal.ll
@@ -0,0 +1,20 @@
+; RUN: llc < %s -arm-tail-calls=1 | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+%struct.A = type <{ i16, i16, i32, i16, i16, i32, i16, [8 x %struct.B], [418 x i8], %struct.C }>
+%struct.B = type <{ i32, i16, i16 }>
+%struct.C = type { i16, i32, i16, i16 }
+
+; CHECK: f
+; CHECK: push {r1, r2, r3}
+; CHECK: add sp, #12
+; CHECK: b.w _puts
+
+define void @f(i8* %s, %struct.A* nocapture byval %a) nounwind optsize {
+entry:
+  %puts = tail call i32 @puts(i8* %s)
+  ret void
+}
+
+declare i32 @puts(i8* nocapture) nounwind
diff --git a/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
new file mode 100644
index 0000000..1b5b8a9
--- /dev/null
+++ b/test/CodeGen/ARM/2011-06-29-MergeGlobalsAlign.ll
@@ -0,0 +1,12 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 | FileCheck %s
+; CHECK: .zerofill __DATA,__bss,__MergedGlobals,16,2
+
+%struct.config = type { i16, i16, i16, i16 }
+
+@prev = external global [0 x i16]
+@max_lazy_match = internal unnamed_addr global i32 0, align 4
+@read_buf = external global i32 (i8*, i32)*
+@window = external global [0 x i8]
+@lookahead = internal unnamed_addr global i32 0, align 4
+@eofile.b = internal unnamed_addr global i1 false
+@ins_h = internal unnamed_addr global i32 0, align 4
diff --git a/test/CodeGen/ARM/2011-07-10-GlobalMergeBug.ll b/test/CodeGen/ARM/2011-07-10-GlobalMergeBug.ll
new file mode 100644
index 0000000..2970cd2
--- /dev/null
+++ b/test/CodeGen/ARM/2011-07-10-GlobalMergeBug.ll
@@ -0,0 +1,8 @@
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; CHECK-NOT: MergedGlobals
+
+@a = internal unnamed_addr global i1 false
+@b = internal global [64 x i8] zeroinitializer, align 64
diff --git a/test/CodeGen/ARM/arm-modifier.ll b/test/CodeGen/ARM/arm-modifier.ll
index 0a7bb6c..396de37 100644
--- a/test/CodeGen/ARM/arm-modifier.ll
+++ b/test/CodeGen/ARM/arm-modifier.ll
@@ -46,9 +46,9 @@ ret void
 define void @f3() nounwind {
 entry:
 ; CHECK: f3
-; CHECK: stm r{{[0-9]+}}, {[[REG1:(r[0-9]+)]], r{{[0-9]+}}}
-; CHECK: adds lr, [[REG1]]
-; CHECK: ldm r{{[0-9]+}}, {r{{[0-9]+}}, r{{[0-9]+}}}
+; CHECK: stm {{lr|r[0-9]+}}, {[[REG1:(r[0-9]+)]], r{{[0-9]+}}}
+; CHECK: adds {{lr|r[0-9]+}}, [[REG1]]
+; CHECK: ldm {{lr|r[0-9]+}}, {r{{[0-9]+}}, r{{[0-9]+}}}
 %tmp = load i64* @f3_var, align 4
 %tmp1 = load i64* @f3_var2, align 4
 %0 = call i64 asm sideeffect "stm ${0:m}, ${1:M}\0A\09adds $3, $1\0A\09", "=*m,=r,1,r"(i64** @f3_ptr, i64 %tmp, i64 %tmp1) nounwind
diff --git a/test/CodeGen/ARM/armv4.ll b/test/CodeGen/ARM/armv4.ll
index ef722de..6b213d5 100644
--- a/test/CodeGen/ARM/armv4.ll
+++ b/test/CodeGen/ARM/armv4.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=arm-unknown-eabi | FileCheck %s -check-prefix=THUMB
-; RUN: llc < %s -mtriple=arm-unknown-eabi -mcpu=strongarm | FileCheck %s -check-prefix=ARM
-; RUN: llc < %s -mtriple=arm-unknown-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=THUMB
-; RUN: llc < %s -mtriple=arm-unknown-eabi -mattr=+v6 | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=armv4t-unknown-eabi | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=armv4-unknown-eabi -mcpu=strongarm | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=armv7-unknown-eabi -mcpu=cortex-a8 | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -mtriple=armv6-unknown-eabi | FileCheck %s -check-prefix=THUMB
 ; RUN: llc < %s -mtriple=armv4-unknown-eabi | FileCheck %s -check-prefix=ARM
 ; RUN: llc < %s -mtriple=armv4t-unknown-eabi | FileCheck %s -check-prefix=THUMB
 
diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll
index c94b096..84f3813 100644
--- a/test/CodeGen/ARM/bfi.ll
+++ b/test/CodeGen/ARM/bfi.ll
@@ -61,3 +61,16 @@ entry:
   %3 = or i32 %2, %0
   ret i32 %3
 }
+
+; rdar://9609030
+define i32 @f6(i32 %a, i32 %b) nounwind readnone {
+entry:
+; CHECK: f6:
+; CHECK-NOT: bic
+; CHECK: bfi r0, r1, #8, #9
+  %and = and i32 %a, -130817
+  %and2 = shl i32 %b, 8
+  %shl = and i32 %and2, 130816
+  %or = or i32 %shl, %and
+  ret i32 %or
+}
diff --git a/test/CodeGen/ARM/bfx.ll b/test/CodeGen/ARM/bfx.ll
index fcca191..519c135 100644
--- a/test/CodeGen/ARM/bfx.ll
+++ b/test/CodeGen/ARM/bfx.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+v7a | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+v7 | FileCheck %s
 
 define i32 @sbfx1(i32 %a) {
 ; CHECK: sbfx1
diff --git a/test/CodeGen/ARM/call-tc.ll b/test/CodeGen/ARM/call-tc.ll
index c460f7a..e01750b 100644
--- a/test/CodeGen/ARM/call-tc.ll
+++ b/test/CodeGen/ARM/call-tc.ll
@@ -15,11 +15,11 @@ define void @t1() {
 
 define void @t2() {
 ; CHECKV6: t2:
-; CHECKV6: bx r0 @ TAILCALL
+; CHECKV6: bx r0
 ; CHECKT2D: t2:
 ; CHECKT2D: ldr
 ; CHECKT2D-NEXT: ldr
-; CHECKT2D-NEXT: bx r0 @ TAILCALL
+; CHECKT2D-NEXT: bx r0
         %tmp = load i32 ()** @t         ; <i32 ()*> [#uses=1]
         %tmp.upgrd.2 = tail call i32 %tmp( )            ; <i32> [#uses=0]
         ret void
@@ -27,11 +27,11 @@ define void @t2() {
 
 define void @t3() {
 ; CHECKV6: t3:
-; CHECKV6: b _t2  @ TAILCALL
+; CHECKV6: b _t2
 ; CHECKELF: t3:
-; CHECKELF: b t2(PLT) @ TAILCALL
+; CHECKELF: b t2(PLT)
 ; CHECKT2D: t3:
-; CHECKT2D: b.w _t2  @ TAILCALL
+; CHECKT2D: b.w _t2
 
         tail call void @t2( )            ; <i32> [#uses=0]
         ret void
@@ -41,9 +41,9 @@ define void @t3() {
 define double @t4(double %a) nounwind readonly ssp {
 entry:
 ; CHECKV6: t4:
-; CHECKV6: b _sin @ TAILCALL
+; CHECKV6: b _sin
 ; CHECKELF: t4:
-; CHECKELF: b sin(PLT) @ TAILCALL
+; CHECKELF: b sin(PLT)
   %0 = tail call double @sin(double %a) nounwind readonly ; <double> [#uses=1]
   ret double %0
 }
@@ -51,9 +51,9 @@ entry:
 define float @t5(float %a) nounwind readonly ssp {
 entry:
 ; CHECKV6: t5:
-; CHECKV6: b _sinf @ TAILCALL
+; CHECKV6: b _sinf
 ; CHECKELF: t5:
-; CHECKELF: b sinf(PLT) @ TAILCALL
+; CHECKELF: b sinf(PLT)
   %0 = tail call float @sinf(float %a) nounwind readonly ; <float> [#uses=1]
   ret float %0
 }
@@ -65,9 +65,9 @@ declare double @sin(double) nounwind readonly
 define i32 @t6(i32 %a, i32 %b) nounwind readnone {
 entry:
 ; CHECKV6: t6:
-; CHECKV6: b ___divsi3 @ TAILCALL
+; CHECKV6: b ___divsi3
 ; CHECKELF: t6:
-; CHECKELF: b __aeabi_idiv(PLT) @ TAILCALL
+; CHECKELF: b __aeabi_idiv(PLT)
   %0 = sdiv i32 %a, %b
   ret i32 %0
 }
diff --git a/test/CodeGen/ARM/call.ll b/test/CodeGen/ARM/call.ll
index c020b6f..0f9543f 100644
--- a/test/CodeGen/ARM/call.ll
+++ b/test/CodeGen/ARM/call.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=CHECKV4
+; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s -check-prefix=CHECKV4
 ; RUN: llc < %s -march=arm -mattr=+v5t | FileCheck %s -check-prefix=CHECKV5
-; RUN: llc < %s -march=arm -mtriple=arm-linux-gnueabi\
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi\
 ; RUN:   -relocation-model=pic | FileCheck %s -check-prefix=CHECKELF
 
 @t = weak global i32 ()* null           ; <i32 ()**> [#uses=1]
diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll
index 9b90408..06b459e 100644
--- a/test/CodeGen/ARM/carry.ll
+++ b/test/CodeGen/ARM/carry.ll
@@ -24,7 +24,6 @@ entry:
 define i64 @f3(i32 %al, i32 %bl) {
 ; CHECK: f3:
 ; CHECK: adds r
-; CHECK: adcs r
 ; CHECK: adc r
 entry:
         ; unsigned wide add
diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll
index 7893df7..f4c1b5a 100644
--- a/test/CodeGen/ARM/constants.ll
+++ b/test/CodeGen/ARM/constants.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -disable-cgp-branch-opts | FileCheck %s
+; RUN: llc < %s -mtriple=armv4t-unknown-linux-gnueabi -disable-cgp-branch-opts | FileCheck %s
 
 define i32 @f1() {
 ; CHECK: f1
@@ -14,33 +14,46 @@ define i32 @f2() {
 
 define i32 @f3() {
 ; CHECK: f3
-; CHECK: mov r0, #1, #24
+; CHECK: mov r0, #256
         ret i32 256
 }
 
 define i32 @f4() {
 ; CHECK: f4
-; CHECK: orr{{.*}}#1, #24
+; CHECK: orr{{.*}}#256
         ret i32 257
 }
 
 define i32 @f5() {
 ; CHECK: f5
-; CHECK: mov r0, #255, #2
+; CHECK: mov r0, #-1073741761
         ret i32 -1073741761
 }
 
 define i32 @f6() {
 ; CHECK: f6
-; CHECK: mov r0, #63, #28
+; CHECK: mov r0, #1008
         ret i32 1008
 }
 
 define void @f7(i32 %a) {
 ; CHECK: f7
-; CHECK: cmp r0, #1, #16
+; CHECK: cmp r0, #65536
         %b = icmp ugt i32 %a, 65536
         br i1 %b, label %r, label %r
 r:
         ret void
 }
+
+%t1 = type { <3 x float>, <3 x float> }
+
+@const1 = global %t1 { <3 x float> zeroinitializer,
+                       <3 x float> <float 1.000000e+00,
+                                    float 2.000000e+00,
+                                    float 3.000000e+00> }, align 16
+; CHECK: const1
+; CHECK: .zero 16
+; CHECK: float 1.0
+; CHECK: float 2.0
+; CHECK: float 3.0
+; CHECK: .zero 4
diff --git a/test/CodeGen/ARM/debug-info-blocks.ll b/test/CodeGen/ARM/debug-info-blocks.ll
new file mode 100644
index 0000000..519c40e
--- /dev/null
+++ b/test/CodeGen/ARM/debug-info-blocks.ll
@@ -0,0 +1,245 @@
+; RUN: llc -O0 < %s | FileCheck %s
+; CHECK: @DEBUG_VALUE: mydata <- [sp+#8]+#0
+; Radar 9331779
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-macosx10.7.0"
+
+%0 = type opaque
+%1 = type { [4 x i32] }
+%2 = type <{ i8*, i32, i32, i8*, %struct.Re*, i8*, %3*, %struct.my_struct* }>
+%3 = type opaque
+%struct.CP = type { float, float }
+%struct.CR = type { %struct.CP, %struct.CP }
+%struct.Re = type { i32, i32 }
+%struct.__block_byref_mydata = type { i8*, %struct.__block_byref_mydata*, i32, i32, i8*, i8*, %0* }
+%struct.my_struct = type opaque
+
+@"\01L_OBJC_SELECTOR_REFERENCES_13" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"OBJC_IVAR_$_MyWork._bounds" = external hidden global i32, section "__DATA, __objc_const", align 4
+@"OBJC_IVAR_$_MyWork._data" = external hidden global i32, section "__DATA, __objc_const", align 4
+@"\01L_OBJC_SELECTOR_REFERENCES_222" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+declare void @llvm.dbg.declare(metadata, metadata) nounwind readnone
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+define hidden void @foobar_func_block_invoke_0(i8* %.block_descriptor, %0* %loadedMydata, [4 x i32] %bounds.coerce0, [4 x i32] %data.coerce0) ssp {
+  %1 = alloca %0*, align 4
+  %bounds = alloca %struct.CR, align 4
+  %data = alloca %struct.CR, align 4
+  call void @llvm.dbg.value(metadata !{i8* %.block_descriptor}, i64 0, metadata !27), !dbg !129
+  store %0* %loadedMydata, %0** %1, align 4
+  call void @llvm.dbg.declare(metadata !{%0** %1}, metadata !130), !dbg !131
+  %2 = bitcast %struct.CR* %bounds to %1*
+  %3 = getelementptr %1* %2, i32 0, i32 0
+  store [4 x i32] %bounds.coerce0, [4 x i32]* %3
+  call void @llvm.dbg.declare(metadata !{%struct.CR* %bounds}, metadata !132), !dbg !133
+  %4 = bitcast %struct.CR* %data to %1*
+  %5 = getelementptr %1* %4, i32 0, i32 0
+  store [4 x i32] %data.coerce0, [4 x i32]* %5
+  call void @llvm.dbg.declare(metadata !{%struct.CR* %data}, metadata !134), !dbg !135
+  %6 = bitcast i8* %.block_descriptor to %2*
+  %7 = getelementptr inbounds %2* %6, i32 0, i32 6
+  call void @llvm.dbg.declare(metadata !{%2* %6}, metadata !136), !dbg !137
+  call void @llvm.dbg.declare(metadata !{%2* %6}, metadata !138), !dbg !137
+  call void @llvm.dbg.declare(metadata !{%2* %6}, metadata !139), !dbg !140
+  %8 = load %0** %1, align 4, !dbg !141
+  %9 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_13", !dbg !141
+  %10 = bitcast %0* %8 to i8*, !dbg !141
+  %11 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %10, i8* %9), !dbg !141
+  %12 = bitcast i8* %11 to %0*, !dbg !141
+  %13 = getelementptr inbounds %2* %6, i32 0, i32 5, !dbg !141
+  %14 = load i8** %13, !dbg !141
+  %15 = bitcast i8* %14 to %struct.__block_byref_mydata*, !dbg !141
+  %16 = getelementptr inbounds %struct.__block_byref_mydata* %15, i32 0, i32 1, !dbg !141
+  %17 = load %struct.__block_byref_mydata** %16, !dbg !141
+  %18 = getelementptr inbounds %struct.__block_byref_mydata* %17, i32 0, i32 6, !dbg !141
+  store %0* %12, %0** %18, align 4, !dbg !141
+  %19 = getelementptr inbounds %2* %6, i32 0, i32 6, !dbg !143
+  %20 = load %3** %19, align 4, !dbg !143
+  %21 = load i32* @"OBJC_IVAR_$_MyWork._data", !dbg !143
+  %22 = bitcast %3* %20 to i8*, !dbg !143
+  %23 = getelementptr inbounds i8* %22, i32 %21, !dbg !143
+  %24 = bitcast i8* %23 to %struct.CR*, !dbg !143
+  %25 = bitcast %struct.CR* %24 to i8*, !dbg !143
+  %26 = bitcast %struct.CR* %data to i8*, !dbg !143
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %25, i8* %26, i32 16, i32 4, i1 false), !dbg !143
+  %27 = getelementptr inbounds %2* %6, i32 0, i32 6, !dbg !144
+  %28 = load %3** %27, align 4, !dbg !144
+  %29 = load i32* @"OBJC_IVAR_$_MyWork._bounds", !dbg !144
+  %30 = bitcast %3* %28 to i8*, !dbg !144
+  %31 = getelementptr inbounds i8* %30, i32 %29, !dbg !144
+  %32 = bitcast i8* %31 to %struct.CR*, !dbg !144
+  %33 = bitcast %struct.CR* %32 to i8*, !dbg !144
+  %34 = bitcast %struct.CR* %bounds to i8*, !dbg !144
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %33, i8* %34, i32 16, i32 4, i1 false), !dbg !144
+  %35 = getelementptr inbounds %2* %6, i32 0, i32 6, !dbg !145
+  %36 = load %3** %35, align 4, !dbg !145
+  %37 = getelementptr inbounds %2* %6, i32 0, i32 5, !dbg !145
+  %38 = load i8** %37, !dbg !145
+  %39 = bitcast i8* %38 to %struct.__block_byref_mydata*, !dbg !145
+  %40 = getelementptr inbounds %struct.__block_byref_mydata* %39, i32 0, i32 1, !dbg !145
+  %41 = load %struct.__block_byref_mydata** %40, !dbg !145
+  %42 = getelementptr inbounds %struct.__block_byref_mydata* %41, i32 0, i32 6, !dbg !145
+  %43 = load %0** %42, align 4, !dbg !145
+  %44 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_222", !dbg !145
+  %45 = bitcast %3* %36 to i8*, !dbg !145
+  call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %0*)*)(i8* %45, i8* %44, %0* %43), !dbg !145
+  ret void, !dbg !146
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.dbg.enum = !{!1, !1, !5, !5, !9, !14, !19, !19, !14, !14, !14, !19, !19, !19}
+!llvm.dbg.sp = !{!23}
+
+!0 = metadata !{i32 589841, i32 0, i32 16, metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm", metadata !"Apple clang version 2.1", i1 true, i1 false, metadata !"", i32 2} ; [ DW_TAG_compile_unit ]
+!1 = metadata !{i32 589828, metadata !0, metadata !"", metadata !2, i32 248, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !3, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!2 = metadata !{i32 589865, metadata !"header.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!3 = metadata !{metadata !4}
+!4 = metadata !{i32 589864, metadata !"Ver1", i64 0} ; [ DW_TAG_enumerator ]
+!5 = metadata !{i32 589828, metadata !0, metadata !"Mode", metadata !6, i32 79, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !7, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!6 = metadata !{i32 589865, metadata !"header2.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!7 = metadata !{metadata !8}
+!8 = metadata !{i32 589864, metadata !"One", i64 0} ; [ DW_TAG_enumerator ]
+!9 = metadata !{i32 589828, metadata !0, metadata !"", metadata !10, i32 15, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !11, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!10 = metadata !{i32 589865, metadata !"header3.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!11 = metadata !{metadata !12, metadata !13}
+!12 = metadata !{i32 589864, metadata !"Unknown", i64 0} ; [ DW_TAG_enumerator ]
+!13 = metadata !{i32 589864, metadata !"Known", i64 1} ; [ DW_TAG_enumerator ]
+!14 = metadata !{i32 589828, metadata !0, metadata !"", metadata !15, i32 20, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !16, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!15 = metadata !{i32 589865, metadata !"Private.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!16 = metadata !{metadata !17, metadata !18}
+!17 = metadata !{i32 589864, metadata !"Single", i64 0} ; [ DW_TAG_enumerator ]
+!18 = metadata !{i32 589864, metadata !"Double", i64 1} ; [ DW_TAG_enumerator ]
+!19 = metadata !{i32 589828, metadata !0, metadata !"", metadata !20, i32 14, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !21, i32 0, i32 0} ; [ DW_TAG_enumeration_type ]
+!20 = metadata !{i32 589865, metadata !"header4.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!21 = metadata !{metadata !22}
+!22 = metadata !{i32 589864, metadata !"Eleven", i64 0} ; [ DW_TAG_enumerator ]
+!23 = metadata !{i32 589870, i32 0, metadata !24, metadata !"foobar_func_block_invoke_0", metadata !"foobar_func_block_invoke_0", metadata !"", metadata !24, i32 609, metadata !25, i1 true, i1 true, i32 0, i32 0, i32 0, i32 256, i1 false, void (i8*, %0*, [4 x i32], [4 x i32])* @foobar_func_block_invoke_0, null, null} ; [ DW_TAG_subprogram ]
+!24 = metadata !{i32 589865, metadata !"MyLibrary.m", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!25 = metadata !{i32 589845, metadata !24, metadata !"", metadata !24, i32 0, i64 0, i64 0, i32 0, i32 0, i32 0, metadata !26, i32 0, i32 0} ; [ DW_TAG_subroutine_type ]
+!26 = metadata !{null}
+!27 = metadata !{i32 590081, metadata !23, metadata !".block_descriptor", metadata !24, i32 16777825, metadata !28, i32 64} ; [ DW_TAG_arg_variable ]
+!28 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 0, i64 0, i32 0, metadata !29} ; [ DW_TAG_pointer_type ]
+!29 = metadata !{i32 589843, metadata !24, metadata !"__block_literal_14", metadata !24, i32 609, i64 256, i64 32, i32 0, i32 0, i32 0, metadata !30, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!30 = metadata !{metadata !31, metadata !33, metadata !35, metadata !36, metadata !37, metadata !48, metadata !89, metadata !124}
+!31 = metadata !{i32 589837, metadata !24, metadata !"__isa", metadata !24, i32 609, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
+!32 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!33 = metadata !{i32 589837, metadata !24, metadata !"__flags", metadata !24, i32 609, i64 32, i64 32, i64 32, i32 0, metadata !34} ; [ DW_TAG_member ]
+!34 = metadata !{i32 589860, metadata !0, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!35 = metadata !{i32 589837, metadata !24, metadata !"__reserved", metadata !24, i32 609, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
+!36 = metadata !{i32 589837, metadata !24, metadata !"__FuncPtr", metadata !24, i32 609, i64 32, i64 32, i64 96, i32 0, metadata !32} ; [ DW_TAG_member ]
+!37 = metadata !{i32 589837, metadata !24, metadata !"__descriptor", metadata !24, i32 609, i64 32, i64 32, i64 128, i32 0, metadata !38} ; [ DW_TAG_member ]
+!38 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !39} ; [ DW_TAG_pointer_type ]
+!39 = metadata !{i32 589843, metadata !0, metadata !"__block_descriptor_withcopydispose", metadata !40, i32 307, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !41, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!40 = metadata !{i32 589865, metadata !"MyLibrary.i", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!41 = metadata !{metadata !42, metadata !44, metadata !45, metadata !47}
+!42 = metadata !{i32 589837, metadata !40, metadata !"reserved", metadata !40, i32 307, i64 32, i64 32, i64 0, i32 0, metadata !43} ; [ DW_TAG_member ]
+!43 = metadata !{i32 589860, metadata !0, metadata !"long unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!44 = metadata !{i32 589837, metadata !40, metadata !"Size", metadata !40, i32 307, i64 32, i64 32, i64 32, i32 0, metadata !43} ; [ DW_TAG_member ]
+!45 = metadata !{i32 589837, metadata !40, metadata !"CopyFuncPtr", metadata !40, i32 307, i64 32, i64 32, i64 64, i32 0, metadata !46} ; [ DW_TAG_member ]
+!46 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_pointer_type ]
+!47 = metadata !{i32 589837, metadata !40, metadata !"DestroyFuncPtr", metadata !40, i32 307, i64 32, i64 32, i64 96, i32 0, metadata !46} ; [ DW_TAG_member ]
+!48 = metadata !{i32 589837, metadata !24, metadata !"mydata", metadata !24, i32 609, i64 32, i64 32, i64 160, i32 0, metadata !49} ; [ DW_TAG_member ]
+!49 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 0, i64 0, i32 0, metadata !50} ; [ DW_TAG_pointer_type ]
+!50 = metadata !{i32 589843, metadata !24, metadata !"", metadata !24, i32 0, i64 224, i64 0, i32 0, i32 16, i32 0, metadata !51, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!51 = metadata !{metadata !52, metadata !53, metadata !54, metadata !55, metadata !56, metadata !57, metadata !58}
+!52 = metadata !{i32 589837, metadata !24, metadata !"__isa", metadata !24, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !32} ; [ DW_TAG_member ]
+!53 = metadata !{i32 589837, metadata !24, metadata !"__forwarding", metadata !24, i32 0, i64 32, i64 32, i64 32, i32 0, metadata !32} ; [ DW_TAG_member ]
+!54 = metadata !{i32 589837, metadata !24, metadata !"__flags", metadata !24, i32 0, i64 32, i64 32, i64 64, i32 0, metadata !34} ; [ DW_TAG_member ]
+!55 = metadata !{i32 589837, metadata !24, metadata !"__size", metadata !24, i32 0, i64 32, i64 32, i64 96, i32 0, metadata !34} ; [ DW_TAG_member ]
+!56 = metadata !{i32 589837, metadata !24, metadata !"__copy_helper", metadata !24, i32 0, i64 32, i64 32, i64 128, i32 0, metadata !32} ; [ DW_TAG_member ]
+!57 = metadata !{i32 589837, metadata !24, metadata !"__destroy_helper", metadata !24, i32 0, i64 32, i64 32, i64 160, i32 0, metadata !32} ; [ DW_TAG_member ]
+!58 = metadata !{i32 589837, metadata !24, metadata !"mydata", metadata !24, i32 0, i64 32, i64 32, i64 192, i32 0, metadata !59} ; [ DW_TAG_member ]
+!59 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !60} ; [ DW_TAG_pointer_type ]
+!60 = metadata !{i32 589843, metadata !24, metadata !"UIMydata", metadata !61, i32 26, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !62, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!61 = metadata !{i32 589865, metadata !"header11.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!62 = metadata !{metadata !63, metadata !71, metadata !75, metadata !79}
+!63 = metadata !{i32 589852, metadata !60, null, metadata !61, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
+!64 = metadata !{i32 589843, metadata !40, metadata !"NSO", metadata !65, i32 66, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !66, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!65 = metadata !{i32 589865, metadata !"NSO.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!66 = metadata !{metadata !67}
+!67 = metadata !{i32 589837, metadata !65, metadata !"isa", metadata !65, i32 67, i64 32, i64 32, i64 0, i32 2, metadata !68, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!68 = metadata !{i32 589846, metadata !0, metadata !"Class", metadata !40, i32 197, i64 0, i64 0, i64 0, i32 0, metadata !69} ; [ DW_TAG_typedef ]
+!69 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !70} ; [ DW_TAG_pointer_type ]
+!70 = metadata !{i32 589843, metadata !0, metadata !"objc_class", metadata !40, i32 0, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!71 = metadata !{i32 589837, metadata !61, metadata !"_mydataRef", metadata !61, i32 28, i64 32, i64 32, i64 32, i32 0, metadata !72, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!72 = metadata !{i32 589846, metadata !0, metadata !"CFTypeRef", metadata !24, i32 313, i64 0, i64 0, i64 0, i32 0, metadata !73} ; [ DW_TAG_typedef ]
+!73 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !74} ; [ DW_TAG_pointer_type ]
+!74 = metadata !{i32 589862, metadata !0, metadata !"", null, i32 0, i64 0, i64 0, i64 0, i32 0, null} ; [ DW_TAG_const_type ]
+!75 = metadata !{i32 589837, metadata !61, metadata !"_scale", metadata !61, i32 29, i64 32, i64 32, i64 64, i32 0, metadata !76, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!76 = metadata !{i32 589846, metadata !0, metadata !"Float", metadata !77, i32 89, i64 0, i64 0, i64 0, i32 0, metadata !78} ; [ DW_TAG_typedef ]
+!77 = metadata !{i32 589865, metadata !"header12.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!78 = metadata !{i32 589860, metadata !0, metadata !"float", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 4} ; [ DW_TAG_base_type ]
+!79 = metadata !{i32 589837, metadata !61, metadata !"_mydataFlags", metadata !61, i32 37, i64 8, i64 8, i64 96, i32 0, metadata !80, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!80 = metadata !{i32 589843, metadata !0, metadata !"", metadata !61, i32 30, i64 8, i64 8, i32 0, i32 0, i32 0, metadata !81, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!81 = metadata !{metadata !82, metadata !84, metadata !85, metadata !86, metadata !87, metadata !88}
+!82 = metadata !{i32 589837, metadata !61, metadata !"named", metadata !61, i32 31, i64 1, i64 32, i64 0, i32 0, metadata !83} ; [ DW_TAG_member ]
+!83 = metadata !{i32 589860, metadata !0, metadata !"unsigned int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!84 = metadata !{i32 589837, metadata !61, metadata !"mydataO", metadata !61, i32 32, i64 3, i64 32, i64 1, i32 0, metadata !83} ; [ DW_TAG_member ]
+!85 = metadata !{i32 589837, metadata !61, metadata !"cached", metadata !61, i32 33, i64 1, i64 32, i64 4, i32 0, metadata !83} ; [ DW_TAG_member ]
+!86 = metadata !{i32 589837, metadata !61, metadata !"hasBeenCached", metadata !61, i32 34, i64 1, i64 32, i64 5, i32 0, metadata !83} ; [ DW_TAG_member ]
+!87 = metadata !{i32 589837, metadata !61, metadata !"hasPattern", metadata !61, i32 35, i64 1, i64 32, i64 6, i32 0, metadata !83} ; [ DW_TAG_member ]
+!88 = metadata !{i32 589837, metadata !61, metadata !"isCIMydata", metadata !61, i32 36, i64 1, i64 32, i64 7, i32 0, metadata !83} ; [ DW_TAG_member ]
+!89 = metadata !{i32 589837, metadata !24, metadata !"self", metadata !24, i32 609, i64 32, i64 32, i64 192, i32 0, metadata !90} ; [ DW_TAG_member ]
+!90 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !91} ; [ DW_TAG_pointer_type ]
+!91 = metadata !{i32 589843, metadata !40, metadata !"MyWork", metadata !24, i32 36, i64 384, i64 32, i32 0, i32 0, i32 0, metadata !92, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!92 = metadata !{metadata !93, metadata !98, metadata !101, metadata !107, metadata !123}
+!93 = metadata !{i32 589852, metadata !91, null, metadata !24, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !94} ; [ DW_TAG_inheritance ]
+!94 = metadata !{i32 589843, metadata !40, metadata !"twork", metadata !95, i32 43, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !96, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!95 = metadata !{i32 589865, metadata !"header13.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!96 = metadata !{metadata !97}
+!97 = metadata !{i32 589852, metadata !94, null, metadata !95, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
+!98 = metadata !{i32 589837, metadata !24, metadata !"_itemID", metadata !24, i32 38, i64 64, i64 32, i64 32, i32 1, metadata !99, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!99 = metadata !{i32 589846, metadata !0, metadata !"uint64_t", metadata !40, i32 55, i64 0, i64 0, i64 0, i32 0, metadata !100} ; [ DW_TAG_typedef ]
+!100 = metadata !{i32 589860, metadata !0, metadata !"long long unsigned int", null, i32 0, i64 64, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!101 = metadata !{i32 589837, metadata !24, metadata !"_library", metadata !24, i32 39, i64 32, i64 32, i64 96, i32 1, metadata !102, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!102 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !103} ; [ DW_TAG_pointer_type ]
+!103 = metadata !{i32 589843, metadata !40, metadata !"MyLibrary2", metadata !104, i32 22, i64 32, i64 32, i32 0, i32 0, i32 0, metadata !105, i32 16, i32 0} ; [ DW_TAG_structure_type ]
+!104 = metadata !{i32 589865, metadata !"header14.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!105 = metadata !{metadata !106}
+!106 = metadata !{i32 589852, metadata !103, null, metadata !104, i32 0, i64 0, i64 0, i64 0, i32 0, metadata !64} ; [ DW_TAG_inheritance ]
+!107 = metadata !{i32 589837, metadata !24, metadata !"_bounds", metadata !24, i32 40, i64 128, i64 32, i64 128, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!108 = metadata !{i32 589846, metadata !0, metadata !"CR", metadata !40, i32 33, i64 0, i64 0, i64 0, i32 0, metadata !109} ; [ DW_TAG_typedef ]
+!109 = metadata !{i32 589843, metadata !0, metadata !"CR", metadata !77, i32 29, i64 128, i64 32, i32 0, i32 0, i32 0, metadata !110, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!110 = metadata !{metadata !111, metadata !117}
+!111 = metadata !{i32 589837, metadata !77, metadata !"origin", metadata !77, i32 30, i64 64, i64 32, i64 0, i32 0, metadata !112} ; [ DW_TAG_member ]
+!112 = metadata !{i32 589846, metadata !0, metadata !"CP", metadata !77, i32 17, i64 0, i64 0, i64 0, i32 0, metadata !113} ; [ DW_TAG_typedef ]
+!113 = metadata !{i32 589843, metadata !0, metadata !"CP", metadata !77, i32 13, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !114, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!114 = metadata !{metadata !115, metadata !116}
+!115 = metadata !{i32 589837, metadata !77, metadata !"x", metadata !77, i32 14, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
+!116 = metadata !{i32 589837, metadata !77, metadata !"y", metadata !77, i32 15, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
+!117 = metadata !{i32 589837, metadata !77, metadata !"size", metadata !77, i32 31, i64 64, i64 32, i64 64, i32 0, metadata !118} ; [ DW_TAG_member ]
+!118 = metadata !{i32 589846, metadata !0, metadata !"Size", metadata !77, i32 25, i64 0, i64 0, i64 0, i32 0, metadata !119} ; [ DW_TAG_typedef ]
+!119 = metadata !{i32 589843, metadata !0, metadata !"Size", metadata !77, i32 21, i64 64, i64 32, i32 0, i32 0, i32 0, metadata !120, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!120 = metadata !{metadata !121, metadata !122}
+!121 = metadata !{i32 589837, metadata !77, metadata !"width", metadata !77, i32 22, i64 32, i64 32, i64 0, i32 0, metadata !76} ; [ DW_TAG_member ]
+!122 = metadata !{i32 589837, metadata !77, metadata !"height", metadata !77, i32 23, i64 32, i64 32, i64 32, i32 0, metadata !76} ; [ DW_TAG_member ]
+!123 = metadata !{i32 589837, metadata !24, metadata !"_data", metadata !24, i32 40, i64 128, i64 32, i64 256, i32 1, metadata !108, metadata !"", metadata !"", metadata !"", i32 0} ; [ DW_TAG_member ]
+!124 = metadata !{i32 589837, metadata !24, metadata !"semi", metadata !24, i32 609, i64 32, i64 32, i64 224, i32 0, metadata !125} ; [ DW_TAG_member ]
+!125 = metadata !{i32 589846, metadata !0, metadata !"d_t", metadata !24, i32 35, i64 0, i64 0, i64 0, i32 0, metadata !126} ; [ DW_TAG_typedef ]
+!126 = metadata !{i32 589839, metadata !0, metadata !"", null, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !127} ; [ DW_TAG_pointer_type ]
+!127 = metadata !{i32 589843, metadata !0, metadata !"my_struct", metadata !128, i32 49, i64 0, i64 0, i32 0, i32 4, i32 0, null, i32 0, i32 0} ; [ DW_TAG_structure_type ]
+!128 = metadata !{i32 589865, metadata !"header15.h", metadata !"/Volumes/Sandbox/llvm", metadata !0} ; [ DW_TAG_file_type ]
+!129 = metadata !{i32 609, i32 144, metadata !23, null}
+!130 = metadata !{i32 590081, metadata !23, metadata !"loadedMydata", metadata !24, i32 33555041, metadata !59, i32 0} ; [ DW_TAG_arg_variable ]
+!131 = metadata !{i32 609, i32 155, metadata !23, null}
+!132 = metadata !{i32 590081, metadata !23, metadata !"bounds", metadata !24, i32 50332257, metadata !108, i32 0} ; [ DW_TAG_arg_variable ]
+!133 = metadata !{i32 609, i32 175, metadata !23, null}
+!134 = metadata !{i32 590081, metadata !23, metadata !"data", metadata !24, i32 67109473, metadata !108, i32 0} ; [ DW_TAG_arg_variable ]
+!135 = metadata !{i32 609, i32 190, metadata !23, null}
+!136 = metadata !{i32 590080, metadata !23, metadata !"mydata", metadata !24, i32 604, metadata !50, i32 0, i64 1, i64 20, i64 2, i64 1, i64 4, i64 2, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
+!137 = metadata !{i32 604, i32 49, metadata !23, null}
+!138 = metadata !{i32 590080, metadata !23, metadata !"self", metadata !40, i32 604, metadata !90, i32 0, i64 1, i64 24} ; [ DW_TAG_auto_variable ]
+!139 = metadata !{i32 590080, metadata !23, metadata !"semi", metadata !24, i32 607, metadata !125, i32 0, i64 1, i64 28} ; [ DW_TAG_auto_variable ]
+!140 = metadata !{i32 607, i32 30, metadata !23, null}
+!141 = metadata !{i32 610, i32 17, metadata !142, null}
+!142 = metadata !{i32 589835, metadata !23, i32 609, i32 200, metadata !24, i32 94} ; [ DW_TAG_lexical_block ]
+!143 = metadata !{i32 611, i32 17, metadata !142, null}
+!144 = metadata !{i32 612, i32 17, metadata !142, null}
+!145 = metadata !{i32 613, i32 17, metadata !142, null}
+!146 = metadata !{i32 615, i32 13, metadata !142, null}
diff --git a/test/CodeGen/ARM/dyn-stackalloc.ll b/test/CodeGen/ARM/dyn-stackalloc.ll
index 92e2d13..de2820e 100644
--- a/test/CodeGen/ARM/dyn-stackalloc.ll
+++ b/test/CodeGen/ARM/dyn-stackalloc.ll
@@ -1,48 +1,48 @@
 ; RUN: llc < %s -march=arm
 
-	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
-	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+%struct.comment = type { i8**, i32*, i32, i8* }
+%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
+
+@str215 = external global [2 x i8]
 
 define void @t1(%struct.state* %v) {
-	%tmp6 = load i32* null
-	%tmp8 = alloca float, i32 %tmp6
-	store i32 1, i32* null
-	br i1 false, label %bb123.preheader, label %return
-
-bb123.preheader:
-	br i1 false, label %bb43, label %return
-
-bb43:
-	call fastcc void @f1( float* %tmp8, float* null, i32 0 )
-	%tmp70 = load i32* null
-	%tmp85 = getelementptr float* %tmp8, i32 0
-	call fastcc void @f2( float* null, float* null, float* %tmp85, i32 %tmp70 )
-	ret void
-
-return:
-	ret void
+  %tmp6 = load i32* null
+  %tmp8 = alloca float, i32 %tmp6
+  store i32 1, i32* null
+  br i1 false, label %bb123.preheader, label %return
+
+bb123.preheader:                                  ; preds = %0
+  br i1 false, label %bb43, label %return
+
+bb43:                                             ; preds = %bb123.preheader
+  call fastcc void @f1(float* %tmp8, float* null, i32 0)
+  %tmp70 = load i32* null
+  %tmp85 = getelementptr float* %tmp8, i32 0
+  call fastcc void @f2(float* null, float* null, float* %tmp85, i32 %tmp70)
+  ret void
+
+return:                                           ; preds = %bb123.preheader, %0
+  ret void
 }
 
 declare fastcc void @f1(float*, float*, i32)
 
 declare fastcc void @f2(float*, float*, float*, i32)
 
-	%struct.comment = type { i8**, i32*, i32, i8* }
-@str215 = external global [2 x i8]
-
 define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
-	%tmp1 = call i32 @strlen( i8* %tag )
-	%tmp3 = call i32 @strlen( i8* %contents )
-	%tmp4 = add i32 %tmp1, 2
-	%tmp5 = add i32 %tmp4, %tmp3
-	%tmp6 = alloca i8, i32 %tmp5
-	%tmp9 = call i8* @strcpy( i8* %tmp6, i8* %tag )
-	%tmp6.len = call i32 @strlen( i8* %tmp6 )
-	%tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
-	call void @llvm.memcpy.i32( i8* %tmp6.indexed, i8* getelementptr ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1 )
-	%tmp15 = call i8* @strcat( i8* %tmp6, i8* %contents )
-	call fastcc void @comment_add( %struct.comment* %vc, i8* %tmp6 )
-	ret void
+  %tmp1 = call i32 @strlen(i8* %tag)
+  %tmp3 = call i32 @strlen(i8* %contents)
+  %tmp4 = add i32 %tmp1, 2
+  %tmp5 = add i32 %tmp4, %tmp3
+  %tmp6 = alloca i8, i32 %tmp5
+  %tmp9 = call i8* @strcpy(i8* %tmp6, i8* %tag)
+  %tmp6.len = call i32 @strlen(i8* %tmp6)
+  %tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false)
+  %tmp15 = call i8* @strcat(i8* %tmp6, i8* %contents)
+  call fastcc void @comment_add(%struct.comment* %vc, i8* %tmp6)
+  ret void
 }
 
 declare i32 @strlen(i8*)
@@ -51,6 +51,6 @@ declare i8* @strcat(i8*, i8*)
 
 declare fastcc void @comment_add(%struct.comment*, i8*)
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
 declare i8* @strcpy(i8*, i8*)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/fast-isel-static.ll b/test/CodeGen/ARM/fast-isel-static.ll
index 2d79674..a86e325 100644
--- a/test/CodeGen/ARM/fast-isel-static.ll
+++ b/test/CodeGen/ARM/fast-isel-static.ll
@@ -23,7 +23,7 @@ entry:
   %z = alloca float, align 4
   store float 0.000000e+00, float* %ztot, align 4
   store float 1.000000e+00, float* %z, align 4
-; CHECK-LONG: blx     r2
+; CHECK-LONG: blx     r
 ; CHECK-NORM: bl      _myadd
   call void @myadd(float* %ztot, float* %z)
   ret i32 0
diff --git a/test/CodeGen/ARM/fast-isel.ll b/test/CodeGen/ARM/fast-isel.ll
index 499c97f..eb0c5c8 100644
--- a/test/CodeGen/ARM/fast-isel.ll
+++ b/test/CodeGen/ARM/fast-isel.ll
@@ -43,7 +43,7 @@ b1:
   br label %b2
 
 ; THUMB: add.w {{.*}} #4096
-; ARM: add {{.*}} #1, #20
+; ARM: add {{.*}} #4096
 
 b2:
   %b = add i32 %tmp, 4095
diff --git a/test/CodeGen/ARM/fold-const.ll b/test/CodeGen/ARM/fold-const.ll
new file mode 100644
index 0000000..227e4e8
--- /dev/null
+++ b/test/CodeGen/ARM/fold-const.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -march=arm -mattr=+v7 | FileCheck %s
+
+define i32 @f(i32 %a) nounwind readnone optsize ssp {
+entry:
+  %conv = zext i32 %a to i64
+  %tmp1 = tail call i64 @llvm.ctlz.i64(i64 %conv)
+; CHECK: clz
+; CHECK-NOT: adds
+  %cast = trunc i64 %tmp1 to i32
+  %sub = sub nsw i32 63, %cast
+  ret i32 %sub
+}
+
+declare i64 @llvm.ctlz.i64(i64) nounwind readnone
diff --git a/test/CodeGen/ARM/fp.ll b/test/CodeGen/ARM/fp.ll
index 8ef45f2..ac023d1 100644
--- a/test/CodeGen/ARM/fp.ll
+++ b/test/CodeGen/ARM/fp.ll
@@ -42,7 +42,7 @@ entry:
 
 define double @h(double* %v) {
 ;CHECK: h:
-;CHECK: vldr.64 
+;CHECK: vldr.64
 ;CHECK-NEXT: vmov
 entry:
         %tmp = load double* %v          ; <double> [#uses=1]
@@ -51,7 +51,7 @@ entry:
 
 define float @h2() {
 ;CHECK: h2:
-;CHECK: mov r0, #254, #10
+;CHECK: mov r0, #1065353216
 entry:
         ret float 1.000000e+00
 }
diff --git a/test/CodeGen/ARM/globals.ll b/test/CodeGen/ARM/globals.ll
index ccb1428..5e7e3f2 100644
--- a/test/CodeGen/ARM/globals.ll
+++ b/test/CodeGen/ARM/globals.ll
@@ -1,7 +1,7 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=DarwinStatic
-; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DarwinDynamic
-; RUN: llc < %s -mtriple=arm-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=DarwinPIC
-; RUN: llc < %s -mtriple=arm-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LinuxPIC
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=static | FileCheck %s -check-prefix=DarwinStatic
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=dynamic-no-pic | FileCheck %s -check-prefix=DarwinDynamic
+; RUN: llc < %s -mtriple=armv6-apple-darwin -relocation-model=pic | FileCheck %s -check-prefix=DarwinPIC
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi -relocation-model=pic | FileCheck %s -check-prefix=LinuxPIC
 
 @G = external global i32
 
diff --git a/test/CodeGen/ARM/hello.ll b/test/CodeGen/ARM/hello.ll
index bfed7a6..9f46ae0 100644
--- a/test/CodeGen/ARM/hello.ll
+++ b/test/CodeGen/ARM/hello.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -march=arm
-; RUN: llc < %s -mtriple=arm-linux-gnueabi | grep mov | count 1
-; RUN: llc < %s -mtriple=arm-linux-gnu --disable-fp-elim | \
+; RUN: llc < %s -mtriple=armv6-linux-gnueabi | grep mov | count 1
+; RUN: llc < %s -mtriple=armv6-linux-gnu --disable-fp-elim | \
 ; RUN:   grep mov | count 2
-; RUN: llc < %s -mtriple=arm-apple-darwin | grep mov | count 2
+; RUN: llc < %s -mtriple=armv6-apple-darwin | grep mov | count 2
 
 @str = internal constant [12 x i8] c"Hello World\00"
 
diff --git a/test/CodeGen/ARM/iabs.ll b/test/CodeGen/ARM/iabs.ll
index 63808b2..c01c041 100644
--- a/test/CodeGen/ARM/iabs.ll
+++ b/test/CodeGen/ARM/iabs.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s
 
 ;; Integer absolute value, should produce something as good as: ARM:
 ;;   add r3, r0, r0, asr #31
diff --git a/test/CodeGen/ARM/ifcvt1.ll b/test/CodeGen/ARM/ifcvt1.ll
index e6aa044..b073a05 100644
--- a/test/CodeGen/ARM/ifcvt1.ll
+++ b/test/CodeGen/ARM/ifcvt1.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm
-; RUN: llc < %s -march=arm | grep bx | count 1
+; RUN: llc < %s -march=arm -mattr=+v4t
+; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 1
 
 define i32 @t1(i32 %a, i32 %b) {
 	%tmp2 = icmp eq i32 %a, 0
diff --git a/test/CodeGen/ARM/ifcvt2.ll b/test/CodeGen/ARM/ifcvt2.ll
index 7b9d0cf..1bca10a 100644
--- a/test/CodeGen/ARM/ifcvt2.ll
+++ b/test/CodeGen/ARM/ifcvt2.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+v4t | FileCheck %s
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
 ; CHECK: t1:
diff --git a/test/CodeGen/ARM/ifcvt3.ll b/test/CodeGen/ARM/ifcvt3.ll
index f7ebac6..3e2c578 100644
--- a/test/CodeGen/ARM/ifcvt3.ll
+++ b/test/CodeGen/ARM/ifcvt3.ll
@@ -1,6 +1,6 @@
-; RUN: llc < %s -march=arm
-; RUN: llc < %s -march=arm | grep cmpne | count 1
-; RUN: llc < %s -march=arm | grep bx | count 2
+; RUN: llc < %s -march=arm -mattr=+v4t
+; RUN: llc < %s -march=arm -mattr=+v4t | grep cmpne | count 1
+; RUN: llc < %s -march=arm -mattr=+v4t | grep bx | count 2
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
 	switch i32 %c, label %cond_next [
diff --git a/test/CodeGen/ARM/indirectbr.ll b/test/CodeGen/ARM/indirectbr.ll
index f0ab9dd..25a0f93 100644
--- a/test/CodeGen/ARM/indirectbr.ll
+++ b/test/CodeGen/ARM/indirectbr.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -relocation-model=pic -mtriple=arm-apple-darwin | FileCheck %s -check-prefix=ARM
-; RUN: llc < %s -relocation-model=pic -mtriple=thumb-apple-darwin | FileCheck %s -check-prefix=THUMB
+; RUN: llc < %s -relocation-model=pic -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -relocation-model=pic -mtriple=thumbv6-apple-darwin | FileCheck %s -check-prefix=THUMB
 ; RUN: llc < %s -relocation-model=static -mtriple=thumbv7-apple-darwin | FileCheck %s -check-prefix=THUMB2
 
 @nextaddr = global i8* null                       ; <i8**> [#uses=2]
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
index 58687b9..853585d 100644
--- a/test/CodeGen/ARM/inlineasm3.ll
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+; RUN: llc < %s -march=arm -mattr=+neon,+v6t2 | FileCheck %s
 
 ; Radar 7449043
 %struct.int32x4_t = type { <4 x i32> }
@@ -58,3 +58,43 @@ entry:
 call void asm sideeffect "flds s15, $0 \0A", "*^Uvm,~{s15}"(float* @k.2126) nounwind
 ret i32 0
 }
+
+; Radar 9307836 & 9119939
+
+define float @t6(float %y) nounwind {
+entry:
+; CHECK: t6
+; CHECK: flds s15, s0
+  %0 = tail call float asm "flds s15, $0", "=x"() nounwind
+  ret float %0
+}
+
+; Radar 9307836 & 9119939
+
+define double @t7(double %y) nounwind {
+entry:
+; CHECK: t7
+; CHECK: flds s15, d0
+  %0 = tail call double asm "flds s15, $0", "=x"() nounwind
+  ret double %0
+}
+
+; Radar 9307836 & 9119939
+
+define float @t8(float %y) nounwind {
+entry:
+; CHECK: t8
+; CHECK: flds s15, s0
+  %0 = tail call float asm "flds s15, $0", "=t"() nounwind
+  ret float %0
+}
+
+; Radar 9307836 & 9119939
+
+define i32 @t9(i32 %r0) nounwind {
+entry:
+; CHECK: t9
+; CHECK: movw r0, #27182
+  %0 = tail call i32 asm "movw $0, $1", "=r,j"(i32 27182) nounwind
+  ret i32 %0
+}
diff --git a/test/CodeGen/ARM/jumptable-label.ll b/test/CodeGen/ARM/jumptable-label.ll
new file mode 100644
index 0000000..49d6986
--- /dev/null
+++ b/test/CodeGen/ARM/jumptable-label.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple thumbv6-apple-macosx10.6.0 | FileCheck %s
+
+; test that we print the label of a bb that is only used in a jump table.
+
+; CHECK:	.long	LBB0_2
+; CHECK: LBB0_2:
+
+define i32 @calculate()  {
+entry:
+  switch i32 undef, label %return [
+    i32 1, label %sw.bb
+    i32 2, label %sw.bb6
+    i32 3, label %sw.bb13
+    i32 4, label %sw.bb20
+  ]
+
+sw.bb:                                            ; preds = %entry
+  br label %return
+
+sw.bb6:                                           ; preds = %entry
+  br label %return
+
+sw.bb13:                                          ; preds = %entry
+  br label %return
+
+sw.bb20:                                          ; preds = %entry
+  %div = sdiv i32 undef, undef
+  br label %return
+
+return:                                           ; preds = %sw.bb20, %sw.bb13, %sw.bb6, %sw.bb, %entry
+  %retval.0 = phi i32 [ %div, %sw.bb20 ], [ undef, %sw.bb13 ], [ undef, %sw.bb6 ], [ undef, %sw.bb ], [ 0, %entry ]
+  ret i32 %retval.0
+}
diff --git a/test/CodeGen/ARM/ldr_frame.ll b/test/CodeGen/ARM/ldr_frame.ll
index a3abdb6..f071b89 100644
--- a/test/CodeGen/ARM/ldr_frame.ll
+++ b/test/CodeGen/ARM/ldr_frame.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm | not grep mov
+; RUN: llc < %s -march=arm -mattr=+v4t | not grep mov
 
 define i32 @f1() {
 	%buf = alloca [32 x i32], align 4
diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll
index e401dca..0f1c7be 100644
--- a/test/CodeGen/ARM/long.ll
+++ b/test/CodeGen/ARM/long.ll
@@ -14,14 +14,14 @@ entry:
 
 define i64 @f3() {
 ; CHECK: f3:
-; CHECK: mvn r0, #2, #2
+; CHECK: mvn r0, #-2147483648
 entry:
         ret i64 2147483647
 }
 
 define i64 @f4() {
 ; CHECK: f4:
-; CHECK: mov r0, #2, #2
+; CHECK: mov r0, #-2147483648
 entry:
         ret i64 2147483648
 }
@@ -29,7 +29,7 @@ entry:
 define i64 @f5() {
 ; CHECK: f5:
 ; CHECK: mvn r0, #0
-; CHECK: mvn r1, #2, #2
+; CHECK: mvn r1, #-2147483648
 entry:
         ret i64 9223372036854775807
 }
diff --git a/test/CodeGen/ARM/lsr-unfolded-offset.ll b/test/CodeGen/ARM/lsr-unfolded-offset.ll
index e3e6eae..61b25bb 100644
--- a/test/CodeGen/ARM/lsr-unfolded-offset.ll
+++ b/test/CodeGen/ARM/lsr-unfolded-offset.ll
@@ -4,12 +4,13 @@
 ; register pressure and therefore spilling. There is more room for improvement
 ; here.
 
-; CHECK: sub sp, #{{32|24}}
+; CHECK: sub sp, #{{32|28|24}}
 
-; CHECK:      ldr r{{.*}}, [sp, #4]
-; CHECK-NEXT: ldr r{{.*}}, [sp, #16]
-; CHECK-NEXT: ldr r{{.*}}, [sp, #12]
-; CHECK-NEXT: adds
+; CHECK: %for.inc
+; CHECK: ldr{{(.w)?}} r{{.*}}, [sp, #
+; CHECK: ldr{{(.w)?}} r{{.*}}, [sp, #
+; CHECK: ldr{{(.w)?}} r{{.*}}, [sp, #
+; CHECK: add
 
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
 target triple = "thumbv7-apple-macosx10.7.0"
diff --git a/test/CodeGen/ARM/memcpy-inline.ll b/test/CodeGen/ARM/memcpy-inline.ll
index 5bae037..30b9f59 100644
--- a/test/CodeGen/ARM/memcpy-inline.ll
+++ b/test/CodeGen/ARM/memcpy-inline.ll
@@ -6,13 +6,14 @@
 ; CHECK: ldrb
 
 %struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
+
 @src = external global %struct.x
 @dst = external global %struct.x
 
 define i32 @t() {
 entry:
-	call void @llvm.memcpy.i32( i8* getelementptr (%struct.x* @dst, i32 0, i32 0), i8* getelementptr (%struct.x* @src, i32 0, i32 0), i32 11, i32 8 )
-	ret i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%struct.x* @dst, i32 0, i32 0), i8* getelementptr inbounds (%struct.x* @src, i32 0, i32 0), i32 11, i32 8, i1 false)
+  ret i32 0
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/memfunc.ll b/test/CodeGen/ARM/memfunc.ll
index 032129d..aeda022 100644
--- a/test/CodeGen/ARM/memfunc.ll
+++ b/test/CodeGen/ARM/memfunc.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=arm -o - | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios -o - | FileCheck %s
 ; RUN: llc < %s -mtriple=arm-none-eabi -o - | FileCheck --check-prefix=EABI %s
 
 @from = common global [500 x i32] zeroinitializer, align 4
@@ -9,24 +9,21 @@ entry:
 
         ; CHECK: memmove
         ; EABI: __aeabi_memmove
-        call void @llvm.memmove.i32( i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0 )
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
 
         ; CHECK: memcpy
         ; EABI: __aeabi_memcpy
-        call void @llvm.memcpy.i32( i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0 )
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8* bitcast ([500 x i32]* @to to i8*), i32 500, i32 0, i1 false)
 
         ; EABI memset swaps arguments
         ; CHECK: mov r1, #0
         ; CHECK: memset
         ; EABI: mov r2, #0
         ; EABI: __aeabi_memset
-        call void @llvm.memset.i32( i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0 )
+        call void @llvm.memset.p0i8.i32(i8* bitcast ([500 x i32]* @from to i8*), i8 0, i32 500, i32 0, i1 false)
         unreachable
 }
 
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/ARM/phi.ll b/test/CodeGen/ARM/phi.ll
index 29e17c0..dc1a95b 100644
--- a/test/CodeGen/ARM/phi.ll
+++ b/test/CodeGen/ARM/phi.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=arm < %s | FileCheck %s
+; RUN: llc -march=arm -mattr=+v4t < %s | FileCheck %s
 ; <rdar://problem/8686347>
 
 define i32 @test1(i1 %a, i32* %b) {
@@ -20,4 +20,4 @@ end:
   %r = load i32* %gep
 ; CHECK-NEXT: bx	lr
   ret i32 %r
-}
-\ No newline at end of file
+}
diff --git a/test/CodeGen/ARM/prefetch.ll b/test/CodeGen/ARM/prefetch.ll
index 95f082a..9c8ff2b 100644
--- a/test/CodeGen/ARM/prefetch.ll
+++ b/test/CodeGen/ARM/prefetch.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=thumb -mattr=-thumb2 | not grep pld
-; RUN: llc < %s -march=thumb -mattr=+v7a        | FileCheck %s -check-prefix=THUMB2
-; RUN: llc < %s -march=arm   -mattr=+v7a        | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -march=thumb -mattr=+v7         | FileCheck %s -check-prefix=THUMB2
+; RUN: llc < %s -march=arm   -mattr=+v7         | FileCheck %s -check-prefix=ARM
 ; RUN: llc < %s -march=arm   -mcpu=cortex-a9-mp | FileCheck %s -check-prefix=ARM-MP
 ; rdar://8601536
 
@@ -17,8 +17,8 @@ entry:
 ; THUMB2: t1:
 ; THUMB2-NOT: pldw [r0]
 ; THUMB2: pld [r0]
-  tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3 )
-  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
+  tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 )
+  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
   ret void
 }
 
@@ -30,7 +30,7 @@ entry:
 ; THUMB2: t2:
 ; THUMB2: pld [r0, #1023]
   %tmp = getelementptr i8* %ptr, i32 1023
-  tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3 )
+  tail call void @llvm.prefetch( i8* %tmp, i32 0, i32 3, i32 1 )
   ret void
 }
 
@@ -45,7 +45,7 @@ entry:
   %tmp1 = lshr i32 %offset, 2
   %tmp2 = add i32 %base, %tmp1
   %tmp3 = inttoptr i32 %tmp2 to i8*
-  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
+  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3, i32 1 )
   ret void
 }
 
@@ -59,8 +59,19 @@ entry:
   %tmp1 = shl i32 %offset, 2
   %tmp2 = add i32 %base, %tmp1
   %tmp3 = inttoptr i32 %tmp2 to i8*
-  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3 )
+  tail call void @llvm.prefetch( i8* %tmp3, i32 0, i32 3, i32 1 )
   ret void
 }
 
-declare void @llvm.prefetch(i8*, i32, i32) nounwind 
+declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
+
+define void @t5(i8* %ptr) nounwind  {
+entry:
+; ARM: t5:
+; ARM: pli [r0]
+
+; THUMB2: t5:
+; THUMB2: pli [r0]
+  tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 0 )
+  ret void
+}
diff --git a/test/CodeGen/ARM/private.ll b/test/CodeGen/ARM/private.ll
index fba56b4..f93ffe7 100644
--- a/test/CodeGen/ARM/private.ll
+++ b/test/CodeGen/ARM/private.ll
@@ -6,8 +6,6 @@
 ; RUN: grep .Lbaz: %t
 ; RUN: grep long.*\.Lbaz %t
 
-declare void @foo()
-
 define private void @foo() {
         ret void
 }
diff --git a/test/CodeGen/ARM/reg_sequence.ll b/test/CodeGen/ARM/reg_sequence.ll
index d350937..3a19211 100644
--- a/test/CodeGen/ARM/reg_sequence.ll
+++ b/test/CodeGen/ARM/reg_sequence.ll
@@ -124,7 +124,7 @@ return1:
 return2:
 ; CHECK:        %return2
 ; CHECK:        vadd.i32
-; CHECK:        vmov {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK:        vorr {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK-NOT:    vmov
 ; CHECK:        vst2.32 {d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}}
   %tmp100 = extractvalue %struct.__neon_int32x4x2_t %tmp2, 0 ; <<4 x i32>> [#uses=1]
@@ -139,7 +139,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
 ; CHECK:        t5:
 ; CHECK:        vldmia
 ; How can FileCheck match Q and D registers? We need a lisp interpreter.
-; CHECK:        vmov {{q[0-9]+}}, {{q[0-9]+}}
+; CHECK:        vorr {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
 ; CHECK-NOT:    vmov
 ; CHECK:        vld2.16 {d{{[0-9]+}}[1], d{{[0-9]+}}[1]}, [r0]
 ; CHECK-NOT:    vmov
@@ -156,7 +156,7 @@ define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
 define <8 x i8> @t6(i8* %A, <8 x i8>* %B) nounwind {
 ; CHECK:        t6:
 ; CHECK:        vldr.64
-; CHECK:        vmov d[[D0:[0-9]+]], d[[D1:[0-9]+]]
+; CHECK:        vorr d[[D0:[0-9]+]], d[[D1:[0-9]+]]
 ; CHECK-NEXT:   vld2.8 {d[[D1]][1], d[[D0]][1]}
   %tmp1 = load <8 x i8>* %B                       ; <<8 x i8>> [#uses=2]
   %tmp2 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2lane.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 1, i32 1) ; <%struct.__neon_int8x8x2_t> [#uses=2]
@@ -172,7 +172,7 @@ entry:
 ; CHECK:        vld2.32
 ; CHECK:        vst2.32
 ; CHECK:        vld1.32 {d{{[0-9]+}}, d{{[0-9]+}}},
-; CHECK:        vmov q[[Q0:[0-9]+]], q[[Q1:[0-9]+]]
+; CHECK:        vorr q[[Q0:[0-9]+]], q[[Q1:[0-9]+]], q[[Q1:[0-9]+]]
 ; CHECK-NOT:    vmov
 ; CHECK:        vuzp.32 q[[Q1]], q[[Q0]]
 ; CHECK:        vst1.32
@@ -272,8 +272,8 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
 define arm_aapcs_vfpcc i32 @t10() nounwind {
 entry:
 ; CHECK: t10:
-; CHECK: vmul.f32 q8, q8, d0[0]
 ; CHECK: vmov.i32 q[[Q0:[0-9]+]], #0x3F000000
+; CHECK: vmul.f32 q8, q8, d0[0]
 ; CHECK: vadd.f32 q8, q8, q8
   %0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
   %1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
diff --git a/test/CodeGen/ARM/rev.ll b/test/CodeGen/ARM/rev.ll
index 4170ff3..ea44c28 100644
--- a/test/CodeGen/ARM/rev.ll
+++ b/test/CodeGen/ARM/rev.ll
@@ -54,3 +54,73 @@ entry:
   %conv8 = ashr exact i32 %sext, 16
   ret i32 %conv8
 }
+
+; rdar://9609059
+define i32 @test5(i32 %i) nounwind readnone {
+entry:
+; CHECK: test5
+; CHECK: revsh r0, r0
+  %shl = shl i32 %i, 24
+  %shr = ashr exact i32 %shl, 16
+  %shr23 = lshr i32 %i, 8
+  %and = and i32 %shr23, 255
+  %or = or i32 %shr, %and
+  ret i32 %or
+}
+
+; rdar://9609108
+define i32 @test6(i32 %x) nounwind readnone {
+entry:
+; CHECK: test6
+; CHECK: rev16 r0, r0
+  %and = shl i32 %x, 8
+  %shl = and i32 %and, 65280
+  %and2 = lshr i32 %x, 8
+  %shr11 = and i32 %and2, 255
+  %shr5 = and i32 %and2, 16711680
+  %shl9 = and i32 %and, -16777216
+  %or = or i32 %shr5, %shl9
+  %or6 = or i32 %or, %shr11
+  %or10 = or i32 %or6, %shl
+  ret i32 %or10
+}
+
+; rdar://9164521
+define i32 @test7(i32 %a) nounwind readnone {
+entry:
+; CHECK: test7
+; CHECK: rev r0, r0
+; CHECK: lsr r0, r0, #16
+  %and = lshr i32 %a, 8
+  %shr3 = and i32 %and, 255
+  %and2 = shl i32 %a, 8
+  %shl = and i32 %and2, 65280
+  %or = or i32 %shr3, %shl
+  ret i32 %or
+}
+
+define i32 @test8(i32 %a) nounwind readnone {
+entry:
+; CHECK: test8
+; CHECK: revsh r0, r0
+  %and = lshr i32 %a, 8
+  %shr4 = and i32 %and, 255
+  %and2 = shl i32 %a, 8
+  %or = or i32 %shr4, %and2
+  %sext = shl i32 %or, 16
+  %conv3 = ashr exact i32 %sext, 16
+  ret i32 %conv3
+}
+
+define zeroext i16 @test9(i16 zeroext %v) nounwind readnone {
+entry:
+; CHECK: test9
+; CHECK: rev r0, r0
+; CHECK: lsr r0, r0, #16
+  %conv = zext i16 %v to i32
+  %shr4 = lshr i32 %conv, 8
+  %shl = shl nuw nsw i32 %conv, 8
+  %or = or i32 %shr4, %shl
+  %conv3 = trunc i32 %or to i16
+  ret i16 %conv3
+}
diff --git a/test/CodeGen/ARM/section.ll b/test/CodeGen/ARM/section.ll
index 7a566d4..2762056 100644
--- a/test/CodeGen/ARM/section.ll
+++ b/test/CodeGen/ARM/section.ll
@@ -1,7 +1,6 @@
-; RUN: llc < %s -mtriple=arm-linux | \
-; RUN:   grep {__DTOR_END__:}
-; RUN: llc < %s -mtriple=arm-linux | \
-; RUN:   grep {\\.section.\\.dtors,"aw",.progbits}
+; RUN: llc < %s -mtriple=arm-linux | FileCheck %s
 
+; CHECK: .section .dtors,"aw",%progbits
+; CHECK: __DTOR_END__:
 @__DTOR_END__ = internal global [1 x i32] zeroinitializer, section ".dtors"       ; <[1 x i32]*> [#uses=0]
 
diff --git a/test/CodeGen/ARM/select-imm.ll b/test/CodeGen/ARM/select-imm.ll
index 43f8a66..f43dde5 100644
--- a/test/CodeGen/ARM/select-imm.ll
+++ b/test/CodeGen/ARM/select-imm.ll
@@ -6,7 +6,7 @@ define i32 @t1(i32 %c) nounwind readnone {
 entry:
 ; ARM: t1:
 ; ARM: mov [[R1:r[0-9]+]], #101
-; ARM: orr [[R1b:r[0-9]+]], [[R1]], #1, #24
+; ARM: orr [[R1b:r[0-9]+]], [[R1]], #256
 ; ARM: movgt r0, #123
 
 ; ARMT2: t1:
@@ -27,7 +27,7 @@ entry:
 ; ARM: t2:
 ; ARM: mov r0, #123
 ; ARM: movgt r0, #101
-; ARM: orrgt r0, r0, #1, #24
+; ARM: orrgt r0, r0, #256
 
 ; ARMT2: t2:
 ; ARMT2: mov r0, #123
@@ -76,3 +76,39 @@ entry:
   %1 = select i1 %0, i32 4283826005, i32 %x
   ret i32 %1
 }
+
+; rdar://9758317
+define i32 @t5(i32 %a) nounwind {
+entry:
+; ARM: t5:
+; ARM-NOT: mov
+; ARM: cmp r0, #1
+; ARM-NOT: mov
+; ARM: movne r0, #0
+
+; THUMB2: t5:
+; THUMB2-NOT: mov
+; THUMB2: cmp r0, #1
+; THUMB2: it ne
+; THUMB2: movne r0, #0
+  %cmp = icmp eq i32 %a, 1
+  %conv = zext i1 %cmp to i32
+  ret i32 %conv
+}
+
+define i32 @t6(i32 %a) nounwind {
+entry:
+; ARM: t6:
+; ARM-NOT: mov
+; ARM: cmp r0, #0
+; ARM: movne r0, #1
+
+; THUMB2: t6:
+; THUMB2-NOT: mov
+; THUMB2: cmp r0, #0
+; THUMB2: it ne
+; THUMB2: movne r0, #1
+  %tobool = icmp ne i32 %a, 0
+  %lnot.ext = zext i1 %tobool to i32
+  ret i32 %lnot.ext
+}
diff --git a/test/CodeGen/ARM/select_xform.ll b/test/CodeGen/ARM/select_xform.ll
index 4211797..8a3133a 100644
--- a/test/CodeGen/ARM/select_xform.ll
+++ b/test/CodeGen/ARM/select_xform.ll
@@ -4,7 +4,7 @@
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) nounwind {
 ; ARM: t1:
-; ARM: sub r0, r1, #6, #2
+; ARM: sub r0, r1, #-2147483647
 ; ARM: movgt r0, r1
 
 ; T2: t1:
diff --git a/test/CodeGen/ARM/sub.ll b/test/CodeGen/ARM/sub.ll
index 555b18e..06ea703 100644
--- a/test/CodeGen/ARM/sub.ll
+++ b/test/CodeGen/ARM/sub.ll
@@ -12,7 +12,7 @@ define i64 @f1(i64 %a) {
 ; 66846720 = 0x03fc0000
 define i64 @f2(i64 %a) {
 ; CHECK: f2
-; CHECK: subs r0, r0, #255, #14
+; CHECK: subs r0, r0, #66846720
 ; CHECK: sbc r1, r1, #0
     %tmp = sub i64 %a, 66846720
     ret i64 %tmp
diff --git a/test/CodeGen/ARM/sxt_rot.ll b/test/CodeGen/ARM/sxt_rot.ll
index 4752f17..355fee3 100644
--- a/test/CodeGen/ARM/sxt_rot.ll
+++ b/test/CodeGen/ARM/sxt_rot.ll
@@ -10,7 +10,7 @@ define i32 @test0(i8 %A) {
 	ret i32 %B
 }
 
-define i8 @test1(i32 %A) signext {
+define signext i8 @test1(i32 %A) {
 	%B = lshr i32 %A, 8
 	%C = shl i32 %A, 24
 	%D = or i32 %B, %C
@@ -18,7 +18,7 @@ define i8 @test1(i32 %A) signext {
 	ret i8 %E
 }
 
-define i32 @test2(i32 %A, i32 %X) signext {
+define signext i32 @test2(i32 %A, i32 %X) {
 	%B = lshr i32 %A, 8
 	%C = shl i32 %A, 24
 	%D = or i32 %B, %C
diff --git a/test/CodeGen/ARM/truncstore-dag-combine.ll b/test/CodeGen/ARM/truncstore-dag-combine.ll
index 2da08b6..5665440 100644
--- a/test/CodeGen/ARM/truncstore-dag-combine.ll
+++ b/test/CodeGen/ARM/truncstore-dag-combine.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -march=arm | not grep orr
-; RUN: llc < %s -march=arm | not grep mov
+; RUN: llc < %s -march=arm -mattr=+v4t | not grep orr
+; RUN: llc < %s -march=arm -mattr=+v4t | not grep mov
 
 define void @bar(i8* %P, i16* %Q) {
 entry:
diff --git a/test/CodeGen/ARM/uxt_rot.ll b/test/CodeGen/ARM/uxt_rot.ll
index 6307795..628c079 100644
--- a/test/CodeGen/ARM/uxt_rot.ll
+++ b/test/CodeGen/ARM/uxt_rot.ll
@@ -2,19 +2,19 @@
 ; RUN: llc < %s -march=arm -mattr=+v6 | grep uxtab | count 1
 ; RUN: llc < %s -march=arm -mattr=+v6 | grep uxth | count 1
 
-define i8 @test1(i32 %A.u) zeroext {
+define zeroext i8 @test1(i32 %A.u) {
     %B.u = trunc i32 %A.u to i8
     ret i8 %B.u
 }
 
-define i32 @test2(i32 %A.u, i32 %B.u) zeroext {
+define zeroext i32 @test2(i32 %A.u, i32 %B.u) {
     %C.u = trunc i32 %B.u to i8
     %D.u = zext i8 %C.u to i32
     %E.u = add i32 %A.u, %D.u
     ret i32 %E.u
 }
 
-define i32 @test3(i32 %A.u) zeroext {
+define zeroext i32 @test3(i32 %A.u) {
     %B.u = lshr i32 %A.u, 8
     %C.u = shl i32 %A.u, 24
     %D.u = or i32 %B.u, %C.u
diff --git a/test/CodeGen/ARM/vargs_align.ll b/test/CodeGen/ARM/vargs_align.ll
index e4ef9e3..e390cf0 100644
--- a/test/CodeGen/ARM/vargs_align.ll
+++ b/test/CodeGen/ARM/vargs_align.ll
@@ -6,7 +6,6 @@ entry:
 	%a_addr = alloca i32		; <i32*> [#uses=1]
 	%retval = alloca i32, align 4		; <i32*> [#uses=2]
 	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %a, i32* %a_addr
 	store i32 0, i32* %tmp
 	%tmp1 = load i32* %tmp		; <i32> [#uses=1]
diff --git a/test/CodeGen/ARM/vcvt_combine.ll b/test/CodeGen/ARM/vcvt_combine.ll
new file mode 100644
index 0000000..3009e50
--- /dev/null
+++ b/test/CodeGen/ARM/vcvt_combine.ll
@@ -0,0 +1,99 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
+
+@in = global float 0x400921FA00000000, align 4
+
+; Test signed conversion.
+; CHECK: t0
+; CHECK-NOT: vmul
+define void @t0() nounwind {
+entry:
+  %tmp = load float* @in, align 4, !tbaa !0
+  %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
+  %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
+  %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
+  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+  tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
+  ret void
+}
+
+declare void @foo_int32x2_t(<2 x i32>)
+
+; Test unsigned conversion.
+; CHECK: t1
+; CHECK-NOT: vmul
+define void @t1() nounwind {
+entry:
+  %tmp = load float* @in, align 4, !tbaa !0
+  %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
+  %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
+  %mul.i = fmul <2 x float> %vecinit2.i, <float 8.000000e+00, float 8.000000e+00>
+  %vcvt.i = fptoui <2 x float> %mul.i to <2 x i32>
+  tail call void @foo_uint32x2_t(<2 x i32> %vcvt.i) nounwind
+  ret void
+}
+
+declare void @foo_uint32x2_t(<2 x i32>)
+
+; Test which should not fold due to non-power of 2.
+; CHECK: t2
+; CHECK: vmul
+define void @t2() nounwind {
+entry:
+  %tmp = load float* @in, align 4, !tbaa !0
+  %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
+  %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
+  %mul.i = fmul <2 x float> %vecinit2.i, <float 0x401B333340000000, float 0x401B333340000000>
+  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+  tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
+  ret void
+}
+
+; Test which should not fold due to power of 2 out of range.
+; CHECK: t3
+; CHECK: vmul
+define void @t3() nounwind {
+entry:
+  %tmp = load float* @in, align 4, !tbaa !0
+  %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
+  %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
+  %mul.i = fmul <2 x float> %vecinit2.i, <float 0x4200000000000000, float 0x4200000000000000>
+  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+  tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
+  ret void
+}
+
+; Test which case where const is max power of 2 (i.e., 2^32).
+; CHECK: t4
+; CHECK-NOT: vmul
+define void @t4() nounwind {
+entry:
+  %tmp = load float* @in, align 4, !tbaa !0
+  %vecinit.i = insertelement <2 x float> undef, float %tmp, i32 0
+  %vecinit2.i = insertelement <2 x float> %vecinit.i, float %tmp, i32 1
+  %mul.i = fmul <2 x float> %vecinit2.i, <float 0x41F0000000000000, float 0x41F0000000000000>
+  %vcvt.i = fptosi <2 x float> %mul.i to <2 x i32>
+  tail call void @foo_int32x2_t(<2 x i32> %vcvt.i) nounwind
+  ret void
+}
+
+; Test quadword.
+; CHECK: t5
+; CHECK-NOT: vmul
+define void @t5() nounwind {
+entry:
+  %tmp = load float* @in, align 4, !tbaa !0
+  %vecinit.i = insertelement <4 x float> undef, float %tmp, i32 0
+  %vecinit2.i = insertelement <4 x float> %vecinit.i, float %tmp, i32 1
+  %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %tmp, i32 2
+  %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %tmp, i32 3
+  %mul.i = fmul <4 x float> %vecinit6.i, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+  %vcvt.i = fptosi <4 x float> %mul.i to <4 x i32>
+  tail call void @foo_int32x4_t(<4 x i32> %vcvt.i) nounwind
+  ret void
+}
+
+declare void @foo_int32x4_t(<4 x i32>)
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/ARM/vdiv_combine.ll b/test/CodeGen/ARM/vdiv_combine.ll
new file mode 100644
index 0000000..1387393
--- /dev/null
+++ b/test/CodeGen/ARM/vdiv_combine.ll
@@ -0,0 +1,102 @@
+; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s
+
+@in = global float 0x400921FA00000000, align 4
+@iin = global i32 -1023, align 4
+@uin = global i32 1023, align 4
+
+declare void @foo_int32x4_t(<4 x i32>)
+
+; Test signed conversion.
+; CHECK: t1
+; CHECK-NOT: vdiv
+define void @t1() nounwind {
+entry:
+  %tmp = load i32* @iin, align 4, !tbaa !3
+  %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
+  %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
+  %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00>
+  tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind
+  ret void
+}
+
+declare void @foo_float32x2_t(<2 x float>)
+
+; Test unsigned conversion.
+; CHECK: t2
+; CHECK-NOT: vdiv
+define void @t2() nounwind {
+entry:
+  %tmp = load i32* @uin, align 4, !tbaa !3
+  %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
+  %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
+  %vcvt.i = uitofp <2 x i32> %vecinit2.i to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00>
+  tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind
+  ret void
+}
+
+; Test which should not fold due to non-power of 2.
+; CHECK: t3
+; CHECK: vdiv
+define void @t3() nounwind {
+entry:
+  %tmp = load i32* @iin, align 4, !tbaa !3
+  %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
+  %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
+  %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 0x401B333340000000, float 0x401B333340000000>
+  tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind
+  ret void
+}
+
+; Test which should not fold due to power of 2 out of range.
+; CHECK: t4
+; CHECK: vdiv
+define void @t4() nounwind {
+entry:
+  %tmp = load i32* @iin, align 4, !tbaa !3
+  %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
+  %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
+  %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 0x4200000000000000, float 0x4200000000000000>
+  tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind
+  ret void
+}
+
+; Test case where const is max power of 2 (i.e., 2^32).
+; CHECK: t5
+; CHECK-NOT: vdiv
+define void @t5() nounwind {
+entry:
+  %tmp = load i32* @iin, align 4, !tbaa !3
+  %vecinit.i = insertelement <2 x i32> undef, i32 %tmp, i32 0
+  %vecinit2.i = insertelement <2 x i32> %vecinit.i, i32 %tmp, i32 1
+  %vcvt.i = sitofp <2 x i32> %vecinit2.i to <2 x float>
+  %div.i = fdiv <2 x float> %vcvt.i, <float 0x41F0000000000000, float 0x41F0000000000000>
+  tail call void @foo_float32x2_t(<2 x float> %div.i) nounwind
+  ret void
+}
+
+; Test quadword.
+; CHECK: t6
+; CHECK-NOT: vdiv
+define void @t6() nounwind {
+entry:
+  %tmp = load i32* @iin, align 4, !tbaa !3
+  %vecinit.i = insertelement <4 x i32> undef, i32 %tmp, i32 0
+  %vecinit2.i = insertelement <4 x i32> %vecinit.i, i32 %tmp, i32 1
+  %vecinit4.i = insertelement <4 x i32> %vecinit2.i, i32 %tmp, i32 2
+  %vecinit6.i = insertelement <4 x i32> %vecinit4.i, i32 %tmp, i32 3
+  %vcvt.i = sitofp <4 x i32> %vecinit6.i to <4 x float>
+  %div.i = fdiv <4 x float> %vcvt.i, <float 8.000000e+00, float 8.000000e+00, float 8.000000e+00, float 8.000000e+00>
+  tail call void @foo_float32x4_t(<4 x float> %div.i) nounwind
+  ret void
+}
+
+declare void @foo_float32x4_t(<4 x float>)
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
+!3 = metadata !{metadata !"int", metadata !1}
diff --git a/test/CodeGen/ARM/vmul.ll b/test/CodeGen/ARM/vmul.ll
index 1fd6581..1780d6e 100644
--- a/test/CodeGen/ARM/vmul.ll
+++ b/test/CodeGen/ARM/vmul.ll
@@ -439,9 +439,9 @@ define <2 x i64> @vmull_extvec_u32(<2 x i32> %arg) nounwind {
 }
 
 ; rdar://9197392
-define void @distribue(i16* %dst, i8* %src, i32 %mul) nounwind {
+define void @distribute(i16* %dst, i8* %src, i32 %mul) nounwind {
 entry:
-; CHECK: distribue:
+; CHECK: distribute:
 ; CHECK: vmull.u8 [[REG1:(q[0-9]+)]], d{{.*}}, [[REG2:(d[0-9]+)]]
 ; CHECK: vmlal.u8 [[REG1]], d{{.*}}, [[REG2]]
   %0 = trunc i32 %mul to i8
@@ -471,9 +471,9 @@ declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
 
 %struct.uint8x8_t = type { <8 x i8> }
 
-define void @distribue2(%struct.uint8x8_t* nocapture %dst, i8* %src, i32 %mul) nounwind {
+define void @distribute2(%struct.uint8x8_t* nocapture %dst, i8* %src, i32 %mul) nounwind {
 entry:
-; CHECK: distribue2
+; CHECK: distribute2
 ; CHECK-NOT: vadd.i8
 ; CHECK: vmul.i8
 ; CHECK: vmla.i8
@@ -492,3 +492,25 @@ entry:
   store <8 x i8> %10, <8 x i8>* %11, align 8
   ret void
 }
+
+define void @distribute2_commutative(%struct.uint8x8_t* nocapture %dst, i8* %src, i32 %mul) nounwind {
+entry:
+; CHECK: distribute2_commutative
+; CHECK-NOT: vadd.i8
+; CHECK: vmul.i8
+; CHECK: vmla.i8
+  %0 = trunc i32 %mul to i8
+  %1 = insertelement <8 x i8> undef, i8 %0, i32 0
+  %2 = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> zeroinitializer
+  %3 = tail call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %src, i32 1)
+  %4 = bitcast <16 x i8> %3 to <2 x double>
+  %5 = extractelement <2 x double> %4, i32 1
+  %6 = bitcast double %5 to <8 x i8>
+  %7 = extractelement <2 x double> %4, i32 0
+  %8 = bitcast double %7 to <8 x i8>
+  %9 = add <8 x i8> %6, %8
+  %10 = mul <8 x i8> %2, %9
+  %11 = getelementptr inbounds %struct.uint8x8_t* %dst, i32 0, i32 0
+  store <8 x i8> %10, <8 x i8>* %11, align 8
+  ret void
+}
diff --git a/test/CodeGen/ARM/vpadd.ll b/test/CodeGen/ARM/vpadd.ll
index 2125573..1ba68f5 100644
--- a/test/CodeGen/ARM/vpadd.ll
+++ b/test/CodeGen/ARM/vpadd.ll
@@ -138,6 +138,20 @@ define <2 x i64> @vpaddlQu32(<4 x i32>* %A) nounwind {
 	ret <2 x i64> %tmp2
 }
 
+; Test AddCombine optimization that generates a vpaddl.s
+define void @addCombineToVPADDL() nounwind ssp {
+; CHECK: vpaddl.s8
+  %cbcr = alloca <16 x i8>, align 16
+  %X = alloca <8 x i8>, align 8
+  %tmp = load <16 x i8>* %cbcr
+  %tmp1 = shufflevector <16 x i8> %tmp, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %tmp2 = load <16 x i8>* %cbcr
+  %tmp3 = shufflevector <16 x i8> %tmp2, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %add = add <8 x i8> %tmp3, %tmp1
+  store <8 x i8> %add, <8 x i8>* %X, align 8
+  ret void
+}
+
 declare <4 x i16> @llvm.arm.neon.vpaddls.v4i16.v8i8(<8 x i8>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vpaddls.v2i32.v4i16(<4 x i16>) nounwind readnone
 declare <1 x i64> @llvm.arm.neon.vpaddls.v1i64.v2i32(<2 x i32>) nounwind readnone
diff --git a/test/CodeGen/ARM/vqdmul.ll b/test/CodeGen/ARM/vqdmul.ll
index 8dcc7f7..08e7d2b 100644
--- a/test/CodeGen/ARM/vqdmul.ll
+++ b/test/CodeGen/ARM/vqdmul.ll
@@ -152,7 +152,6 @@ entry:
   ret <2 x i32> %1
 }
 
-declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vqrdmulh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vqrdmulh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
diff --git a/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll b/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll
deleted file mode 100644
index 87d9928..0000000
--- a/test/CodeGen/Alpha/2005-07-12-TwoMallocCalls.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; There should be exactly two calls here (memset and malloc), no more.
-; RUN: llc < %s -march=alpha | grep jsr | count 2
-
-%typedef.bc_struct = type opaque
-declare void @llvm.memset.i64(i8*, i8, i64, i32)
-
-define i1 @l12_l94_bc_divide_endif_2E_3_2E_ce(i32* %tmp.71.reload, i32 %scale2.1.3, i32 %extra.0, %typedef.bc_struct* %n1, %typedef.bc_struct* %n2, i32* %tmp.92.reload, i32 %tmp.94.reload, i32* %tmp.98.reload, i32 %tmp.100.reload, i8** %tmp.112.out, i32* %tmp.157.out, i8** %tmp.158.out) {
-newFuncRoot:
-        %tmp.120 = add i32 %extra.0, 2          ; <i32> [#uses=1]
-        %tmp.122 = add i32 %tmp.120, %tmp.94.reload             ; <i32> [#uses=1]
-        %tmp.123 = add i32 %tmp.122, %tmp.100.reload            ; <i32> [#uses=2]
-        %tmp.112 = malloc i8, i32 %tmp.123              ; <i8*> [#uses=1]
-        %tmp.137 = zext i32 %tmp.123 to i64             ; <i64> [#uses=1]
-        tail call void @llvm.memset.i64( i8* %tmp.112, i8 0, i64 %tmp.137, i32 0 )
-        ret i1 true
-}
-
diff --git a/test/CodeGen/Alpha/add.ll b/test/CodeGen/Alpha/add.ll
index cd883f6..8a92695 100644
--- a/test/CodeGen/Alpha/add.ll
+++ b/test/CodeGen/Alpha/add.ll
@@ -17,19 +17,19 @@
 ; RUN: grep {s8subq} %t.s | count 2
 
 
-define i32 @al(i32 signext %x.s, i32 signext %y.s) signext {
+define signext i32 @al(i32 signext %x.s, i32 signext %y.s) {
 entry:
 	%tmp.3.s = add i32 %y.s, %x.s		; <i32> [#uses=1]
 	ret i32 %tmp.3.s
 }
 
-define i32 @ali(i32 signext %x.s) signext {
+define signext i32 @ali(i32 signext %x.s)  {
 entry:
 	%tmp.3.s = add i32 100, %x.s		; <i32> [#uses=1]
 	ret i32 %tmp.3.s
 }
 
-define i64 @aq(i64 signext %x.s, i64 signext %y.s) signext {
+define signext i64 @aq(i64 signext %x.s, i64 signext %y.s)  {
 entry:
 	%tmp.3.s = add i64 %y.s, %x.s		; <i64> [#uses=1]
 	ret i64 %tmp.3.s
@@ -41,13 +41,13 @@ entry:
 	ret i64 %tmp.3.s
 }
 
-define i32 @sl(i32 signext %x.s, i32 signext %y.s) signext {
+define signext i32 @sl(i32 signext %x.s, i32 signext %y.s)  {
 entry:
 	%tmp.3.s = sub i32 %y.s, %x.s		; <i32> [#uses=1]
 	ret i32 %tmp.3.s
 }
 
-define i32 @sli(i32 signext %x.s) signext {
+define signext i32 @sli(i32 signext %x.s)  {
 entry:
 	%tmp.3.s = sub i32 %x.s, 100		; <i32> [#uses=1]
 	ret i32 %tmp.3.s
@@ -65,14 +65,14 @@ entry:
 	ret i64 %tmp.3.s
 }
 
-define i32 @a4l(i32 signext %x.s, i32 signext %y.s) signext {
+define signext i32 @a4l(i32 signext %x.s, i32 signext %y.s)  {
 entry:
 	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
 	%tmp.3.s = add i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
 	ret i32 %tmp.3.s
 }
 
-define i32 @a8l(i32 signext %x.s, i32 signext %y.s) signext {
+define signext i32 @a8l(i32 signext %x.s, i32 signext %y.s)  {
 entry:
 	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
 	%tmp.3.s = add i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
@@ -93,14 +93,14 @@ entry:
 	ret i64 %tmp.3.s
 }
 
-define i32 @a4li(i32 signext %y.s) signext {
+define signext i32 @a4li(i32 signext %y.s)  {
 entry:
 	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
 	%tmp.3.s = add i32 100, %tmp.1.s		; <i32> [#uses=1]
 	ret i32 %tmp.3.s
 }
 
-define i32 @a8li(i32 signext %y.s) signext {
+define signext i32 @a8li(i32 signext %y.s)  {
 entry:
 	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
 	%tmp.3.s = add i32 100, %tmp.1.s		; <i32> [#uses=1]
@@ -121,14 +121,14 @@ entry:
 	ret i64 %tmp.3.s
 }
 
-define i32 @s4l(i32 signext %x.s, i32 signext %y.s) signext {
+define signext i32 @s4l(i32 signext %x.s, i32 signext %y.s)  {
 entry:
 	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
 	%tmp.3.s = sub i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
 	ret i32 %tmp.3.s
 }
 
-define i32 @s8l(i32 signext %x.s, i32 signext %y.s) signext {
+define signext i32 @s8l(i32 signext %x.s, i32 signext %y.s)  {
 entry:
 	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
 	%tmp.3.s = sub i32 %tmp.1.s, %x.s		; <i32> [#uses=1]
@@ -149,14 +149,14 @@ entry:
 	ret i64 %tmp.3.s
 }
 
-define i32 @s4li(i32 signext %y.s) signext {
+define signext i32 @s4li(i32 signext %y.s)  {
 entry:
 	%tmp.1.s = shl i32 %y.s, 2		; <i32> [#uses=1]
 	%tmp.3.s = sub i32 %tmp.1.s, 100		; <i32> [#uses=1]
 	ret i32 %tmp.3.s
 }
 
-define i32 @s8li(i32 signext %y.s) signext {
+define signext i32 @s8li(i32 signext %y.s)  {
 entry:
 	%tmp.1.s = shl i32 %y.s, 3		; <i32> [#uses=1]
 	%tmp.3.s = sub i32 %tmp.1.s, 100		; <i32> [#uses=1]
diff --git a/test/CodeGen/Alpha/i32_sub_1.ll b/test/CodeGen/Alpha/i32_sub_1.ll
index ffeafbd..35b1d08 100644
--- a/test/CodeGen/Alpha/i32_sub_1.ll
+++ b/test/CodeGen/Alpha/i32_sub_1.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=alpha | grep -i {subl \$16,1,\$0}
 
 
-define i32 @foo(i32 signext %x) signext {
+define signext i32 @foo(i32 signext %x) {
 entry:
 	%tmp.1 = add i32 %x, -1		; <int> [#uses=1]
 	ret i32 %tmp.1
diff --git a/test/CodeGen/Alpha/private.ll b/test/CodeGen/Alpha/private.ll
index 26076e0..f8d3094 100644
--- a/test/CodeGen/Alpha/private.ll
+++ b/test/CodeGen/Alpha/private.ll
@@ -6,8 +6,6 @@
 ; RUN: grep \\\$baz: %t
 ; RUN: grep ldah.*\\\$baz %t
 
-declare void @foo()
-
 define private void @foo() {
         ret void
 }
diff --git a/test/CodeGen/Alpha/zapnot.ll b/test/CodeGen/Alpha/zapnot.ll
index d00984a..a47035e 100644
--- a/test/CodeGen/Alpha/zapnot.ll
+++ b/test/CodeGen/Alpha/zapnot.ll
@@ -2,7 +2,7 @@
 ; RUN: llc < %s -march=alpha | grep zapnot
 
 
-define i16 @foo(i64 %y) zeroext {
+define zeroext i16 @foo(i64 %y)  {
 entry:
         %tmp.1 = trunc i64 %y to i16         ; <ushort> [#uses=1]
         ret i16 %tmp.1
diff --git a/test/CodeGen/Blackfin/add-overflow.ll b/test/CodeGen/Blackfin/add-overflow.ll
index e982e43..8dcf3f8 100644
--- a/test/CodeGen/Blackfin/add-overflow.ll
+++ b/test/CodeGen/Blackfin/add-overflow.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
 
-	type { i24, i1 }		; type %0
+	%0 = type { i24, i1 }		; type %0
 
 define i1 @func2(i24 zeroext %v1, i24 zeroext %v2) nounwind {
 entry:
diff --git a/test/CodeGen/Blackfin/burg.ll b/test/CodeGen/Blackfin/burg.ll
deleted file mode 100644
index 8cc3713..0000000
--- a/test/CodeGen/Blackfin/burg.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=bfin -verify-machineinstrs > %t
-
-	%IntList = type %struct.intlist*
-	%ReadFn = type i32 ()*
-	%YYSTYPE = type { %IntList }
-	%struct.intlist = type { i32, %IntList }
-@yyval = external global %YYSTYPE		; <%YYSTYPE*> [#uses=1]
-
-define i32 @yyparse() {
-bb0:
-	%reg254 = load i16* null		; <i16> [#uses=1]
-	%reg254-idxcast = sext i16 %reg254 to i64		; <i64> [#uses=1]
-	%reg254-idxcast-scale = mul i64 %reg254-idxcast, -1		; <i64> [#uses=1]
-	%reg254-idxcast-scale-offset = add i64 %reg254-idxcast-scale, 1		; <i64> [#uses=1]
-	%reg261.idx1 = getelementptr %YYSTYPE* null, i64 %reg254-idxcast-scale-offset, i32 0		; <%IntList*> [#uses=1]
-	%reg261 = load %IntList* %reg261.idx1		; <%IntList> [#uses=1]
-	store %IntList %reg261, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	unreachable
-}
diff --git a/test/CodeGen/Blackfin/many-args.ll b/test/CodeGen/Blackfin/many-args.ll
index 8c52874..2df32ca 100644
--- a/test/CodeGen/Blackfin/many-args.ll
+++ b/test/CodeGen/Blackfin/many-args.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=bfin -verify-machineinstrs
 
-	type { i32, float, float, float, float, float, float, float, float, float, float }		; type %0
+	%0 = type { i32, float, float, float, float, float, float, float, float, float, float }		; type %0
 	%struct..s_segment_inf = type { float, i32, i16, i16, float, float, i32, float, float }
 
 define i32 @main(i32 %argc.1, i8** %argv.1) {
diff --git a/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll b/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll
deleted file mode 100644
index 3b2085c..0000000
--- a/test/CodeGen/CBackend/2002-08-20-RecursiveTypes.ll
+++ /dev/null
@@ -1,3 +0,0 @@
-; RUN: llc < %s -march=c
-
-@MyIntList = external global { \2*, i32 }
diff --git a/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll b/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll
deleted file mode 100644
index 2563d8c..0000000
--- a/test/CodeGen/CBackend/2002-10-15-OpaqueTypeProblem.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s -march=c
-
-%MPI_Comm = type %struct.Comm*
-%struct.Comm = type opaque
-@thing = global %MPI_Comm* null         ; <%MPI_Comm**> [#uses=0]
-
diff --git a/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll b/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll
deleted file mode 100644
index 54e0aa6..0000000
--- a/test/CodeGen/CBackend/2002-10-30-FunctionPointerAlloca.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: llc < %s -march=c
-
-        %BitField = type i32
-        %tokenptr = type i32*
-
-define void @test() {
-        %pmf1 = alloca %tokenptr (%tokenptr, i8*)*              ; <%tokenptr (%tokenptr, i8*)**> [#uses=0]
-        ret void
-}
-
diff --git a/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll b/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll
deleted file mode 100644
index 1c5f506..0000000
--- a/test/CodeGen/CBackend/2005-03-08-RecursiveTypeCrash.ll
+++ /dev/null
@@ -1,5 +0,0 @@
-; RUN: llc < %s -march=c
-
-        %JNIEnv = type %struct.JNINa*
-        %struct.JNINa = type { i8*, i8*, i8*, void (%JNIEnv*)* }
-
diff --git a/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll b/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll
deleted file mode 100644
index 8a5f253..0000000
--- a/test/CodeGen/CBackend/2007-01-15-NamedArrayType.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; PR918
-; RUN: llc < %s -march=c | not grep {l_structtype_s l_fixarray_array3}
-
-%structtype_s = type { i32 }
-%fixarray_array3 = type [3 x %structtype_s]
-
-define i32 @witness(%fixarray_array3* %p) {
-    %q = getelementptr %fixarray_array3* %p, i32 0, i32 0, i32 0
-    %v = load i32* %q
-    ret i32 %v
-}
diff --git a/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll b/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
deleted file mode 100644
index 8db3167..0000000
--- a/test/CodeGen/CBackend/2008-05-21-MRV-InlineAsm.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -march=c
-
-declare {i32, i32} @foo()
-
-define i32 @test() {
-  %A = call {i32, i32} @foo()
-  %B = getresult {i32, i32} %A, 0
-  %C = getresult {i32, i32} %A, 1
-  %D = add i32 %B, %C
-  ret i32 %D
-}
-
-define i32 @test2() {
-  %A = call {i32, i32} asm sideeffect "...", "={cx},={di},~{dirflag},~{fpsr},~{flags},~{memory}"()
-  %B = getresult {i32, i32} %A, 0
-  %C = getresult {i32, i32} %A, 1
-  %D = add i32 %B, %C
-  ret i32 %D
-}
diff --git a/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll b/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll
new file mode 100644
index 0000000..0ae480d
--- /dev/null
+++ b/test/CodeGen/CBackend/2011-06-08-addWithOverflow.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -march=c
+; Check that uadd and sadd with overflow are handled by C Backend.
+
+%0 = type { i32, i1 }        ; type %0
+
+define i1 @func1(i32 zeroext %v1, i32 zeroext %v2) nounwind {
+entry:
+    %t = call %0 @llvm.uadd.with.overflow.i32(i32 %v1, i32 %v2)     ; <%0> [#uses=1]
+    %obit = extractvalue %0 %t, 1       ; <i1> [#uses=1]
+    br i1 %obit, label %carry, label %normal
+
+normal:     ; preds = %entry
+    ret i1 true
+
+carry:      ; preds = %entry
+    ret i1 false
+}
+
+define i1 @func2(i32 signext %v1, i32 signext %v2) nounwind {
+entry:
+    %t = call %0 @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)     ; <%0> [#uses=1]
+    %obit = extractvalue %0 %t, 1       ; <i1> [#uses=1]
+    br i1 %obit, label %carry, label %normal
+
+normal:     ; preds = %entry
+    ret i1 true
+
+carry:      ; preds = %entry
+    ret i1 false
+}
+
+declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind
+
+declare %0 @llvm.uadd.with.overflow.i32(i32, i32) nounwind
+
diff --git a/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll b/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll
index 054a3ca..054a3ca 100644
--- a/test/CodeGen/CBackend/2008-06-04-IndirectMem.ll
+++ b/test/CodeGen/CBackend/X86/2008-06-04-IndirectMem.ll
diff --git a/test/CodeGen/CBackend/X86/dg.exp b/test/CodeGen/CBackend/X86/dg.exp
new file mode 100644
index 0000000..833bcc5
--- /dev/null
+++ b/test/CodeGen/CBackend/X86/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target X86] && [llvm_gcc_supports c] } {
+    RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp,s}]]
+}
diff --git a/test/CodeGen/CPP/llvm2cpp.ll b/test/CodeGen/CPP/llvm2cpp.ll
deleted file mode 100644
index d0ba0cf..0000000
--- a/test/CodeGen/CPP/llvm2cpp.ll
+++ /dev/null
@@ -1,756 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis > /dev/null
-; RUN: llc < %s -march=cpp -cppgen=program -o -
-
-@X = global i32 4, align 16		; <i32*> [#uses=0]
-
-define i32* @test1012() align 32 {
-	%X = alloca i32, align 4		; <i32*> [#uses=1]
-	%Y = alloca i32, i32 42, align 16		; <i32*> [#uses=0]
-	%Z = alloca i32		; <i32*> [#uses=0]
-	ret i32* %X
-}
-
-define i32* @test1013() {
-	%X = malloc i32, align 4		; <i32*> [#uses=1]
-	%Y = malloc i32, i32 42, align 16		; <i32*> [#uses=0]
-	%Z = malloc i32		; <i32*> [#uses=0]
-	ret i32* %X
-}
-
-define void @void(i32, i32) {
-	add i32 0, 0		; <i32>:3 [#uses=2]
-	sub i32 0, 4		; <i32>:4 [#uses=2]
-	br label %5
-
-; <label>:5		; preds = %5, %2
-	add i32 %0, %1		; <i32>:6 [#uses=2]
-	sub i32 %6, %4		; <i32>:7 [#uses=1]
-	icmp sle i32 %7, %3		; <i1>:8 [#uses=1]
-	br i1 %8, label %9, label %5
-
-; <label>:9		; preds = %5
-	add i32 %0, %1		; <i32>:10 [#uses=0]
-	sub i32 %6, %4		; <i32>:11 [#uses=1]
-	icmp sle i32 %11, %3		; <i1>:12 [#uses=0]
-	ret void
-}
-
-define i32 @zarro() {
-Startup:
-	ret i32 0
-}
-
-define fastcc void @foo() {
-	ret void
-}
-
-define coldcc void @bar() {
-	call fastcc void @foo( )
-	ret void
-}
-
-define void @structret({ i8 }* sret  %P) {
-	call void @structret( { i8 }* %P sret  )
-	ret void
-}
-
-define void @foo4() {
-	ret void
-}
-
-define coldcc void @bar2() {
-	call fastcc void @foo( )
-	ret void
-}
-
-define cc42 void @bar3() {
-	invoke fastcc void @foo( )
-			to label %Ok unwind label %U
-
-Ok:		; preds = %0
-	ret void
-
-U:		; preds = %0
-	unwind
-}
-
-define void @bar4() {
-	call cc42 void @bar( )
-	invoke cc42 void @bar3( )
-			to label %Ok unwind label %U
-
-Ok:		; preds = %0
-	ret void
-
-U:		; preds = %0
-	unwind
-}
-; ModuleID = 'calltest.ll'
-	%FunTy = type i32 (i32)
-
-define i32 @test1000(i32 %i0) {
-	ret i32 %i0
-}
-
-define void @invoke(%FunTy* %x) {
-	%foo = call i32 %x( i32 123 )		; <i32> [#uses=0]
-	%foo2 = tail call i32 %x( i32 123 )		; <i32> [#uses=0]
-	ret void
-}
-
-define i32 @main(i32 %argc) {
-	%retval = call i32 @test1000( i32 %argc )		; <i32> [#uses=2]
-	%two = add i32 %retval, %retval		; <i32> [#uses=1]
-	%retval2 = invoke i32 @test1000( i32 %argc )
-			to label %Next unwind label %Error		; <i32> [#uses=1]
-
-Next:		; preds = %0
-	%two2 = add i32 %two, %retval2		; <i32> [#uses=1]
-	call void @invoke( %FunTy* @test1000 )
-	ret i32 %two2
-
-Error:		; preds = %0
-	ret i32 -1
-}
-; ModuleID = 'casttest.ll'
-
-define i16 @FunFunc(i64 %x, i8 %z) {
-bb0:
-	%cast110 = sext i8 %z to i16		; <i16> [#uses=1]
-	%cast10 = trunc i64 %x to i16		; <i16> [#uses=1]
-	%reg109 = add i16 %cast110, %cast10		; <i16> [#uses=1]
-	ret i16 %reg109
-}
-; ModuleID = 'cfgstructures.ll'
-
-define void @irreducible(i1 %cond) {
-	br i1 %cond, label %X, label %Y
-
-X:		; preds = %Y, %0
-	br label %Y
-
-Y:		; preds = %X, %0
-	br label %X
-}
-
-define void @sharedheader(i1 %cond) {
-	br label %A
-
-A:		; preds = %Y, %X, %0
-	br i1 %cond, label %X, label %Y
-
-X:		; preds = %A
-	br label %A
-
-Y:		; preds = %A
-	br label %A
-}
-
-define void @nested(i1 %cond1, i1 %cond2, i1 %cond3) {
-	br label %Loop1
-
-Loop1:		; preds = %L2Exit, %0
-	br label %Loop2
-
-Loop2:		; preds = %L3Exit, %Loop1
-	br label %Loop3
-
-Loop3:		; preds = %Loop3, %Loop2
-	br i1 %cond3, label %Loop3, label %L3Exit
-
-L3Exit:		; preds = %Loop3
-	br i1 %cond2, label %Loop2, label %L2Exit
-
-L2Exit:		; preds = %L3Exit
-	br i1 %cond1, label %Loop1, label %L1Exit
-
-L1Exit:		; preds = %L2Exit
-	ret void
-}
-; ModuleID = 'constexpr.ll'
-	%SAType = type { i32, { [2 x float], i64 } }
-	%SType = type { i32, { float, { i8 } }, i64 }
-global i64 1		; <i64*>:0 [#uses=0]
-global i64 74514		; <i64*>:1 [#uses=0]
-@t2 = global i32* @t1		; <i32**> [#uses=0]
-@t3 = global i32* @t1		; <i32**> [#uses=2]
-@t1 = global i32 4		; <i32*> [#uses=2]
-@t4 = global i32** @t3		; <i32***> [#uses=1]
-@t5 = global i32** @t3		; <i32***> [#uses=0]
-@t6 = global i32*** @t4		; <i32****> [#uses=0]
-@t7 = global float* inttoptr (i32 12345678 to float*)		; <float**> [#uses=0]
-@t9 = global i32 8		; <i32*> [#uses=0]
-global i32* bitcast (float* @4 to i32*)		; <i32**>:2 [#uses=0]
-global float* @4		; <float**>:3 [#uses=0]
-global float 0.000000e+00		; <float*>:4 [#uses=2]
-@array = constant [2 x i32] [ i32 12, i32 52 ]		; <[2 x i32]*> [#uses=1]
-@arrayPtr = global i32* getelementptr ([2 x i32]* @array, i64 0, i64 0)		; <i32**> [#uses=1]
-@arrayPtr5 = global i32** getelementptr (i32** @arrayPtr, i64 5)		; <i32***> [#uses=0]
-@somestr = constant [11 x i8] c"hello world"		; <[11 x i8]*> [#uses=2]
-@char5 = global i8* getelementptr ([11 x i8]* @somestr, i64 0, i64 5)		; <i8**> [#uses=0]
-@char8a = global i32* bitcast (i8* getelementptr ([11 x i8]* @somestr, i64 0, i64 8) to i32*)		; <i32**> [#uses=0]
-@char8b = global i8* getelementptr ([11 x i8]* @somestr, i64 0, i64 8)		; <i8**> [#uses=0]
-@S1 = global %SType* null		; <%SType**> [#uses=1]
-@S2c = constant %SType {
-    i32 1, 
-    { float, { i8 } } { float 2.000000e+00, { i8 } { i8 3 } }, 
-    i64 4 }		; <%SType*> [#uses=3]
-@S3c = constant %SAType { i32 1, { [2 x float], i64 } { [2 x float] [ float 2.000000e+00, float 3.000000e+00 ], i64 4 } }		; <%SAType*> [#uses=1]
-@S1ptr = global %SType** @S1		; <%SType***> [#uses=0]
-@S2 = global %SType* @S2c		; <%SType**> [#uses=0]
-@S3 = global %SAType* @S3c		; <%SAType**> [#uses=0]
-@S1fld1a = global float* getelementptr (%SType* @S2c, i64 0, i32 1, i32 0)		; <float**> [#uses=0]
-@S1fld1b = global float* getelementptr (%SType* @S2c, i64 0, i32 1, i32 0)		; <float**> [#uses=1]
-@S1fld1bptr = global float** @S1fld1b		; <float***> [#uses=0]
-@S2fld3 = global i8* getelementptr (%SType* @S2c, i64 0, i32 1, i32 1, i32 0)		; <i8**> [#uses=0]
-
-; ModuleID = 'constpointer.ll'
-@cpt3 = global i32* @cpt1		; <i32**> [#uses=1]
-@cpt1 = global i32 4		; <i32*> [#uses=2]
-@cpt4 = global i32** @cpt3		; <i32***> [#uses=0]
-@cpt2 = global i32* @cpt1		; <i32**> [#uses=0]
-global float* @7		; <float**>:0 [#uses=0]
-global float* @7		; <float**>:1 [#uses=0]
-global float 0.000000e+00		; <float*>:2 [#uses=3]
-global float* @7		; <float**>:3 [#uses=0]
-@fptr = global void ()* @f		; <void ()**> [#uses=0]
-@sptr1 = global [11 x i8]* @somestr		; <[11 x i8]**> [#uses=0]
-@somestr2 = constant [11 x i8] c"hello world"		; <[11 x i8]*> [#uses=2]
-@sptr2 = global [11 x i8]* @somestr2		; <[11 x i8]**> [#uses=0]
-
-declare void @f()
-; ModuleID = 'escaped_label.ll'
-
-define i32 @foo3() {
-	br label "foo`~!@#$%^&*()-_=+{}[]\\|;:',<.>/?"
-
-"foo`~!@#$%^&*()-_=+{}[]\\|;:',<.>/?":		; preds = %0
-	ret i32 17
-}
-; ModuleID = 'float.ll'
-@F1 = global float 4.000000e+00		; <float*> [#uses=0]
-@D1 = global double 4.000000e+00		; <double*> [#uses=0]
-; ModuleID = 'fold-fpcast.ll'
-
-define i32 @test1() {
-	ret i32 1080872141
-}
-
-define float @test1002() {
-	ret float 0x36E1000000000000
-}
-
-define i64 @test3() {
-	ret i64 4614256656431372362
-}
-
-define double @test4() {
-	ret double 2.075076e-322
-}
-; ModuleID = 'forwardreftest.ll'
-	%myfn = type float (i32, double, i32, i16)
-	%myty = type i32
-	%thisfuncty = type i32 (i32)*
-
-declare void @F(%thisfuncty, %thisfuncty, %thisfuncty)
-
-define i32 @zarro2(i32 %Func) {
-Startup:
-	add i32 0, 10		; <i32>:0 [#uses=0]
-	ret i32 0
-}
-
-define i32 @test1004(i32) {
-	call void @F( %thisfuncty @zarro2, %thisfuncty @test1004, %thisfuncty @foozball )
-	ret i32 0
-}
-
-define i32 @foozball(i32) {
-	ret i32 0
-}
-
-; ModuleID = 'globalredefinition.ll'
-@A = global i32* @B		; <i32**> [#uses=0]
-@B = global i32 7		; <i32*> [#uses=1]
-
-define void @test12312() {
-	ret void
-}
-; ModuleID = 'global_section.ll'
-@GlobSec = global i32 4, section "foo", align 16
-
-define void @test1005() section "bar" {
-	ret void
-}
-
-; ModuleID = 'globalvars.ll'
-@MyVar = external global i32		; <i32*> [#uses=1]
-@MyIntList = external global { \2*, i32 }		; <{ \2*, i32 }*> [#uses=1]
-external global i32		; <i32*>:0 [#uses=0]
-@AConst = constant i32 123		; <i32*> [#uses=0]
-@AString = constant [4 x i8] c"test"		; <[4 x i8]*> [#uses=0]
-@ZeroInit = global { [100 x i32], [40 x float] } zeroinitializer		; <{ [100 x i32], [40 x float] }*> [#uses=0]
-
-define i32 @foo10015(i32 %blah) {
-	store i32 5, i32* @MyVar
-	%idx = getelementptr { \2*, i32 }* @MyIntList, i64 0, i32 1		; <i32*> [#uses=1]
-	store i32 12, i32* %idx
-	ret i32 %blah
-}
-; ModuleID = 'indirectcall2.ll'
-
-define i64 @test1006(i64 %X) {
-	ret i64 %X
-}
-
-define i64 @fib(i64 %n) {
-; <label>:0
-	%T = icmp ult i64 %n, 2		; <i1> [#uses=1]
-	br i1 %T, label %BaseCase, label %RecurseCase
-
-RecurseCase:		; preds = %0
-	%result = call i64 @test1006( i64 %n )		; <i64> [#uses=0]
-	br label %BaseCase
-
-BaseCase:		; preds = %RecurseCase, %0
-	%X = phi i64 [ 1, %0 ], [ 2, %RecurseCase ]		; <i64> [#uses=1]
-	ret i64 %X
-}
-; ModuleID = 'indirectcall.ll'
-
-declare i32 @atoi(i8*)
-
-define i64 @fibonacc(i64 %n) {
-	icmp ult i64 %n, 2		; <i1>:1 [#uses=1]
-	br i1 %1, label %BaseCase, label %RecurseCase
-
-BaseCase:		; preds = %0
-	ret i64 1
-
-RecurseCase:		; preds = %0
-	%n2 = sub i64 %n, 2		; <i64> [#uses=1]
-	%n1 = sub i64 %n, 1		; <i64> [#uses=1]
-	%f2 = call i64 @fibonacc( i64 %n2 )		; <i64> [#uses=1]
-	%f1 = call i64 @fibonacc( i64 %n1 )		; <i64> [#uses=1]
-	%result = add i64 %f2, %f1		; <i64> [#uses=1]
-	ret i64 %result
-}
-
-define i64 @realmain(i32 %argc, i8** %argv) {
-; <label>:0
-	icmp eq i32 %argc, 2		; <i1>:1 [#uses=1]
-	br i1 %1, label %HasArg, label %Continue
-
-HasArg:		; preds = %0
-	%n1 = add i32 1, 1		; <i32> [#uses=1]
-	br label %Continue
-
-Continue:		; preds = %HasArg, %0
-	%n = phi i32 [ %n1, %HasArg ], [ 1, %0 ]		; <i32> [#uses=1]
-	%N = sext i32 %n to i64		; <i64> [#uses=1]
-	%F = call i64 @fib( i64 %N )		; <i64> [#uses=1]
-	ret i64 %F
-}
-
-define i64 @trampoline(i64 %n, i64 (i64)* %fibfunc) {
-	%F = call i64 %fibfunc( i64 %n )		; <i64> [#uses=1]
-	ret i64 %F
-}
-
-define i32 @main2() {
-	%Result = call i64 @trampoline( i64 10, i64 (i64)* @fib )		; <i64> [#uses=1]
-	%Result.upgrd.1 = trunc i64 %Result to i32		; <i32> [#uses=1]
-	ret i32 %Result.upgrd.1
-}
-; ModuleID = 'inlineasm.ll'
-module asm "this is an inline asm block"
-module asm "this is another inline asm block"
-
-define i32 @test1007() {
-	%X = call i32 asm "tricky here $0, $1", "=r,r"( i32 4 )		; <i32> [#uses=1]
-	call void asm sideeffect "eieio", ""( )
-	ret i32 %X
-}
-; ModuleID = 'instructions.ll'
-
-define i32 @test_extractelement(<4 x i32> %V) {
-	%R = extractelement <4 x i32> %V, i32 1		; <i32> [#uses=1]
-	ret i32 %R
-}
-
-define <4 x i32> @test_insertelement(<4 x i32> %V) {
-	%R = insertelement <4 x i32> %V, i32 0, i32 0		; <<4 x i32>> [#uses=1]
-	ret <4 x i32> %R
-}
-
-define <4 x i32> @test_shufflevector_u(<4 x i32> %V) {
-	%R = shufflevector <4 x i32> %V, <4 x i32> %V, <4 x i32> < i32 1, i32 undef, i32 7, i32 2 >		; <<4 x i32>> [#uses=1]
-	ret <4 x i32> %R
-}
-
-define <4 x float> @test_shufflevector_f(<4 x float> %V) {
-	%R = shufflevector <4 x float> %V, <4 x float> undef, <4 x i32> < i32 1, i32 undef, i32 7, i32 2 >		; <<4 x float>> [#uses=1]
-	ret <4 x float> %R
-}
-; ModuleID = 'intrinsics.ll'
-
-declare i1 @llvm.isunordered.f32(float, float)
-
-declare i1 @llvm.isunordered.f64(double, double)
-
-declare void @llvm.prefetch(i8*, i32, i32)
-
-declare float @llvm.sqrt.f32(float)
-
-declare double @llvm.sqrt.f64(double)
-
-define void @libm() {
-	fcmp uno float 1.000000e+00, 2.000000e+00		; <i1>:1 [#uses=0]
-	fcmp uno double 3.000000e+00, 4.000000e+00		; <i1>:2 [#uses=0]
-	call void @llvm.prefetch( i8* null, i32 1, i32 3 )
-	call float @llvm.sqrt.f32( float 5.000000e+00 )		; <float>:3 [#uses=0]
-	call double @llvm.sqrt.f64( double 6.000000e+00 )		; <double>:4 [#uses=0]
-	call i8 @llvm.ctpop.i8( i8 10 )		; <i32>:5 [#uses=1]
-	call i16 @llvm.ctpop.i16( i16 11 )		; <i32>:7 [#uses=1]
-	call i32 @llvm.ctpop.i32( i32 12 )		; <i32>:9 [#uses=1]
-	call i64 @llvm.ctpop.i64( i64 13 )		; <i32>:11 [#uses=1]
-	call i8 @llvm.ctlz.i8( i8 14 )		; <i32>:13 [#uses=1]
-	call i16 @llvm.ctlz.i16( i16 15 )		; <i32>:15 [#uses=1]
-	call i32 @llvm.ctlz.i32( i32 16 )		; <i32>:17 [#uses=1]
-	call i64 @llvm.ctlz.i64( i64 17 )		; <i32>:19 [#uses=1]
-	call i8 @llvm.cttz.i8( i8 18 )		; <i32>:21 [#uses=1]
-	call i16 @llvm.cttz.i16( i16 19 )		; <i32>:23 [#uses=1]
-	call i32 @llvm.cttz.i32( i32 20 )		; <i32>:25 [#uses=1]
-	call i64 @llvm.cttz.i64( i64 21 )		; <i32>:27 [#uses=1]
-	ret void
-}
-
-declare i8 @llvm.ctpop.i8(i8)
-
-declare i16 @llvm.ctpop.i16(i16)
-
-declare i32 @llvm.ctpop.i32(i32)
-
-declare i64 @llvm.ctpop.i64(i64)
-
-declare i8 @llvm.ctlz.i8(i8)
-
-declare i16 @llvm.ctlz.i16(i16)
-
-declare i32 @llvm.ctlz.i32(i32)
-
-declare i64 @llvm.ctlz.i64(i64)
-
-declare i8 @llvm.cttz.i8(i8)
-
-declare i16 @llvm.cttz.i16(i16)
-
-declare i32 @llvm.cttz.i32(i32)
-
-declare i64 @llvm.cttz.i64(i64)
-
-; ModuleID = 'packed.ll'
-@foo1 = external global <4 x float>		; <<4 x float>*> [#uses=2]
-@foo102 = external global <2 x i32>		; <<2 x i32>*> [#uses=2]
-
-define void @main3() {
-	store <4 x float> < float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 >, <4 x float>* @foo1
-	store <2 x i32> < i32 4, i32 4 >, <2 x i32>* @foo102
-	%l1 = load <4 x float>* @foo1		; <<4 x float>> [#uses=0]
-	%l2 = load <2 x i32>* @foo102		; <<2 x i32>> [#uses=0]
-	ret void
-}
-
-; ModuleID = 'properties.ll'
-target datalayout = "e-p:32:32"
-target triple = "proc-vend-sys"
-deplibs = [ "m", "c" ]
-; ModuleID = 'prototype.ll'
-
-declare i32 @bar1017(i32 %in)
-
-define i32 @foo1016(i32 %blah) {
-	%xx = call i32 @bar1017( i32 %blah )		; <i32> [#uses=1]
-	ret i32 %xx
-}
-
-; ModuleID = 'recursivetype.ll'
-	%list = type { %list*, i32 }
-
-declare i8* @malloc(i32)
-
-define void @InsertIntoListTail(%list** %L, i32 %Data) {
-bb1:
-	%reg116 = load %list** %L		; <%list*> [#uses=1]
-	%cast1004 = inttoptr i64 0 to %list*		; <%list*> [#uses=1]
-	%cond1000 = icmp eq %list* %reg116, %cast1004		; <i1> [#uses=1]
-	br i1 %cond1000, label %bb3, label %bb2
-
-bb2:		; preds = %bb2, %bb1
-	%reg117 = phi %list** [ %reg118, %bb2 ], [ %L, %bb1 ]		; <%list**> [#uses=1]
-	%cast1010 = bitcast %list** %reg117 to %list***		; <%list***> [#uses=1]
-	%reg118 = load %list*** %cast1010		; <%list**> [#uses=3]
-	%reg109 = load %list** %reg118		; <%list*> [#uses=1]
-	%cast1005 = inttoptr i64 0 to %list*		; <%list*> [#uses=1]
-	%cond1001 = icmp ne %list* %reg109, %cast1005		; <i1> [#uses=1]
-	br i1 %cond1001, label %bb2, label %bb3
-
-bb3:		; preds = %bb2, %bb1
-	%reg119 = phi %list** [ %reg118, %bb2 ], [ %L, %bb1 ]		; <%list**> [#uses=1]
-	%cast1006 = bitcast %list** %reg119 to i8**		; <i8**> [#uses=1]
-	%reg111 = call i8* @malloc( i32 16 )		; <i8*> [#uses=3]
-	store i8* %reg111, i8** %cast1006
-	%reg111.upgrd.1 = ptrtoint i8* %reg111 to i64		; <i64> [#uses=1]
-	%reg1002 = add i64 %reg111.upgrd.1, 8		; <i64> [#uses=1]
-	%reg1002.upgrd.2 = inttoptr i64 %reg1002 to i8*		; <i8*> [#uses=1]
-	%cast1008 = bitcast i8* %reg1002.upgrd.2 to i32*		; <i32*> [#uses=1]
-	store i32 %Data, i32* %cast1008
-	%cast1003 = inttoptr i64 0 to i64*		; <i64*> [#uses=1]
-	%cast1009 = bitcast i8* %reg111 to i64**		; <i64**> [#uses=1]
-	store i64* %cast1003, i64** %cast1009
-	ret void
-}
-
-define %list* @FindData(%list* %L, i32 %Data) {
-bb1:
-	br label %bb2
-
-bb2:		; preds = %bb6, %bb1
-	%reg115 = phi %list* [ %reg116, %bb6 ], [ %L, %bb1 ]		; <%list*> [#uses=4]
-	%cast1014 = inttoptr i64 0 to %list*		; <%list*> [#uses=1]
-	%cond1011 = icmp ne %list* %reg115, %cast1014		; <i1> [#uses=1]
-	br i1 %cond1011, label %bb4, label %bb3
-
-bb3:		; preds = %bb2
-	ret %list* null
-
-bb4:		; preds = %bb2
-	%idx = getelementptr %list* %reg115, i64 0, i32 1		; <i32*> [#uses=1]
-	%reg111 = load i32* %idx		; <i32> [#uses=1]
-	%cond1013 = icmp ne i32 %reg111, %Data		; <i1> [#uses=1]
-	br i1 %cond1013, label %bb6, label %bb5
-
-bb5:		; preds = %bb4
-	ret %list* %reg115
-
-bb6:		; preds = %bb4
-	%idx2 = getelementptr %list* %reg115, i64 0, i32 0		; <%list**> [#uses=1]
-	%reg116 = load %list** %idx2		; <%list*> [#uses=1]
-	br label %bb2
-}
-; ModuleID = 'simplecalltest.ll'
-	%FunTy = type i32 (i32)
-
-define void @invoke1019(%FunTy* %x) {
-	%foo = call i32 %x( i32 123 )		; <i32> [#uses=0]
-	ret void
-}
-
-define i32 @main4(i32 %argc, i8** %argv, i8** %envp) {
-	%retval = call i32 @test1008( i32 %argc )		; <i32> [#uses=2]
-	%two = add i32 %retval, %retval		; <i32> [#uses=1]
-	%retval2 = call i32 @test1008( i32 %argc )		; <i32> [#uses=1]
-	%two2 = add i32 %two, %retval2		; <i32> [#uses=1]
-	call void @invoke1019( %FunTy* @test1008 )
-	ret i32 %two2
-}
-
-define i32 @test1008(i32 %i0) {
-	ret i32 %i0
-}
-; ModuleID = 'smallest.ll'
-; ModuleID = 'small.ll'
-	%x = type i32
-
-define i32 @foo1020(i32 %in) {
-label:
-	ret i32 2
-}
-; ModuleID = 'testalloca.ll'
-	%inners = type { float, { i8 } }
-	%struct = type { i32, %inners, i64 }
-
-define i32 @testfunction(i32 %i0, i32 %j0) {
-	alloca i8, i32 5		; <i8*>:1 [#uses=0]
-	%ptr = alloca i32		; <i32*> [#uses=2]
-	store i32 3, i32* %ptr
-	%val = load i32* %ptr		; <i32> [#uses=0]
-	%sptr = alloca %struct		; <%struct*> [#uses=2]
-	%nsptr = getelementptr %struct* %sptr, i64 0, i32 1		; <%inners*> [#uses=1]
-	%ubsptr = getelementptr %inners* %nsptr, i64 0, i32 1		; <{ i8 }*> [#uses=1]
-	%idx = getelementptr { i8 }* %ubsptr, i64 0, i32 0		; <i8*> [#uses=1]
-	store i8 4, i8* %idx
-	%fptr = getelementptr %struct* %sptr, i64 0, i32 1, i32 0		; <float*> [#uses=1]
-	store float 4.000000e+00, float* %fptr
-	ret i32 3
-}
-; ModuleID = 'testconstants.ll'
-@somestr3 = constant [11 x i8] c"hello world"
-@array99 = constant [2 x i32] [ i32 12, i32 52 ]
-constant { i32, i32 } { i32 4, i32 3 }		; <{ i32, i32 }*>:0 [#uses=0]
-
-define [2 x i32]* @testfunction99(i32 %i0, i32 %j0) {
-	ret [2 x i32]* @array
-}
-
-define i8* @otherfunc(i32, double) {
-	%somestr = getelementptr [11 x i8]* @somestr3, i64 0, i64 0		; <i8*> [#uses=1]
-	ret i8* %somestr
-}
-
-define i8* @yetanotherfunc(i32, double) {
-	ret i8* null
-}
-
-define i32 @negativeUnsigned() {
-	ret i32 -1
-}
-
-define i32 @largeSigned() {
-	ret i32 -394967296
-}
-; ModuleID = 'testlogical.ll'
-
-define i32 @simpleAdd(i32 %i0, i32 %j0) {
-	%t1 = xor i32 %i0, %j0		; <i32> [#uses=1]
-	%t2 = or i32 %i0, %j0		; <i32> [#uses=1]
-	%t3 = and i32 %t1, %t2		; <i32> [#uses=1]
-	ret i32 %t3
-}
-; ModuleID = 'testmemory.ll'
-	%complexty = type { i32, { [4 x i8*], float }, double }
-	%struct = type { i32, { float, { i8 } }, i64 }
-
-define i32 @main6() {
-	call i32 @testfunction98( i64 0, i64 1 )
-	ret i32 0
-}
-
-define i32 @testfunction98(i64 %i0, i64 %j0) {
-	%array0 = malloc [4 x i8]		; <[4 x i8]*> [#uses=2]
-	%size = add i32 2, 2		; <i32> [#uses=1]
-	%array1 = malloc i8, i32 4		; <i8*> [#uses=1]
-	%array2 = malloc i8, i32 %size		; <i8*> [#uses=1]
-	%idx = getelementptr [4 x i8]* %array0, i64 0, i64 2		; <i8*> [#uses=1]
-	store i8 123, i8* %idx
-	free [4 x i8]* %array0
-	free i8* %array1
-	free i8* %array2
-	%aa = alloca %complexty, i32 5		; <%complexty*> [#uses=1]
-	%idx2 = getelementptr %complexty* %aa, i64 %i0, i32 1, i32 0, i64 %j0		; <i8**> [#uses=1]
-	store i8* null, i8** %idx2
-	%ptr = alloca i32		; <i32*> [#uses=2]
-	store i32 3, i32* %ptr
-	%val = load i32* %ptr		; <i32> [#uses=0]
-	%sptr = alloca %struct		; <%struct*> [#uses=1]
-	%ubsptr = getelementptr %struct* %sptr, i64 0, i32 1, i32 1		; <{ i8 }*> [#uses=1]
-	%idx3 = getelementptr { i8 }* %ubsptr, i64 0, i32 0		; <i8*> [#uses=1]
-	store i8 4, i8* %idx3
-	ret i32 3
-}
-; ModuleID = 'testswitch.ll'
-	%int = type i32
-
-define i32 @squared(i32 %i0) {
-	switch i32 %i0, label %Default [
-		 i32 1, label %Case1
-		 i32 2, label %Case2
-		 i32 4, label %Case4
-	]
-
-Default:		; preds = %0
-	ret i32 -1
-
-Case1:		; preds = %0
-	ret i32 1
-
-Case2:		; preds = %0
-	ret i32 4
-
-Case4:		; preds = %0
-	ret i32 16
-}
-; ModuleID = 'testvarargs.ll'
-
-declare i32 @printf(i8*, ...)
-
-define i32 @testvarar() {
-	call i32 (i8*, ...)* @printf( i8* null, i32 12, i8 42 )		; <i32>:1 [#uses=1]
-	ret i32 %1
-}
-; ModuleID = 'undefined.ll'
-@X2 = global i32 undef		; <i32*> [#uses=0]
-
-declare i32 @atoi(i8*)
-
-define i32 @test1009() {
-	ret i32 undef
-}
-
-define i32 @test1003() {
-	%X = add i32 undef, 1		; <i32> [#uses=1]
-	ret i32 %X
-}
-; ModuleID = 'unreachable.ll'
-
-declare void @bar()
-
-define i32 @foo1021() {
-	unreachable
-}
-
-define double @xyz() {
-	call void @bar( )
-	unreachable
-}
-; ModuleID = 'varargs.ll'
-
-declare void @llvm.va_start(i8* %ap)
-
-declare void @llvm.va_copy(i8* %aq, i8* %ap)
-
-declare void @llvm.va_end(i8* %ap)
-
-define i32 @test1010(i32 %X, ...) {
-	%ap = alloca i8*		; <i8**> [#uses=4]
-	%va.upgrd.1 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
-	call void @llvm.va_start( i8* %va.upgrd.1 )
-	%tmp = va_arg i8** %ap, i32		; <i32> [#uses=1]
-	%aq = alloca i8*		; <i8**> [#uses=2]
-	%va0.upgrd.2 = bitcast i8** %aq to i8*		; <i8*> [#uses=1]
-	%va1.upgrd.3 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
-	call void @llvm.va_copy( i8* %va0.upgrd.2, i8* %va1.upgrd.3 )
-	%va.upgrd.4 = bitcast i8** %aq to i8*		; <i8*> [#uses=1]
-	call void @llvm.va_end( i8* %va.upgrd.4 )
-	%va.upgrd.5 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
-	call void @llvm.va_end( i8* %va.upgrd.5 )
-	ret i32 %tmp
-}
-; ModuleID = 'varargs_new.ll'
-
-declare void @llvm.va_start(i8*)
-
-declare void @llvm.va_copy(i8*, i8*)
-
-declare void @llvm.va_end(i8*)
-
-define i32 @test1011(i32 %X, ...) {
-	%ap = alloca i8*		; <i8**> [#uses=4]
-	%aq = alloca i8*		; <i8**> [#uses=2]
-	%va.upgrd.1 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
-	call void @llvm.va_start( i8* %va.upgrd.1 )
-	%tmp = va_arg i8** %ap, i32		; <i32> [#uses=1]
-	%apv = load i8** %ap		; <i8*> [#uses=1]
-	%va0.upgrd.2 = bitcast i8** %aq to i8*		; <i8*> [#uses=1]
-	%va1.upgrd.3 = bitcast i8* %apv to i8*		; <i8*> [#uses=1]
-	call void @llvm.va_copy( i8* %va0.upgrd.2, i8* %va1.upgrd.3 )
-	%va.upgrd.4 = bitcast i8** %aq to i8*		; <i8*> [#uses=1]
-	call void @llvm.va_end( i8* %va.upgrd.4 )
-	%va.upgrd.5 = bitcast i8** %ap to i8*		; <i8*> [#uses=1]
-	call void @llvm.va_end( i8* %va.upgrd.5 )
-	ret i32 %tmp
-}
-; ModuleID = 'weirdnames.ll'
-	"&^ " = type { i32 }
-@"%.*+ foo" = global "&^ " { i32 5 }		; <"&^ "*> [#uses=0]
-@"0" = global float 0.000000e+00		; <float*> [#uses=0]
diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll
index 139e97b..72478a1 100644
--- a/test/CodeGen/CellSPU/and_ops.ll
+++ b/test/CodeGen/CellSPU/and_ops.ll
@@ -201,12 +201,12 @@ define <4 x i32> @andi_v4i32_4(<4 x i32> %in) {
         ret <4 x i32> %tmp2
 }
 
-define i32 @andi_u32(i32 zeroext  %in) zeroext  {
+define zeroext i32 @andi_u32(i32 zeroext  %in)   {
         %tmp37 = and i32 %in, 37
         ret i32 %tmp37
 }
 
-define i32 @andi_i32(i32 signext  %in) signext  {
+define signext i32 @andi_i32(i32 signext  %in)   {
         %tmp38 = and i32 %in, 37
         ret i32 %tmp38
 }
@@ -241,12 +241,12 @@ define <8 x i16> @andhi_v8i16_4(<8 x i16> %in) {
         ret <8 x i16> %tmp2
 }
 
-define i16 @andhi_u16(i16 zeroext  %in) zeroext  {
+define zeroext i16 @andhi_u16(i16 zeroext  %in)   {
         %tmp37 = and i16 %in, 37         ; <i16> [#uses=1]
         ret i16 %tmp37
 }
 
-define i16 @andhi_i16(i16 signext  %in) signext  {
+define signext i16 @andhi_i16(i16 signext  %in)   {
         %tmp38 = and i16 %in, 37         ; <i16> [#uses=1]
         ret i16 %tmp38
 }
@@ -260,13 +260,13 @@ define <16 x i8> @and_v16i8(<16 x i8> %in) {
         ret <16 x i8> %tmp2
 }
 
-define i8 @and_u8(i8 zeroext  %in) zeroext  {
+define zeroext i8 @and_u8(i8 zeroext  %in)   {
         ; ANDBI generated:
         %tmp37 = and i8 %in, 37
         ret i8 %tmp37
 }
 
-define i8 @and_sext8(i8 signext  %in) signext  {
+define signext i8 @and_sext8(i8 signext  %in)   {
         ; ANDBI generated
         %tmp38 = and i8 %in, 37
         ret i8 %tmp38
diff --git a/test/CodeGen/CellSPU/eqv.ll b/test/CodeGen/CellSPU/eqv.ll
index 22c8c3b..7967681 100644
--- a/test/CodeGen/CellSPU/eqv.ll
+++ b/test/CodeGen/CellSPU/eqv.ll
@@ -79,7 +79,7 @@ define i32 @equiv_i32_5(i32 %arg1, i32 %arg2) {
         ret i32 %C
 }
 
-define i16 @equiv_i16_1(i16 signext %arg1, i16 signext %arg2) signext {
+define signext i16 @equiv_i16_1(i16 signext %arg1, i16 signext %arg2)  {
         %A = and i16 %arg1, %arg2               ; <i16> [#uses=1]
         %B = or i16 %arg1, %arg2                ; <i16> [#uses=1]
         %Bnot = xor i16 %B, -1                  ; <i16> [#uses=1]
@@ -87,7 +87,7 @@ define i16 @equiv_i16_1(i16 signext %arg1, i16 signext %arg2) signext {
         ret i16 %C
 }
 
-define i16 @equiv_i16_2(i16 signext %arg1, i16 signext %arg2) signext {
+define signext i16 @equiv_i16_2(i16 signext %arg1, i16 signext %arg2) {
         %B = or i16 %arg1, %arg2                ; <i16> [#uses=1]
         %Bnot = xor i16 %B, -1                  ; <i16> [#uses=1]
         %A = and i16 %arg1, %arg2               ; <i16> [#uses=1]
@@ -95,7 +95,7 @@ define i16 @equiv_i16_2(i16 signext %arg1, i16 signext %arg2) signext {
         ret i16 %C
 }
 
-define i16 @equiv_i16_3(i16 signext %arg1, i16 signext %arg2) signext {
+define signext i16 @equiv_i16_3(i16 signext %arg1, i16 signext %arg2)  {
         %B = or i16 %arg1, %arg2                ; <i16> [#uses=1]
         %A = and i16 %arg1, %arg2               ; <i16> [#uses=1]
         %Bnot = xor i16 %B, -1                  ; <i16> [#uses=1]
@@ -103,7 +103,7 @@ define i16 @equiv_i16_3(i16 signext %arg1, i16 signext %arg2) signext {
         ret i16 %C
 }
 
-define i8 @equiv_i8_1(i8 signext %arg1, i8 signext %arg2) signext {
+define signext i8 @equiv_i8_1(i8 signext %arg1, i8 signext %arg2)  {
         %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
         %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
         %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
@@ -111,7 +111,7 @@ define i8 @equiv_i8_1(i8 signext %arg1, i8 signext %arg2) signext {
         ret i8 %C
 }
 
-define i8 @equiv_i8_2(i8 signext %arg1, i8 signext %arg2) signext {
+define signext i8 @equiv_i8_2(i8 signext %arg1, i8 signext %arg2)  {
         %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
         %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
         %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
@@ -119,7 +119,7 @@ define i8 @equiv_i8_2(i8 signext %arg1, i8 signext %arg2) signext {
         ret i8 %C
 }
 
-define i8 @equiv_i8_3(i8 signext %arg1, i8 signext %arg2) signext {
+define signext i8 @equiv_i8_3(i8 signext %arg1, i8 signext %arg2)  {
         %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
         %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
         %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
@@ -127,7 +127,7 @@ define i8 @equiv_i8_3(i8 signext %arg1, i8 signext %arg2) signext {
         ret i8 %C
 }
 
-define i8 @equiv_u8_1(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
+define zeroext i8 @equiv_u8_1(i8 zeroext %arg1, i8 zeroext %arg2)  {
         %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
         %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
         %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
@@ -135,7 +135,7 @@ define i8 @equiv_u8_1(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
         ret i8 %C
 }
 
-define i8 @equiv_u8_2(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
+define zeroext i8 @equiv_u8_2(i8 zeroext %arg1, i8 zeroext %arg2)  {
         %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
         %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
         %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
@@ -143,7 +143,7 @@ define i8 @equiv_u8_2(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
         ret i8 %C
 }
 
-define i8 @equiv_u8_3(i8 zeroext %arg1, i8 zeroext %arg2) zeroext {
+define zeroext i8 @equiv_u8_3(i8 zeroext %arg1, i8 zeroext %arg2)  {
         %B = or i8 %arg1, %arg2         ; <i8> [#uses=1]
         %A = and i8 %arg1, %arg2                ; <i8> [#uses=1]
         %Bnot = xor i8 %B, -1                   ; <i8> [#uses=1]
diff --git a/test/CodeGen/CellSPU/mul-with-overflow.ll b/test/CodeGen/CellSPU/mul-with-overflow.ll
index d15da12..c04e69e 100644
--- a/test/CodeGen/CellSPU/mul-with-overflow.ll
+++ b/test/CodeGen/CellSPU/mul-with-overflow.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -march=cellspu
 
 declare {i16, i1} @llvm.smul.with.overflow.i16(i16 %a, i16 %b)
-define i1 @a(i16 %x) zeroext nounwind {
+define zeroext i1 @a(i16 %x)  nounwind {
   %res = call {i16, i1} @llvm.smul.with.overflow.i16(i16 %x, i16 3)
   %obil = extractvalue {i16, i1} %res, 1
   ret i1 %obil
 }
 
 declare {i16, i1} @llvm.umul.with.overflow.i16(i16 %a, i16 %b)
-define i1 @b(i16 %x) zeroext nounwind {
+define zeroext i1 @b(i16 %x)  nounwind {
   %res = call {i16, i1} @llvm.umul.with.overflow.i16(i16 %x, i16 3)
   %obil = extractvalue {i16, i1} %res, 1
   ret i1 %obil
diff --git a/test/CodeGen/CellSPU/nand.ll b/test/CodeGen/CellSPU/nand.ll
index e141923..b770cad 100644
--- a/test/CodeGen/CellSPU/nand.ll
+++ b/test/CodeGen/CellSPU/nand.ll
@@ -60,49 +60,49 @@ define i32 @nand_i32_2(i32 %arg1, i32 %arg2) {
         ret i32 %B
 }
 
-define i16 @nand_i16_1(i16 signext  %arg1, i16 signext  %arg2) signext  {
+define signext i16 @nand_i16_1(i16 signext  %arg1, i16 signext  %arg2)   {
         %A = and i16 %arg2, %arg1            ; <i16> [#uses=1]
         %B = xor i16 %A, -1                  ; <i16> [#uses=1]
         ret i16 %B
 }
 
-define i16 @nand_i16_2(i16 signext  %arg1, i16 signext  %arg2) signext  {
+define signext i16 @nand_i16_2(i16 signext  %arg1, i16 signext  %arg2)   {
         %A = and i16 %arg1, %arg2            ; <i16> [#uses=1]
         %B = xor i16 %A, -1                  ; <i16> [#uses=1]
         ret i16 %B
 }
 
-define i16 @nand_i16u_1(i16 zeroext  %arg1, i16 zeroext  %arg2) zeroext  {
+define zeroext i16 @nand_i16u_1(i16 zeroext  %arg1, i16 zeroext  %arg2)   {
         %A = and i16 %arg2, %arg1            ; <i16> [#uses=1]
         %B = xor i16 %A, -1                  ; <i16> [#uses=1]
         ret i16 %B
 }
 
-define i16 @nand_i16u_2(i16 zeroext  %arg1, i16 zeroext  %arg2) zeroext  {
+define zeroext i16 @nand_i16u_2(i16 zeroext  %arg1, i16 zeroext  %arg2)   {
         %A = and i16 %arg1, %arg2            ; <i16> [#uses=1]
         %B = xor i16 %A, -1                  ; <i16> [#uses=1]
         ret i16 %B
 }
 
-define i8 @nand_i8u_1(i8 zeroext  %arg1, i8 zeroext  %arg2) zeroext  {
+define zeroext i8 @nand_i8u_1(i8 zeroext  %arg1, i8 zeroext  %arg2)   {
         %A = and i8 %arg2, %arg1             ; <i8> [#uses=1]
         %B = xor i8 %A, -1                   ; <i8> [#uses=1]
         ret i8 %B
 }
 
-define i8 @nand_i8u_2(i8 zeroext  %arg1, i8 zeroext  %arg2) zeroext  {
+define zeroext i8 @nand_i8u_2(i8 zeroext  %arg1, i8 zeroext  %arg2)   {
         %A = and i8 %arg1, %arg2             ; <i8> [#uses=1]
         %B = xor i8 %A, -1                   ; <i8> [#uses=1]
         ret i8 %B
 }
 
-define i8 @nand_i8_1(i8 signext  %arg1, i8 signext  %arg2) signext  {
+define signext i8 @nand_i8_1(i8 signext  %arg1, i8 signext  %arg2)   {
         %A = and i8 %arg2, %arg1             ; <i8> [#uses=1]
         %B = xor i8 %A, -1                   ; <i8> [#uses=1]
         ret i8 %B
 }
 
-define i8 @nand_i8_2(i8 signext  %arg1, i8 signext  %arg2) signext  {
+define signext i8 @nand_i8_2(i8 signext  %arg1, i8 signext  %arg2) {
         %A = and i8 %arg1, %arg2             ; <i8> [#uses=1]
         %B = xor i8 %A, -1                   ; <i8> [#uses=1]
         ret i8 %B
diff --git a/test/CodeGen/CellSPU/or_ops.ll b/test/CodeGen/CellSPU/or_ops.ll
index 8aa1e99..46349b9 100644
--- a/test/CodeGen/CellSPU/or_ops.ll
+++ b/test/CodeGen/CellSPU/or_ops.ll
@@ -200,12 +200,12 @@ define <4 x i32> @ori_v4i32_4(<4 x i32> %in) {
         ret <4 x i32> %tmp2
 }
 
-define i32 @ori_u32(i32 zeroext  %in) zeroext  {
+define zeroext i32 @ori_u32(i32 zeroext  %in)   {
         %tmp37 = or i32 %in, 37         ; <i32> [#uses=1]
         ret i32 %tmp37
 }
 
-define i32 @ori_i32(i32 signext  %in) signext  {
+define signext i32 @ori_i32(i32 signext  %in)   {
         %tmp38 = or i32 %in, 37         ; <i32> [#uses=1]
         ret i32 %tmp38
 }
@@ -235,12 +235,12 @@ define <8 x i16> @orhi_v8i16_4(<8 x i16> %in) {
         ret <8 x i16> %tmp2
 }
 
-define i16 @orhi_u16(i16 zeroext  %in) zeroext  {
+define zeroext i16 @orhi_u16(i16 zeroext  %in)   {
         %tmp37 = or i16 %in, 37         ; <i16> [#uses=1]
         ret i16 %tmp37
 }
 
-define i16 @orhi_i16(i16 signext  %in) signext  {
+define signext i16 @orhi_i16(i16 signext  %in)   {
         %tmp38 = or i16 %in, 37         ; <i16> [#uses=1]
         ret i16 %tmp38
 }
@@ -253,12 +253,12 @@ define <16 x i8> @orbi_v16i8(<16 x i8> %in) {
         ret <16 x i8> %tmp2
 }
 
-define i8 @orbi_u8(i8 zeroext  %in) zeroext  {
+define zeroext i8 @orbi_u8(i8 zeroext  %in)   {
         %tmp37 = or i8 %in, 37         ; <i8> [#uses=1]
         ret i8 %tmp37
 }
 
-define i8 @orbi_i8(i8 signext  %in) signext  {
+define signext i8 @orbi_i8(i8 signext  %in)   {
         %tmp38 = or i8 %in, 37         ; <i8> [#uses=1]
         ret i8 %tmp38
 }
diff --git a/test/CodeGen/CellSPU/private.ll b/test/CodeGen/CellSPU/private.ll
index 56f72e7..1d933ad 100644
--- a/test/CodeGen/CellSPU/private.ll
+++ b/test/CodeGen/CellSPU/private.ll
@@ -6,9 +6,6 @@
 ; RUN: grep .Lbaz: %t
 ; RUN: grep ila.*\.Lbaz %t
 
-
-declare void @foo()
-
 define private void @foo() {
         ret void
 }
diff --git a/test/CodeGen/CellSPU/shift_ops.ll b/test/CodeGen/CellSPU/shift_ops.ll
index c4a5abd..3252c77 100644
--- a/test/CodeGen/CellSPU/shift_ops.ll
+++ b/test/CodeGen/CellSPU/shift_ops.ll
@@ -33,22 +33,22 @@ define i16 @shlh_i16_2(i16 %arg1, i16 %arg2) {
         ret i16 %A
 }
 
-define i16 @shlh_i16_3(i16 signext %arg1, i16 signext %arg2) signext {
+define signext i16 @shlh_i16_3(i16 signext %arg1, i16 signext %arg2) {
         %A = shl i16 %arg1, %arg2
         ret i16 %A
 }
 
-define i16 @shlh_i16_4(i16 signext %arg1, i16 signext %arg2) signext {
+define signext i16 @shlh_i16_4(i16 signext %arg1, i16 signext %arg2) {
         %A = shl i16 %arg2, %arg1
         ret i16 %A
 }
 
-define i16 @shlh_i16_5(i16 zeroext %arg1, i16 zeroext %arg2) zeroext {
+define zeroext i16 @shlh_i16_5(i16 zeroext %arg1, i16 zeroext %arg2)  {
         %A = shl i16 %arg1, %arg2
         ret i16 %A
 }
 
-define i16 @shlh_i16_6(i16 zeroext %arg1, i16 zeroext %arg2) zeroext {
+define zeroext i16 @shlh_i16_6(i16 zeroext %arg1, i16 zeroext %arg2) {
         %A = shl i16 %arg2, %arg1
         ret i16 %A
 }
@@ -76,46 +76,46 @@ define i16 @shlhi_i16_4(i16 %arg1) {
         ret i16 %A
 }
 
-define i16 @shlhi_i16_5(i16 signext %arg1) signext {
+define signext i16 @shlhi_i16_5(i16 signext %arg1)  {
         %A = shl i16 %arg1, 12
         ret i16 %A
 }
 
 ; Should not generate anything other than the return, arg1 << 0 = arg1
-define i16 @shlhi_i16_6(i16 signext %arg1) signext {
+define signext i16 @shlhi_i16_6(i16 signext %arg1) {
         %A = shl i16 %arg1, 0
         ret i16 %A
 }
 
-define i16 @shlhi_i16_7(i16 signext %arg1) signext {
+define signext i16 @shlhi_i16_7(i16 signext %arg1) {
         %A = shl i16 16383, %arg1
         ret i16 %A
 }
 
 ; Should generate 0, 0 << arg1 = 0
-define i16 @shlhi_i16_8(i16 signext %arg1) signext {
+define signext i16 @shlhi_i16_8(i16 signext %arg1)  {
         %A = shl i16 0, %arg1
         ret i16 %A
 }
 
-define i16 @shlhi_i16_9(i16 zeroext %arg1) zeroext {
+define zeroext i16 @shlhi_i16_9(i16 zeroext %arg1)  {
         %A = shl i16 %arg1, 12
         ret i16 %A
 }
 
 ; Should not generate anything other than the return, arg1 << 0 = arg1
-define i16 @shlhi_i16_10(i16 zeroext %arg1) zeroext {
+define zeroext i16 @shlhi_i16_10(i16 zeroext %arg1)  {
         %A = shl i16 %arg1, 0
         ret i16 %A
 }
 
-define i16 @shlhi_i16_11(i16 zeroext %arg1) zeroext {
+define zeroext i16 @shlhi_i16_11(i16 zeroext %arg1)  {
         %A = shl i16 16383, %arg1
         ret i16 %A
 }
 
 ; Should generate 0, 0 << arg1 = 0
-define i16 @shlhi_i16_12(i16 zeroext %arg1) zeroext {
+define zeroext i16 @shlhi_i16_12(i16 zeroext %arg1)  {
         %A = shl i16 0, %arg1
         ret i16 %A
 }
@@ -133,22 +133,22 @@ define i32 @shl_i32_2(i32 %arg1, i32 %arg2) {
         ret i32 %A
 }
 
-define i32 @shl_i32_3(i32 signext %arg1, i32 signext %arg2) signext {
+define signext i32 @shl_i32_3(i32 signext %arg1, i32 signext %arg2)  {
         %A = shl i32 %arg1, %arg2
         ret i32 %A
 }
 
-define i32 @shl_i32_4(i32 signext %arg1, i32 signext %arg2) signext {
+define signext i32 @shl_i32_4(i32 signext %arg1, i32 signext %arg2)  {
         %A = shl i32 %arg2, %arg1
         ret i32 %A
 }
 
-define i32 @shl_i32_5(i32 zeroext %arg1, i32 zeroext %arg2) zeroext {
+define zeroext i32 @shl_i32_5(i32 zeroext %arg1, i32 zeroext %arg2)  {
         %A = shl i32 %arg1, %arg2
         ret i32 %A
 }
 
-define i32 @shl_i32_6(i32 zeroext %arg1, i32 zeroext %arg2) zeroext {
+define zeroext i32 @shl_i32_6(i32 zeroext %arg1, i32 zeroext %arg2)  {
         %A = shl i32 %arg2, %arg1
         ret i32 %A
 }
@@ -176,46 +176,46 @@ define i32 @shli_i32_4(i32 %arg1) {
         ret i32 %A
 }
 
-define i32 @shli_i32_5(i32 signext %arg1) signext {
+define signext i32 @shli_i32_5(i32 signext %arg1)  {
         %A = shl i32 %arg1, 12
         ret i32 %A
 }
 
 ; Should not generate anything other than the return, arg1 << 0 = arg1
-define i32 @shli_i32_6(i32 signext %arg1) signext {
+define signext i32 @shli_i32_6(i32 signext %arg1) {
         %A = shl i32 %arg1, 0
         ret i32 %A
 }
 
-define i32 @shli_i32_7(i32 signext %arg1) signext {
+define signext i32 @shli_i32_7(i32 signext %arg1)  {
         %A = shl i32 16383, %arg1
         ret i32 %A
 }
 
 ; Should generate 0, 0 << arg1 = 0
-define i32 @shli_i32_8(i32 signext %arg1) signext {
+define signext i32 @shli_i32_8(i32 signext %arg1) {
         %A = shl i32 0, %arg1
         ret i32 %A
 }
 
-define i32 @shli_i32_9(i32 zeroext %arg1) zeroext {
+define zeroext i32 @shli_i32_9(i32 zeroext %arg1)  {
         %A = shl i32 %arg1, 12
         ret i32 %A
 }
 
 ; Should not generate anything other than the return, arg1 << 0 = arg1
-define i32 @shli_i32_10(i32 zeroext %arg1) zeroext {
+define zeroext i32 @shli_i32_10(i32 zeroext %arg1)  {
         %A = shl i32 %arg1, 0
         ret i32 %A
 }
 
-define i32 @shli_i32_11(i32 zeroext %arg1) zeroext {
+define zeroext i32 @shli_i32_11(i32 zeroext %arg1) {
         %A = shl i32 16383, %arg1
         ret i32 %A
 }
 
 ; Should generate 0, 0 << arg1 = 0
-define i32 @shli_i32_12(i32 zeroext %arg1) zeroext {
+define zeroext i32 @shli_i32_12(i32 zeroext %arg1) {
         %A = shl i32 0, %arg1
         ret i32 %A
 }
diff --git a/test/CodeGen/CellSPU/struct_1.ll b/test/CodeGen/CellSPU/struct_1.ll
index 8ee7d93..adbb5ef 100644
--- a/test/CodeGen/CellSPU/struct_1.ll
+++ b/test/CodeGen/CellSPU/struct_1.ll
@@ -47,19 +47,19 @@ target triple = "spu"
 ; struct hackstate state = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
 @state = global %struct.hackstate zeroinitializer, align 16
 
-define i8 @get_hackstate_c1() zeroext nounwind  {
+define zeroext i8 @get_hackstate_c1()  nounwind  {
 entry:
         %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 0), align 16
         ret i8 %tmp2
 }
 
-define i8 @get_hackstate_c2() zeroext nounwind  {
+define zeroext i8 @get_hackstate_c2()  nounwind  {
 entry:
         %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 1), align 16
         ret i8 %tmp2
 }
 
-define i8 @get_hackstate_c3() zeroext nounwind  {
+define zeroext i8 @get_hackstate_c3()  nounwind  {
 entry:
         %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 2), align 16
         ret i8 %tmp2
@@ -71,19 +71,19 @@ entry:
         ret i32 %tmp2
 }
 
-define i16 @get_hackstate_s1() signext nounwind  {
+define signext i16 @get_hackstate_s1()  nounwind  {
 entry:
         %tmp2 = load i16* getelementptr (%struct.hackstate* @state, i32 0, i32 4), align 16
         ret i16 %tmp2
 }
 
-define i8 @get_hackstate_c6() zeroext nounwind  {
+define zeroext i8 @get_hackstate_c6()  nounwind  {
 entry:
         %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 8), align 16
         ret i8 %tmp2
 }
 
-define i8 @get_hackstate_c7() zeroext nounwind  {
+define zeroext i8 @get_hackstate_c7()  nounwind  {
 entry:
         %tmp2 = load i8* getelementptr (%struct.hackstate* @state, i32 0, i32 9), align 16
         ret i8 %tmp2
diff --git a/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll b/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
deleted file mode 100644
index 733202c..0000000
--- a/test/CodeGen/Generic/2005-07-12-memcpy-i64-length.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: llc < %s
-; Test that llvm.memcpy works with a i64 length operand on all targets.
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
-
-define void @l12_l94_bc_divide_endif_2E_3_2E_ce() {
-newFuncRoot:
-        tail call void @llvm.memcpy.i64( i8* null, i8* null, i64 0, i32 1 )
-        unreachable
-}
-
diff --git a/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll b/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
index 568b88f..2a2cf6c 100644
--- a/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
+++ b/test/CodeGen/Generic/2007-04-30-LandingPadBranchFolding.ll
@@ -1,13 +1,13 @@
 ; RUN: llc < %s 
 ; PR1228
 
-	"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
-	"struct.std::locale" = type { "struct.std::locale::_Impl"* }
-	"struct.std::locale::_Impl" = type { i32, "struct.std::locale::facet"**, i32, "struct.std::locale::facet"**, i8** }
-	"struct.std::locale::facet" = type { i32 (...)**, i32 }
-	"struct.std::string" = type { "struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }
+	%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" = type { i8* }
+	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
+	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
+	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
+	%"struct.std::string" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Alloc_hider" }
 
-define void @_ZNKSt6locale4nameEv("struct.std::string"* %agg.result) {
+define void @_ZNKSt6locale4nameEv(%"struct.std::string"* %agg.result) {
 entry:
 	%tmp105 = icmp eq i8* null, null		; <i1> [#uses=1]
 	br i1 %tmp105, label %cond_true, label %cond_true222
diff --git a/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll b/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
deleted file mode 100644
index 3090857..0000000
--- a/test/CodeGen/Generic/2007-11-21-UndeadIllegalNode.ll
+++ /dev/null
@@ -1,157 +0,0 @@
-; RUN: llc < %s -o -
-
-	%struct.RETURN = type { i32, i32 }
-	%struct.ada__finalization__controlled = type { %struct.system__finalization_root__root_controlled }
-	%struct.ada__streams__root_stream_type = type { %struct.ada__tags__dispatch_table* }
-	%struct.ada__strings__unbounded__string_access = type { i8*, %struct.RETURN* }
-	%struct.ada__strings__unbounded__unbounded_string = type { %struct.ada__finalization__controlled, %struct.ada__strings__unbounded__string_access, i32 }
-	%struct.ada__tags__dispatch_table = type { [1 x i32] }
-	%struct.exception = type { i8, i8, i32, i8*, i8*, i32, i8* }
-	%struct.system__finalization_root__root_controlled = type { %struct.ada__streams__root_stream_type, %struct.system__finalization_root__root_controlled*, %struct.system__finalization_root__root_controlled* }
-	%struct.system__standard_library__exception_data = type { i8, i8, i32, i32, %struct.system__standard_library__exception_data*, i32, void ()* }
-@C.495.7639 = internal constant %struct.RETURN { i32 1, i32 16 }		; <%struct.RETURN*> [#uses=1]
-@ada__strings__index_error = external global %struct.exception		; <%struct.exception*> [#uses=1]
-@.str5 = internal constant [16 x i8] c"a-strunb.adb:690"		; <[16 x i8]*> [#uses=1]
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare void @ada__strings__unbounded__realloc_for_chunk(%struct.ada__strings__unbounded__unbounded_string*, i32)
-
-declare void @__gnat_raise_exception(%struct.system__standard_library__exception_data*, i64)
-
-define void @ada__strings__unbounded__insert__2(%struct.ada__strings__unbounded__unbounded_string* %source, i32 %before, i64 %new_item.0.0) {
-entry:
-	%tmp24636 = lshr i64 %new_item.0.0, 32		; <i64> [#uses=1]
-	%tmp24637 = trunc i64 %tmp24636 to i32		; <i32> [#uses=1]
-	%tmp24638 = inttoptr i32 %tmp24637 to %struct.RETURN*		; <%struct.RETURN*> [#uses=2]
-	%tmp25 = getelementptr %struct.RETURN* %tmp24638, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp26 = load i32* %tmp25, align 4		; <i32> [#uses=1]
-	%tmp29 = getelementptr %struct.RETURN* %tmp24638, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp30 = load i32* %tmp29, align 4		; <i32> [#uses=1]
-	%tmp63 = getelementptr %struct.ada__strings__unbounded__unbounded_string* %source, i32 0, i32 1, i32 1		; <%struct.RETURN**> [#uses=5]
-	%tmp64 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
-	%tmp65 = getelementptr %struct.RETURN* %tmp64, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp66 = load i32* %tmp65, align 4		; <i32> [#uses=1]
-	%tmp67 = icmp sgt i32 %tmp66, %before		; <i1> [#uses=1]
-	br i1 %tmp67, label %bb77, label %bb
-
-bb:		; preds = %entry
-	%tmp71 = getelementptr %struct.ada__strings__unbounded__unbounded_string* %source, i32 0, i32 2		; <i32*> [#uses=4]
-	%tmp72 = load i32* %tmp71, align 4		; <i32> [#uses=1]
-	%tmp73 = add i32 %tmp72, 1		; <i32> [#uses=1]
-	%tmp74 = icmp slt i32 %tmp73, %before		; <i1> [#uses=1]
-	br i1 %tmp74, label %bb77, label %bb84
-
-bb77:		; preds = %bb, %entry
-	tail call void @__gnat_raise_exception( %struct.system__standard_library__exception_data* bitcast (%struct.exception* @ada__strings__index_error to %struct.system__standard_library__exception_data*), i64 or (i64 zext (i32 ptrtoint ([16 x i8]* @.str5 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.RETURN* @C.495.7639 to i32) to i64), i64 32)) )
-	unreachable
-
-bb84:		; preds = %bb
-	%tmp93 = sub i32 %tmp30, %tmp26		; <i32> [#uses=2]
-	%tmp9394 = sext i32 %tmp93 to i36		; <i36> [#uses=1]
-	%tmp95 = shl i36 %tmp9394, 3		; <i36> [#uses=1]
-	%tmp96 = add i36 %tmp95, 8		; <i36> [#uses=2]
-	%tmp97 = icmp sgt i36 %tmp96, -1		; <i1> [#uses=1]
-	%tmp100 = select i1 %tmp97, i36 %tmp96, i36 0		; <i36> [#uses=2]
-	%tmp101 = icmp slt i36 %tmp100, 17179869177		; <i1> [#uses=1]
-	%tmp100.cast = trunc i36 %tmp100 to i32		; <i32> [#uses=1]
-	%min102 = select i1 %tmp101, i32 %tmp100.cast, i32 -8		; <i32> [#uses=1]
-	tail call void @ada__strings__unbounded__realloc_for_chunk( %struct.ada__strings__unbounded__unbounded_string* %source, i32 %min102 )
-	%tmp148 = load i32* %tmp71, align 4		; <i32> [#uses=4]
-	%tmp152 = add i32 %tmp93, 1		; <i32> [#uses=2]
-	%tmp153 = icmp sgt i32 %tmp152, -1		; <i1> [#uses=1]
-	%max154 = select i1 %tmp153, i32 %tmp152, i32 0		; <i32> [#uses=5]
-	%tmp155 = add i32 %tmp148, %max154		; <i32> [#uses=5]
-	%tmp315 = getelementptr %struct.ada__strings__unbounded__unbounded_string* %source, i32 0, i32 1, i32 0		; <i8**> [#uses=4]
-	%tmp328 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
-	%tmp329 = getelementptr %struct.RETURN* %tmp328, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp330 = load i32* %tmp329, align 4		; <i32> [#uses=4]
-	%tmp324 = add i32 %max154, %before		; <i32> [#uses=3]
-	%tmp331 = sub i32 %tmp324, %tmp330		; <i32> [#uses=1]
-	%tmp349 = sub i32 %before, %tmp330		; <i32> [#uses=1]
-	%tmp356 = icmp sgt i32 %tmp331, %tmp349		; <i1> [#uses=1]
-	%tmp431 = icmp sgt i32 %tmp324, %tmp155		; <i1> [#uses=2]
-	br i1 %tmp356, label %bb420, label %bb359
-
-bb359:		; preds = %bb84
-	br i1 %tmp431, label %bb481, label %bb382
-
-bb382:		; preds = %bb382, %bb359
-	%indvar = phi i32 [ 0, %bb359 ], [ %indvar.next, %bb382 ]		; <i32> [#uses=2]
-	%max379.pn = phi i32 [ %max154, %bb359 ], [ %L492b.0, %bb382 ]		; <i32> [#uses=1]
-	%before.pn = phi i32 [ %before, %bb359 ], [ 1, %bb382 ]		; <i32> [#uses=1]
-	%L492b.0 = add i32 %before.pn, %max379.pn		; <i32> [#uses=3]
-	%tmp386 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
-	%tmp387 = getelementptr %struct.RETURN* %tmp386, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp388 = load i32* %tmp387, align 4		; <i32> [#uses=2]
-	%tmp392 = load i8** %tmp315, align 4		; <i8*> [#uses=2]
-	%R493b.0 = add i32 %indvar, %before		; <i32> [#uses=1]
-	%tmp405 = sub i32 %R493b.0, %tmp388		; <i32> [#uses=1]
-	%tmp406 = getelementptr i8* %tmp392, i32 %tmp405		; <i8*> [#uses=1]
-	%tmp407 = load i8* %tmp406, align 1		; <i8> [#uses=1]
-	%tmp408 = sub i32 %L492b.0, %tmp388		; <i32> [#uses=1]
-	%tmp409 = getelementptr i8* %tmp392, i32 %tmp408		; <i8*> [#uses=1]
-	store i8 %tmp407, i8* %tmp409, align 1
-	%tmp414 = icmp eq i32 %L492b.0, %tmp155		; <i1> [#uses=1]
-	%indvar.next = add i32 %indvar, 1		; <i32> [#uses=1]
-	br i1 %tmp414, label %bb481, label %bb382
-
-bb420:		; preds = %bb84
-	br i1 %tmp431, label %bb481, label %bb436.preheader
-
-bb436.preheader:		; preds = %bb420
-	%tmp4468 = load i8** %tmp315, align 4		; <i8*> [#uses=2]
-	%tmp4599 = sub i32 %tmp148, %tmp330		; <i32> [#uses=1]
-	%tmp46010 = getelementptr i8* %tmp4468, i32 %tmp4599		; <i8*> [#uses=1]
-	%tmp46111 = load i8* %tmp46010, align 1		; <i8> [#uses=1]
-	%tmp46212 = sub i32 %tmp155, %tmp330		; <i32> [#uses=1]
-	%tmp46313 = getelementptr i8* %tmp4468, i32 %tmp46212		; <i8*> [#uses=1]
-	store i8 %tmp46111, i8* %tmp46313, align 1
-	%exitcond14 = icmp eq i32 %tmp155, %tmp324		; <i1> [#uses=1]
-	br i1 %exitcond14, label %bb481, label %bb.nph
-
-bb.nph:		; preds = %bb436.preheader
-	%tmp5 = sub i32 %tmp148, %before		; <i32> [#uses=1]
-	br label %bb478
-
-bb478:		; preds = %bb478, %bb.nph
-	%indvar6422 = phi i32 [ 0, %bb.nph ], [ %indvar.next643, %bb478 ]		; <i32> [#uses=1]
-	%indvar.next643 = add i32 %indvar6422, 1		; <i32> [#uses=4]
-	%L490b.0 = sub i32 %tmp155, %indvar.next643		; <i32> [#uses=1]
-	%R491b.0 = sub i32 %tmp148, %indvar.next643		; <i32> [#uses=1]
-	%tmp440 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
-	%tmp441 = getelementptr %struct.RETURN* %tmp440, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp442 = load i32* %tmp441, align 4		; <i32> [#uses=2]
-	%tmp446 = load i8** %tmp315, align 4		; <i8*> [#uses=2]
-	%tmp459 = sub i32 %R491b.0, %tmp442		; <i32> [#uses=1]
-	%tmp460 = getelementptr i8* %tmp446, i32 %tmp459		; <i8*> [#uses=1]
-	%tmp461 = load i8* %tmp460, align 1		; <i8> [#uses=1]
-	%tmp462 = sub i32 %L490b.0, %tmp442		; <i32> [#uses=1]
-	%tmp463 = getelementptr i8* %tmp446, i32 %tmp462		; <i8*> [#uses=1]
-	store i8 %tmp461, i8* %tmp463, align 1
-	%exitcond = icmp eq i32 %indvar.next643, %tmp5		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb481, label %bb478
-
-bb481:		; preds = %bb478, %bb436.preheader, %bb420, %bb382, %bb359
-	%tmp577 = add i32 %before, -1		; <i32> [#uses=3]
-	%tmp578 = add i32 %max154, %tmp577		; <i32> [#uses=2]
-	%tmp581 = icmp sge i32 %tmp578, %tmp577		; <i1> [#uses=1]
-	%max582 = select i1 %tmp581, i32 %tmp578, i32 %tmp577		; <i32> [#uses=1]
-	%tmp584 = sub i32 %max582, %before		; <i32> [#uses=1]
-	%tmp585 = add i32 %tmp584, 1		; <i32> [#uses=2]
-	%tmp586 = icmp sgt i32 %tmp585, -1		; <i1> [#uses=1]
-	%max587 = select i1 %tmp586, i32 %tmp585, i32 0		; <i32> [#uses=1]
-	%tmp591 = load %struct.RETURN** %tmp63, align 4		; <%struct.RETURN*> [#uses=1]
-	%tmp592 = getelementptr %struct.RETURN* %tmp591, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp593 = load i32* %tmp592, align 4		; <i32> [#uses=1]
-	%tmp597 = load i8** %tmp315, align 4		; <i8*> [#uses=1]
-	%tmp600621 = trunc i64 %new_item.0.0 to i32		; <i32> [#uses=1]
-	%tmp600622 = inttoptr i32 %tmp600621 to i8*		; <i8*> [#uses=1]
-	%tmp601 = sub i32 %before, %tmp593		; <i32> [#uses=1]
-	%tmp602 = getelementptr i8* %tmp597, i32 %tmp601		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32( i8* %tmp602, i8* %tmp600622, i32 %max587, i32 1 )
-	%tmp606 = load i32* %tmp71, align 4		; <i32> [#uses=1]
-	%tmp613 = add i32 %tmp606, %max154		; <i32> [#uses=1]
-	store i32 %tmp613, i32* %tmp71, align 4
-	ret void
-}
diff --git a/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll b/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll
new file mode 100644
index 0000000..cd446d5
--- /dev/null
+++ b/test/CodeGen/Generic/2011-07-07-ScheduleDAGCrash.ll
@@ -0,0 +1,16 @@
+; RUN: llc < %s
+; This caused ScheduleDAG to crash in EmitPhysRegCopy when searching
+; the uses of a copy to a physical register without ignoring non-data
+; dependence, PR10220.
+
+define void @f(i256* nocapture %a, i256* nocapture %b, i256* nocapture %cc, i256* nocapture %dd) nounwind uwtable noinline ssp {
+entry:
+  %c = load i256* %cc
+  %d = load i256* %dd
+  %add = add nsw i256 %c, %d
+  store i256 %add, i256* %a, align 8
+  %or = or i256 %c, 1606938044258990275541962092341162602522202993782792835301376
+  %add6 = add nsw i256 %or, %d
+  store i256 %add6, i256* %b, align 8
+  ret void
+}
diff --git a/test/CodeGen/Generic/BurgBadRegAlloc.ll b/test/CodeGen/Generic/BurgBadRegAlloc.ll
deleted file mode 100644
index 99d856a..0000000
--- a/test/CodeGen/Generic/BurgBadRegAlloc.ll
+++ /dev/null
@@ -1,829 +0,0 @@
-; RUN: llc < %s
-
-;; Register allocation is doing a very poor job on this routine from yyparse
-;; in Burg:
-;; -- at least two long-lived values are being allocated to %o? registers
-;; -- even worse, those registers are being saved and restored repeatedly
-;;    at function calls, even though there are no intervening uses.
-;; -- outgoing args of some function calls have to be swapped, causing
-;;    another write/read from stack to do the exchange (use -dregalloc=y).
-;;	
-%Arity = type %struct.arity*
-	%Binding = type %struct.binding*
-	%DeltaCost = type [4 x i16]
-	%Dimension = type %struct.dimension*
-	%Index_Map = type { i32, %Item_Set* }
-	%IntList = type %struct.intlist*
-	%Item = type { %DeltaCost, %Rule }
-	%ItemArray = type %Item*
-	%Item_Set = type %struct.item_set*
-	%List = type %struct.list*
-	%Mapping = type %struct.mapping*
-	%NonTerminal = type %struct.nonterminal*
-	%Operator = type %struct.operator*
-	%Pattern = type %struct.pattern*
-	%PatternAST = type %struct.patternAST*
-	%Plank = type %struct.plank*
-	%PlankMap = type %struct.plankMap*
-	%ReadFn = type i32 ()*
-	%Rule = type %struct.rule*
-	%RuleAST = type %struct.ruleAST*
-	%StateMap = type %struct.stateMap*
-	%StrTableElement = type %struct.strTableElement*
-	%Symbol = type %struct.symbol*
-	%Table = type %struct.table*
-	%YYSTYPE = type { %IntList }
-	%struct.arity = type { i32, %List }
-	%struct.binding = type { i8*, i32 }
-	%struct.dimension = type { i16*, %Index_Map, %Mapping, i32, %PlankMap }
-	%struct.index_map = type { i32, %Item_Set* }
-	%struct.intlist = type { i32, %IntList }
-	%struct.item = type { %DeltaCost, %Rule }
-	%struct.item_set = type { i32, i32, %Operator, [2 x %Item_Set], %Item_Set, i16*, %ItemArray, %ItemArray }
-	%struct.list = type { i8*, %List }
-	%struct.mapping = type { %List*, i32, i32, i32, %Item_Set* }
-	%struct.nonterminal = type { i8*, i32, i32, i32, %PlankMap, %Rule }
-	%struct.operator = type { i8*, i32, i32, i32, i32, i32, %Table }
-	%struct.pattern = type { %NonTerminal, %Operator, [2 x %NonTerminal] }
-	%struct.patternAST = type { %Symbol, i8*, %List }
-	%struct.plank = type { i8*, %List, i32 }
-	%struct.plankMap = type { %List, i32, %StateMap }
-	%struct.rule = type { %DeltaCost, i32, i32, i32, %NonTerminal, %Pattern, i32 }
-	%struct.ruleAST = type { i8*, %PatternAST, i32, %IntList, %Rule, %StrTableElement, %StrTableElement }
-	%struct.stateMap = type { i8*, %Plank, i32, i16* }
-	%struct.strTableElement = type { i8*, %IntList, i8* }
-	%struct.symbol = type { i8*, i32, { %Operator } }
-	%struct.table = type { %Operator, %List, i16*, [2 x %Dimension], %Item_Set* }
-@yylval = external global %YYSTYPE		; <%YYSTYPE*> [#uses=1]
-@yylhs = external global [25 x i16]		; <[25 x i16]*> [#uses=1]
-@yylen = external global [25 x i16]		; <[25 x i16]*> [#uses=1]
-@yydefred = external global [43 x i16]		; <[43 x i16]*> [#uses=1]
-@yydgoto = external global [12 x i16]		; <[12 x i16]*> [#uses=1]
-@yysindex = external global [43 x i16]		; <[43 x i16]*> [#uses=2]
-@yyrindex = external global [43 x i16]		; <[43 x i16]*> [#uses=1]
-@yygindex = external global [12 x i16]		; <[12 x i16]*> [#uses=1]
-@yytable = external global [263 x i16]		; <[263 x i16]*> [#uses=4]
-@yycheck = external global [263 x i16]		; <[263 x i16]*> [#uses=4]
-@yynerrs = external global i32		; <i32*> [#uses=3]
-@yyerrflag = external global i32		; <i32*> [#uses=6]
-@yychar = external global i32		; <i32*> [#uses=15]
-@yyssp = external global i16*		; <i16**> [#uses=15]
-@yyvsp = external global %YYSTYPE*		; <%YYSTYPE**> [#uses=30]
-@yyval = external global %YYSTYPE		; <%YYSTYPE*> [#uses=1]
-@yyss = external global i16*		; <i16**> [#uses=3]
-@yysslim = external global i16*		; <i16**> [#uses=3]
-@yyvs = external global %YYSTYPE*		; <%YYSTYPE**> [#uses=1]
-@.LC01 = external global [13 x i8]		; <[13 x i8]*> [#uses=1]
-@.LC1 = external global [20 x i8]		; <[20 x i8]*> [#uses=1]
-
-define i32 @yyparse() {
-bb0:
-	store i32 0, i32* @yynerrs
-	store i32 0, i32* @yyerrflag
-	store i32 -1, i32* @yychar
-	%reg113 = load i16** @yyss		; <i16*> [#uses=1]
-	%cond581 = icmp ne i16* %reg113, null		; <i1> [#uses=1]
-	br i1 %cond581, label %bb3, label %bb2
-
-bb2:		; preds = %bb0
-	%reg584 = call i32 @yygrowstack( )		; <i32> [#uses=1]
-	%cond584 = icmp ne i32 %reg584, 0		; <i1> [#uses=1]
-	br i1 %cond584, label %bb113, label %bb3
-
-bb3:		; preds = %bb2, %bb0
-	%reg115 = load i16** @yyss		; <i16*> [#uses=1]
-	store i16* %reg115, i16** @yyssp
-	%reg116 = load %YYSTYPE** @yyvs		; <%YYSTYPE*> [#uses=1]
-	store %YYSTYPE* %reg116, %YYSTYPE** @yyvsp
-	%reg117 = load i16** @yyssp		; <i16*> [#uses=1]
-	store i16 0, i16* %reg117
-	br label %bb4
-
-bb4:		; preds = %bb112, %bb102, %bb35, %bb31, %bb15, %bb14, %bb3
-	%reg458 = phi i32 [ %reg476, %bb112 ], [ 1, %bb102 ], [ %reg458, %bb35 ], [ %cast768, %bb31 ], [ %cast658, %bb15 ], [ %cast658, %bb14 ], [ 0, %bb3 ]		; <i32> [#uses=2]
-	%reg458-idxcast = zext i32 %reg458 to i64		; <i64> [#uses=3]
-	%reg594 = getelementptr [43 x i16]* @yydefred, i64 0, i64 %reg458-idxcast		; <i16*> [#uses=1]
-	%reg125 = load i16* %reg594		; <i16> [#uses=1]
-	%cast599 = sext i16 %reg125 to i32		; <i32> [#uses=2]
-	%cond600 = icmp ne i32 %cast599, 0		; <i1> [#uses=1]
-	br i1 %cond600, label %bb36, label %bb5
-
-bb5:		; preds = %bb4
-	%reg127 = load i32* @yychar		; <i32> [#uses=1]
-	%cond603 = icmp sge i32 %reg127, 0		; <i1> [#uses=1]
-	br i1 %cond603, label %bb8, label %bb6
-
-bb6:		; preds = %bb5
-	%reg607 = call i32 @yylex( )		; <i32> [#uses=1]
-	store i32 %reg607, i32* @yychar
-	%reg129 = load i32* @yychar		; <i32> [#uses=1]
-	%cond609 = icmp sge i32 %reg129, 0		; <i1> [#uses=1]
-	br i1 %cond609, label %bb8, label %bb7
-
-bb7:		; preds = %bb6
-	store i32 0, i32* @yychar
-	br label %bb8
-
-bb8:		; preds = %bb7, %bb6, %bb5
-	%reg615 = getelementptr [43 x i16]* @yysindex, i64 0, i64 %reg458-idxcast		; <i16*> [#uses=1]
-	%reg137 = load i16* %reg615		; <i16> [#uses=1]
-	%cast620 = sext i16 %reg137 to i32		; <i32> [#uses=2]
-	%cond621 = icmp eq i32 %cast620, 0		; <i1> [#uses=1]
-	br i1 %cond621, label %bb16, label %bb9
-
-bb9:		; preds = %bb8
-	%reg139 = load i32* @yychar		; <i32> [#uses=2]
-	%reg460 = add i32 %cast620, %reg139		; <i32> [#uses=3]
-	%cond624 = icmp slt i32 %reg460, 0		; <i1> [#uses=1]
-	br i1 %cond624, label %bb16, label %bb10
-
-bb10:		; preds = %bb9
-	%cond627 = icmp sgt i32 %reg460, 262		; <i1> [#uses=1]
-	br i1 %cond627, label %bb16, label %bb11
-
-bb11:		; preds = %bb10
-	%reg460-idxcast = sext i32 %reg460 to i64		; <i64> [#uses=2]
-	%reg632 = getelementptr [263 x i16]* @yycheck, i64 0, i64 %reg460-idxcast		; <i16*> [#uses=1]
-	%reg148 = load i16* %reg632		; <i16> [#uses=1]
-	%cast637 = sext i16 %reg148 to i32		; <i32> [#uses=1]
-	%cond639 = icmp ne i32 %cast637, %reg139		; <i1> [#uses=1]
-	br i1 %cond639, label %bb16, label %bb12
-
-bb12:		; preds = %bb11
-	%reg150 = load i16** @yyssp		; <i16*> [#uses=1]
-	%cast640 = bitcast i16* %reg150 to i8*		; <i8*> [#uses=1]
-	%reg151 = load i16** @yysslim		; <i16*> [#uses=1]
-	%cast641 = bitcast i16* %reg151 to i8*		; <i8*> [#uses=1]
-	%cond642 = icmp ult i8* %cast640, %cast641		; <i1> [#uses=1]
-	br i1 %cond642, label %bb14, label %bb13
-
-bb13:		; preds = %bb12
-	%reg644 = call i32 @yygrowstack( )		; <i32> [#uses=1]
-	%cond644 = icmp ne i32 %reg644, 0		; <i1> [#uses=1]
-	br i1 %cond644, label %bb113, label %bb14
-
-bb14:		; preds = %bb13, %bb12
-	%reg153 = load i16** @yyssp		; <i16*> [#uses=1]
-	%reg647 = getelementptr i16* %reg153, i64 1		; <i16*> [#uses=2]
-	store i16* %reg647, i16** @yyssp
-	%reg653 = getelementptr [263 x i16]* @yytable, i64 0, i64 %reg460-idxcast		; <i16*> [#uses=1]
-	%reg162 = load i16* %reg653		; <i16> [#uses=2]
-	%cast658 = sext i16 %reg162 to i32		; <i32> [#uses=2]
-	store i16 %reg162, i16* %reg647
-	%reg164 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
-	%reg661 = getelementptr %YYSTYPE* %reg164, i64 1		; <%YYSTYPE*> [#uses=1]
-	store %YYSTYPE* %reg661, %YYSTYPE** @yyvsp
-	%reg167 = load %IntList* getelementptr (%YYSTYPE* @yylval, i64 0, i32 0)		; <%IntList> [#uses=1]
-	%reg661.idx1 = getelementptr %YYSTYPE* %reg164, i64 1, i32 0		; <%IntList*> [#uses=1]
-	store %IntList %reg167, %IntList* %reg661.idx1
-	store i32 -1, i32* @yychar
-	%reg169 = load i32* @yyerrflag		; <i32> [#uses=2]
-	%cond669 = icmp sle i32 %reg169, 0		; <i1> [#uses=1]
-	br i1 %cond669, label %bb4, label %bb15
-
-bb15:		; preds = %bb14
-	%reg171 = add i32 %reg169, -1		; <i32> [#uses=1]
-	store i32 %reg171, i32* @yyerrflag
-	br label %bb4
-
-bb16:		; preds = %bb11, %bb10, %bb9, %bb8
-	%reg677 = getelementptr [43 x i16]* @yyrindex, i64 0, i64 %reg458-idxcast		; <i16*> [#uses=1]
-	%reg178 = load i16* %reg677		; <i16> [#uses=1]
-	%cast682 = sext i16 %reg178 to i32		; <i32> [#uses=2]
-	%cond683 = icmp eq i32 %cast682, 0		; <i1> [#uses=1]
-	br i1 %cond683, label %bb21, label %bb17
-
-bb17:		; preds = %bb16
-	%reg180 = load i32* @yychar		; <i32> [#uses=2]
-	%reg463 = add i32 %cast682, %reg180		; <i32> [#uses=3]
-	%cond686 = icmp slt i32 %reg463, 0		; <i1> [#uses=1]
-	br i1 %cond686, label %bb21, label %bb18
-
-bb18:		; preds = %bb17
-	%cond689 = icmp sgt i32 %reg463, 262		; <i1> [#uses=1]
-	br i1 %cond689, label %bb21, label %bb19
-
-bb19:		; preds = %bb18
-	%reg463-idxcast = sext i32 %reg463 to i64		; <i64> [#uses=2]
-	%reg694 = getelementptr [263 x i16]* @yycheck, i64 0, i64 %reg463-idxcast		; <i16*> [#uses=1]
-	%reg189 = load i16* %reg694		; <i16> [#uses=1]
-	%cast699 = sext i16 %reg189 to i32		; <i32> [#uses=1]
-	%cond701 = icmp ne i32 %cast699, %reg180		; <i1> [#uses=1]
-	br i1 %cond701, label %bb21, label %bb20
-
-bb20:		; preds = %bb19
-	%reg704 = getelementptr [263 x i16]* @yytable, i64 0, i64 %reg463-idxcast		; <i16*> [#uses=1]
-	%reg197 = load i16* %reg704		; <i16> [#uses=1]
-	%cast709 = sext i16 %reg197 to i32		; <i32> [#uses=1]
-	br label %bb36
-
-bb21:		; preds = %bb19, %bb18, %bb17, %bb16
-	%reg198 = load i32* @yyerrflag		; <i32> [#uses=1]
-	%cond711 = icmp ne i32 %reg198, 0		; <i1> [#uses=1]
-	br i1 %cond711, label %bb23, label %bb22
-
-bb22:		; preds = %bb21
-	call void @yyerror( i8* getelementptr ([13 x i8]* @.LC01, i64 0, i64 0) )
-	%reg200 = load i32* @yynerrs		; <i32> [#uses=1]
-	%reg201 = add i32 %reg200, 1		; <i32> [#uses=1]
-	store i32 %reg201, i32* @yynerrs
-	br label %bb23
-
-bb23:		; preds = %bb22, %bb21
-	%reg202 = load i32* @yyerrflag		; <i32> [#uses=1]
-	%cond719 = icmp sgt i32 %reg202, 2		; <i1> [#uses=1]
-	br i1 %cond719, label %bb34, label %bb24
-
-bb24:		; preds = %bb23
-	store i32 3, i32* @yyerrflag
-	%reg241 = load i16** @yyss		; <i16*> [#uses=1]
-	%cast778 = bitcast i16* %reg241 to i8*		; <i8*> [#uses=1]
-	br label %bb25
-
-bb25:		; preds = %bb33, %bb24
-	%reg204 = load i16** @yyssp		; <i16*> [#uses=4]
-	%reg206 = load i16* %reg204		; <i16> [#uses=1]
-	%reg206-idxcast = sext i16 %reg206 to i64		; <i64> [#uses=1]
-	%reg727 = getelementptr [43 x i16]* @yysindex, i64 0, i64 %reg206-idxcast		; <i16*> [#uses=1]
-	%reg212 = load i16* %reg727		; <i16> [#uses=2]
-	%cast732 = sext i16 %reg212 to i32		; <i32> [#uses=2]
-	%cond733 = icmp eq i32 %cast732, 0		; <i1> [#uses=1]
-	br i1 %cond733, label %bb32, label %bb26
-
-bb26:		; preds = %bb25
-	%reg466 = add i32 %cast732, 256		; <i32> [#uses=2]
-	%cond736 = icmp slt i32 %reg466, 0		; <i1> [#uses=1]
-	br i1 %cond736, label %bb32, label %bb27
-
-bb27:		; preds = %bb26
-	%cond739 = icmp sgt i32 %reg466, 262		; <i1> [#uses=1]
-	br i1 %cond739, label %bb32, label %bb28
-
-bb28:		; preds = %bb27
-	%reg212-idxcast = sext i16 %reg212 to i64		; <i64> [#uses=1]
-	%reg212-idxcast-offset = add i64 %reg212-idxcast, 256		; <i64> [#uses=2]
-	%reg744 = getelementptr [263 x i16]* @yycheck, i64 0, i64 %reg212-idxcast-offset		; <i16*> [#uses=1]
-	%reg221 = load i16* %reg744		; <i16> [#uses=1]
-	%cond748 = icmp ne i16 %reg221, 256		; <i1> [#uses=1]
-	br i1 %cond748, label %bb32, label %bb29
-
-bb29:		; preds = %bb28
-	%cast750 = bitcast i16* %reg204 to i8*		; <i8*> [#uses=1]
-	%reg223 = load i16** @yysslim		; <i16*> [#uses=1]
-	%cast751 = bitcast i16* %reg223 to i8*		; <i8*> [#uses=1]
-	%cond752 = icmp ult i8* %cast750, %cast751		; <i1> [#uses=1]
-	br i1 %cond752, label %bb31, label %bb30
-
-bb30:		; preds = %bb29
-	%reg754 = call i32 @yygrowstack( )		; <i32> [#uses=1]
-	%cond754 = icmp ne i32 %reg754, 0		; <i1> [#uses=1]
-	br i1 %cond754, label %bb113, label %bb31
-
-bb31:		; preds = %bb30, %bb29
-	%reg225 = load i16** @yyssp		; <i16*> [#uses=1]
-	%reg757 = getelementptr i16* %reg225, i64 1		; <i16*> [#uses=2]
-	store i16* %reg757, i16** @yyssp
-	%reg763 = getelementptr [263 x i16]* @yytable, i64 0, i64 %reg212-idxcast-offset		; <i16*> [#uses=1]
-	%reg234 = load i16* %reg763		; <i16> [#uses=2]
-	%cast768 = sext i16 %reg234 to i32		; <i32> [#uses=1]
-	store i16 %reg234, i16* %reg757
-	%reg236 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
-	%reg771 = getelementptr %YYSTYPE* %reg236, i64 1		; <%YYSTYPE*> [#uses=1]
-	store %YYSTYPE* %reg771, %YYSTYPE** @yyvsp
-	%reg239 = load %IntList* getelementptr (%YYSTYPE* @yylval, i64 0, i32 0)		; <%IntList> [#uses=1]
-	%reg771.idx1 = getelementptr %YYSTYPE* %reg236, i64 1, i32 0		; <%IntList*> [#uses=1]
-	store %IntList %reg239, %IntList* %reg771.idx1
-	br label %bb4
-
-bb32:		; preds = %bb28, %bb27, %bb26, %bb25
-	%cast777 = bitcast i16* %reg204 to i8*		; <i8*> [#uses=1]
-	%cond779 = icmp ule i8* %cast777, %cast778		; <i1> [#uses=1]
-	br i1 %cond779, label %UnifiedExitNode, label %bb33
-
-bb33:		; preds = %bb32
-	%reg781 = getelementptr i16* %reg204, i64 -1		; <i16*> [#uses=1]
-	store i16* %reg781, i16** @yyssp
-	%reg244 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=1]
-	%reg786 = getelementptr %YYSTYPE* %reg244, i64 -1		; <%YYSTYPE*> [#uses=1]
-	store %YYSTYPE* %reg786, %YYSTYPE** @yyvsp
-	br label %bb25
-
-bb34:		; preds = %bb23
-	%reg246 = load i32* @yychar		; <i32> [#uses=1]
-	%cond791 = icmp eq i32 %reg246, 0		; <i1> [#uses=1]
-	br i1 %cond791, label %UnifiedExitNode, label %bb35
-
-bb35:		; preds = %bb34
-	store i32 -1, i32* @yychar
-	br label %bb4
-
-bb36:		; preds = %bb20, %bb4
-	%reg468 = phi i32 [ %cast709, %bb20 ], [ %cast599, %bb4 ]		; <i32> [#uses=31]
-	%reg468-idxcast = sext i32 %reg468 to i64		; <i64> [#uses=2]
-	%reg796 = getelementptr [25 x i16]* @yylen, i64 0, i64 %reg468-idxcast		; <i16*> [#uses=1]
-	%reg254 = load i16* %reg796		; <i16> [#uses=2]
-	%reg259 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=1]
-	%reg254-idxcast = sext i16 %reg254 to i64		; <i64> [#uses=1]
-	%reg254-idxcast-scale = mul i64 %reg254-idxcast, -1		; <i64> [#uses=1]
-	%reg254-idxcast-scale-offset = add i64 %reg254-idxcast-scale, 1		; <i64> [#uses=1]
-	%reg261.idx1 = getelementptr %YYSTYPE* %reg259, i64 %reg254-idxcast-scale-offset, i32 0		; <%IntList*> [#uses=1]
-	%reg261 = load %IntList* %reg261.idx1		; <%IntList> [#uses=1]
-	store %IntList %reg261, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	%cond812 = icmp eq i32 %reg468, 13		; <i1> [#uses=1]
-	br i1 %cond812, label %bb85, label %bb37
-
-bb37:		; preds = %bb36
-	%cond814 = icmp sgt i32 %reg468, 13		; <i1> [#uses=1]
-	br i1 %cond814, label %bb56, label %bb38
-
-bb38:		; preds = %bb37
-	%cond817 = icmp eq i32 %reg468, 7		; <i1> [#uses=1]
-	br i1 %cond817, label %bb79, label %bb39
-
-bb39:		; preds = %bb38
-	%cond819 = icmp sgt i32 %reg468, 7		; <i1> [#uses=1]
-	br i1 %cond819, label %bb48, label %bb40
-
-bb40:		; preds = %bb39
-	%cond822 = icmp eq i32 %reg468, 4		; <i1> [#uses=1]
-	br i1 %cond822, label %bb76, label %bb41
-
-bb41:		; preds = %bb40
-	%cond824 = icmp sgt i32 %reg468, 4		; <i1> [#uses=1]
-	br i1 %cond824, label %bb45, label %bb42
-
-bb42:		; preds = %bb41
-	%cond827 = icmp eq i32 %reg468, 2		; <i1> [#uses=1]
-	br i1 %cond827, label %bb74, label %bb43
-
-bb43:		; preds = %bb42
-	%cond829 = icmp eq i32 %reg468, 3		; <i1> [#uses=1]
-	br i1 %cond829, label %bb75, label %bb97
-
-bb45:		; preds = %bb41
-	%cond831 = icmp eq i32 %reg468, 5		; <i1> [#uses=1]
-	br i1 %cond831, label %bb77, label %bb46
-
-bb46:		; preds = %bb45
-	%cond833 = icmp eq i32 %reg468, 6		; <i1> [#uses=1]
-	br i1 %cond833, label %bb78, label %bb97
-
-bb48:		; preds = %bb39
-	%cond835 = icmp eq i32 %reg468, 10		; <i1> [#uses=1]
-	br i1 %cond835, label %bb82, label %bb49
-
-bb49:		; preds = %bb48
-	%cond837 = icmp sgt i32 %reg468, 10		; <i1> [#uses=1]
-	br i1 %cond837, label %bb53, label %bb50
-
-bb50:		; preds = %bb49
-	%cond840 = icmp eq i32 %reg468, 8		; <i1> [#uses=1]
-	br i1 %cond840, label %bb80, label %bb51
-
-bb51:		; preds = %bb50
-	%cond842 = icmp eq i32 %reg468, 9		; <i1> [#uses=1]
-	br i1 %cond842, label %bb81, label %bb97
-
-bb53:		; preds = %bb49
-	%cond844 = icmp eq i32 %reg468, 11		; <i1> [#uses=1]
-	br i1 %cond844, label %bb83, label %bb54
-
-bb54:		; preds = %bb53
-	%cond846 = icmp eq i32 %reg468, 12		; <i1> [#uses=1]
-	br i1 %cond846, label %bb84, label %bb97
-
-bb56:		; preds = %bb37
-	%cond848 = icmp eq i32 %reg468, 19		; <i1> [#uses=1]
-	br i1 %cond848, label %bb91, label %bb57
-
-bb57:		; preds = %bb56
-	%cond850 = icmp sgt i32 %reg468, 19		; <i1> [#uses=1]
-	br i1 %cond850, label %bb66, label %bb58
-
-bb58:		; preds = %bb57
-	%cond853 = icmp eq i32 %reg468, 16		; <i1> [#uses=1]
-	br i1 %cond853, label %bb88, label %bb59
-
-bb59:		; preds = %bb58
-	%cond855 = icmp sgt i32 %reg468, 16		; <i1> [#uses=1]
-	br i1 %cond855, label %bb63, label %bb60
-
-bb60:		; preds = %bb59
-	%cond858 = icmp eq i32 %reg468, 14		; <i1> [#uses=1]
-	br i1 %cond858, label %bb86, label %bb61
-
-bb61:		; preds = %bb60
-	%cond860 = icmp eq i32 %reg468, 15		; <i1> [#uses=1]
-	br i1 %cond860, label %bb87, label %bb97
-
-bb63:		; preds = %bb59
-	%cond862 = icmp eq i32 %reg468, 17		; <i1> [#uses=1]
-	br i1 %cond862, label %bb89, label %bb64
-
-bb64:		; preds = %bb63
-	%cond864 = icmp eq i32 %reg468, 18		; <i1> [#uses=1]
-	br i1 %cond864, label %bb90, label %bb97
-
-bb66:		; preds = %bb57
-	%cond866 = icmp eq i32 %reg468, 22		; <i1> [#uses=1]
-	br i1 %cond866, label %bb94, label %bb67
-
-bb67:		; preds = %bb66
-	%cond868 = icmp sgt i32 %reg468, 22		; <i1> [#uses=1]
-	br i1 %cond868, label %bb71, label %bb68
-
-bb68:		; preds = %bb67
-	%cond871 = icmp eq i32 %reg468, 20		; <i1> [#uses=1]
-	br i1 %cond871, label %bb92, label %bb69
-
-bb69:		; preds = %bb68
-	%cond873 = icmp eq i32 %reg468, 21		; <i1> [#uses=1]
-	br i1 %cond873, label %bb93, label %bb97
-
-bb71:		; preds = %bb67
-	%cond875 = icmp eq i32 %reg468, 23		; <i1> [#uses=1]
-	br i1 %cond875, label %bb95, label %bb72
-
-bb72:		; preds = %bb71
-	%cond877 = icmp eq i32 %reg468, 24		; <i1> [#uses=1]
-	br i1 %cond877, label %bb96, label %bb97
-
-bb74:		; preds = %bb42
-	call void @yyfinished( )
-	br label %bb97
-
-bb75:		; preds = %bb43
-	%reg262 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
-	%reg264.idx1 = getelementptr %YYSTYPE* %reg262, i64 -2, i32 0		; <%IntList*> [#uses=1]
-	%reg264 = load %IntList* %reg264.idx1		; <%IntList> [#uses=1]
-	%reg265.idx = getelementptr %YYSTYPE* %reg262, i64 0, i32 0		; <%IntList*> [#uses=1]
-	%reg265 = load %IntList* %reg265.idx		; <%IntList> [#uses=1]
-	%cast889 = bitcast %IntList %reg265 to %List		; <%List> [#uses=1]
-	%cast890 = bitcast %IntList %reg264 to %List		; <%List> [#uses=1]
-	call void @doSpec( %List %cast890, %List %cast889 )
-	br label %bb97
-
-bb76:		; preds = %bb40
-	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb77:		; preds = %bb45
-	%reg269 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
-	%cast894 = getelementptr %YYSTYPE* %reg269, i64 0, i32 0		; <%IntList*> [#uses=1]
-	%reg271 = load %IntList* %cast894		; <%IntList> [#uses=1]
-	%reg271.upgrd.1 = bitcast %IntList %reg271 to i8*		; <i8*> [#uses=1]
-	%reg272.idx1 = getelementptr %YYSTYPE* %reg269, i64 -1, i32 0		; <%IntList*> [#uses=1]
-	%reg272 = load %IntList* %reg272.idx1		; <%IntList> [#uses=1]
-	%cast901 = bitcast %IntList %reg272 to %List		; <%List> [#uses=1]
-	%reg901 = call %List @newList( i8* %reg271.upgrd.1, %List %cast901 )		; <%List> [#uses=1]
-	bitcast %List %reg901 to %IntList		; <%IntList>:0 [#uses=1]
-	store %IntList %0, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb78:		; preds = %bb46
-	%reg275 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=1]
-	%reg277.idx = getelementptr %YYSTYPE* %reg275, i64 0, i32 0		; <%IntList*> [#uses=1]
-	%reg277 = load %IntList* %reg277.idx		; <%IntList> [#uses=1]
-	%cast907 = bitcast %IntList %reg277 to %List		; <%List> [#uses=1]
-	%reg907 = call %Arity @newArity( i32 -1, %List %cast907 )		; <%Arity> [#uses=1]
-	bitcast %Arity %reg907 to %IntList		; <%IntList>:1 [#uses=1]
-	store %IntList %1, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb79:		; preds = %bb38
-	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	%reg281 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=1]
-	%cast912 = getelementptr %YYSTYPE* %reg281, i64 0, i32 0		; <%IntList*> [#uses=1]
-	%reg282 = load %IntList* %cast912		; <%IntList> [#uses=1]
-	%reg282.upgrd.2 = bitcast %IntList %reg282 to %List		; <%List> [#uses=1]
-	call void @doGram( %List %reg282.upgrd.2 )
-	br label %bb97
-
-bb80:		; preds = %bb50
-	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	%reg285 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=1]
-	%cast917 = getelementptr %YYSTYPE* %reg285, i64 0, i32 0		; <%IntList*> [#uses=1]
-	%reg286 = load %IntList* %cast917		; <%IntList> [#uses=1]
-	%reg286.upgrd.3 = bitcast %IntList %reg286 to i8*		; <i8*> [#uses=1]
-	call void @doStart( i8* %reg286.upgrd.3 )
-	br label %bb97
-
-bb81:		; preds = %bb51
-	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb82:		; preds = %bb48
-	%reg290 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
-	%cast923 = getelementptr %YYSTYPE* %reg290, i64 0, i32 0		; <%IntList*> [#uses=1]
-	%reg292 = load %IntList* %cast923		; <%IntList> [#uses=1]
-	%reg292.upgrd.4 = bitcast %IntList %reg292 to i8*		; <i8*> [#uses=1]
-	%reg293.idx1 = getelementptr %YYSTYPE* %reg290, i64 -1, i32 0		; <%IntList*> [#uses=1]
-	%reg293 = load %IntList* %reg293.idx1		; <%IntList> [#uses=1]
-	%cast930 = bitcast %IntList %reg293 to %List		; <%List> [#uses=1]
-	%reg930 = call %List @newList( i8* %reg292.upgrd.4, %List %cast930 )		; <%List> [#uses=1]
-	bitcast %List %reg930 to %IntList		; <%IntList>:2 [#uses=1]
-	store %IntList %2, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb83:		; preds = %bb53
-	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb84:		; preds = %bb54
-	%reg298 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
-	%cast936 = getelementptr %YYSTYPE* %reg298, i64 0, i32 0		; <%IntList*> [#uses=1]
-	%reg300 = load %IntList* %cast936		; <%IntList> [#uses=1]
-	%reg300.upgrd.5 = bitcast %IntList %reg300 to i8*		; <i8*> [#uses=1]
-	%reg301.idx1 = getelementptr %YYSTYPE* %reg298, i64 -1, i32 0		; <%IntList*> [#uses=1]
-	%reg301 = load %IntList* %reg301.idx1		; <%IntList> [#uses=1]
-	%cast943 = bitcast %IntList %reg301 to %List		; <%List> [#uses=1]
-	%reg943 = call %List @newList( i8* %reg300.upgrd.5, %List %cast943 )		; <%List> [#uses=1]
-	bitcast %List %reg943 to %IntList		; <%IntList>:3 [#uses=1]
-	store %IntList %3, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb85:		; preds = %bb36
-	%reg304 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
-	%cast9521 = getelementptr %YYSTYPE* %reg304, i64 -2, i32 0		; <%IntList*> [#uses=1]
-	%reg306 = load %IntList* %cast9521		; <%IntList> [#uses=1]
-	%reg306.upgrd.6 = bitcast %IntList %reg306 to i8*		; <i8*> [#uses=1]
-	%cast953 = bitcast %YYSTYPE* %reg304 to i32*		; <i32*> [#uses=1]
-	%reg307 = load i32* %cast953		; <i32> [#uses=1]
-	%reg955 = call %Binding @newBinding( i8* %reg306.upgrd.6, i32 %reg307 )		; <%Binding> [#uses=1]
-	bitcast %Binding %reg955 to %IntList		; <%IntList>:4 [#uses=1]
-	store %IntList %4, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb86:		; preds = %bb60
-	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb87:		; preds = %bb61
-	%reg312 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
-	%cast961 = getelementptr %YYSTYPE* %reg312, i64 0, i32 0		; <%IntList*> [#uses=1]
-	%reg314 = load %IntList* %cast961		; <%IntList> [#uses=1]
-	%reg314.upgrd.7 = bitcast %IntList %reg314 to i8*		; <i8*> [#uses=1]
-	%reg315.idx1 = getelementptr %YYSTYPE* %reg312, i64 -1, i32 0		; <%IntList*> [#uses=1]
-	%reg315 = load %IntList* %reg315.idx1		; <%IntList> [#uses=1]
-	%cast968 = bitcast %IntList %reg315 to %List		; <%List> [#uses=1]
-	%reg968 = call %List @newList( i8* %reg314.upgrd.7, %List %cast968 )		; <%List> [#uses=1]
-	bitcast %List %reg968 to %IntList		; <%IntList>:5 [#uses=1]
-	store %IntList %5, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb88:		; preds = %bb58
-	%reg318 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=4]
-	%cast9791 = getelementptr %YYSTYPE* %reg318, i64 -6, i32 0		; <%IntList*> [#uses=1]
-	%reg322 = load %IntList* %cast9791		; <%IntList> [#uses=1]
-	%reg322.upgrd.8 = bitcast %IntList %reg322 to i8*		; <i8*> [#uses=1]
-	%reg323.idx1 = getelementptr %YYSTYPE* %reg318, i64 -4, i32 0		; <%IntList*> [#uses=1]
-	%reg323 = load %IntList* %reg323.idx1		; <%IntList> [#uses=1]
-	%reg987 = getelementptr %YYSTYPE* %reg318, i64 -2		; <%YYSTYPE*> [#uses=1]
-	%cast989 = bitcast %YYSTYPE* %reg987 to i32*		; <i32*> [#uses=1]
-	%reg324 = load i32* %cast989		; <i32> [#uses=1]
-	%reg325.idx1 = getelementptr %YYSTYPE* %reg318, i64 -1, i32 0		; <%IntList*> [#uses=1]
-	%reg325 = load %IntList* %reg325.idx1		; <%IntList> [#uses=1]
-	%cast998 = bitcast %IntList %reg323 to %PatternAST		; <%PatternAST> [#uses=1]
-	%reg996 = call %RuleAST @newRuleAST( i8* %reg322.upgrd.8, %PatternAST %cast998, i32 %reg324, %IntList %reg325 )		; <%RuleAST> [#uses=1]
-	bitcast %RuleAST %reg996 to %IntList		; <%IntList>:6 [#uses=1]
-	store %IntList %6, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb89:		; preds = %bb63
-	%reg328 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=1]
-	%cast1002 = getelementptr %YYSTYPE* %reg328, i64 0, i32 0		; <%IntList*> [#uses=1]
-	%reg329 = load %IntList* %cast1002		; <%IntList> [#uses=1]
-	%reg329.upgrd.9 = bitcast %IntList %reg329 to i8*		; <i8*> [#uses=1]
-	%reg1004 = call %PatternAST @newPatternAST( i8* %reg329.upgrd.9, %List null )		; <%PatternAST> [#uses=1]
-	bitcast %PatternAST %reg1004 to %IntList		; <%IntList>:7 [#uses=1]
-	store %IntList %7, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb90:		; preds = %bb64
-	%reg333 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
-	%cast10131 = getelementptr %YYSTYPE* %reg333, i64 -1, i32 0		; <%IntList*> [#uses=1]
-	%reg335 = load %IntList* %cast10131		; <%IntList> [#uses=1]
-	%reg335.upgrd.10 = bitcast %IntList %reg335 to i8*		; <i8*> [#uses=1]
-	%reg1015 = call %List @newList( i8* %reg335.upgrd.10, %List null )		; <%List> [#uses=1]
-	%cast10211 = getelementptr %YYSTYPE* %reg333, i64 -3, i32 0		; <%IntList*> [#uses=1]
-	%reg338 = load %IntList* %cast10211		; <%IntList> [#uses=1]
-	%reg338.upgrd.11 = bitcast %IntList %reg338 to i8*		; <i8*> [#uses=1]
-	%reg1023 = call %PatternAST @newPatternAST( i8* %reg338.upgrd.11, %List %reg1015 )		; <%PatternAST> [#uses=1]
-	bitcast %PatternAST %reg1023 to %IntList		; <%IntList>:8 [#uses=1]
-	store %IntList %8, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb91:		; preds = %bb56
-	%reg341 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=3]
-	%cast10331 = getelementptr %YYSTYPE* %reg341, i64 -1, i32 0		; <%IntList*> [#uses=1]
-	%reg344 = load %IntList* %cast10331		; <%IntList> [#uses=1]
-	%reg344.upgrd.12 = bitcast %IntList %reg344 to i8*		; <i8*> [#uses=1]
-	%reg1035 = call %List @newList( i8* %reg344.upgrd.12, %List null )		; <%List> [#uses=1]
-	%cast10411 = getelementptr %YYSTYPE* %reg341, i64 -3, i32 0		; <%IntList*> [#uses=1]
-	%reg347 = load %IntList* %cast10411		; <%IntList> [#uses=1]
-	%reg347.upgrd.13 = bitcast %IntList %reg347 to i8*		; <i8*> [#uses=1]
-	%reg1043 = call %List @newList( i8* %reg347.upgrd.13, %List %reg1035 )		; <%List> [#uses=1]
-	%cast10491 = getelementptr %YYSTYPE* %reg341, i64 -5, i32 0		; <%IntList*> [#uses=1]
-	%reg349 = load %IntList* %cast10491		; <%IntList> [#uses=1]
-	%reg349.upgrd.14 = bitcast %IntList %reg349 to i8*		; <i8*> [#uses=1]
-	%reg1051 = call %PatternAST @newPatternAST( i8* %reg349.upgrd.14, %List %reg1043 )		; <%PatternAST> [#uses=1]
-	bitcast %PatternAST %reg1051 to %IntList		; <%IntList>:9 [#uses=1]
-	store %IntList %9, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb92:		; preds = %bb68
-	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb93:		; preds = %bb69
-	%reg354 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
-	%reg1059 = getelementptr %YYSTYPE* %reg354, i64 -2		; <%YYSTYPE*> [#uses=1]
-	%cast1061 = bitcast %YYSTYPE* %reg1059 to i32*		; <i32*> [#uses=1]
-	%reg356 = load i32* %cast1061		; <i32> [#uses=1]
-	%reg357.idx1 = getelementptr %YYSTYPE* %reg354, i64 -1, i32 0		; <%IntList*> [#uses=1]
-	%reg357 = load %IntList* %reg357.idx1		; <%IntList> [#uses=1]
-	%reg1068 = call %IntList @newIntList( i32 %reg356, %IntList %reg357 )		; <%IntList> [#uses=1]
-	store %IntList %reg1068, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb94:		; preds = %bb66
-	store %IntList null, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb95:		; preds = %bb71
-	%reg362 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
-	%reg1076 = getelementptr %YYSTYPE* %reg362, i64 -1		; <%YYSTYPE*> [#uses=1]
-	%cast1078 = bitcast %YYSTYPE* %reg1076 to i32*		; <i32*> [#uses=1]
-	%reg364 = load i32* %cast1078		; <i32> [#uses=1]
-	%reg365.idx = getelementptr %YYSTYPE* %reg362, i64 0, i32 0		; <%IntList*> [#uses=1]
-	%reg365 = load %IntList* %reg365.idx		; <%IntList> [#uses=1]
-	%reg1081 = call %IntList @newIntList( i32 %reg364, %IntList %reg365 )		; <%IntList> [#uses=1]
-	store %IntList %reg1081, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb96:		; preds = %bb72
-	%reg368 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
-	%reg1088 = getelementptr %YYSTYPE* %reg368, i64 -1		; <%YYSTYPE*> [#uses=1]
-	%cast1090 = bitcast %YYSTYPE* %reg1088 to i32*		; <i32*> [#uses=1]
-	%reg370 = load i32* %cast1090		; <i32> [#uses=1]
-	%reg371.idx = getelementptr %YYSTYPE* %reg368, i64 0, i32 0		; <%IntList*> [#uses=1]
-	%reg371 = load %IntList* %reg371.idx		; <%IntList> [#uses=1]
-	%reg1093 = call %IntList @newIntList( i32 %reg370, %IntList %reg371 )		; <%IntList> [#uses=1]
-	store %IntList %reg1093, %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)
-	br label %bb97
-
-bb97:		; preds = %bb96, %bb95, %bb94, %bb93, %bb92, %bb91, %bb90, %bb89, %bb88, %bb87, %bb86, %bb85, %bb84, %bb83, %bb82, %bb81, %bb80, %bb79, %bb78, %bb77, %bb76, %bb75, %bb74, %bb72, %bb69, %bb64, %bb61, %bb54, %bb51, %bb46, %bb43
-	%cast1097 = sext i16 %reg254 to i64		; <i64> [#uses=3]
-	%reg375 = add i64 %cast1097, %cast1097		; <i64> [#uses=1]
-	%reg377 = load i16** @yyssp		; <i16*> [#uses=1]
-	%cast379 = ptrtoint i16* %reg377 to i64		; <i64> [#uses=1]
-	%reg381 = sub i64 %cast379, %reg375		; <i64> [#uses=1]
-	%cast1099 = inttoptr i64 %reg381 to i16*		; <i16*> [#uses=1]
-	store i16* %cast1099, i16** @yyssp
-	%reg382 = load i16** @yyssp		; <i16*> [#uses=3]
-	%reg383 = load i16* %reg382		; <i16> [#uses=1]
-	%cast1103 = sext i16 %reg383 to i32		; <i32> [#uses=3]
-	%reg385 = mul i64 %cast1097, 8		; <i64> [#uses=1]
-	%reg387 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=1]
-	%cast389 = ptrtoint %YYSTYPE* %reg387 to i64		; <i64> [#uses=1]
-	%reg391 = sub i64 %cast389, %reg385		; <i64> [#uses=1]
-	%cast1108 = inttoptr i64 %reg391 to %YYSTYPE*		; <%YYSTYPE*> [#uses=1]
-	store %YYSTYPE* %cast1108, %YYSTYPE** @yyvsp
-	%reg1111 = getelementptr [25 x i16]* @yylhs, i64 0, i64 %reg468-idxcast		; <i16*> [#uses=1]
-	%reg398 = load i16* %reg1111		; <i16> [#uses=2]
-	%cast1116 = sext i16 %reg398 to i32		; <i32> [#uses=1]
-	%cond1117 = icmp ne i32 %cast1103, 0		; <i1> [#uses=1]
-	br i1 %cond1117, label %bb104, label %bb98
-
-bb98:		; preds = %bb97
-	%cond1119 = icmp ne i32 %cast1116, 0		; <i1> [#uses=1]
-	br i1 %cond1119, label %bb104, label %bb99
-
-bb99:		; preds = %bb98
-	%reg1122 = getelementptr i16* %reg382, i64 1		; <i16*> [#uses=2]
-	store i16* %reg1122, i16** @yyssp
-	store i16 1, i16* %reg1122
-	%reg403 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
-	%reg1128 = getelementptr %YYSTYPE* %reg403, i64 1		; <%YYSTYPE*> [#uses=1]
-	store %YYSTYPE* %reg1128, %YYSTYPE** @yyvsp
-	%reg406 = load %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)		; <%IntList> [#uses=1]
-	%reg1128.idx1 = getelementptr %YYSTYPE* %reg403, i64 1, i32 0		; <%IntList*> [#uses=1]
-	store %IntList %reg406, %IntList* %reg1128.idx1
-	%reg407 = load i32* @yychar		; <i32> [#uses=1]
-	%cond1135 = icmp sge i32 %reg407, 0		; <i1> [#uses=1]
-	br i1 %cond1135, label %bb102, label %bb100
-
-bb100:		; preds = %bb99
-	%reg1139 = call i32 @yylex( )		; <i32> [#uses=1]
-	store i32 %reg1139, i32* @yychar
-	%reg409 = load i32* @yychar		; <i32> [#uses=1]
-	%cond1141 = icmp sge i32 %reg409, 0		; <i1> [#uses=1]
-	br i1 %cond1141, label %bb102, label %bb101
-
-bb101:		; preds = %bb100
-	store i32 0, i32* @yychar
-	br label %bb102
-
-bb102:		; preds = %bb101, %bb100, %bb99
-	%reg411 = load i32* @yychar		; <i32> [#uses=1]
-	%cond1146 = icmp ne i32 %reg411, 0		; <i1> [#uses=1]
-	br i1 %cond1146, label %bb4, label %UnifiedExitNode
-
-bb104:		; preds = %bb98, %bb97
-	%reg398-idxcast = sext i16 %reg398 to i64		; <i64> [#uses=2]
-	%reg1150 = getelementptr [12 x i16]* @yygindex, i64 0, i64 %reg398-idxcast		; <i16*> [#uses=1]
-	%reg418 = load i16* %reg1150		; <i16> [#uses=1]
-	%cast1155 = sext i16 %reg418 to i32		; <i32> [#uses=2]
-	%cond1156 = icmp eq i32 %cast1155, 0		; <i1> [#uses=1]
-	br i1 %cond1156, label %bb109, label %bb105
-
-bb105:		; preds = %bb104
-	%reg473 = add i32 %cast1155, %cast1103		; <i32> [#uses=3]
-	%cond1158 = icmp slt i32 %reg473, 0		; <i1> [#uses=1]
-	br i1 %cond1158, label %bb109, label %bb106
-
-bb106:		; preds = %bb105
-	%cond1161 = icmp sgt i32 %reg473, 262		; <i1> [#uses=1]
-	br i1 %cond1161, label %bb109, label %bb107
-
-bb107:		; preds = %bb106
-	%reg473-idxcast = sext i32 %reg473 to i64		; <i64> [#uses=2]
-	%reg1166 = getelementptr [263 x i16]* @yycheck, i64 0, i64 %reg473-idxcast		; <i16*> [#uses=1]
-	%reg428 = load i16* %reg1166		; <i16> [#uses=1]
-	%cast1171 = sext i16 %reg428 to i32		; <i32> [#uses=1]
-	%cond1172 = icmp ne i32 %cast1171, %cast1103		; <i1> [#uses=1]
-	br i1 %cond1172, label %bb109, label %bb108
-
-bb108:		; preds = %bb107
-	%reg1175 = getelementptr [263 x i16]* @yytable, i64 0, i64 %reg473-idxcast		; <i16*> [#uses=1]
-	%reg435 = load i16* %reg1175		; <i16> [#uses=1]
-	%cast1180 = sext i16 %reg435 to i32		; <i32> [#uses=1]
-	br label %bb110
-
-bb109:		; preds = %bb107, %bb106, %bb105, %bb104
-	%reg1183 = getelementptr [12 x i16]* @yydgoto, i64 0, i64 %reg398-idxcast		; <i16*> [#uses=1]
-	%reg442 = load i16* %reg1183		; <i16> [#uses=1]
-	%cast1188 = sext i16 %reg442 to i32		; <i32> [#uses=1]
-	br label %bb110
-
-bb110:		; preds = %bb109, %bb108
-	%reg476 = phi i32 [ %cast1188, %bb109 ], [ %cast1180, %bb108 ]		; <i32> [#uses=2]
-	%cast1189 = bitcast i16* %reg382 to i8*		; <i8*> [#uses=1]
-	%reg444 = load i16** @yysslim		; <i16*> [#uses=1]
-	%cast1190 = bitcast i16* %reg444 to i8*		; <i8*> [#uses=1]
-	%cond1191 = icmp ult i8* %cast1189, %cast1190		; <i1> [#uses=1]
-	br i1 %cond1191, label %bb112, label %bb111
-
-bb111:		; preds = %bb110
-	%reg1193 = call i32 @yygrowstack( )		; <i32> [#uses=1]
-	%cond1193 = icmp ne i32 %reg1193, 0		; <i1> [#uses=1]
-	br i1 %cond1193, label %bb113, label %bb112
-
-bb112:		; preds = %bb111, %bb110
-	%reg446 = load i16** @yyssp		; <i16*> [#uses=1]
-	%reg1196 = getelementptr i16* %reg446, i64 1		; <i16*> [#uses=2]
-	store i16* %reg1196, i16** @yyssp
-	%cast1357 = trunc i32 %reg476 to i16		; <i16> [#uses=1]
-	store i16 %cast1357, i16* %reg1196
-	%reg449 = load %YYSTYPE** @yyvsp		; <%YYSTYPE*> [#uses=2]
-	%reg1202 = getelementptr %YYSTYPE* %reg449, i64 1		; <%YYSTYPE*> [#uses=1]
-	store %YYSTYPE* %reg1202, %YYSTYPE** @yyvsp
-	%reg452 = load %IntList* getelementptr (%YYSTYPE* @yyval, i64 0, i32 0)		; <%IntList> [#uses=1]
-	%reg1202.idx1 = getelementptr %YYSTYPE* %reg449, i64 1, i32 0		; <%IntList*> [#uses=1]
-	store %IntList %reg452, %IntList* %reg1202.idx1
-	br label %bb4
-
-bb113:		; preds = %bb111, %bb30, %bb13, %bb2
-	call void @yyerror( i8* getelementptr ([20 x i8]* @.LC1, i64 0, i64 0) )
-	br label %UnifiedExitNode
-
-UnifiedExitNode:		; preds = %bb113, %bb102, %bb34, %bb32
-	%UnifiedRetVal = phi i32 [ 1, %bb113 ], [ 1, %bb34 ], [ 1, %bb32 ], [ 0, %bb102 ]		; <i32> [#uses=1]
-	ret i32 %UnifiedRetVal
-}
-
-declare %List @newList(i8*, %List)
-
-declare %IntList @newIntList(i32, %IntList)
-
-declare void @doStart(i8*)
-
-declare void @yyerror(i8*)
-
-declare void @doSpec(%List, %List)
-
-declare %Arity @newArity(i32, %List)
-
-declare %Binding @newBinding(i8*, i32)
-
-declare %PatternAST @newPatternAST(i8*, %List)
-
-declare %RuleAST @newRuleAST(i8*, %PatternAST, i32, %IntList)
-
-declare void @yyfinished()
-
-declare i32 @yylex()
-
-declare void @doGram(%List)
-
-declare i32 @yygrowstack()
diff --git a/test/CodeGen/Generic/badlive.ll b/test/CodeGen/Generic/badlive.ll
deleted file mode 100644
index 43b03e3..0000000
--- a/test/CodeGen/Generic/badlive.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llc < %s
-
-define i32 @main() {
-bb0:
-        %reg109 = malloc i32, i32 100           ; <i32*> [#uses=2]
-        br label %bb2
-
-bb2:            ; preds = %bb2, %bb0
-        %cann-indvar1 = phi i32 [ 0, %bb0 ], [ %add1-indvar1, %bb2 ]            ; <i32> [#uses=2]
-        %reg127 = mul i32 %cann-indvar1, 2              ; <i32> [#uses=1]
-        %add1-indvar1 = add i32 %cann-indvar1, 1                ; <i32> [#uses=1]
-        store i32 999, i32* %reg109
-        %cond1015 = icmp sle i32 1, 99          ; <i1> [#uses=1]
-        %reg128 = add i32 %reg127, 2            ; <i32> [#uses=0]
-        br i1 %cond1015, label %bb2, label %bb4
-
-bb4:            ; preds = %bb4, %bb2
-        %cann-indvar = phi i32 [ %add1-indvar, %bb4 ], [ 0, %bb2 ]              ; <i32> [#uses=1]
-        %add1-indvar = add i32 %cann-indvar, 1          ; <i32> [#uses=2]
-        store i32 333, i32* %reg109
-        %reg131 = add i32 %add1-indvar, 3               ; <i32> [#uses=1]
-        %cond1017 = icmp ule i32 %reg131, 99            ; <i1> [#uses=1]
-        br i1 %cond1017, label %bb4, label %bb5
-
-bb5:            ; preds = %bb4
-        ret i32 0
-}
-
diff --git a/test/CodeGen/Generic/builtin-expect.ll b/test/CodeGen/Generic/builtin-expect.ll
new file mode 100644
index 0000000..e8cd07b
--- /dev/null
+++ b/test/CodeGen/Generic/builtin-expect.ll
@@ -0,0 +1,223 @@
+; RUN: llc < %s
+
+define i32 @test1(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %cmp = icmp sgt i32 %tmp, 1
+  %conv = zext i1 %cmp to i32
+  %conv1 = sext i32 %conv to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 1)
+  %tobool = icmp ne i64 %expval, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+declare i64 @llvm.expect.i64(i64, i64) nounwind readnone
+
+declare i32 @f(...)
+
+define i32 @test2(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %conv = sext i32 %tmp to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  %tobool = icmp ne i64 %expval, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+define i32 @test3(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %tobool = icmp ne i32 %tmp, 0
+  %lnot = xor i1 %tobool, true
+  %lnot.ext = zext i1 %lnot to i32
+  %conv = sext i32 %lnot.ext to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  %tobool1 = icmp ne i64 %expval, 0
+  br i1 %tobool1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+define i32 @test4(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %tobool = icmp ne i32 %tmp, 0
+  %lnot = xor i1 %tobool, true
+  %lnot1 = xor i1 %lnot, true
+  %lnot.ext = zext i1 %lnot1 to i32
+  %conv = sext i32 %lnot.ext to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  %tobool2 = icmp ne i64 %expval, 0
+  br i1 %tobool2, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+define i32 @test5(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %cmp = icmp slt i32 %tmp, 0
+  %conv = zext i1 %cmp to i32
+  %conv1 = sext i32 %conv to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 0)
+  %tobool = icmp ne i64 %expval, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+define i32 @test6(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %conv = sext i32 %tmp to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  switch i64 %expval, label %sw.epilog [
+    i64 1, label %sw.bb
+    i64 2, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry
+  store i32 0, i32* %retval
+  br label %return
+
+sw.epilog:                                        ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %sw.epilog, %sw.bb
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+define i32 @test7(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %conv = sext i32 %tmp to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  switch i64 %expval, label %sw.epilog [
+    i64 2, label %sw.bb
+    i64 3, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry
+  %tmp1 = load i32* %x.addr, align 4
+  store i32 %tmp1, i32* %retval
+  br label %return
+
+sw.epilog:                                        ; preds = %entry
+  store i32 0, i32* %retval
+  br label %return
+
+return:                                           ; preds = %sw.epilog, %sw.bb
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+define i32 @test8(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %cmp = icmp sgt i32 %tmp, 1
+  %conv = zext i1 %cmp to i32
+  %expval = call i32 @llvm.expect.i32(i32 %conv, i32 1)
+  %tobool = icmp ne i32 %expval, 0
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+declare i32 @llvm.expect.i32(i32, i32) nounwind readnone
+
diff --git a/test/CodeGen/Generic/crash.ll b/test/CodeGen/Generic/crash.ll
index e7cc7e3..d889389 100644
--- a/test/CodeGen/Generic/crash.ll
+++ b/test/CodeGen/Generic/crash.ll
@@ -1,7 +1,7 @@
 ; RUN: llc %s -o -
 
 ; PR6332
-%struct.AVCodecTag = type opaque
+%struct.AVCodecTag = type {}
 @ff_codec_bmp_tags = external global [0 x %struct.AVCodecTag]
 @tags = global [1 x %struct.AVCodecTag*] [%struct.AVCodecTag* getelementptr
 inbounds ([0 x %struct.AVCodecTag]* @ff_codec_bmp_tags, i32 0, i32 0)]
diff --git a/test/CodeGen/Generic/edge-bundles-blockIDs.ll b/test/CodeGen/Generic/edge-bundles-blockIDs.ll
new file mode 100644
index 0000000..b4ae415
--- /dev/null
+++ b/test/CodeGen/Generic/edge-bundles-blockIDs.ll
@@ -0,0 +1,81 @@
+; Make sure EdgeBoundles handles the case when the function size is less then 
+; the number of block IDs.
+; RUN: llc -regalloc=fast < %s
+
+define void @foo() nounwind {
+entry:
+  br i1 undef, label %bb5.i1632, label %bb1.i1605
+
+bb1.i1605:                                        ; preds = %entry
+  br i1 undef, label %bb5.i73.i, label %bb3.i68.i
+
+bb3.i68.i:                                        ; preds = %bb1.i1605
+  unreachable
+
+bb5.i73.i:                                        ; preds = %bb1.i1605
+  br i1 undef, label %bb7.i79.i, label %bb6.i76.i
+
+bb6.i76.i:                                        ; preds = %bb5.i73.i
+  unreachable
+
+bb7.i79.i:                                        ; preds = %bb5.i73.i
+  br i1 undef, label %bb.i.i1608, label %bb8.i82.i
+
+bb8.i82.i:                                        ; preds = %bb7.i79.i
+  unreachable
+
+bb.i.i1608:                                       ; preds = %bb.i.i1608, %bb7.i79.i
+  br i1 undef, label %bb1.i.dis.preheader_crit_edge.i, label %bb.i.i1608
+
+bb1.i.dis.preheader_crit_edge.i: ; preds = %bb.i.i1608
+  br label %dis.i
+
+bb3.i.i1610:                                      ; preds = %bb8.i.i, %bb7.i.i1615
+  br i1 undef, label %bb5.i.i1613, label %bb4.i.i1611
+
+bb4.i.i1611:                                      ; preds = %bb3.i.i1610
+  br label %bb5.i.i1613
+
+bb5.i.i1613:                                      ; preds = %bb4.i.i1611, %bb3.i.i1610
+  unreachable
+
+bb7.i.i1615:                                      ; preds = %getfloder.exit.i
+  br i1 undef, label %bb3.i.i1610, label %bb8.i.i
+
+bb8.i.i:                                          ; preds = %bb7.i.i1615
+  br i1 undef, label %bb3.i.i1610, label %bb9.i.i
+
+bb9.i.i:                                          ; preds = %bb8.i.i
+  br label %bb12.i.i
+
+bb12.i.i:                                         ; preds = %bb12.i.i, %bb9.i.i
+  br i1 undef, label %bb13.i.bb14.i_crit_edge.i, label %bb12.i.i
+
+bb13.i.bb14.i_crit_edge.i:                        ; preds = %bb12.i.i
+  br i1 undef, label %bb25.i.i, label %bb20.i.i
+
+bb19.i.i:                                         ; preds = %bb20.i.i
+  br label %bb20.i.i
+
+bb20.i.i:                                         ; preds = %bb19.i.i, %bb13.i.bb14.i_crit_edge.i
+  %or.cond.i = or i1 undef, undef
+  br i1 %or.cond.i, label %bb25.i.i, label %bb19.i.i
+
+bb25.i.i:                                         ; preds = %bb20.i.i, %bb13.i.bb14.i_crit_edge.i
+  unreachable
+
+bb5.i1632:                                        ; preds = %entry
+  unreachable
+
+dis.i:                     ; preds = %getfloder.exit.i, %bb1.i.dis.preheader_crit_edge.i
+  br i1 undef, label %bb.i96.i, label %bb1.i102.i
+
+bb.i96.i:                                         ; preds = %dis.i
+  br label %getfloder.exit.i
+
+bb1.i102.i:                                       ; preds = %dis.i
+  br label %getfloder.exit.i
+
+getfloder.exit.i:                           ; preds = %bb1.i102.i, %bb.i96.i
+  br i1 undef, label %bb7.i.i1615, label %dis.i
+}
diff --git a/test/CodeGen/Generic/getresult-undef.ll b/test/CodeGen/Generic/getresult-undef.ll
deleted file mode 100644
index c675535..0000000
--- a/test/CodeGen/Generic/getresult-undef.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llc < %s
-
-define double @foo() {
-  %t = getresult {double, double} undef, 1
-  ret double %t
-}
diff --git a/test/CodeGen/Generic/invalid-memcpy.ll b/test/CodeGen/Generic/invalid-memcpy.ll
index 8448565..2dfa28b 100644
--- a/test/CodeGen/Generic/invalid-memcpy.ll
+++ b/test/CodeGen/Generic/invalid-memcpy.ll
@@ -4,16 +4,14 @@
 ; greater than the alignment guaranteed for Qux or C.0.1173), but it
 ; should compile, not crash the code generator.
 
-@C.0.1173 = external constant [33 x i8]         ; <[33 x i8]*> [#uses=1]
+@C.0.1173 = external constant [33 x i8]
 
 define void @Bork() {
 entry:
-        %Qux = alloca [33 x i8]         ; <[33 x i8]*> [#uses=1]
-        %Qux1 = bitcast [33 x i8]* %Qux to i8*          ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i64( i8* %Qux1, i8* getelementptr ([33 x i8]* @C.0.1173, i32 0, i32 0), i64 33, i32 8 )
-        ret void
+  %Qux = alloca [33 x i8]
+  %Qux1 = bitcast [33 x i8]* %Qux to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %Qux1, i8* getelementptr inbounds ([33 x i8]* @C.0.1173, i32 0, i32 0), i64 33, i32 8, i1 false)
+  ret void
 }
 
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
-
-
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/Generic/legalize-dbg-value.ll b/test/CodeGen/Generic/legalize-dbg-value.ll
deleted file mode 100644
index b71aa8a..0000000
--- a/test/CodeGen/Generic/legalize-dbg-value.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -o /dev/null
-
-; llvm.dbg.value instructions can have types which are not legal for the
-; target. CodeGen should handle this.
-
-define i128 @__mulvti3(i128 %a, i128 %b) nounwind {
-entry:
-  tail call void @llvm.dbg.value(metadata !0, i64 0, metadata !1), !dbg !11
-  unreachable
-}
-
-declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
-
-!0 = metadata !{i128 170141183460469231731687303715884105727} 
-!1 = metadata !{i32 524544, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8} ; [ DW_TAG_auto_variable ]
-!2 = metadata !{i32 524299, metadata !3, i32 26, i32 0} ; [ DW_TAG_lexical_block ]
-!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"__mulvti3", metadata !"__mulvti3", metadata !"__mulvti3", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
-!4 = metadata !{i32 524329, metadata !"mulvti3.c", metadata !"/Volumes/Sandbox/llvm/swb/Libcompiler_rt-6.roots/Libcompiler_rt-6/lib", metadata !5} ; [ DW_TAG_file_type ]
-!5 = metadata !{i32 524305, i32 0, i32 1, metadata !"mulvti3.c", metadata !"/Volumes/Sandbox/llvm/swb/Libcompiler_rt-6.roots/Libcompiler_rt-6/lib", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build 2328)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
-!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
-!7 = metadata !{metadata !8, metadata !8, metadata !8}
-!8 = metadata !{i32 524310, metadata !4, metadata !"ti_int", metadata !9, i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
-!9 = metadata !{i32 524329, metadata !"int_lib.h", metadata !"/Volumes/Sandbox/llvm/swb/Libcompiler_rt-6.roots/Libcompiler_rt-6/lib", metadata !5} ; [ DW_TAG_file_type ]
-!10 = metadata !{i32 524324, metadata !4, metadata !"", metadata !4, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
-!11 = metadata !{i32 29, i32 0, metadata !2, null}
diff --git a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
index 282e973..a3cab5d 100644
--- a/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
+++ b/test/CodeGen/Generic/multiple-return-values-cross-block-with-invoke.ll
@@ -6,9 +6,9 @@ define void @foo(i64* %p, double* %q) nounwind {
         %t = invoke { i64, double } @wild() to label %normal unwind label %handler
 
 normal:
-        %mrv_gr = getresult { i64, double } %t, 0
+        %mrv_gr = extractvalue { i64, double } %t, 0
         store i64 %mrv_gr, i64* %p
-        %mrv_gr12681 = getresult { i64, double } %t, 1   
+        %mrv_gr12681 = extractvalue { i64, double } %t, 1   
         store double %mrv_gr12681, double* %q
 	ret void
   
diff --git a/test/CodeGen/Generic/spillccr.ll b/test/CodeGen/Generic/spillccr.ll
deleted file mode 100644
index 0a774c6..0000000
--- a/test/CodeGen/Generic/spillccr.ll
+++ /dev/null
@@ -1,49 +0,0 @@
-; RUN: llc < %s
-
-; July 6, 2002 -- LLC Regression test
-; This test case checks if the integer CC register %xcc (or %ccr)
-; is correctly spilled.  The code fragment came from function
-; MakeGraph in Olden-mst.
-; The original code made all comparisons with 0, so that the %xcc
-; register is not needed for the branch in the first basic block.
-; Replace 0 with 1 in the first comparson so that the
-; branch-on-register instruction cannot be used directly, i.e.,
-; the %xcc register is needed for the first branch.
-;
-
-        %Graph = type %struct.graph_st*
-        %Hash = type %struct.hash*
-        %HashEntry = type %struct.hash_entry*
-        %Vertex = type %struct.vert_st*
-        %struct.graph_st = type { [1 x %Vertex] }
-        %struct.hash = type { %HashEntry*, i32 (i32)*, i32 }
-        %struct.hash_entry = type { i32, i8*, %HashEntry }
-        %struct.vert_st = type { i32, %Vertex, %Hash }
-@HashRange = external global i32                ; <i32*> [#uses=0]
-@.LC0 = internal global [13 x i8] c"Make phase 2\00"            ; <[13 x i8]*> [#uses=0]
-@.LC1 = internal global [13 x i8] c"Make phase 3\00"            ; <[13 x i8]*> [#uses=0]
-@.LC2 = internal global [13 x i8] c"Make phase 4\00"            ; <[13 x i8]*> [#uses=0]
-@.LC3 = internal global [15 x i8] c"Make returning\00"          ; <[15 x i8]*> [#uses=0]
-
-define %Graph @MakeGraph(i32 %numvert, i32 %numproc) {
-bb1:
-        %reg111 = add i32 %numproc, -1          ; <i32> [#uses=2]
-        %cond275 = icmp slt i32 %reg111, 1              ; <i1> [#uses=1]
-        %cond276 = icmp sle i32 %reg111, 0              ; <i1> [#uses=1]
-        %cond277 = icmp sge i32 %numvert, 0             ; <i1> [#uses=1]
-        %reg162 = add i32 %numvert, 3           ; <i32> [#uses=0]
-        br i1 %cond275, label %bb7, label %bb4
-
-bb4:            ; preds = %bb1
-        br i1 %cond276, label %bb7, label %bb5
-
-bb5:            ; preds = %bb4
-        br i1 %cond277, label %bb7, label %bb6
-
-bb6:            ; preds = %bb5
-        ret %Graph null
-
-bb7:            ; preds = %bb5, %bb4, %bb1
-        ret %Graph null
-}
-
diff --git a/test/CodeGen/Mips/2008-07-15-SmallSection.ll b/test/CodeGen/Mips/2008-07-15-SmallSection.ll
index 91efd68..4795e47 100644
--- a/test/CodeGen/Mips/2008-07-15-SmallSection.ll
+++ b/test/CodeGen/Mips/2008-07-15-SmallSection.ll
@@ -7,7 +7,7 @@
 ; RUN: not grep {sbss} %t1 
 ; RUN: not grep {gp_rel} %t1
 ; RUN: grep {\%hi} %t1 | count 2
-; RUN: grep {\%lo} %t1 | count 2
+; RUN: grep {\%lo} %t1 | count 3
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-unknown-psp-elf"
diff --git a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
index 41ae5dd..855194a 100644
--- a/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
+++ b/test/CodeGen/Mips/2008-07-16-SignExtInReg.ll
@@ -5,13 +5,13 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "mipsallegrexel-unknown-psp-elf"
 
-define i8 @A(i8 %e.0, i8 signext %sum) signext nounwind {
+define signext i8 @A(i8 %e.0, i8 signext %sum)  nounwind {
 entry:
 	add i8 %sum, %e.0		; <i8>:0 [#uses=1]
 	ret i8 %0
 }
 
-define i16 @B(i16 %e.0, i16 signext %sum) signext nounwind {
+define signext i16 @B(i16 %e.0, i16 signext %sum) nounwind {
 entry:
 	add i16 %sum, %e.0		; <i16>:0 [#uses=1]
 	ret i16 %0
diff --git a/test/CodeGen/Mips/alloca.ll b/test/CodeGen/Mips/alloca.ll
index 50eeecf..fb4f56c 100644
--- a/test/CodeGen/Mips/alloca.ll
+++ b/test/CodeGen/Mips/alloca.ll
@@ -4,15 +4,13 @@ define i32 @twoalloca(i32 %size) nounwind {
 entry:
 ; CHECK: subu  $[[T0:[0-9]+]], $sp, $[[SZ:[0-9]+]]
 ; CHECK: addu  $sp, $zero, $[[T0]]
-; CHECK: addu  $[[SP1:[0-9]+]], $zero, $sp
-; CHECK: subu  $[[T1:[0-9]+]], $sp, $[[SZ]]
-; CHECK: addu  $sp, $zero, $[[T1]]
-; CHECK: addu  $[[SP2:[0-9]+]], $zero, $sp
-; CHECK: lw  $25, %call16(foo)($gp)
-; CHECK: addiu $4, $[[SP1]], 24
-; CHECK: jalr  $25
-; CHECK: lw  $25, %call16(foo)($gp)
-; CHECK: addiu $4, $[[SP2]], 24
+; CHECK: addiu $[[T1:[0-9]+]], $sp, [[OFF:[0-9]+]]
+; CHECK: subu  $[[T2:[0-9]+]], $sp, $[[SZ]]
+; CHECK: addu  $sp, $zero, $[[T2]]
+; CHECK: addiu $[[T3:[0-9]+]], $sp, [[OFF]]
+; CHECK: lw    $[[T4:[0-9]+]], %call16(foo)($gp)
+; CHECK: addu  $25, $zero, $[[T4]]
+; CHECK: addu  $4, $zero, $[[T1]]
 ; CHECK: jalr  $25
   %tmp1 = alloca i8, i32 %size, align 4
   %add.ptr = getelementptr inbounds i8* %tmp1, i32 5
@@ -29,3 +27,72 @@ declare void @foo2(double, double, i32)
 
 declare i32 @foo(i8*)
 
+@.str = private unnamed_addr constant [22 x i8] c"%d %d %d %d %d %d %d\0A\00", align 1
+
+define i32 @alloca2(i32 %size) nounwind {
+entry:
+; dynamic allocated stack area and $gp restore slot have the same offsets
+; relative to $sp.
+;
+; CHECK: alloca2
+; CHECK: .cprestore [[OFF:[0-9]+]]
+; CHECK: subu  $[[T0:[0-9]+]], $sp, $[[SZ:[0-9]+]]
+; CHECK: addu  $sp, $zero, $[[T0]]
+; CHECK: addiu $[[T1:[0-9]+]], $sp, [[OFF]]
+
+  %tmp1 = alloca i8, i32 %size, align 4
+  %0 = bitcast i8* %tmp1 to i32*
+  %cmp = icmp sgt i32 %size, 10
+  br i1 %cmp, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+; CHECK: addiu $4, $[[T1]], 40
+
+  %add.ptr = getelementptr inbounds i8* %tmp1, i32 40
+  %1 = bitcast i8* %add.ptr to i32*
+  call void @foo3(i32* %1) nounwind
+  %arrayidx15.pre = getelementptr inbounds i8* %tmp1, i32 12
+  %.pre = bitcast i8* %arrayidx15.pre to i32*
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+; CHECK: addiu $4, $[[T1]], 12
+
+  %add.ptr5 = getelementptr inbounds i8* %tmp1, i32 12
+  %2 = bitcast i8* %add.ptr5 to i32*
+  call void @foo3(i32* %2) nounwind
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+; CHECK: lw  $5, 0($[[T1]])
+; CHECK: lw  $25, %call16(printf)
+
+  %.pre-phi = phi i32* [ %2, %if.else ], [ %.pre, %if.then ]
+  %tmp7 = load i32* %0, align 4, !tbaa !0
+  %arrayidx9 = getelementptr inbounds i8* %tmp1, i32 4
+  %3 = bitcast i8* %arrayidx9 to i32*
+  %tmp10 = load i32* %3, align 4, !tbaa !0
+  %arrayidx12 = getelementptr inbounds i8* %tmp1, i32 8
+  %4 = bitcast i8* %arrayidx12 to i32*
+  %tmp13 = load i32* %4, align 4, !tbaa !0
+  %tmp16 = load i32* %.pre-phi, align 4, !tbaa !0
+  %arrayidx18 = getelementptr inbounds i8* %tmp1, i32 16
+  %5 = bitcast i8* %arrayidx18 to i32*
+  %tmp19 = load i32* %5, align 4, !tbaa !0
+  %arrayidx21 = getelementptr inbounds i8* %tmp1, i32 20
+  %6 = bitcast i8* %arrayidx21 to i32*
+  %tmp22 = load i32* %6, align 4, !tbaa !0
+  %arrayidx24 = getelementptr inbounds i8* %tmp1, i32 24
+  %7 = bitcast i8* %arrayidx24 to i32*
+  %tmp25 = load i32* %7, align 4, !tbaa !0
+  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([22 x i8]* @.str, i32 0, i32 0), i32 %tmp7, i32 %tmp10, i32 %tmp13, i32 %tmp16, i32 %tmp19, i32 %tmp22, i32 %tmp25) nounwind
+  ret i32 0
+}
+
+declare void @foo3(i32*)
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
diff --git a/test/CodeGen/Mips/i64arg.ll b/test/CodeGen/Mips/i64arg.ll
index 9a30453..560f2e9 100644
--- a/test/CodeGen/Mips/i64arg.ll
+++ b/test/CodeGen/Mips/i64arg.ll
@@ -10,8 +10,8 @@ entry:
 ; CHECK: jalr
   tail call void @ff1(i32 %i, i64 1085102592623924856) nounwind
 ; CHECK: lw $25, %call16(ff2)
-; CHECK: lw $[[R2:[0-9]+]], 80($sp)
-; CHECK: lw $[[R3:[0-9]+]], 84($sp)
+; CHECK: lw $[[R2:[0-9]+]], 88($sp)
+; CHECK: lw $[[R3:[0-9]+]], 92($sp)
 ; CHECK: addu $4, $zero, $[[R2]]
 ; CHECK: addu $5, $zero, $[[R3]]
 ; CHECK: jalr $25
diff --git a/test/CodeGen/Mips/inlineasmmemop.ll b/test/CodeGen/Mips/inlineasmmemop.ll
new file mode 100644
index 0000000..c565892
--- /dev/null
+++ b/test/CodeGen/Mips/inlineasmmemop.ll
@@ -0,0 +1,23 @@
+; RUN: llc -march=mipsel -mcpu=4ke < %s | FileCheck %s
+
+@g1 = external global i32
+
+define i32 @f1(i32 %x) nounwind {
+entry:
+; CHECK: addiu $[[T0:[0-9]+]], $sp
+; CHECK: #APP
+; CHECK: sw $4, 0($[[T0]])
+; CHECK: #NO_APP
+; CHECK: lw  $[[T1:[0-9]+]], %got(g1)($gp)
+; CHECK: #APP
+; CHECK: lw $[[T3:[0-9]+]], 0($[[T0]])
+; CHECK: #NO_APP
+; CHECK: sw  $[[T3]], 0($[[T1]])
+
+  %l1 = alloca i32, align 4
+  call void asm "sw $1, $0", "=*m,r"(i32* %l1, i32 %x) nounwind
+  %0 = call i32 asm "lw $0, $1", "=r,*m"(i32* %l1) nounwind
+  store i32 %0, i32* @g1, align 4
+  ret i32 %0
+}
+
diff --git a/test/CodeGen/Mips/internalfunc.ll b/test/CodeGen/Mips/internalfunc.ll
index 50d0993..c2a4e5c 100644
--- a/test/CodeGen/Mips/internalfunc.ll
+++ b/test/CodeGen/Mips/internalfunc.ll
@@ -15,7 +15,7 @@ entry:
 define void @caller(i32 %a0, i32 %a1) nounwind {
 entry:
 ; CHECK: lw  $[[R1:[0-9]+]], %got(caller.sf1)($gp)
-; CHECK: addiu ${{[0-9]+}}, $[[R1]], %lo(caller.sf1)
+; CHECK: lw  $25, %lo(caller.sf1)($[[R1]])
   %tobool = icmp eq i32 %a1, 0
   br i1 %tobool, label %if.end, label %if.then
 
@@ -26,9 +26,9 @@ if.then:                                          ; preds = %entry
 
 if.end:                                           ; preds = %entry, %if.then
 ; CHECK: lw  $[[R2:[0-9]+]], %got(sf2)($gp)
-; CHECK: lw  $[[R3:[0-9]+]], %got(caller.sf1)($gp)
 ; CHECK: addiu ${{[0-9]+}}, $[[R2]], %lo(sf2)
-; CHECK: addiu ${{[0-9]+}}, $[[R3]], %lo(caller.sf1)
+; CHECK: lw  $[[R3:[0-9]+]], %got(caller.sf1)($gp)
+; CHECK: sw  ${{[0-9]+}}, %lo(caller.sf1)($[[R3]])
   %tobool3 = icmp ne i32 %a0, 0
   %tmp4 = load void (...)** @gf1, align 4
   %cond = select i1 %tobool3, void (...)* %tmp4, void (...)* bitcast (void ()* @sf2 to void (...)*)
diff --git a/test/CodeGen/Mips/largeimmprinting.ll b/test/CodeGen/Mips/largeimmprinting.ll
index fd7ae9e..fcc20f7 100644
--- a/test/CodeGen/Mips/largeimmprinting.ll
+++ b/test/CodeGen/Mips/largeimmprinting.ll
@@ -8,7 +8,7 @@ define void @f() nounwind {
 entry:
 ; CHECK:  lui $at, 65534
 ; CHECK:  addu  $at, $sp, $at
-; CHECK:  addiu $sp, $at, -16
+; CHECK:  addiu $sp, $at, -24
 ; CHECK:  .cprestore  65536
 
   %agg.tmp = alloca %struct.S1, align 1
diff --git a/test/CodeGen/Mips/o32_cc_byval.ll b/test/CodeGen/Mips/o32_cc_byval.ll
index b78c393..f5e1a87 100644
--- a/test/CodeGen/Mips/o32_cc_byval.ll
+++ b/test/CodeGen/Mips/o32_cc_byval.ll
@@ -24,7 +24,7 @@ entry:
 ; CHECK: sw  $[[R4]], 28($sp)
 ; CHECK: sw  $[[R5]], 32($sp)
 ; CHECK: sw  $[[R6]], 36($sp)
-; CHECK: lw  $6, 0($[[R0]])
+; CHECK: lw  $6, %lo(f1.s1)($[[R1]])
 ; CHECK: lw  $7, 4($[[R0]])
   %agg.tmp10 = alloca %struct.S3, align 4
   call void @callee1(float 2.000000e+01, %struct.S1* byval bitcast (%0* @f1.s1 to %struct.S1*)) nounwind
diff --git a/test/CodeGen/Mips/private.ll b/test/CodeGen/Mips/private.ll
index 34b7547..4cc48f0 100644
--- a/test/CodeGen/Mips/private.ll
+++ b/test/CodeGen/Mips/private.ll
@@ -6,8 +6,6 @@
 ; RUN: grep \\\$baz: %t
 ; RUN: grep lw.*\\\$baz %t
 
-declare void @foo()
-
 define private void @foo() {
         ret void
 }
diff --git a/test/CodeGen/PTX/add.ll b/test/CodeGen/PTX/add.ll
index 235b00e..293aebe 100644
--- a/test/CodeGen/PTX/add.ll
+++ b/test/CodeGen/PTX/add.ll
@@ -1,70 +1,70 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
 define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
-; CHECK: add.u16 rh0, rh1, rh2;
+; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, rh{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	%z = add i16 %x, %y
 	ret i16 %z
 }
 
 define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
-; CHECK: add.u32 r0, r1, r2;
+; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	%z = add i32 %x, %y
 	ret i32 %z
 }
 
 define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
-; CHECK: add.u64 rd0, rd1, rd2;
+; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	%z = add i64 %x, %y
 	ret i64 %z
 }
 
 define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: add.f32 f0, f1, f2
+; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
 ; CHECK-NEXT: ret;
   %z = fadd float %x, %y
   ret float %z
 }
 
 define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: add.f64 fd0, fd1, fd2
+; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}
 ; CHECK-NEXT: ret;
   %z = fadd double %x, %y
   ret double %z
 }
 
 define ptx_device i16 @t2_u16(i16 %x) {
-; CHECK: add.u16 rh0, rh1, 1;
+; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, 1;
 ; CHECK-NEXT: ret;
 	%z = add i16 %x, 1
 	ret i16 %z
 }
 
 define ptx_device i32 @t2_u32(i32 %x) {
-; CHECK: add.u32 r0, r1, 1;
+; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, 1;
 ; CHECK-NEXT: ret;
 	%z = add i32 %x, 1
 	ret i32 %z
 }
 
 define ptx_device i64 @t2_u64(i64 %x) {
-; CHECK: add.u64 rd0, rd1, 1;
+; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, 1;
 ; CHECK-NEXT: ret;
 	%z = add i64 %x, 1
 	ret i64 %z
 }
 
 define ptx_device float @t2_f32(float %x) {
-; CHECK: add.f32 f0, f1, 0F3F800000;
+; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0F3F800000;
 ; CHECK-NEXT: ret;
   %z = fadd float %x, 1.0
   ret float %z
 }
 
 define ptx_device double @t2_f64(double %x) {
-; CHECK: add.f64 fd0, fd1, 0D3FF0000000000000;
+; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0D3FF0000000000000;
 ; CHECK-NEXT: ret;
   %z = fadd double %x, 1.0
   ret double %z
diff --git a/test/CodeGen/PTX/aggregates.ll b/test/CodeGen/PTX/aggregates.ll
new file mode 100644
index 0000000..23f28a7
--- /dev/null
+++ b/test/CodeGen/PTX/aggregates.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
+
+%complex = type { float, float }
+
+define ptx_device %complex @complex_add(%complex %a, %complex %b) {
+entry:
+; CHECK:      ld.param.f32	r[[R0:[0-9]+]], [__param_1];
+; CHECK-NEXT:	ld.param.f32	r[[R2:[0-9]+]], [__param_3];
+; CHECK-NEXT:	ld.param.f32	r[[R1:[0-9]+]], [__param_2];
+; CHECK-NEXT:	ld.param.f32	r[[R3:[0-9]+]], [__param_4];
+; CHECK-NEXT:	add.rn.f32	r[[R0]], r[[R0]], r[[R2]];
+; CHECK-NEXT:	add.rn.f32	r[[R1]], r[[R1]], r[[R3]];
+; CHECK-NEXT:	ret;
+  %a.real = extractvalue %complex %a, 0
+  %a.imag = extractvalue %complex %a, 1
+  %b.real = extractvalue %complex %b, 0
+  %b.imag = extractvalue %complex %b, 1
+  %ret.real = fadd float %a.real, %b.real
+  %ret.imag = fadd float %a.imag, %b.imag
+  %ret.0 = insertvalue %complex undef, float %ret.real, 0
+  %ret.1 = insertvalue %complex %ret.0, float %ret.imag, 1
+  ret %complex %ret.1
+}
diff --git a/test/CodeGen/PTX/bitwise.ll b/test/CodeGen/PTX/bitwise.ll
index dbc77e5..3859280 100644
--- a/test/CodeGen/PTX/bitwise.ll
+++ b/test/CodeGen/PTX/bitwise.ll
@@ -3,21 +3,21 @@
 ; preds
 
 define ptx_device i32 @t1_and_preds(i1 %x, i1 %y) {
-; CHECK: and.pred p0, p1, p2
+; CHECK: and.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}}
   %c = and i1 %x, %y
   %d = zext i1 %c to i32 
   ret i32 %d
 }
 
 define ptx_device i32 @t1_or_preds(i1 %x, i1 %y) {
-; CHECK: or.pred p0, p1, p2
+; CHECK: or.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}}
   %a = or i1 %x, %y
   %b = zext i1 %a to i32 
   ret i32 %b
 }
 
 define ptx_device i32 @t1_xor_preds(i1 %x, i1 %y) {
-; CHECK: xor.pred p0, p1, p2
+; CHECK: xor.pred p{{[0-9]+}}, p{{[0-9]+}}, p{{[0-9]+}}
   %a = xor i1 %x, %y
   %b = zext i1 %a to i32 
   ret i32 %b
diff --git a/test/CodeGen/PTX/bra.ll b/test/CodeGen/PTX/bra.ll
index 49383eb..7cc9444 100644
--- a/test/CodeGen/PTX/bra.ll
+++ b/test/CodeGen/PTX/bra.ll
@@ -10,15 +10,15 @@ loop:
 
 define ptx_device i32 @test_bra_cond_direct(i32 %x, i32 %y) {
 entry:
-; CHECK: setp.le.u32 p0, r1, r2
+; CHECK: setp.le.u32 p0, r[[R0:[0-9]+]], r[[R1:[0-9]+]]
 	%p = icmp ugt i32 %x, %y
 ; CHECK-NEXT: @p0 bra
 ; CHECK-NOT: bra
 	br i1 %p, label %clause.if, label %clause.else
 clause.if:
-; CHECK: mov.u32 r0, r1
+; CHECK: mov.u32 r{{[0-9]+}}, r[[R0]]
 	ret i32 %x
 clause.else:
-; CHECK: mov.u32 r0, r2
+; CHECK: mov.u32 r{{[0-9]+}}, r[[R1]]
 	ret i32 %y
 }
diff --git a/test/CodeGen/PTX/cvt.ll b/test/CodeGen/PTX/cvt.ll
index 2f793de..853abaf 100644
--- a/test/CodeGen/PTX/cvt.ll
+++ b/test/CodeGen/PTX/cvt.ll
@@ -4,8 +4,10 @@
 ; (note: we convert back to i32 to return)
 
 define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
-; CHECK: cvt.pred.u16 p0, rh1;
-; CHECK: ret;
+; CHECK: setp.gt.u16 p[[P0:[0-9]+]], rh{{[0-9]+}}, 0
+; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
+; CHECK-NEXT: ret;
 	%a = trunc i16 %x to i1
 	%b = and i1 %a, %y
 	%c = zext i1 %b to i32
@@ -13,8 +15,10 @@ define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
 }
 
 define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) {
-; CHECK: cvt.pred.u32 p0, r1;
-; CHECK: ret;
+; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0
+; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
+; CHECK-NEXT: ret;
 	%a = trunc i32 %x to i1
 	%b = and i1 %a, %y
 	%c = zext i1 %b to i32
@@ -22,8 +26,10 @@ define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) {
 }
 
 define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
-; CHECK: cvt.pred.u64 p0, rd1;
-; CHECK: ret;
+; CHECK: setp.gt.u64 p[[P0:[0-9]+]], rd{{[0-9]+}}, 0
+; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
+; CHECK-NEXT: ret;
 	%a = trunc i64 %x to i1
 	%b = and i1 %a, %y
 	%c = zext i1 %b to i32
@@ -31,8 +37,10 @@ define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
 }
 
 define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
-; CHECK: cvt.rni.pred.f32 p0, f1;
-; CHECK: ret;
+; CHECK: setp.gt.f32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0
+; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
+; CHECK-NEXT: ret;
 	%a = fptoui float %x to i1
 	%b = and i1 %a, %y
 	%c = zext i1 %b to i32
@@ -40,8 +48,10 @@ define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
 }
 
 define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
-; CHECK: cvt.rni.pred.f64 p0, fd1;
-; CHECK: ret;
+; CHECK: setp.gt.f64 p[[P0:[0-9]+]], rd{{[0-9]+}}, 0
+; CHECK-NEXT: and.pred p0, p[[P0:[0-9]+]], p{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0:[0-9]+]];
+; CHECK-NEXT: ret;
 	%a = fptoui double %x to i1
 	%b = and i1 %a, %y
 	%c = zext i1 %b to i32
@@ -51,36 +61,36 @@ define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
 ; i16
 
 define ptx_device i16 @cvt_i16_preds(i1 %x) {
-; CHECK: cvt.u16.pred rh0, p1;
-; CHECK: ret;
+; CHECK: selp.u16 rh{{[0-9]+}}, 1, 0, p{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = zext i1 %x to i16
 	ret i16 %a
 }
 
 define ptx_device i16 @cvt_i16_i32(i32 %x) {
-; CHECK: cvt.u16.u32 rh0, r1;
-; CHECK: ret;
+; CHECK: cvt.u16.u32 rh{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = trunc i32 %x to i16
 	ret i16 %a
 }
 
 define ptx_device i16 @cvt_i16_i64(i64 %x) {
-; CHECK: cvt.u16.u64 rh0, rd1;
-; CHECK: ret;
+; CHECK: cvt.u16.u64 rh{{[0-9]+}}, rd{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = trunc i64 %x to i16
 	ret i16 %a
 }
 
 define ptx_device i16 @cvt_i16_f32(float %x) {
-; CHECK: cvt.rni.u16.f32 rh0, f1;
-; CHECK: ret;
+; CHECK: cvt.rzi.u16.f32 rh{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = fptoui float %x to i16
 	ret i16 %a
 }
 
 define ptx_device i16 @cvt_i16_f64(double %x) {
-; CHECK: cvt.rni.u16.f64 rh0, fd1;
-; CHECK: ret;
+; CHECK: cvt.rzi.u16.f64 rh{{[0-9]+}}, rd{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = fptoui double %x to i16
 	ret i16 %a
 }
@@ -88,36 +98,36 @@ define ptx_device i16 @cvt_i16_f64(double %x) {
 ; i32
 
 define ptx_device i32 @cvt_i32_preds(i1 %x) {
-; CHECK: cvt.u32.pred r0, p1;
-; CHECK: ret;
+; CHECK: selp.u32 r{{[0-9]+}}, 1, 0, p{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = zext i1 %x to i32
 	ret i32 %a
 }
 
 define ptx_device i32 @cvt_i32_i16(i16 %x) {
-; CHECK: cvt.u32.u16 r0, rh1;
-; CHECK: ret;
+; CHECK: cvt.u32.u16 r{{[0-9]+}}, rh{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = zext i16 %x to i32
 	ret i32 %a
 }
 
 define ptx_device i32 @cvt_i32_i64(i64 %x) {
-; CHECK: cvt.u32.u64 r0, rd1;
-; CHECK: ret;
+; CHECK: cvt.u32.u64 r{{[0-9]+}}, rd{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = trunc i64 %x to i32
 	ret i32 %a
 }
 
 define ptx_device i32 @cvt_i32_f32(float %x) {
-; CHECK: cvt.rni.u32.f32 r0, f1;
-; CHECK: ret;
+; CHECK: cvt.rzi.u32.f32 r{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = fptoui float %x to i32
 	ret i32 %a
 }
 
 define ptx_device i32 @cvt_i32_f64(double %x) {
-; CHECK: cvt.rni.u32.f64 r0, fd1;
-; CHECK: ret;
+; CHECK: cvt.rzi.u32.f64 r{{[0-9]+}}, rd{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = fptoui double %x to i32
 	ret i32 %a
 }
@@ -125,35 +135,35 @@ define ptx_device i32 @cvt_i32_f64(double %x) {
 ; i64
 
 define ptx_device i64 @cvt_i64_preds(i1 %x) {
-; CHECK: cvt.u64.pred rd0, p1;
-; CHECK: ret;
+; CHECK: selp.u64 rd{{[0-9]+}}, 1, 0, p{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = zext i1 %x to i64
 	ret i64 %a
 }
 
 define ptx_device i64 @cvt_i64_i16(i16 %x) {
-; CHECK: cvt.u64.u16 rd0, rh1;
-; CHECK: ret;
+; CHECK: cvt.u64.u16 rd{{[0-9]+}}, rh{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = zext i16 %x to i64
 	ret i64 %a
 }
 
 define ptx_device i64 @cvt_i64_i32(i32 %x) {
-; CHECK: cvt.u64.u32 rd0, r1;
-; CHECK: ret;
+; CHECK: cvt.u64.u32 rd{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = zext i32 %x to i64
 	ret i64 %a
 }
 
 define ptx_device i64 @cvt_i64_f32(float %x) {
-; CHECK: cvt.rni.u64.f32 rd0, f1;
-; CHECK: ret;
+; CHECK: cvt.rzi.u64.f32 rd{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = fptoui float %x to i64
 	ret i64 %a
 }
 
 define ptx_device i64 @cvt_i64_f64(double %x) {
-; CHECK: cvt.rni.u64.f64 rd0, fd1;
+; CHECK: cvt.rzi.u64.f64 rd{{[0-9]+}}, rd{{[0-9]+}};
 ; CHECK: ret;
 	%a = fptoui double %x to i64
 	ret i64 %a
@@ -162,36 +172,36 @@ define ptx_device i64 @cvt_i64_f64(double %x) {
 ; f32
 
 define ptx_device float @cvt_f32_preds(i1 %x) {
-; CHECK: cvt.rn.f32.pred f0, p1;
-; CHECK: ret;
+; CHECK: selp.f32 r{{[0-9]+}}, 0F3F800000, 0F00000000, p{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = uitofp i1 %x to float
 	ret float %a
 }
 
 define ptx_device float @cvt_f32_i16(i16 %x) {
-; CHECK: cvt.rn.f32.u16 f0, rh1;
-; CHECK: ret;
+; CHECK: cvt.rn.f32.u16 r{{[0-9]+}}, rh{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = uitofp i16 %x to float
 	ret float %a
 }
 
 define ptx_device float @cvt_f32_i32(i32 %x) {
-; CHECK: cvt.rn.f32.u32 f0, r1;
-; CHECK: ret;
+; CHECK: cvt.rn.f32.u32 r{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = uitofp i32 %x to float
 	ret float %a
 }
 
 define ptx_device float @cvt_f32_i64(i64 %x) {
-; CHECK: cvt.rn.f32.u64 f0, rd1;
-; CHECK: ret;
+; CHECK: cvt.rn.f32.u64 r{{[0-9]+}}, rd{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = uitofp i64 %x to float
 	ret float %a
 }
 
 define ptx_device float @cvt_f32_f64(double %x) {
-; CHECK: cvt.rn.f32.f64 f0, fd1;
-; CHECK: ret;
+; CHECK: cvt.rn.f32.f64 r{{[0-9]+}}, rd{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = fptrunc double %x to float
 	ret float %a
 }
@@ -199,36 +209,36 @@ define ptx_device float @cvt_f32_f64(double %x) {
 ; f64
 
 define ptx_device double @cvt_f64_preds(i1 %x) {
-; CHECK: cvt.rn.f64.pred fd0, p1;
-; CHECK: ret;
+; CHECK: selp.f64 rd{{[0-9]+}}, 0D3F80000000000000, 0D0000000000000000, p{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = uitofp i1 %x to double
 	ret double %a
 }
 
 define ptx_device double @cvt_f64_i16(i16 %x) {
-; CHECK: cvt.rn.f64.u16 fd0, rh1;
-; CHECK: ret;
+; CHECK: cvt.rn.f64.u16 rd{{[0-9]+}}, rh{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = uitofp i16 %x to double
 	ret double %a
 }
 
 define ptx_device double @cvt_f64_i32(i32 %x) {
-; CHECK: cvt.rn.f64.u32 fd0, r1;
-; CHECK: ret;
+; CHECK: cvt.rn.f64.u32 rd{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = uitofp i32 %x to double
 	ret double %a
 }
 
 define ptx_device double @cvt_f64_i64(i64 %x) {
-; CHECK: cvt.rn.f64.u64 fd0, rd1;
-; CHECK: ret;
+; CHECK: cvt.rn.f64.u64 rd{{[0-9]+}}, rd{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = uitofp i64 %x to double
 	ret double %a
 }
 
 define ptx_device double @cvt_f64_f32(float %x) {
-; CHECK: cvt.f64.f32 fd0, f1;
-; CHECK: ret;
+; CHECK: cvt.f64.f32 rd{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: ret;
 	%a = fpext float %x to double
 	ret double %a
 }
diff --git a/test/CodeGen/PTX/fdiv-sm10.ll b/test/CodeGen/PTX/fdiv-sm10.ll
index 121360c..049d891 100644
--- a/test/CodeGen/PTX/fdiv-sm10.ll
+++ b/test/CodeGen/PTX/fdiv-sm10.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s
 
 define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: div.approx.f32 f0, f1, f2;
+; CHECK: div.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	%a = fdiv float %x, %y
 	ret float %a
 }
 
 define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: div.f64 fd0, fd1, fd2;
+; CHECK: div.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	%a = fdiv double %x, %y
 	ret double %a
diff --git a/test/CodeGen/PTX/fdiv-sm13.ll b/test/CodeGen/PTX/fdiv-sm13.ll
index 0ec7bae..2d95339 100644
--- a/test/CodeGen/PTX/fdiv-sm13.ll
+++ b/test/CodeGen/PTX/fdiv-sm13.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
 
 define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: div.approx.f32 f0, f1, f2;
+; CHECK: div.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	%a = fdiv float %x, %y
 	ret float %a
 }
 
 define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: div.rn.f64 fd0, fd1, fd2;
+; CHECK: div.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	%a = fdiv double %x, %y
 	ret double %a
diff --git a/test/CodeGen/PTX/fneg.ll b/test/CodeGen/PTX/fneg.ll
index 22eeda3..66ca74a 100644
--- a/test/CodeGen/PTX/fneg.ll
+++ b/test/CodeGen/PTX/fneg.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
 define ptx_device float @t1_f32(float %x) {
-; CHECK: neg.f32 f0, f1;
+; CHECK: neg.f32 r{{[0-9]+}}, r{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	%y = fsub float -0.000000e+00, %x
 	ret float %y
 }
 
 define ptx_device double @t1_f64(double %x) {
-; CHECK: neg.f64 fd0, fd1;
+; CHECK: neg.f64 rd{{[0-9]+}}, rd{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	%y = fsub double -0.000000e+00, %x
 	ret double %y
diff --git a/test/CodeGen/PTX/intrinsic.ll b/test/CodeGen/PTX/intrinsic.ll
index cea4182..af987d6 100644
--- a/test/CodeGen/PTX/intrinsic.ll
+++ b/test/CodeGen/PTX/intrinsic.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s
 
 define ptx_device i32 @test_tid_x() {
 ; CHECK: mov.u32 r0, %tid.x;
diff --git a/test/CodeGen/PTX/ld.ll b/test/CodeGen/PTX/ld.ll
index 377a95a..d184d12 100644
--- a/test/CodeGen/PTX/ld.ll
+++ b/test/CodeGen/PTX/ld.ll
@@ -63,7 +63,7 @@
 
 define ptx_device i16 @t1_u16(i16* %p) {
 entry:
-;CHECK: ld.global.u16 rh0, [r1];
+;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}];
 ;CHECK-NEXT: ret;
   %x = load i16* %p
   ret i16 %x
@@ -71,7 +71,7 @@ entry:
 
 define ptx_device i32 @t1_u32(i32* %p) {
 entry:
-;CHECK: ld.global.u32 r0, [r1];
+;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}];
 ;CHECK-NEXT: ret;
   %x = load i32* %p
   ret i32 %x
@@ -79,7 +79,7 @@ entry:
 
 define ptx_device i64 @t1_u64(i64* %p) {
 entry:
-;CHECK: ld.global.u64 rd0, [r1];
+;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}];
 ;CHECK-NEXT: ret;
   %x = load i64* %p
   ret i64 %x
@@ -87,7 +87,7 @@ entry:
 
 define ptx_device float @t1_f32(float* %p) {
 entry:
-;CHECK: ld.global.f32 f0, [r1];
+;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}];
 ;CHECK-NEXT: ret;
   %x = load float* %p
   ret float %x
@@ -95,7 +95,7 @@ entry:
 
 define ptx_device double @t1_f64(double* %p) {
 entry:
-;CHECK: ld.global.f64 fd0, [r1];
+;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}];
 ;CHECK-NEXT: ret;
   %x = load double* %p
   ret double %x
@@ -103,7 +103,7 @@ entry:
 
 define ptx_device i16 @t2_u16(i16* %p) {
 entry:
-;CHECK: ld.global.u16 rh0, [r1+2];
+;CHECK: ld.global.u16 rh{{[0-9]+}}, [r{{[0-9]+}}+2];
 ;CHECK-NEXT: ret;
   %i = getelementptr i16* %p, i32 1
   %x = load i16* %i
@@ -112,7 +112,7 @@ entry:
 
 define ptx_device i32 @t2_u32(i32* %p) {
 entry:
-;CHECK: ld.global.u32 r0, [r1+4];
+;CHECK: ld.global.u32 r{{[0-9]+}}, [r{{[0-9]+}}+4];
 ;CHECK-NEXT: ret;
   %i = getelementptr i32* %p, i32 1
   %x = load i32* %i
@@ -121,7 +121,7 @@ entry:
 
 define ptx_device i64 @t2_u64(i64* %p) {
 entry:
-;CHECK: ld.global.u64 rd0, [r1+8];
+;CHECK: ld.global.u64 rd{{[0-9]+}}, [r{{[0-9]+}}+8];
 ;CHECK-NEXT: ret;
   %i = getelementptr i64* %p, i32 1
   %x = load i64* %i
@@ -130,7 +130,7 @@ entry:
 
 define ptx_device float @t2_f32(float* %p) {
 entry:
-;CHECK: ld.global.f32 f0, [r1+4];
+;CHECK: ld.global.f32 r{{[0-9]+}}, [r{{[0-9]+}}+4];
 ;CHECK-NEXT: ret;
   %i = getelementptr float* %p, i32 1
   %x = load float* %i
@@ -139,7 +139,7 @@ entry:
 
 define ptx_device double @t2_f64(double* %p) {
 entry:
-;CHECK: ld.global.f64 fd0, [r1+8];
+;CHECK: ld.global.f64 rd{{[0-9]+}}, [r{{[0-9]+}}+8];
 ;CHECK-NEXT: ret;
   %i = getelementptr double* %p, i32 1
   %x = load double* %i
@@ -148,9 +148,9 @@ entry:
 
 define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
 entry:
-;CHECK: shl.b32 r0, r2, 1;
-;CHECK-NEXT: add.u32 r0, r1, r0;
-;CHECK-NEXT: ld.global.u16 rh0, [r0];
+;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1;
+;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
+;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]];
   %i = getelementptr i16* %p, i32 %q
   %x = load i16* %i
   ret i16 %x
@@ -158,9 +158,9 @@ entry:
 
 define ptx_device i32 @t3_u32(i32* %p, i32 %q) {
 entry:
-;CHECK: shl.b32 r0, r2, 2;
-;CHECK-NEXT: add.u32 r0, r1, r0;
-;CHECK-NEXT: ld.global.u32 r0, [r0];
+;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2;
+;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
+;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]];
   %i = getelementptr i32* %p, i32 %q
   %x = load i32* %i
   ret i32 %x
@@ -168,9 +168,9 @@ entry:
 
 define ptx_device i64 @t3_u64(i64* %p, i32 %q) {
 entry:
-;CHECK: shl.b32 r0, r2, 3;
-;CHECK-NEXT: add.u32 r0, r1, r0;
-;CHECK-NEXT: ld.global.u64 rd0, [r0];
+;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3;
+;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
+;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]];
   %i = getelementptr i64* %p, i32 %q
   %x = load i64* %i
   ret i64 %x
@@ -178,9 +178,9 @@ entry:
 
 define ptx_device float @t3_f32(float* %p, i32 %q) {
 entry:
-;CHECK: shl.b32 r0, r2, 2;
-;CHECK-NEXT: add.u32 r0, r1, r0;
-;CHECK-NEXT: ld.global.f32 f0, [r0];
+;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2;
+;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
+;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]];
   %i = getelementptr float* %p, i32 %q
   %x = load float* %i
   ret float %x
@@ -188,9 +188,9 @@ entry:
 
 define ptx_device double @t3_f64(double* %p, i32 %q) {
 entry:
-;CHECK: shl.b32 r0, r2, 3;
-;CHECK-NEXT: add.u32 r0, r1, r0;
-;CHECK-NEXT: ld.global.f64 fd0, [r0];
+;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3;
+;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
+;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]];
   %i = getelementptr double* %p, i32 %q
   %x = load double* %i
   ret double %x
@@ -198,8 +198,8 @@ entry:
 
 define ptx_device i16 @t4_global_u16() {
 entry:
-;CHECK: mov.u32 r0, array_i16;
-;CHECK-NEXT: ld.global.u16 rh0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
+;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0
   %x = load i16* %i
@@ -208,8 +208,8 @@ entry:
 
 define ptx_device i32 @t4_global_u32() {
 entry:
-;CHECK: mov.u32 r0, array_i32;
-;CHECK-NEXT: ld.global.u32 r0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32;
+;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
   %x = load i32* %i
@@ -218,8 +218,8 @@ entry:
 
 define ptx_device i64 @t4_global_u64() {
 entry:
-;CHECK: mov.u32 r0, array_i64;
-;CHECK-NEXT: ld.global.u64 rd0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64;
+;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
   %x = load i64* %i
@@ -228,8 +228,8 @@ entry:
 
 define ptx_device float @t4_global_f32() {
 entry:
-;CHECK: mov.u32 r0, array_float;
-;CHECK-NEXT: ld.global.f32 f0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_float;
+;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
   %x = load float* %i
@@ -238,8 +238,8 @@ entry:
 
 define ptx_device double @t4_global_f64() {
 entry:
-;CHECK: mov.u32 r0, array_double;
-;CHECK-NEXT: ld.global.f64 fd0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_double;
+;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
   %x = load double* %i
@@ -248,8 +248,8 @@ entry:
 
 define ptx_device i16 @t4_const_u16() {
 entry:
-;CHECK: mov.u32 r0, array_constant_i16;
-;CHECK-NEXT: ld.const.u16 rh0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i16;
+;CHECK-NEXT: ld.const.u16 rh{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0
   %x = load i16 addrspace(1)* %i
@@ -258,8 +258,8 @@ entry:
 
 define ptx_device i32 @t4_const_u32() {
 entry:
-;CHECK: mov.u32 r0, array_constant_i32;
-;CHECK-NEXT: ld.const.u32 r0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i32;
+;CHECK-NEXT: ld.const.u32 r{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0
   %x = load i32 addrspace(1)* %i
@@ -268,8 +268,8 @@ entry:
 
 define ptx_device i64 @t4_const_u64() {
 entry:
-;CHECK: mov.u32 r0, array_constant_i64;
-;CHECK-NEXT: ld.const.u64 rd0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_i64;
+;CHECK-NEXT: ld.const.u64 rd{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0
   %x = load i64 addrspace(1)* %i
@@ -278,8 +278,8 @@ entry:
 
 define ptx_device float @t4_const_f32() {
 entry:
-;CHECK: mov.u32 r0, array_constant_float;
-;CHECK-NEXT: ld.const.f32 f0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_float;
+;CHECK-NEXT: ld.const.f32 r{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0
   %x = load float addrspace(1)* %i
@@ -288,8 +288,8 @@ entry:
 
 define ptx_device double @t4_const_f64() {
 entry:
-;CHECK: mov.u32 r0, array_constant_double;
-;CHECK-NEXT: ld.const.f64 fd0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_constant_double;
+;CHECK-NEXT: ld.const.f64 rd{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0
   %x = load double addrspace(1)* %i
@@ -298,8 +298,8 @@ entry:
 
 define ptx_device i16 @t4_local_u16() {
 entry:
-;CHECK: mov.u32 r0, array_local_i16;
-;CHECK-NEXT: ld.local.u16 rh0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16;
+;CHECK-NEXT: ld.local.u16 rh{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
   %x = load i16 addrspace(2)* %i
@@ -308,8 +308,8 @@ entry:
 
 define ptx_device i32 @t4_local_u32() {
 entry:
-;CHECK: mov.u32 r0, array_local_i32;
-;CHECK-NEXT: ld.local.u32 r0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i32;
+;CHECK-NEXT: ld.local.u32 r{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
   %x = load i32 addrspace(2)* %i
@@ -318,8 +318,8 @@ entry:
 
 define ptx_device i64 @t4_local_u64() {
 entry:
-;CHECK: mov.u32 r0, array_local_i64;
-;CHECK-NEXT: ld.local.u64 rd0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i64;
+;CHECK-NEXT: ld.local.u64 rd{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
   %x = load i64 addrspace(2)* %i
@@ -328,8 +328,8 @@ entry:
 
 define ptx_device float @t4_local_f32() {
 entry:
-;CHECK: mov.u32 r0, array_local_float;
-;CHECK-NEXT: ld.local.f32 f0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_float;
+;CHECK-NEXT: ld.local.f32 r{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
   %x = load float addrspace(2)* %i
@@ -338,8 +338,8 @@ entry:
 
 define ptx_device double @t4_local_f64() {
 entry:
-;CHECK: mov.u32 r0, array_local_double;
-;CHECK-NEXT: ld.local.f64 fd0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_double;
+;CHECK-NEXT: ld.local.f64 rd{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
   %x = load double addrspace(2)* %i
@@ -348,8 +348,8 @@ entry:
 
 define ptx_device i16 @t4_shared_u16() {
 entry:
-;CHECK: mov.u32 r0, array_shared_i16;
-;CHECK-NEXT: ld.shared.u16 rh0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16;
+;CHECK-NEXT: ld.shared.u16 rh{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
   %x = load i16 addrspace(4)* %i
@@ -358,8 +358,8 @@ entry:
 
 define ptx_device i32 @t4_shared_u32() {
 entry:
-;CHECK: mov.u32 r0, array_shared_i32;
-;CHECK-NEXT: ld.shared.u32 r0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i32;
+;CHECK-NEXT: ld.shared.u32 r{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
   %x = load i32 addrspace(4)* %i
@@ -368,8 +368,8 @@ entry:
 
 define ptx_device i64 @t4_shared_u64() {
 entry:
-;CHECK: mov.u32 r0, array_shared_i64;
-;CHECK-NEXT: ld.shared.u64 rd0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i64;
+;CHECK-NEXT: ld.shared.u64 rd{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
   %x = load i64 addrspace(4)* %i
@@ -378,8 +378,8 @@ entry:
 
 define ptx_device float @t4_shared_f32() {
 entry:
-;CHECK: mov.u32 r0, array_shared_float;
-;CHECK-NEXT: ld.shared.f32 f0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_float;
+;CHECK-NEXT: ld.shared.f32 r{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
   %x = load float addrspace(4)* %i
@@ -388,8 +388,8 @@ entry:
 
 define ptx_device double @t4_shared_f64() {
 entry:
-;CHECK: mov.u32 r0, array_shared_double;
-;CHECK-NEXT: ld.shared.f64 fd0, [r0];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_double;
+;CHECK-NEXT: ld.shared.f64 rd{{[0-9]+}}, [r[[R0]]];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
   %x = load double addrspace(4)* %i
@@ -398,8 +398,8 @@ entry:
 
 define ptx_device i16 @t5_u16() {
 entry:
-;CHECK: mov.u32 r0, array_i16;
-;CHECK-NEXT: ld.global.u16 rh0, [r0+2];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
+;CHECK-NEXT: ld.global.u16 rh{{[0-9]+}}, [r[[R0]]+2];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
   %x = load i16* %i
@@ -408,8 +408,8 @@ entry:
 
 define ptx_device i32 @t5_u32() {
 entry:
-;CHECK: mov.u32 r0, array_i32;
-;CHECK-NEXT: ld.global.u32 r0, [r0+4];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32;
+;CHECK-NEXT: ld.global.u32 r{{[0-9]+}}, [r[[R0]]+4];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
   %x = load i32* %i
@@ -418,8 +418,8 @@ entry:
 
 define ptx_device i64 @t5_u64() {
 entry:
-;CHECK: mov.u32 r0, array_i64;
-;CHECK-NEXT: ld.global.u64 rd0, [r0+8];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64;
+;CHECK-NEXT: ld.global.u64 rd{{[0-9]+}}, [r[[R0]]+8];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
   %x = load i64* %i
@@ -428,8 +428,8 @@ entry:
 
 define ptx_device float @t5_f32() {
 entry:
-;CHECK: mov.u32 r0, array_float;
-;CHECK-NEXT: ld.global.f32 f0, [r0+4];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_float;
+;CHECK-NEXT: ld.global.f32 r{{[0-9]+}}, [r[[R0]]+4];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
   %x = load float* %i
@@ -438,8 +438,8 @@ entry:
 
 define ptx_device double @t5_f64() {
 entry:
-;CHECK: mov.u32 r0, array_double;
-;CHECK-NEXT: ld.global.f64 fd0, [r0+8];
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_double;
+;CHECK-NEXT: ld.global.f64 rd{{[0-9]+}}, [r[[R0]]+8];
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
   %x = load double* %i
diff --git a/test/CodeGen/PTX/llvm-intrinsic.ll b/test/CodeGen/PTX/llvm-intrinsic.ll
index 1e265f5..4611c54 100644
--- a/test/CodeGen/PTX/llvm-intrinsic.ll
+++ b/test/CodeGen/PTX/llvm-intrinsic.ll
@@ -1,8 +1,8 @@
-; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s
+; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s
 
 define ptx_device float @test_sqrt_f32(float %x) {
 entry:
-; CHECK: sqrt.rn.f32 f0, f1;
+; CHECK: sqrt.rn.f32 r{{[0-9]+}}, r{{[0-9]+}};
 ; CHECK-NEXT: ret;
   %y = call float @llvm.sqrt.f32(float %x)
   ret float %y
@@ -10,7 +10,7 @@ entry:
 
 define ptx_device double @test_sqrt_f64(double %x) {
 entry:
-; CHECK: sqrt.rn.f64 fd0, fd1;
+; CHECK: sqrt.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}};
 ; CHECK-NEXT: ret;
   %y = call double @llvm.sqrt.f64(double %x)
   ret double %y
@@ -18,7 +18,7 @@ entry:
 
 define ptx_device float @test_sin_f32(float %x) {
 entry:
-; CHECK: sin.approx.f32 f0, f1;
+; CHECK: sin.approx.f32 r{{[0-9]+}}, r{{[0-9]+}};
 ; CHECK-NEXT: ret;
   %y = call float @llvm.sin.f32(float %x)
   ret float %y
@@ -26,7 +26,7 @@ entry:
 
 define ptx_device double @test_sin_f64(double %x) {
 entry:
-; CHECK: sin.approx.f64 fd0, fd1;
+; CHECK: sin.approx.f64 rd{{[0-9]+}}, rd{{[0-9]+}};
 ; CHECK-NEXT: ret;
   %y = call double @llvm.sin.f64(double %x)
   ret double %y
@@ -34,7 +34,7 @@ entry:
 
 define ptx_device float @test_cos_f32(float %x) {
 entry:
-; CHECK: cos.approx.f32 f0, f1;
+; CHECK: cos.approx.f32 r{{[0-9]+}}, r{{[0-9]+}};
 ; CHECK-NEXT: ret;
   %y = call float @llvm.cos.f32(float %x)
   ret float %y
@@ -42,7 +42,7 @@ entry:
 
 define ptx_device double @test_cos_f64(double %x) {
 entry:
-; CHECK: cos.approx.f64 fd0, fd1;
+; CHECK: cos.approx.f64 rd{{[0-9]+}}, rd{{[0-9]+}};
 ; CHECK-NEXT: ret;
   %y = call double @llvm.cos.f64(double %x)
   ret double %y
diff --git a/test/CodeGen/PTX/mad.ll b/test/CodeGen/PTX/mad.ll
index 0c25f2c..0e4d3f9 100644
--- a/test/CodeGen/PTX/mad.ll
+++ b/test/CodeGen/PTX/mad.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
 
 define ptx_device float @t1_f32(float %x, float %y, float %z) {
-; CHECK: mad.rn.f32 f0, f1, f2, f3;
+; CHECK: mad.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	%a = fmul float %x, %y
   %b = fadd float %a, %z
@@ -9,7 +9,7 @@ define ptx_device float @t1_f32(float %x, float %y, float %z) {
 }
 
 define ptx_device double @t1_f64(double %x, double %y, double %z) {
-; CHECK: mad.rn.f64 fd0, fd1, fd2, fd3;
+; CHECK: mad.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	%a = fmul double %x, %y
   %b = fadd double %a, %z
diff --git a/test/CodeGen/PTX/mov.ll b/test/CodeGen/PTX/mov.ll
index 120572a..cce6a5b 100644
--- a/test/CodeGen/PTX/mov.ll
+++ b/test/CodeGen/PTX/mov.ll
@@ -1,61 +1,61 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
 define ptx_device i16 @t1_u16() {
-; CHECK: mov.u16 rh0, 0;
+; CHECK: mov.u16 rh{{[0-9]+}}, 0;
 ; CHECK: ret;
 	ret i16 0
 }
 
 define ptx_device i32 @t1_u32() {
-; CHECK: mov.u32 r0, 0;
+; CHECK: mov.u32 r{{[0-9]+}}, 0;
 ; CHECK: ret;
 	ret i32 0
 }
 
 define ptx_device i64 @t1_u64() {
-; CHECK: mov.u64 rd0, 0;
+; CHECK: mov.u64 rd{{[0-9]+}}, 0;
 ; CHECK: ret;
 	ret i64 0
 }
 
 define ptx_device float @t1_f32() {
-; CHECK: mov.f32 f0, 0F00000000;
+; CHECK: mov.f32 r{{[0-9]+}}, 0F00000000;
 ; CHECK: ret;
 	ret float 0.0
 }
 
 define ptx_device double @t1_f64() {
-; CHECK: mov.f64 fd0, 0D0000000000000000;
+; CHECK: mov.f64 rd{{[0-9]+}}, 0D0000000000000000;
 ; CHECK: ret;
 	ret double 0.0
 }
 
 define ptx_device i16 @t2_u16(i16 %x) {
-; CHECK: mov.u16 rh0, rh1;
+; CHECK: mov.u16 rh{{[0-9]+}}, rh{{[0-9]+}};
 ; CHECK: ret;
 	ret i16 %x
 }
 
 define ptx_device i32 @t2_u32(i32 %x) {
-; CHECK: mov.u32 r0, r1;
+; CHECK: mov.u32 r{{[0-9]+}}, r{{[0-9]+}};
 ; CHECK: ret;
 	ret i32 %x
 }
 
 define ptx_device i64 @t2_u64(i64 %x) {
-; CHECK: mov.u64 rd0, rd1;
+; CHECK: mov.u64 rd{{[0-9]+}}, rd{{[0-9]+}};
 ; CHECK: ret;
 	ret i64 %x
 }
 
 define ptx_device float @t3_f32(float %x) {
-; CHECK: mov.f32 f0, f1;
+; CHECK: mov.u32 r{{[0-9]+}}, r{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	ret float %x
 }
 
 define ptx_device double @t3_f64(double %x) {
-; CHECK: mov.f64 fd0, fd1;
+; CHECK: mov.u64 rd{{[0-9]+}}, rd{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	ret double %x
 }
diff --git a/test/CodeGen/PTX/mul.ll b/test/CodeGen/PTX/mul.ll
index 5ce0426..491cc74 100644
--- a/test/CodeGen/PTX/mul.ll
+++ b/test/CodeGen/PTX/mul.ll
@@ -11,28 +11,28 @@
 ;}
 
 define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: mul.f32 f0, f1, f2
+; CHECK: mul.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
 ; CHECK-NEXT: ret;
   %z = fmul float %x, %y
   ret float %z
 }
 
 define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: mul.f64 fd0, fd1, fd2
+; CHECK: mul.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}
 ; CHECK-NEXT: ret;
   %z = fmul double %x, %y
   ret double %z
 }
 
 define ptx_device float @t2_f32(float %x) {
-; CHECK: mul.f32 f0, f1, 0F40A00000;
+; CHECK: mul.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0F40A00000;
 ; CHECK-NEXT: ret;
   %z = fmul float %x, 5.0
   ret float %z
 }
 
 define ptx_device double @t2_f64(double %x) {
-; CHECK: mul.f64 fd0, fd1, 0D4014000000000000;
+; CHECK: mul.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0D4014000000000000;
 ; CHECK-NEXT: ret;
   %z = fmul double %x, 5.0
   ret double %z
diff --git a/test/CodeGen/PTX/options.ll b/test/CodeGen/PTX/options.ll
index 92effa6..0fb6602 100644
--- a/test/CodeGen/PTX/options.ll
+++ b/test/CodeGen/PTX/options.ll
@@ -5,6 +5,8 @@
 ; RUN: llc < %s -march=ptx32 -mattr=sm10 | grep ".target sm_10"
 ; RUN: llc < %s -march=ptx32 -mattr=sm13 | grep ".target sm_13"
 ; RUN: llc < %s -march=ptx32 -mattr=sm20 | grep ".target sm_20"
+; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".address_size 32"
+; RUN: llc < %s -march=ptx64 -mattr=ptx23 | grep ".address_size 64"
 
 define ptx_device void @t1() {
 	ret void
diff --git a/test/CodeGen/PTX/parameter-order.ll b/test/CodeGen/PTX/parameter-order.ll
index 95d4a32..b16556e 100644
--- a/test/CodeGen/PTX/parameter-order.ll
+++ b/test/CodeGen/PTX/parameter-order.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
-; CHECK: .func (.reg .u32 r0) test_parameter_order (.reg .f32 f1, .reg .u32 r1, .reg .u32 r2, .reg .f32 f2)
+; CHECK: .func (.reg .b32 r{{[0-9]+}}) test_parameter_order (.reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}}, .reg .b32 r{{[0-9]+}})
 define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) {
-; CHECK: sub.u32 r0, r1, r2
+; CHECK: sub.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
 	%result = sub i32 %b, %c
 	ret i32 %result
 }
diff --git a/test/CodeGen/PTX/selp.ll b/test/CodeGen/PTX/selp.ll
index 6f1b03e..e705fbe 100644
--- a/test/CodeGen/PTX/selp.ll
+++ b/test/CodeGen/PTX/selp.ll
@@ -1,25 +1,25 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
 define ptx_device i32 @test_selp_i32(i1 %x, i32 %y, i32 %z) {
-; CHECK: selp.u32 r0, r1, r2, p1;
+; CHECK: selp.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, p{{[0-9]+}};
 	%a = select i1 %x, i32 %y, i32 %z
 	ret i32 %a
 }
 
 define ptx_device i64 @test_selp_i64(i1 %x, i64 %y, i64 %z) {
-; CHECK: selp.u64 rd0, rd1, rd2, p1;
+; CHECK: selp.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, p{{[0-9]+}};
 	%a = select i1 %x, i64 %y, i64 %z
 	ret i64 %a
 }
 
 define ptx_device float @test_selp_f32(i1 %x, float %y, float %z) {
-; CHECK: selp.f32 f0, f1, f2, p1;
+; CHECK: selp.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}, p{{[0-9]+}};
 	%a = select i1 %x, float %y, float %z
 	ret float %a
 }
 
 define ptx_device double @test_selp_f64(i1 %x, double %y, double %z) {
-; CHECK: selp.f64 fd0, fd1, fd2, p1;
+; CHECK: selp.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}, p{{[0-9]+}};
 	%a = select i1 %x, double %y, double %z
 	ret double %a
 }
diff --git a/test/CodeGen/PTX/setp.ll b/test/CodeGen/PTX/setp.ll
index 5836122..e0044d6 100644
--- a/test/CodeGen/PTX/setp.ll
+++ b/test/CodeGen/PTX/setp.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
 define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.eq.u32 p0, r1, r2;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
 ; CHECK-NEXT: ret;
 	%p = icmp eq i32 %x, %y
 	%z = zext i1 %p to i32
@@ -10,8 +10,8 @@ define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) {
 }
 
 define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.ne.u32 p0, r1, r2;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
 ; CHECK-NEXT: ret;
 	%p = icmp ne i32 %x, %y
 	%z = zext i1 %p to i32
@@ -19,8 +19,8 @@ define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) {
 }
 
 define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.lt.u32 p0, r1, r2;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK: setp.lt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
 ; CHECK-NEXT: ret;
 	%p = icmp ult i32 %x, %y
 	%z = zext i1 %p to i32
@@ -28,8 +28,8 @@ define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) {
 }
 
 define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.le.u32 p0, r1, r2;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK: setp.le.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
 ; CHECK-NEXT: ret;
 	%p = icmp ule i32 %x, %y
 	%z = zext i1 %p to i32
@@ -37,8 +37,8 @@ define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) {
 }
 
 define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.gt.u32 p0, r1, r2;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
 ; CHECK-NEXT: ret;
 	%p = icmp ugt i32 %x, %y
 	%z = zext i1 %p to i32
@@ -46,17 +46,53 @@ define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) {
 }
 
 define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) {
-; CHECK: setp.ge.u32 p0, r1, r2;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK: setp.ge.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
 ; CHECK-NEXT: ret;
 	%p = icmp uge i32 %x, %y
 	%z = zext i1 %p to i32
 	ret i32 %z
 }
 
+define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) {
+; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
+; CHECK-NEXT: ret;
+	%p = icmp slt i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) {
+; CHECK: setp.le.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
+; CHECK-NEXT: ret;
+	%p = icmp sle i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) {
+; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
+; CHECK-NEXT: ret;
+	%p = icmp sgt i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) {
+; CHECK: setp.ge.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
+; CHECK-NEXT: ret;
+	%p = icmp sge i32 %x, %y
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
 define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) {
-; CHECK: setp.eq.u32 p0, r1, 1;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1;
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
 ; CHECK-NEXT: ret;
 	%p = icmp eq i32 %x, 1
 	%z = zext i1 %p to i32
@@ -64,8 +100,8 @@ define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) {
 }
 
 define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) {
-; CHECK: setp.ne.u32 p0, r1, 1;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1;
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
 ; CHECK-NEXT: ret;
 	%p = icmp ne i32 %x, 1
 	%z = zext i1 %p to i32
@@ -73,8 +109,8 @@ define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) {
 }
 
 define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) {
-; CHECK: setp.eq.u32 p0, r1, 0;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK: setp.eq.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0;
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
 ; CHECK-NEXT: ret;
 	%p = icmp ult i32 %x, 1
 	%z = zext i1 %p to i32
@@ -82,8 +118,8 @@ define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) {
 }
 
 define ptx_device i32 @test_setp_le_u32_ri(i32 %x) {
-; CHECK: setp.lt.u32 p0, r1, 2;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK: setp.lt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 2;
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
 ; CHECK-NEXT: ret;
 	%p = icmp ule i32 %x, 1
 	%z = zext i1 %p to i32
@@ -91,8 +127,8 @@ define ptx_device i32 @test_setp_le_u32_ri(i32 %x) {
 }
 
 define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) {
-; CHECK: setp.gt.u32 p0, r1, 1;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1;
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
 ; CHECK-NEXT: ret;
 	%p = icmp ugt i32 %x, 1
 	%z = zext i1 %p to i32
@@ -100,18 +136,54 @@ define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) {
 }
 
 define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) {
-; CHECK: setp.ne.u32 p0, r1, 0;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK: setp.ne.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0;
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
 ; CHECK-NEXT: ret;
 	%p = icmp uge i32 %x, 1
 	%z = zext i1 %p to i32
 	ret i32 %z
 }
 
+define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) {
+; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1;
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
+; CHECK-NEXT: ret;
+	%p = icmp slt i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_le_s32_ri(i32 %x) {
+; CHECK: setp.lt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 2;
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
+; CHECK-NEXT: ret;
+	%p = icmp sle i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) {
+; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 1;
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
+; CHECK-NEXT: ret;
+	%p = icmp sgt i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
+define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) {
+; CHECK: setp.gt.s32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0;
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
+; CHECK-NEXT: ret;
+	%p = icmp sge i32 %x, 1
+	%z = zext i1 %p to i32
+	ret i32 %z
+}
+
 define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) {
-; CHECK: setp.gt.u32 p0, r3, r4;
-; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, p0;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}};
+; CHECK-NEXT: setp.eq.and.u32 p[[P0]], r{{[0-9]+}}, r{{[0-9]+}}, p[[P0]];
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
 ; CHECK-NEXT: ret;
 	%c = icmp eq i32 %x, %y
 	%d = icmp ugt i32 %u, %v
@@ -121,9 +193,9 @@ define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) {
 }
 
 define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) {
-; CHECK: cvt.pred.u32 p0, r3;
-; CHECK-NEXT: setp.eq.and.u32 p0, r1, r2, !p0;
-; CHECK-NEXT: cvt.u32.pred r0, p0;
+; CHECK: setp.gt.u32 p[[P0:[0-9]+]], r{{[0-9]+}}, 0;
+; CHECK-NEXT: setp.eq.and.u32 p[[P0]], r{{[0-9]+}}, r{{[0-9]+}}, !p[[P0]];
+; CHECK-NEXT: selp.u32 r{{[0-9]+}}, 1, 0, p[[P0]];
 ; CHECK-NEXT: ret;
 	%c = trunc i32 %w to i1
 	%d = icmp eq i32 %x, %y
diff --git a/test/CodeGen/PTX/shl.ll b/test/CodeGen/PTX/shl.ll
index 6e72c92..b3818e1 100644
--- a/test/CodeGen/PTX/shl.ll
+++ b/test/CodeGen/PTX/shl.ll
@@ -1,21 +1,21 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
 define ptx_device i32 @t1(i32 %x, i32 %y) {
-; CHECK: shl.b32 r0, r1, r2
+; CHECK: shl.b32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
 	%z = shl i32 %x, %y
 ; CHECK: ret;
 	ret i32 %z
 }
 
 define ptx_device i32 @t2(i32 %x) {
-; CHECK: shl.b32 r0, r1, 3
+; CHECK: shl.b32 r{{[0-9]+}}, r{{[0-9]+}}, 3
 	%z = shl i32 %x, 3
 ; CHECK: ret;
 	ret i32 %z
 }
 
 define ptx_device i32 @t3(i32 %x) {
-; CHECK: shl.b32 r0, 3, r1
+; CHECK: shl.b32 r{{[0-9]+}}, 3, r{{[0-9]+}}
 	%z = shl i32 3, %x
 ; CHECK: ret;
 	ret i32 %z
diff --git a/test/CodeGen/PTX/shr.ll b/test/CodeGen/PTX/shr.ll
index 8693e0e..cb57546 100644
--- a/test/CodeGen/PTX/shr.ll
+++ b/test/CodeGen/PTX/shr.ll
@@ -1,42 +1,42 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
 define ptx_device i32 @t1(i32 %x, i32 %y) {
-; CHECK: shr.u32 r0, r1, r2
+; CHECK: shr.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
 	%z = lshr i32 %x, %y
 ; CHECK: ret;
 	ret i32 %z
 }
 
 define ptx_device i32 @t2(i32 %x) {
-; CHECK: shr.u32 r0, r1, 3
+; CHECK: shr.u32 r{{[0-9]+}}, r{{[0-9]+}}, 3
 	%z = lshr i32 %x, 3
 ; CHECK: ret;
 	ret i32 %z
 }
 
 define ptx_device i32 @t3(i32 %x) {
-; CHECK: shr.u32 r0, 3, r1
+; CHECK: shr.u32 r{{[0-9]+}}, 3, r{{[0-9]+}}
 	%z = lshr i32 3, %x
 ; CHECK: ret;
 	ret i32 %z
 }
 
 define ptx_device i32 @t4(i32 %x, i32 %y) {
-; CHECK: shr.s32 r0, r1, r2
+; CHECK: shr.s32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
 	%z = ashr i32 %x, %y
 ; CHECK: ret;
 	ret i32 %z
 }
 
 define ptx_device i32 @t5(i32 %x) {
-; CHECK: shr.s32 r0, r1, 3
+; CHECK: shr.s32 r{{[0-9]+}}, r{{[0-9]+}}, 3
 	%z = ashr i32 %x, 3
 ; CHECK: ret;
 	ret i32 %z
 }
 
 define ptx_device i32 @t6(i32 %x) {
-; CHECK: shr.s32 r0, -3, r1
+; CHECK: shr.s32 r{{[0-9]+}}, -3, r{{[0-9]+}}
 	%z = ashr i32 -3, %x
 ; CHECK: ret;
 	ret i32 %z
diff --git a/test/CodeGen/PTX/st.ll b/test/CodeGen/PTX/st.ll
index dee5c61..b08528e 100644
--- a/test/CodeGen/PTX/st.ll
+++ b/test/CodeGen/PTX/st.ll
@@ -63,7 +63,7 @@
 
 define ptx_device void @t1_u16(i16* %p, i16 %x) {
 entry:
-;CHECK: st.global.u16 [r1], rh1;
+;CHECK: st.global.u16 [r{{[0-9]+}}], rh{{[0-9]+}};
 ;CHECK-NEXT: ret;
   store i16 %x, i16* %p
   ret void
@@ -71,7 +71,7 @@ entry:
 
 define ptx_device void @t1_u32(i32* %p, i32 %x) {
 entry:
-;CHECK: st.global.u32 [r1], r2;
+;CHECK: st.global.u32 [r{{[0-9]+}}], r{{[0-9]+}};
 ;CHECK-NEXT: ret;
   store i32 %x, i32* %p
   ret void
@@ -79,7 +79,7 @@ entry:
 
 define ptx_device void @t1_u64(i64* %p, i64 %x) {
 entry:
-;CHECK: st.global.u64 [r1], rd1;
+;CHECK: st.global.u64 [r{{[0-9]+}}], rd{{[0-9]+}};
 ;CHECK-NEXT: ret;
   store i64 %x, i64* %p
   ret void
@@ -87,7 +87,7 @@ entry:
 
 define ptx_device void @t1_f32(float* %p, float %x) {
 entry:
-;CHECK: st.global.f32 [r1], f1;
+;CHECK: st.global.f32 [r{{[0-9]+}}], r{{[0-9]+}};
 ;CHECK-NEXT: ret;
   store float %x, float* %p
   ret void
@@ -95,7 +95,7 @@ entry:
 
 define ptx_device void @t1_f64(double* %p, double %x) {
 entry:
-;CHECK: st.global.f64 [r1], fd1;
+;CHECK: st.global.f64 [r{{[0-9]+}}], rd{{[0-9]+}};
 ;CHECK-NEXT: ret;
   store double %x, double* %p
   ret void
@@ -103,7 +103,7 @@ entry:
 
 define ptx_device void @t2_u16(i16* %p, i16 %x) {
 entry:
-;CHECK: st.global.u16 [r1+2], rh1;
+;CHECK: st.global.u16 [r{{[0-9]+}}+2], rh{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr i16* %p, i32 1
   store i16 %x, i16* %i
@@ -112,7 +112,7 @@ entry:
 
 define ptx_device void @t2_u32(i32* %p, i32 %x) {
 entry:
-;CHECK: st.global.u32 [r1+4], r2;
+;CHECK: st.global.u32 [r{{[0-9]+}}+4], r{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr i32* %p, i32 1
   store i32 %x, i32* %i
@@ -121,7 +121,7 @@ entry:
 
 define ptx_device void @t2_u64(i64* %p, i64 %x) {
 entry:
-;CHECK: st.global.u64 [r1+8], rd1;
+;CHECK: st.global.u64 [r{{[0-9]+}}+8], rd{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr i64* %p, i32 1
   store i64 %x, i64* %i
@@ -130,7 +130,7 @@ entry:
 
 define ptx_device void @t2_f32(float* %p, float %x) {
 entry:
-;CHECK: st.global.f32 [r1+4], f1;
+;CHECK: st.global.f32 [r{{[0-9]+}}+4], r{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr float* %p, i32 1
   store float %x, float* %i
@@ -139,7 +139,7 @@ entry:
 
 define ptx_device void @t2_f64(double* %p, double %x) {
 entry:
-;CHECK: st.global.f64 [r1+8], fd1;
+;CHECK: st.global.f64 [r{{[0-9]+}}+8], rd{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr double* %p, i32 1
   store double %x, double* %i
@@ -148,9 +148,9 @@ entry:
 
 define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
 entry:
-;CHECK: shl.b32 r0, r2, 1;
-;CHECK-NEXT: add.u32 r0, r1, r0;
-;CHECK-NEXT: st.global.u16 [r0], rh1;
+;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 1;
+;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
+;CHECK-NEXT: st.global.u16 [r[[R0]]], rh{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr i16* %p, i32 %q
   store i16 %x, i16* %i
@@ -159,9 +159,9 @@ entry:
 
 define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) {
 entry:
-;CHECK: shl.b32 r0, r2, 2;
-;CHECK-NEXT: add.u32 r0, r1, r0;
-;CHECK-NEXT: st.global.u32 [r0], r3;
+;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2;
+;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
+;CHECK-NEXT: st.global.u32 [r[[R0]]], r{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr i32* %p, i32 %q
   store i32 %x, i32* %i
@@ -170,9 +170,9 @@ entry:
 
 define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) {
 entry:
-;CHECK: shl.b32 r0, r2, 3;
-;CHECK-NEXT: add.u32 r0, r1, r0;
-;CHECK-NEXT: st.global.u64 [r0], rd1;
+;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3;
+;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
+;CHECK-NEXT: st.global.u64 [r[[R0]]], rd{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr i64* %p, i32 %q
   store i64 %x, i64* %i
@@ -181,9 +181,9 @@ entry:
 
 define ptx_device void @t3_f32(float* %p, i32 %q, float %x) {
 entry:
-;CHECK: shl.b32 r0, r2, 2;
-;CHECK-NEXT: add.u32 r0, r1, r0;
-;CHECK-NEXT: st.global.f32 [r0], f1;
+;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 2;
+;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
+;CHECK-NEXT: st.global.f32 [r[[R0]]], r{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr float* %p, i32 %q
   store float %x, float* %i
@@ -192,9 +192,9 @@ entry:
 
 define ptx_device void @t3_f64(double* %p, i32 %q, double %x) {
 entry:
-;CHECK: shl.b32 r0, r2, 3;
-;CHECK-NEXT: add.u32 r0, r1, r0;
-;CHECK-NEXT: st.global.f64 [r0], fd1;
+;CHECK: shl.b32 r[[R0:[0-9]+]], r{{[0-9]+}}, 3;
+;CHECK-NEXT: add.u32 r[[R0]], r{{[0-9]+}}, r[[R0]];
+;CHECK-NEXT: st.global.f64 [r[[R0]]], rd{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr double* %p, i32 %q
   store double %x, double* %i
@@ -203,8 +203,8 @@ entry:
 
 define ptx_device void @t4_global_u16(i16 %x) {
 entry:
-;CHECK: mov.u32 r0, array_i16;
-;CHECK-NEXT: st.global.u16 [r0], rh1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
+;CHECK-NEXT: st.global.u16 [r[[R0]]], rh{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0
   store i16 %x, i16* %i
@@ -213,8 +213,8 @@ entry:
 
 define ptx_device void @t4_global_u32(i32 %x) {
 entry:
-;CHECK: mov.u32 r0, array_i32;
-;CHECK-NEXT: st.global.u32 [r0], r1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32;
+;CHECK-NEXT: st.global.u32 [r[[R0]]], r{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
   store i32 %x, i32* %i
@@ -223,8 +223,8 @@ entry:
 
 define ptx_device void @t4_global_u64(i64 %x) {
 entry:
-;CHECK: mov.u32 r0, array_i64;
-;CHECK-NEXT: st.global.u64 [r0], rd1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64;
+;CHECK-NEXT: st.global.u64 [r[[R0]]], rd{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
   store i64 %x, i64* %i
@@ -233,8 +233,8 @@ entry:
 
 define ptx_device void @t4_global_f32(float %x) {
 entry:
-;CHECK: mov.u32 r0, array_float;
-;CHECK-NEXT: st.global.f32 [r0], f1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_float;
+;CHECK-NEXT: st.global.f32 [r[[R0]]], r{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x float]* @array_float, i32 0, i32 0
   store float %x, float* %i
@@ -243,8 +243,8 @@ entry:
 
 define ptx_device void @t4_global_f64(double %x) {
 entry:
-;CHECK: mov.u32 r0, array_double;
-;CHECK-NEXT: st.global.f64 [r0], fd1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_double;
+;CHECK-NEXT: st.global.f64 [r[[R0]]], rd{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x double]* @array_double, i32 0, i32 0
   store double %x, double* %i
@@ -253,8 +253,8 @@ entry:
 
 define ptx_device void @t4_local_u16(i16 %x) {
 entry:
-;CHECK: mov.u32 r0, array_local_i16;
-;CHECK-NEXT: st.local.u16 [r0], rh1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i16;
+;CHECK-NEXT: st.local.u16 [r[[R0]]], rh{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
   store i16 %x, i16 addrspace(2)* %i
@@ -263,8 +263,8 @@ entry:
 
 define ptx_device void @t4_local_u32(i32 %x) {
 entry:
-;CHECK: mov.u32 r0, array_local_i32;
-;CHECK-NEXT: st.local.u32 [r0], r1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i32;
+;CHECK-NEXT: st.local.u32 [r[[R0]]], r{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
   store i32 %x, i32 addrspace(2)* %i
@@ -273,8 +273,8 @@ entry:
 
 define ptx_device void @t4_local_u64(i64 %x) {
 entry:
-;CHECK: mov.u32 r0, array_local_i64;
-;CHECK-NEXT: st.local.u64 [r0], rd1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_i64;
+;CHECK-NEXT: st.local.u64 [r[[R0]]], rd{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
   store i64 %x, i64 addrspace(2)* %i
@@ -283,8 +283,8 @@ entry:
 
 define ptx_device void @t4_local_f32(float %x) {
 entry:
-;CHECK: mov.u32 r0, array_local_float;
-;CHECK-NEXT: st.local.f32 [r0], f1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_float;
+;CHECK-NEXT: st.local.f32 [r[[R0]]], r{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
   store float %x, float addrspace(2)* %i
@@ -293,8 +293,8 @@ entry:
 
 define ptx_device void @t4_local_f64(double %x) {
 entry:
-;CHECK: mov.u32 r0, array_local_double;
-;CHECK-NEXT: st.local.f64 [r0], fd1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_local_double;
+;CHECK-NEXT: st.local.f64 [r[[R0]]], rd{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
   store double %x, double addrspace(2)* %i
@@ -303,8 +303,8 @@ entry:
 
 define ptx_device void @t4_shared_u16(i16 %x) {
 entry:
-;CHECK: mov.u32 r0, array_shared_i16;
-;CHECK-NEXT: st.shared.u16 [r0], rh1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i16;
+;CHECK-NEXT: st.shared.u16 [r[[R0]]], rh{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
   store i16 %x, i16 addrspace(4)* %i
@@ -313,8 +313,8 @@ entry:
 
 define ptx_device void @t4_shared_u32(i32 %x) {
 entry:
-;CHECK: mov.u32 r0, array_shared_i32;
-;CHECK-NEXT: st.shared.u32 [r0], r1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i32;
+;CHECK-NEXT: st.shared.u32 [r[[R0]]], r{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
   store i32 %x, i32 addrspace(4)* %i
@@ -323,8 +323,8 @@ entry:
 
 define ptx_device void @t4_shared_u64(i64 %x) {
 entry:
-;CHECK: mov.u32 r0, array_shared_i64;
-;CHECK-NEXT: st.shared.u64 [r0], rd1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_i64;
+;CHECK-NEXT: st.shared.u64 [r[[R0]]], rd{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
   store i64 %x, i64 addrspace(4)* %i
@@ -333,8 +333,8 @@ entry:
 
 define ptx_device void @t4_shared_f32(float %x) {
 entry:
-;CHECK: mov.u32 r0, array_shared_float;
-;CHECK-NEXT: st.shared.f32 [r0], f1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_float;
+;CHECK-NEXT: st.shared.f32 [r[[R0]]], r{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
   store float %x, float addrspace(4)* %i
@@ -343,8 +343,8 @@ entry:
 
 define ptx_device void @t4_shared_f64(double %x) {
 entry:
-;CHECK: mov.u32 r0, array_shared_double;
-;CHECK-NEXT: st.shared.f64 [r0], fd1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_shared_double;
+;CHECK-NEXT: st.shared.f64 [r[[R0]]], rd{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
   store double %x, double addrspace(4)* %i
@@ -353,8 +353,8 @@ entry:
 
 define ptx_device void @t5_u16(i16 %x) {
 entry:
-;CHECK: mov.u32 r0, array_i16;
-;CHECK-NEXT: st.global.u16 [r0+2], rh1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i16;
+;CHECK-NEXT: st.global.u16 [r[[R0]]+2], rh{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
   store i16 %x, i16* %i
@@ -363,8 +363,8 @@ entry:
 
 define ptx_device void @t5_u32(i32 %x) {
 entry:
-;CHECK: mov.u32 r0, array_i32;
-;CHECK-NEXT: st.global.u32 [r0+4], r1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i32;
+;CHECK-NEXT: st.global.u32 [r[[R0]]+4], r{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
   store i32 %x, i32* %i
@@ -373,8 +373,8 @@ entry:
 
 define ptx_device void @t5_u64(i64 %x) {
 entry:
-;CHECK: mov.u32 r0, array_i64;
-;CHECK-NEXT: st.global.u64 [r0+8], rd1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_i64;
+;CHECK-NEXT: st.global.u64 [r[[R0]]+8], rd{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
   store i64 %x, i64* %i
@@ -383,8 +383,8 @@ entry:
 
 define ptx_device void @t5_f32(float %x) {
 entry:
-;CHECK: mov.u32 r0, array_float;
-;CHECK-NEXT: st.global.f32 [r0+4], f1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_float;
+;CHECK-NEXT: st.global.f32 [r[[R0]]+4], r{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x float]* @array_float, i32 0, i32 1
   store float %x, float* %i
@@ -393,8 +393,8 @@ entry:
 
 define ptx_device void @t5_f64(double %x) {
 entry:
-;CHECK: mov.u32 r0, array_double;
-;CHECK-NEXT: st.global.f64 [r0+8], fd1;
+;CHECK: mov.u32 r[[R0:[0-9]+]], array_double;
+;CHECK-NEXT: st.global.f64 [r[[R0]]+8], rd{{[0-9]+}};
 ;CHECK-NEXT: ret;
   %i = getelementptr [10 x double]* @array_double, i32 0, i32 1
   store double %x, double* %i
diff --git a/test/CodeGen/PTX/sub.ll b/test/CodeGen/PTX/sub.ll
index 7dd2c6f..acef396 100644
--- a/test/CodeGen/PTX/sub.ll
+++ b/test/CodeGen/PTX/sub.ll
@@ -1,70 +1,70 @@
 ; RUN: llc < %s -march=ptx32 | FileCheck %s
 
 define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
-; CHECK: sub.u16 rh0, rh1, rh2;
+; CHECK: sub.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, rh{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	%z = sub i16 %x, %y
 	ret i16 %z
 }
 
 define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
-; CHECK: sub.u32 r0, r1, r2;
+; CHECK: sub.u32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	%z = sub i32 %x, %y
 	ret i32 %z
 }
 
 define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
-; CHECK: sub.u64 rd0, rd1, rd2;
+; CHECK: sub.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}};
 ; CHECK-NEXT: ret;
 	%z = sub i64 %x, %y
 	ret i64 %z
 }
 
 define ptx_device float @t1_f32(float %x, float %y) {
-; CHECK: sub.f32 f0, f1, f2
+; CHECK: sub.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, r{{[0-9]+}}
 ; CHECK-NEXT: ret;
   %z = fsub float %x, %y
   ret float %z
 }
 
 define ptx_device double @t1_f64(double %x, double %y) {
-; CHECK: sub.f64 fd0, fd1, fd2
+; CHECK: sub.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, rd{{[0-9]+}}
 ; CHECK-NEXT: ret;
   %z = fsub double %x, %y
   ret double %z
 }
 
 define ptx_device i16 @t2_u16(i16 %x) {
-; CHECK: add.u16 rh0, rh1, -1;
+; CHECK: add.u16 rh{{[0-9]+}}, rh{{[0-9]+}}, -1;
 ; CHECK-NEXT: ret;
 	%z = sub i16 %x, 1
 	ret i16 %z
 }
 
 define ptx_device i32 @t2_u32(i32 %x) {
-; CHECK: add.u32 r0, r1, -1;
+; CHECK: add.u32 r{{[0-9]+}}, r{{[0-9]+}}, -1;
 ; CHECK-NEXT: ret;
 	%z = sub i32 %x, 1
 	ret i32 %z
 }
 
 define ptx_device i64 @t2_u64(i64 %x) {
-; CHECK: add.u64 rd0, rd1, -1;
+; CHECK: add.u64 rd{{[0-9]+}}, rd{{[0-9]+}}, -1;
 ; CHECK-NEXT: ret;
 	%z = sub i64 %x, 1
 	ret i64 %z
 }
 
 define ptx_device float @t2_f32(float %x) {
-; CHECK: add.f32 f0, f1, 0FBF800000;
+; CHECK: add.rn.f32 r{{[0-9]+}}, r{{[0-9]+}}, 0FBF800000;
 ; CHECK-NEXT: ret;
   %z = fsub float %x, 1.0
   ret float %z
 }
 
 define ptx_device double @t2_f64(double %x) {
-; CHECK: add.f64 fd0, fd1, 0DBFF0000000000000;
+; CHECK: add.rn.f64 rd{{[0-9]+}}, rd{{[0-9]+}}, 0DBFF0000000000000;
 ; CHECK-NEXT: ret;
   %z = fsub double %x, 1.0
   ret double %z
diff --git a/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
index 7b00ac6..c779288 100644
--- a/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
+++ b/test/CodeGen/PowerPC/2007-01-15-AsmDialect.ll
@@ -7,7 +7,6 @@ entry:
 	%temp = alloca i32, align 4		; <i32*> [#uses=2]
 	%ctz_x = alloca i32, align 4		; <i32*> [#uses=3]
 	%ctz_c = alloca i32, align 4		; <i32*> [#uses=2]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 61440, i32* %ctz_x
 	%tmp = load i32* %ctz_x		; <i32> [#uses=1]
 	%tmp1 = sub i32 0, %tmp		; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
index 42f2152..c141551 100644
--- a/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
+++ b/test/CodeGen/PowerPC/2007-05-22-tailmerge-3.ll
@@ -13,7 +13,6 @@ entry:
 	%i_addr = alloca i32		; <i32*> [#uses=2]
 	%q_addr = alloca i32		; <i32*> [#uses=2]
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %i, i32* %i_addr
 	store i32 %q, i32* %q_addr
 	%tmp = load i32* %i_addr		; <i32> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
index 2938c70..72e93a9 100644
--- a/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
+++ b/test/CodeGen/PowerPC/2007-05-30-dagcombine-miscomp.ll
@@ -4,7 +4,7 @@ target triple = "powerpc-apple-darwin8.8.0"
 ; RUN: llc < %s -march=ppc32 | grep {rlwinm r3, r3, 23, 30, 30}
 ; PR1473
 
-define i8 @foo(i16 zeroext  %a) zeroext  {
+define zeroext i8 @foo(i16 zeroext  %a)   {
         %tmp2 = lshr i16 %a, 10         ; <i16> [#uses=1]
         %tmp23 = trunc i16 %tmp2 to i8          ; <i8> [#uses=1]
         %tmp4 = shl i8 %tmp23, 1                ; <i8> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll b/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
index 8776d9a..01c83cb 100644
--- a/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
+++ b/test/CodeGen/PowerPC/2008-03-24-CoalescerBug.ll
@@ -18,7 +18,7 @@ define void @"-[PFTPersistentSymbols saveSymbolWithName:address:path:lineNumber:
 entry:
 	br i1 false, label %bb12, label %bb21
 bb12:		; preds = %entry
-	%tmp17 = tail call i8 inttoptr (i64 4294901504 to i8 (%struct..0objc_object*, %struct.objc_selector*, %struct.NSArray*)*)( %struct..0objc_object* null, %struct.objc_selector* null, %struct.NSArray* bitcast (%struct.__builtin_CFString* @0 to %struct.NSArray*) ) signext nounwind 		; <i8> [#uses=0]
+	%tmp17 = tail call signext i8 inttoptr (i64 4294901504 to i8 (%struct..0objc_object*, %struct.objc_selector*, %struct.NSArray*)*)( %struct..0objc_object* null, %struct.objc_selector* null, %struct.NSArray* bitcast (%struct.__builtin_CFString* @0 to %struct.NSArray*) )  nounwind 		; <i8> [#uses=0]
 	br i1 false, label %bb25, label %bb21
 bb21:		; preds = %bb12, %entry
 	%tmp24 = or i64 %flags, 4		; <i64> [#uses=1]
diff --git a/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll b/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
index 5cd8c34..21b0c61 100644
--- a/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
+++ b/test/CodeGen/PowerPC/2008-07-15-SignExtendInreg.ll
@@ -2,7 +2,7 @@
 target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f128:64:128"
 target triple = "powerpc-apple-darwin9"
 
-define i16 @t(i16* %dct) signext nounwind  {
+define signext i16 @t(i16* %dct)  nounwind  {
 entry:
          load i16* null, align 2         ; <i16>:0 [#uses=2]
          lshr i16 %0, 11         ; <i16>:1 [#uses=0]
diff --git a/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll b/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
deleted file mode 100644
index 83f3f6f..0000000
--- a/test/CodeGen/PowerPC/2008-10-30-IllegalShift.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc < %s -march=ppc32
-; PR2986
-@argc = external global i32		; <i32*> [#uses=1]
-@buffer = external global [32 x i8], align 4		; <[32 x i8]*> [#uses=1]
-
-define void @test1() nounwind noinline {
-entry:
-	%0 = load i32* @argc, align 4		; <i32> [#uses=1]
-	%1 = trunc i32 %0 to i8		; <i8> [#uses=1]
-	tail call void @llvm.memset.i32(i8* getelementptr ([32 x i8]* @buffer, i32 0, i32 0), i8 %1, i32 17, i32 4)
-	unreachable
-}
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
diff --git a/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll b/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll
deleted file mode 100644
index 54f4b2e..0000000
--- a/test/CodeGen/PowerPC/2009-11-15-ReMatBug.ll
+++ /dev/null
@@ -1,155 +0,0 @@
-; RUN: llc < %s -mtriple=powerpc-apple-darwin8
-
-%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
-%struct.__gcov_var = type { %struct.FILE*, i32, i32, i32, i32, i32, i32, [1025 x i32] }
-%struct.__sFILEX = type opaque
-%struct.__sbuf = type { i8*, i32 }
-%struct.gcov_ctr_info = type { i32, i64*, void (i64*, i32)* }
-%struct.gcov_ctr_summary = type { i32, i32, i64, i64, i64 }
-%struct.gcov_fn_info = type { i32, i32, [0 x i32] }
-%struct.gcov_info = type { i32, %struct.gcov_info*, i32, i8*, i32, %struct.gcov_fn_info*, i32, [0 x %struct.gcov_ctr_info] }
-%struct.gcov_summary = type { i32, [1 x %struct.gcov_ctr_summary] }
-
-@__gcov_var = external global %struct.__gcov_var  ; <%struct.__gcov_var*> [#uses=1]
-@__sF = external global [0 x %struct.FILE]        ; <[0 x %struct.FILE]*> [#uses=1]
-@.str = external constant [56 x i8], align 4      ; <[56 x i8]*> [#uses=1]
-@gcov_list = external global %struct.gcov_info*   ; <%struct.gcov_info**> [#uses=1]
-@.str7 = external constant [35 x i8], align 4     ; <[35 x i8]*> [#uses=1]
-@.str8 = external constant [9 x i8], align 4      ; <[9 x i8]*> [#uses=1]
-@.str9 = external constant [10 x i8], align 4     ; <[10 x i8]*> [#uses=1]
-@.str10 = external constant [36 x i8], align 4    ; <[36 x i8]*> [#uses=1]
-
-declare i32 @"\01_fprintf$LDBL128"(%struct.FILE*, i8*, ...) nounwind
-
-define void @gcov_exit() nounwind {
-entry:
-  %gi_ptr.0357 = load %struct.gcov_info** @gcov_list, align 4 ; <%struct.gcov_info*> [#uses=1]
-  %0 = alloca i8, i32 undef, align 1              ; <i8*> [#uses=3]
-  br i1 undef, label %return, label %bb.nph341
-
-bb.nph341:                                        ; preds = %entry
-  %object27 = bitcast %struct.gcov_summary* undef to i8* ; <i8*> [#uses=1]
-  br label %bb25
-
-bb25:                                             ; preds = %read_fatal, %bb.nph341
-  %gi_ptr.1329 = phi %struct.gcov_info* [ %gi_ptr.0357, %bb.nph341 ], [ undef, %read_fatal ] ; <%struct.gcov_info*> [#uses=1]
-  call void @llvm.memset.i32(i8* %object27, i8 0, i32 36, i32 8)
-  br i1 undef, label %bb49.1, label %bb48
-
-bb48:                                             ; preds = %bb25
-  br label %bb49.1
-
-bb51:                                             ; preds = %bb48.4, %bb49.3
-  switch i32 undef, label %bb58 [
-    i32 0, label %rewrite
-    i32 1734567009, label %bb59
-  ]
-
-bb58:                                             ; preds = %bb51
-  %1 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([35 x i8]* @.str7, i32 0, i32 0), i8* %0) nounwind ; <i32> [#uses=0]
-  br label %read_fatal
-
-bb59:                                             ; preds = %bb51
-  br i1 undef, label %bb60, label %bb3.i156
-
-bb3.i156:                                         ; preds = %bb59
-  store i8 52, i8* undef, align 1
-  store i8 42, i8* undef, align 1
-  %2 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([56 x i8]* @.str, i32 0, i32 0), i8* %0, i8* undef, i8* undef) nounwind ; <i32> [#uses=0]
-  br label %read_fatal
-
-bb60:                                             ; preds = %bb59
-  br i1 undef, label %bb78.preheader, label %rewrite
-
-bb78.preheader:                                   ; preds = %bb60
-  br i1 undef, label %bb62, label %bb80
-
-bb62:                                             ; preds = %bb78.preheader
-  br i1 undef, label %bb64, label %read_mismatch
-
-bb64:                                             ; preds = %bb62
-  br i1 undef, label %bb65, label %read_mismatch
-
-bb65:                                             ; preds = %bb64
-  br i1 undef, label %bb75, label %read_mismatch
-
-read_mismatch:                                    ; preds = %bb98, %bb119.preheader, %bb72, %bb71, %bb65, %bb64, %bb62
-  %3 = icmp eq i32 undef, -1                      ; <i1> [#uses=1]
-  %iftmp.11.0 = select i1 %3, i8* getelementptr inbounds ([10 x i8]* @.str9, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8]* @.str8, i32 0, i32 0) ; <i8*> [#uses=1]
-  %4 = call i32 (%struct.FILE*, i8*, ...)* @"\01_fprintf$LDBL128"(%struct.FILE* getelementptr inbounds ([0 x %struct.FILE]* @__sF, i32 0, i32 2), i8* getelementptr inbounds ([36 x i8]* @.str10, i32 0, i32 0), i8* %0, i8* %iftmp.11.0) nounwind ; <i32> [#uses=0]
-  br label %read_fatal
-
-bb71:                                             ; preds = %bb75
-  %5 = load i32* undef, align 4                   ; <i32> [#uses=1]
-  %6 = getelementptr inbounds %struct.gcov_info* %gi_ptr.1329, i32 0, i32 7, i32 undef, i32 2 ; <void (i64*, i32)**> [#uses=1]
-  %7 = load void (i64*, i32)** %6, align 4        ; <void (i64*, i32)*> [#uses=1]
-  %8 = call i32 @__gcov_read_unsigned() nounwind  ; <i32> [#uses=1]
-  %9 = call i32 @__gcov_read_unsigned() nounwind  ; <i32> [#uses=1]
-  %10 = icmp eq i32 %tmp386, %8                   ; <i1> [#uses=1]
-  br i1 %10, label %bb72, label %read_mismatch
-
-bb72:                                             ; preds = %bb71
-  %11 = icmp eq i32 undef, %9                     ; <i1> [#uses=1]
-  br i1 %11, label %bb73, label %read_mismatch
-
-bb73:                                             ; preds = %bb72
-  call void %7(i64* null, i32 %5) nounwind
-  unreachable
-
-bb74:                                             ; preds = %bb75
-  %12 = add i32 %13, 1                            ; <i32> [#uses=1]
-  br label %bb75
-
-bb75:                                             ; preds = %bb74, %bb65
-  %13 = phi i32 [ %12, %bb74 ], [ 0, %bb65 ]      ; <i32> [#uses=2]
-  %tmp386 = add i32 0, 27328512                   ; <i32> [#uses=1]
-  %14 = shl i32 1, %13                            ; <i32> [#uses=1]
-  %15 = load i32* undef, align 4                  ; <i32> [#uses=1]
-  %16 = and i32 %15, %14                          ; <i32> [#uses=1]
-  %17 = icmp eq i32 %16, 0                        ; <i1> [#uses=1]
-  br i1 %17, label %bb74, label %bb71
-
-bb80:                                             ; preds = %bb78.preheader
-  unreachable
-
-read_fatal:                                       ; preds = %read_mismatch, %bb3.i156, %bb58
-  br i1 undef, label %return, label %bb25
-
-rewrite:                                          ; preds = %bb60, %bb51
-  store i32 -1, i32* getelementptr inbounds (%struct.__gcov_var* @__gcov_var, i32 0, i32 6), align 4
-  br i1 undef, label %bb94, label %bb119.preheader
-
-bb94:                                             ; preds = %rewrite
-  unreachable
-
-bb119.preheader:                                  ; preds = %rewrite
-  br i1 undef, label %read_mismatch, label %bb98
-
-bb98:                                             ; preds = %bb119.preheader
-  br label %read_mismatch
-
-return:                                           ; preds = %read_fatal, %entry
-  ret void
-
-bb49.1:                                           ; preds = %bb48, %bb25
-  br i1 undef, label %bb49.2, label %bb48.2
-
-bb49.2:                                           ; preds = %bb48.2, %bb49.1
-  br i1 undef, label %bb49.3, label %bb48.3
-
-bb48.2:                                           ; preds = %bb49.1
-  br label %bb49.2
-
-bb49.3:                                           ; preds = %bb48.3, %bb49.2
-  br i1 undef, label %bb51, label %bb48.4
-
-bb48.3:                                           ; preds = %bb49.2
-  br label %bb49.3
-
-bb48.4:                                           ; preds = %bb49.3
-  br label %bb51
-}
-
-declare i32 @__gcov_read_unsigned() nounwind
-
-declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
diff --git a/test/CodeGen/PowerPC/and-elim.ll b/test/CodeGen/PowerPC/and-elim.ll
index 3685361..a1ec29b 100644
--- a/test/CodeGen/PowerPC/and-elim.ll
+++ b/test/CodeGen/PowerPC/and-elim.ll
@@ -9,7 +9,7 @@ define void @test(i8* %P) {
 	ret void
 }
 
-define i16 @test2(i16 zeroext %crc) zeroext { 
+define zeroext i16 @test2(i16 zeroext %crc)  { 
         ; No and's should be needed for the i16s here.
         %tmp.1 = lshr i16 %crc, 1
         %tmp.7 = xor i16 %tmp.1, 40961
diff --git a/test/CodeGen/PowerPC/and_sext.ll b/test/CodeGen/PowerPC/and_sext.ll
index c6d234e..df48ccf 100644
--- a/test/CodeGen/PowerPC/and_sext.ll
+++ b/test/CodeGen/PowerPC/and_sext.ll
@@ -9,7 +9,7 @@ define i32 @test1(i32 %mode.0.i.0) {
         ret i32 %tmp.81
 }
 
-define i16 @test2(i16 signext %X, i16 signext %x) signext {
+define signext i16 @test2(i16 signext %X, i16 signext %x)  {
         %tmp = sext i16 %X to i32
         %tmp1 = sext i16 %x to i32
         %tmp2 = add i32 %tmp, %tmp1
@@ -20,7 +20,7 @@ define i16 @test2(i16 signext %X, i16 signext %x) signext {
         ret i16 %retval
 }
 
-define i16 @test3(i32 zeroext %X) signext {
+define signext i16 @test3(i32 zeroext %X)  {
         %tmp1 = lshr i32 %X, 16
         %tmp2 = trunc i32 %tmp1 to i16
         ret i16 %tmp2
diff --git a/test/CodeGen/PowerPC/atomic-1.ll b/test/CodeGen/PowerPC/atomic-1.ll
index ec4e42d..a2cf170 100644
--- a/test/CodeGen/PowerPC/atomic-1.ll
+++ b/test/CodeGen/PowerPC/atomic-1.ll
@@ -1,21 +1,23 @@
 ; RUN: llc < %s -march=ppc32 | grep lwarx  | count 3
 ; RUN: llc < %s -march=ppc32 | grep stwcx. | count 4
 
-define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind  {
-	%tmp = call i32 @llvm.atomic.load.add.i32( i32* %mem, i32 %val )
-	ret i32 %tmp
+define i32 @exchange_and_add(i32* %mem, i32 %val) nounwind {
+  %tmp = call i32 @llvm.atomic.load.add.i32.p0i32(i32* %mem, i32 %val)
+  ret i32 %tmp
 }
 
-define i32 @exchange_and_cmp(i32* %mem) nounwind  {
-       	%tmp = call i32 @llvm.atomic.cmp.swap.i32( i32* %mem, i32 0, i32 1 )
-	ret i32 %tmp
+define i32 @exchange_and_cmp(i32* %mem) nounwind {
+  %tmp = call i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* %mem, i32 0, i32 1)
+  ret i32 %tmp
 }
 
-define i32 @exchange(i32* %mem, i32 %val) nounwind  {
-	%tmp = call i32 @llvm.atomic.swap.i32( i32* %mem, i32 1 )
-	ret i32 %tmp
+define i32 @exchange(i32* %mem, i32 %val) nounwind {
+  %tmp = call i32 @llvm.atomic.swap.i32.p0i32(i32* %mem, i32 1)
+  ret i32 %tmp
 }
 
-declare i32 @llvm.atomic.load.add.i32(i32*, i32) nounwind 
-declare i32 @llvm.atomic.cmp.swap.i32(i32*, i32, i32) nounwind 
-declare i32 @llvm.atomic.swap.i32(i32*, i32) nounwind 
+declare i32 @llvm.atomic.load.add.i32.p0i32(i32* nocapture, i32) nounwind
+
+declare i32 @llvm.atomic.cmp.swap.i32.p0i32(i32* nocapture, i32, i32) nounwind
+
+declare i32 @llvm.atomic.swap.i32.p0i32(i32* nocapture, i32) nounwind
diff --git a/test/CodeGen/PowerPC/atomic-2.ll b/test/CodeGen/PowerPC/atomic-2.ll
index 6d9daef9..0fa2a29 100644
--- a/test/CodeGen/PowerPC/atomic-2.ll
+++ b/test/CodeGen/PowerPC/atomic-2.ll
@@ -1,21 +1,23 @@
 ; RUN: llc < %s -march=ppc64 | grep ldarx  | count 3
 ; RUN: llc < %s -march=ppc64 | grep stdcx. | count 4
 
-define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind  {
-	%tmp = call i64 @llvm.atomic.load.add.i64( i64* %mem, i64 %val )
-	ret i64 %tmp
+define i64 @exchange_and_add(i64* %mem, i64 %val) nounwind {
+  %tmp = call i64 @llvm.atomic.load.add.i64.p0i64(i64* %mem, i64 %val)
+  ret i64 %tmp
 }
 
-define i64 @exchange_and_cmp(i64* %mem) nounwind  {
-       	%tmp = call i64 @llvm.atomic.cmp.swap.i64( i64* %mem, i64 0, i64 1 )
-	ret i64 %tmp
+define i64 @exchange_and_cmp(i64* %mem) nounwind {
+  %tmp = call i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* %mem, i64 0, i64 1)
+  ret i64 %tmp
 }
 
-define i64 @exchange(i64* %mem, i64 %val) nounwind  {
-	%tmp = call i64 @llvm.atomic.swap.i64( i64* %mem, i64 1 )
-	ret i64 %tmp
+define i64 @exchange(i64* %mem, i64 %val) nounwind {
+  %tmp = call i64 @llvm.atomic.swap.i64.p0i64(i64* %mem, i64 1)
+  ret i64 %tmp
 }
 
-declare i64 @llvm.atomic.load.add.i64(i64*, i64) nounwind 
-declare i64 @llvm.atomic.cmp.swap.i64(i64*, i64, i64) nounwind 
-declare i64 @llvm.atomic.swap.i64(i64*, i64) nounwind 
+declare i64 @llvm.atomic.load.add.i64.p0i64(i64* nocapture, i64) nounwind
+
+declare i64 @llvm.atomic.cmp.swap.i64.p0i64(i64* nocapture, i64, i64) nounwind
+
+declare i64 @llvm.atomic.swap.i64.p0i64(i64* nocapture, i64) nounwind
diff --git a/test/CodeGen/PowerPC/calls.ll b/test/CodeGen/PowerPC/calls.ll
index 0db184f..29bcb20 100644
--- a/test/CodeGen/PowerPC/calls.ll
+++ b/test/CodeGen/PowerPC/calls.ll
@@ -1,7 +1,7 @@
 ; Test various forms of calls.
 
 ; RUN: llc < %s -march=ppc32 | \
-; RUN:   grep {bl } | count 2
+; RUN:   grep {bl } | count 1
 ; RUN: llc < %s -march=ppc32 | \
 ; RUN:   grep {bctrl} | count 1
 ; RUN: llc < %s -march=ppc32 | \
@@ -14,11 +14,6 @@ define void @test_direct() {
         ret void
 }
 
-define void @test_extsym(i8* %P) {
-        free i8* %P
-        ret void
-}
-
 define void @test_indirect(void ()* %fp) {
         call void %fp( )
         ret void
diff --git a/test/CodeGen/PowerPC/invalid-memcpy.ll b/test/CodeGen/PowerPC/invalid-memcpy.ll
deleted file mode 100644
index 3b1f306..0000000
--- a/test/CodeGen/PowerPC/invalid-memcpy.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=ppc32
-; RUN: llc < %s -march=ppc64
-
-; This testcase is invalid (the alignment specified for memcpy is 
-; greater than the alignment guaranteed for Qux or C.0.1173, but it
-; should compile, not crash the code generator.
-
-@C.0.1173 = external constant [33 x i8]         ; <[33 x i8]*> [#uses=1]
-
-define void @Bork() {
-entry:
-        %Qux = alloca [33 x i8]         ; <[33 x i8]*> [#uses=1]
-        %Qux1 = bitcast [33 x i8]* %Qux to i8*          ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i64( i8* %Qux1, i8* getelementptr ([33 x i8]* @C.0.1173, i32 0, i32 0), i64 33, i32 8 )
-        ret void
-}
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
-
-
diff --git a/test/CodeGen/PowerPC/mul-with-overflow.ll b/test/CodeGen/PowerPC/mul-with-overflow.ll
index f03e3cb..76d06df 100644
--- a/test/CodeGen/PowerPC/mul-with-overflow.ll
+++ b/test/CodeGen/PowerPC/mul-with-overflow.ll
@@ -1,14 +1,14 @@
 ; RUN: llc < %s -march=ppc32
 
 declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
-define i1 @a(i32 %x) zeroext nounwind {
+define zeroext i1 @a(i32 %x)  nounwind {
   %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
   %obil = extractvalue {i32, i1} %res, 1
   ret i1 %obil
 }
 
 declare {i32, i1} @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
-define i1 @b(i32 %x) zeroext nounwind {
+define zeroext i1 @b(i32 %x)  nounwind {
   %res = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %x, i32 3)
   %obil = extractvalue {i32, i1} %res, 1
   ret i1 %obil
diff --git a/test/CodeGen/PowerPC/multiple-return-values.ll b/test/CodeGen/PowerPC/multiple-return-values.ll
deleted file mode 100644
index b9317f9..0000000
--- a/test/CodeGen/PowerPC/multiple-return-values.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: llc < %s -march=ppc32
-; RUN: llc < %s -march=ppc64
-
-define {i64, float} @bar(i64 %a, float %b) {
-        %y = add i64 %a, 7
-        %z = fadd float %b, 7.0
-	ret i64 %y, float %z
-}
-
-define i64 @foo() {
-	%M = call {i64, float} @bar(i64 21, float 21.0)
-        %N = getresult {i64, float} %M, 0
-        %O = getresult {i64, float} %M, 1
-        %P = fptosi float %O to i64
-        %Q = add i64 %P, %N
-	ret i64 %Q
-}
diff --git a/test/CodeGen/PowerPC/ppc32-vaarg.ll b/test/CodeGen/PowerPC/ppc32-vaarg.ll
new file mode 100644
index 0000000..6042991
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc32-vaarg.ll
@@ -0,0 +1,167 @@
+; RUN: llc -O0 < %s | FileCheck %s
+;ModuleID = 'test.c'
+target datalayout = "E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32"
+target triple = "powerpc-unknown-freebsd9.0"
+
+%struct.__va_list_tag = type { i8, i8, i16, i8*, i8* }
+
+@var1 = common global i64 0, align 8
+@var2 = common global double 0.0, align 8
+@var3 = common global i32 0, align 4
+
+define void @ppcvaargtest(%struct.__va_list_tag* %ap) nounwind {
+ entry:
+  %x = va_arg %struct.__va_list_tag* %ap, i64; Get from r5,r6
+; CHECK:      lbz 4, 0(3)
+; CHECK-NEXT: lwz 5, 4(3)
+; CHECK-NEXT: rlwinm 6, 4, 0, 31, 31
+; CHECK-NEXT: cmplwi 0, 6, 0
+; CHECK-NEXT: addi 6, 4, 1
+; CHECK-NEXT: stw 3, -4(1)
+; CHECK-NEXT: stw 6, -8(1)
+; CHECK-NEXT: stw 4, -12(1)
+; CHECK-NEXT: stw 5, -16(1)
+; CHECK-NEXT: bne 0, .LBB0_2
+; CHECK-NEXT: # BB#1:                                 # %entry
+; CHECK-NEXT: lwz 3, -12(1)
+; CHECK-NEXT: stw 3, -8(1)
+; CHECK-NEXT: .LBB0_2:                                # %entry
+; CHECK-NEXT: lwz 3, -8(1)
+; CHECK-NEXT: lwz 4, -4(1)
+; CHECK-NEXT: lwz 5, 8(4)
+; CHECK-NEXT: slwi 6, 3, 2
+; CHECK-NEXT: addi 7, 3, 2
+; CHECK-NEXT: cmpwi 0, 3, 8
+; CHECK-NEXT: lwz 3, -16(1)
+; CHECK-NEXT: addi 8, 3, 4
+; CHECK-NEXT: add 5, 5, 6
+; CHECK-NEXT: mfcr 0                          # cr0
+; CHECK-NEXT: stw 0, -20(1)
+; CHECK-NEXT: stw 5, -24(1)
+; CHECK-NEXT: stw 3, -28(1)
+; CHECK-NEXT: stw 7, -32(1)
+; CHECK-NEXT: stw 8, -36(1)
+; CHECK-NEXT: blt 0, .LBB0_4
+; CHECK-NEXT: # BB#3:                                 # %entry
+; CHECK-NEXT: lwz 3, -36(1)
+; CHECK-NEXT: stw 3, -28(1)
+; CHECK-NEXT: .LBB0_4:                                # %entry
+; CHECK-NEXT: lwz 3, -28(1)
+; CHECK-NEXT: lwz 4, -32(1)
+; CHECK-NEXT: lwz 5, -4(1)
+; CHECK-NEXT: stb 4, 0(5)
+; CHECK-NEXT: lwz 4, -24(1)
+; CHECK-NEXT: lwz 0, -20(1)
+; CHECK-NEXT: mtcrf 128, 0
+; CHECK-NEXT: stw 3, -40(1)
+; CHECK-NEXT: stw 4, -44(1)
+; CHECK-NEXT: blt 0, .LBB0_6
+; CHECK-NEXT: # BB#5:                                 # %entry
+; CHECK-NEXT: lwz 3, -16(1)
+; CHECK-NEXT: stw 3, -44(1)
+; CHECK-NEXT: .LBB0_6:                                # %entry
+; CHECK-NEXT: lwz 3, -44(1)
+; CHECK-NEXT: lwz 4, -40(1)
+; CHECK-NEXT: lwz 5, -4(1)
+; CHECK-NEXT: stw 4, 4(5)
+  store i64 %x, i64* @var1, align 8
+; CHECK-NEXT: lis 4, var1@ha
+; CHECK-NEXT: lwz 6, 4(3)
+; CHECK-NEXT: lwz 3, 0(3)
+; CHECK-NEXT: la 7, var1@l(4)
+; CHECK-NEXT: stw 3, var1@l(4)
+; CHECK-NEXT: stw 6, 4(7)
+  %y = va_arg %struct.__va_list_tag* %ap, double; From f1
+; CHECK-NEXT: lbz 3, 1(5)
+; CHECK-NEXT: lwz 4, 4(5)
+; CHECK-NEXT: lwz 6, 8(5)
+; CHECK-NEXT: slwi 7, 3, 3
+; CHECK-NEXT: add 6, 6, 7
+; CHECK-NEXT: addi 7, 3, 1
+; CHECK-NEXT: cmpwi 0, 3, 8
+; CHECK-NEXT: addi 3, 4, 8
+; CHECK-NEXT: addi 6, 6, 32
+; CHECK-NEXT: mr 8, 4
+; CHECK-NEXT: mfcr 0                          # cr0
+; CHECK-NEXT: stw 0, -48(1)
+; CHECK-NEXT: stw 4, -52(1)
+; CHECK-NEXT: stw 6, -56(1)
+; CHECK-NEXT: stw 7, -60(1)
+; CHECK-NEXT: stw 3, -64(1)
+; CHECK-NEXT: stw 8, -68(1)
+; CHECK-NEXT: blt 0, .LBB0_8
+; CHECK-NEXT: # BB#7:                                 # %entry
+; CHECK-NEXT: lwz 3, -64(1)
+; CHECK-NEXT: stw 3, -68(1)
+; CHECK-NEXT: .LBB0_8:                                # %entry
+; CHECK-NEXT: lwz 3, -68(1)
+; CHECK-NEXT: lwz 4, -60(1)
+; CHECK-NEXT: lwz 5, -4(1)
+; CHECK-NEXT: stb 4, 1(5)
+; CHECK-NEXT: lwz 4, -56(1)
+; CHECK-NEXT: lwz 0, -48(1)
+; CHECK-NEXT: mtcrf 128, 0
+; CHECK-NEXT: stw 4, -72(1)
+; CHECK-NEXT: stw 3, -76(1)
+; CHECK-NEXT: blt 0, .LBB0_10
+; CHECK-NEXT: # BB#9:                                 # %entry
+; CHECK-NEXT: lwz 3, -52(1)
+; CHECK-NEXT: stw 3, -72(1)
+; CHECK-NEXT: .LBB0_10:                               # %entry
+; CHECK-NEXT: lwz 3, -72(1)
+; CHECK-NEXT: lwz 4, -76(1)
+; CHECK-NEXT: lwz 5, -4(1)
+; CHECK-NEXT: stw 4, 4(5)
+; CHECK-NEXT: lfd 0, 0(3)
+  store double %y, double* @var2, align 8
+; CHECK-NEXT: lis 3, var2@ha
+; CHECK-NEXT: stfd 0, var2@l(3)
+  %z = va_arg %struct.__va_list_tag* %ap, i32; From r7
+; CHECK-NEXT: lbz 3, 0(5)
+; CHECK-NEXT: lwz 4, 4(5)
+; CHECK-NEXT: lwz 6, 8(5)
+; CHECK-NEXT: slwi 7, 3, 2
+; CHECK-NEXT: addi 8, 3, 1
+; CHECK-NEXT: cmpwi 0, 3, 8
+; CHECK-NEXT: addi 3, 4, 4
+; CHECK-NEXT: add 6, 6, 7
+; CHECK-NEXT: mr 7, 4
+; CHECK-NEXT: stw 6, -80(1)
+; CHECK-NEXT: stw 8, -84(1)
+; CHECK-NEXT: stw 3, -88(1)
+; CHECK-NEXT: stw 4, -92(1)
+; CHECK-NEXT: stw 7, -96(1)
+; CHECK-NEXT: mfcr 0                          # cr0
+; CHECK-NEXT: stw 0, -100(1)
+; CHECK-NEXT: blt 0, .LBB0_12
+; CHECK-NEXT: # BB#11:                                # %entry
+; CHECK-NEXT: lwz 3, -88(1)
+; CHECK-NEXT: stw 3, -96(1)
+; CHECK-NEXT: .LBB0_12:                               # %entry
+; CHECK-NEXT: lwz 3, -96(1)
+; CHECK-NEXT: lwz 4, -84(1)
+; CHECK-NEXT: lwz 5, -4(1)
+; CHECK-NEXT: stb 4, 0(5)
+; CHECK-NEXT: lwz 4, -80(1)
+; CHECK-NEXT: lwz 0, -100(1)
+; CHECK-NEXT: mtcrf 128, 0
+; CHECK-NEXT: stw 4, -104(1)
+; CHECK-NEXT: stw 3, -108(1)
+; CHECK-NEXT: blt 0, .LBB0_14
+; CHECK-NEXT: # BB#13:                                # %entry
+; CHECK-NEXT: lwz 3, -92(1)
+; CHECK-NEXT: stw 3, -104(1)
+; CHECK-NEXT: .LBB0_14:                               # %entry
+; CHECK-NEXT: lwz 3, -104(1)
+; CHECK-NEXT: lwz 4, -108(1)
+; CHECK-NEXT: lwz 5, -4(1)
+; CHECK-NEXT: stw 4, 4(5)
+; CHECK-NEXT: lwz 3, 0(3)
+  store i32 %z, i32* @var3, align 4
+; CHECK-NEXT: lis 4, var3@ha
+; CHECK-NEXT: stw 3, var3@l(4)
+  ret void
+; CHECK-NEXT: stw 5, -112(1)
+; CHECK-NEXT: blr
+}
+
diff --git a/test/CodeGen/PowerPC/ppc64-32bit-addic.ll b/test/CodeGen/PowerPC/ppc64-32bit-addic.ll
new file mode 100644
index 0000000..4d323da
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-32bit-addic.ll
@@ -0,0 +1,29 @@
+; Check that the ADDIC optimizations are not applied on PPC64
+; RUN: llc < %s | FileCheck %s
+; ModuleID = 'os_unix.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd9.0"
+
+define i32 @notZero(i32 %call) nounwind {
+entry:
+; CHECK-NOT: addic
+  %not.tobool = icmp ne i32 %call, 0
+  %. = zext i1 %not.tobool to i32
+  ret i32 %.
+}
+
+define i32 @isMinusOne(i32 %call) nounwind {
+entry:
+; CHECK-NOT: addic
+  %not.tobool = icmp eq i32 %call, -1
+  %. = zext i1 %not.tobool to i32
+  ret i32 %.
+}
+
+define i32 @isNotMinusOne(i32 %call) nounwind {
+entry:
+; CHECK-NOT: addic
+  %not.tobool = icmp ne i32 %call, -1
+  %. = zext i1 %not.tobool to i32
+  ret i32 %.
+}
diff --git a/test/CodeGen/PowerPC/ppc64-crash.ll b/test/CodeGen/PowerPC/ppc64-crash.ll
new file mode 100644
index 0000000..073c322
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64-crash.ll
@@ -0,0 +1,14 @@
+; RUN: llc %s -o -
+
+; ModuleID = 'undo.c'
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-unknown-freebsd"
+
+%struct.__sFILE = type {}
+%struct.pos_T = type { i64 }
+
+; check that we're not copying stuff between R and X registers
+define internal void @serialize_pos(%struct.pos_T* byval %pos, %struct.__sFILE* %fp) nounwind {
+entry:
+  ret void
+}
diff --git a/test/CodeGen/PowerPC/small-arguments.ll b/test/CodeGen/PowerPC/small-arguments.ll
index 31bcee6..b4767b0 100644
--- a/test/CodeGen/PowerPC/small-arguments.ll
+++ b/test/CodeGen/PowerPC/small-arguments.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=ppc32 | not grep {extsh\\|rlwinm}
 
-declare i16 @foo() signext 
+declare signext i16 @foo()  
 
 define i32 @test1(i16 signext %X) {
 	%Y = sext i16 %X to i32  ;; dead
@@ -14,12 +14,12 @@ define i32 @test2(i16 zeroext %X) {
 }
 
 define void @test3() {
-	%tmp.0 = call i16 @foo() signext            ;; no extsh!
+	%tmp.0 = call signext i16 @foo()             ;; no extsh!
 	%tmp.1 = icmp slt i16 %tmp.0, 1234
 	br i1 %tmp.1, label %then, label %UnifiedReturnBlock
 
 then:	
-	call i32 @test1(i16 0 signext)
+	call i32 @test1(i16 signext 0)
 	ret void
 UnifiedReturnBlock:
 	ret void
@@ -46,7 +46,7 @@ define i32 @test6(i32* %P) {
         ret i32 %tmp.2
 }
 
-define i16 @test7(float %a) zeroext {
+define zeroext i16 @test7(float %a)  {
         %tmp.1 = fptoui float %a to i16
         ret i16 %tmp.1
 }
diff --git a/test/CodeGen/PowerPC/vector.ll b/test/CodeGen/PowerPC/vector.ll
index ee4da31..e4c3b0d 100644
--- a/test/CodeGen/PowerPC/vector.ll
+++ b/test/CodeGen/PowerPC/vector.ll
@@ -1,6 +1,6 @@
 ; Test that vectors are scalarized/lowered correctly.
 ; RUN: llc < %s -march=ppc32 -mcpu=g5 > %t
-; RUN: llc < %s -march=ppc32 -mcpu=g3 > %t
+; RUN: llc < %s -march=ppc32 -mcpu=g3 >> %t
 
 %d8 = type <8 x double>
 %f1 = type <1 x float>
diff --git a/test/CodeGen/SPARC/private.ll b/test/CodeGen/SPARC/private.ll
index f091aa6..f06ccd0 100644
--- a/test/CodeGen/SPARC/private.ll
+++ b/test/CodeGen/SPARC/private.ll
@@ -6,8 +6,6 @@
 ; RUN: grep .baz: %t
 ; RUN: grep ld.*\.baz %t
 
-declare void @foo()
-
 define private void @foo() {
         ret void
 }
diff --git a/test/CodeGen/SystemZ/02-MemArith.ll b/test/CodeGen/SystemZ/02-MemArith.ll
index 04022a0..ee9e5e9 100644
--- a/test/CodeGen/SystemZ/02-MemArith.ll
+++ b/test/CodeGen/SystemZ/02-MemArith.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=systemz | FileCheck %s
 
-define i32 @foo1(i32 %a, i32 *%b, i64 %idx) signext {
+define signext i32 @foo1(i32 %a, i32 *%b, i64 %idx)  {
 ; CHECK: foo1:
 ; CHECK:  a %r2, 4(%r1,%r3)
 entry:
@@ -11,7 +11,7 @@ entry:
     ret i32 %d
 }
 
-define i32 @foo2(i32 %a, i32 *%b, i64 %idx) signext {
+define signext i32 @foo2(i32 %a, i32 *%b, i64 %idx) {
 ; CHECK: foo2:
 ; CHECK:  ay %r2, -4(%r1,%r3)
 entry:
@@ -22,7 +22,7 @@ entry:
     ret i32 %d
 }
 
-define i64 @foo3(i64 %a, i64 *%b, i64 %idx) signext {
+define signext i64 @foo3(i64 %a, i64 *%b, i64 %idx)  {
 ; CHECK: foo3:
 ; CHECK:  ag %r2, 8(%r1,%r3)
 entry:
@@ -33,7 +33,7 @@ entry:
     ret i64 %d
 }
 
-define i32 @foo4(i32 %a, i32 *%b, i64 %idx) signext {
+define signext i32 @foo4(i32 %a, i32 *%b, i64 %idx)  {
 ; CHECK: foo4:
 ; CHECK:  n %r2, 4(%r1,%r3)
 entry:
@@ -44,7 +44,7 @@ entry:
     ret i32 %d
 }
 
-define i32 @foo5(i32 %a, i32 *%b, i64 %idx) signext {
+define signext i32 @foo5(i32 %a, i32 *%b, i64 %idx) {
 ; CHECK: foo5:
 ; CHECK:  ny %r2, -4(%r1,%r3)
 entry:
@@ -55,7 +55,7 @@ entry:
     ret i32 %d
 }
 
-define i64 @foo6(i64 %a, i64 *%b, i64 %idx) signext {
+define signext i64 @foo6(i64 %a, i64 *%b, i64 %idx)  {
 ; CHECK: foo6:
 ; CHECK:  ng %r2, 8(%r1,%r3)
 entry:
@@ -66,7 +66,7 @@ entry:
     ret i64 %d
 }
 
-define i32 @foo7(i32 %a, i32 *%b, i64 %idx) signext {
+define signext i32 @foo7(i32 %a, i32 *%b, i64 %idx) {
 ; CHECK: foo7:
 ; CHECK:  o %r2, 4(%r1,%r3)
 entry:
@@ -77,7 +77,7 @@ entry:
     ret i32 %d
 }
 
-define i32 @foo8(i32 %a, i32 *%b, i64 %idx) signext {
+define signext i32 @foo8(i32 %a, i32 *%b, i64 %idx)  {
 ; CHECK: foo8:
 ; CHECK:  oy %r2, -4(%r1,%r3)
 entry:
@@ -88,7 +88,7 @@ entry:
     ret i32 %d
 }
 
-define i64 @foo9(i64 %a, i64 *%b, i64 %idx) signext {
+define signext i64 @foo9(i64 %a, i64 *%b, i64 %idx)  {
 ; CHECK: foo9:
 ; CHECK:  og %r2, 8(%r1,%r3)
 entry:
@@ -99,7 +99,7 @@ entry:
     ret i64 %d
 }
 
-define i32 @foo10(i32 %a, i32 *%b, i64 %idx) signext {
+define signext i32 @foo10(i32 %a, i32 *%b, i64 %idx)  {
 ; CHECK: foo10:
 ; CHECK:  x %r2, 4(%r1,%r3)
 entry:
@@ -110,7 +110,7 @@ entry:
     ret i32 %d
 }
 
-define i32 @foo11(i32 %a, i32 *%b, i64 %idx) signext {
+define signext i32 @foo11(i32 %a, i32 *%b, i64 %idx)  {
 ; CHECK: foo11:
 ; CHECK:  xy %r2, -4(%r1,%r3)
 entry:
@@ -121,7 +121,7 @@ entry:
     ret i32 %d
 }
 
-define i64 @foo12(i64 %a, i64 *%b, i64 %idx) signext {
+define signext i64 @foo12(i64 %a, i64 *%b, i64 %idx)  {
 ; CHECK: foo12:
 ; CHECK:  xg %r2, 8(%r1,%r3)
 entry:
diff --git a/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll b/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
index 0a81271..0a7f5ee 100644
--- a/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
+++ b/test/CodeGen/SystemZ/03-RetAddImmSubreg.ll
@@ -16,25 +16,25 @@ entry:
     ret i32 %c
 }
 
-define i32 @foo3(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo3(i32 %a, i32 %b)  {
 entry:
     %c = add i32 %a, 1
     ret i32 %c
 }
 
-define i32 @foo4(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo4(i32 %a, i32 %b)  {
 entry:
     %c = add i32 %a, 131072
     ret i32 %c
 }
 
-define i32 @foo5(i32 %a, i32 %b) signext {
+define signext i32 @foo5(i32 %a, i32 %b)  {
 entry:
     %c = add i32 %a, 1
     ret i32 %c
 }
 
-define i32 @foo6(i32 %a, i32 %b) signext {
+define signext i32 @foo6(i32 %a, i32 %b)  {
 entry:
     %c = add i32 %a, 131072
     ret i32 %c
diff --git a/test/CodeGen/SystemZ/03-RetAddSubreg.ll b/test/CodeGen/SystemZ/03-RetAddSubreg.ll
index 2787083..337bb3f 100644
--- a/test/CodeGen/SystemZ/03-RetAddSubreg.ll
+++ b/test/CodeGen/SystemZ/03-RetAddSubreg.ll
@@ -8,13 +8,13 @@ entry:
     ret i32 %c
 }
 
-define i32 @foo1(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo1(i32 %a, i32 %b)  {
 entry:
     %c = add i32 %a, %b
     ret i32 %c
 }
 
-define i32 @foo2(i32 %a, i32 %b) signext {
+define signext i32 @foo2(i32 %a, i32 %b)  {
 entry:
     %c = add i32 %a, %b
     ret i32 %c
diff --git a/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll b/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
index 32673dd..c5326ab 100644
--- a/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
+++ b/test/CodeGen/SystemZ/03-RetAndImmSubreg.ll
@@ -12,25 +12,25 @@ entry:
     ret i32 %c
 }
 
-define i32 @foo3(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo3(i32 %a, i32 %b)  {
 entry:
     %c = and i32 %a, 1
     ret i32 %c
 }
 
-define i32 @foo4(i32 %a, i32 %b) signext {
+define signext i32 @foo4(i32 %a, i32 %b)  {
 entry:
     %c = and i32 %a, 131072
     ret i32 %c
 }
 
-define i32 @foo5(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo5(i32 %a, i32 %b)  {
 entry:
     %c = and i32 %a, 1
     ret i32 %c
 }
 
-define i32 @foo6(i32 %a, i32 %b) signext {
+define signext i32 @foo6(i32 %a, i32 %b)  {
 entry:
     %c = and i32 %a, 131072
     ret i32 %c
diff --git a/test/CodeGen/SystemZ/03-RetAndSubreg.ll b/test/CodeGen/SystemZ/03-RetAndSubreg.ll
index ed5e526..75dc90a 100644
--- a/test/CodeGen/SystemZ/03-RetAndSubreg.ll
+++ b/test/CodeGen/SystemZ/03-RetAndSubreg.ll
@@ -7,13 +7,13 @@ entry:
     ret i32 %c
 }
 
-define i32 @foo1(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo1(i32 %a, i32 %b)  {
 entry:
     %c = and i32 %a, %b
     ret i32 %c
 }
 
-define i32 @foo2(i32 %a, i32 %b) signext {
+define signext i32 @foo2(i32 %a, i32 %b)  {
 entry:
     %c = and i32 %a, %b
     ret i32 %c
diff --git a/test/CodeGen/SystemZ/03-RetArgSubreg.ll b/test/CodeGen/SystemZ/03-RetArgSubreg.ll
index 0c9bb14..476821a 100644
--- a/test/CodeGen/SystemZ/03-RetArgSubreg.ll
+++ b/test/CodeGen/SystemZ/03-RetArgSubreg.ll
@@ -8,12 +8,12 @@ entry:
     ret i32 %b
 }
 
-define i32 @foo1(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo1(i32 %a, i32 %b)  {
 entry:
     ret i32 %b
 }
 
-define i32 @foo2(i32 %a, i32 %b) signext {
+define signext i32 @foo2(i32 %a, i32 %b)  {
 entry:
     ret i32 %b
 }
diff --git a/test/CodeGen/SystemZ/03-RetImmSubreg.ll b/test/CodeGen/SystemZ/03-RetImmSubreg.ll
index 343e30b..70da913 100644
--- a/test/CodeGen/SystemZ/03-RetImmSubreg.ll
+++ b/test/CodeGen/SystemZ/03-RetImmSubreg.ll
@@ -30,12 +30,12 @@ entry:
     ret i32 4294967295
 }
 
-define i32 @foo6() zeroext {
+define zeroext i32 @foo6()  {
 entry:
     ret i32 4294967295
 }
 
-define i32 @foo7() signext {
+define signext i32 @foo7()  {
 entry:
     ret i32 4294967295
 }
diff --git a/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll b/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
index 6d118b5e..99adea8 100644
--- a/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
+++ b/test/CodeGen/SystemZ/03-RetOrImmSubreg.ll
@@ -22,37 +22,37 @@ entry:
     ret i32 %c
 }
 
-define i32 @foo3(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo3(i32 %a, i32 %b)  {
 entry:
     %c = or i32 %a, 1
     ret i32 %c
 }
 
-define i32 @foo8(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo8(i32 %a, i32 %b)  {
 entry:
     %c = or i32 %a, 123456
     ret i32 %c
 }
 
-define i32 @foo4(i32 %a, i32 %b) signext {
+define signext i32 @foo4(i32 %a, i32 %b)  {
 entry:
     %c = or i32 %a, 131072
     ret i32 %c
 }
 
-define i32 @foo5(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo5(i32 %a, i32 %b)  {
 entry:
     %c = or i32 %a, 1
     ret i32 %c
 }
 
-define i32 @foo6(i32 %a, i32 %b) signext {
+define signext i32 @foo6(i32 %a, i32 %b)  {
 entry:
     %c = or i32 %a, 131072
     ret i32 %c
 }
 
-define i32 @foo9(i32 %a, i32 %b) signext {
+define signext i32 @foo9(i32 %a, i32 %b)  {
 entry:
     %c = or i32 %a, 123456
     ret i32 %c
diff --git a/test/CodeGen/SystemZ/03-RetOrSubreg.ll b/test/CodeGen/SystemZ/03-RetOrSubreg.ll
index 4d7661a..7dab5ca 100644
--- a/test/CodeGen/SystemZ/03-RetOrSubreg.ll
+++ b/test/CodeGen/SystemZ/03-RetOrSubreg.ll
@@ -9,13 +9,13 @@ entry:
     ret i32 %c
 }
 
-define i32 @foo1(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo1(i32 %a, i32 %b)  {
 entry:
     %c = or i32 %a, %b
     ret i32 %c
 }
 
-define i32 @foo2(i32 %a, i32 %b) signext {
+define signext i32 @foo2(i32 %a, i32 %b)  {
 entry:
     %c = or i32 %a, %b
     ret i32 %c
diff --git a/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll b/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
index 11ca796..21ea9b5 100644
--- a/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
+++ b/test/CodeGen/SystemZ/03-RetSubImmSubreg.ll
@@ -16,25 +16,25 @@ entry:
     ret i32 %c
 }
 
-define i32 @foo3(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo3(i32 %a, i32 %b)  {
 entry:
     %c = sub i32 %a, 1
     ret i32 %c
 }
 
-define i32 @foo4(i32 %a, i32 %b) signext {
+define signext i32 @foo4(i32 %a, i32 %b)  {
 entry:
     %c = sub i32 %a, 131072
     ret i32 %c
 }
 
-define i32 @foo5(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo5(i32 %a, i32 %b)  {
 entry:
     %c = sub i32 %a, 1
     ret i32 %c
 }
 
-define i32 @foo6(i32 %a, i32 %b) signext {
+define signext i32 @foo6(i32 %a, i32 %b)  {
 entry:
     %c = sub i32 %a, 131072
     ret i32 %c
diff --git a/test/CodeGen/SystemZ/03-RetSubSubreg.ll b/test/CodeGen/SystemZ/03-RetSubSubreg.ll
index b3e1ac2..24b7631 100644
--- a/test/CodeGen/SystemZ/03-RetSubSubreg.ll
+++ b/test/CodeGen/SystemZ/03-RetSubSubreg.ll
@@ -8,13 +8,13 @@ entry:
     ret i32 %c
 }
 
-define i32 @foo1(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo1(i32 %a, i32 %b)  {
 entry:
     %c = sub i32 %a, %b
     ret i32 %c
 }
 
-define i32 @foo2(i32 %a, i32 %b) signext {
+define signext i32 @foo2(i32 %a, i32 %b)  {
 entry:
     %c = sub i32 %a, %b
     ret i32 %c
diff --git a/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll b/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
index 0033126..70ee454 100644
--- a/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
+++ b/test/CodeGen/SystemZ/03-RetXorImmSubreg.ll
@@ -20,37 +20,37 @@ entry:
     ret i32 %c
 }
 
-define i32 @foo3(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo3(i32 %a, i32 %b)  {
 entry:
     %c = xor i32 %a, 1
     ret i32 %c
 }
 
-define i32 @foo8(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo8(i32 %a, i32 %b)  {
 entry:
     %c = xor i32 %a, 123456
     ret i32 %c
 }
 
-define i32 @foo4(i32 %a, i32 %b) signext {
+define signext i32 @foo4(i32 %a, i32 %b)  {
 entry:
     %c = xor i32 %a, 131072
     ret i32 %c
 }
 
-define i32 @foo5(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo5(i32 %a, i32 %b)  {
 entry:
     %c = xor i32 %a, 1
     ret i32 %c
 }
 
-define i32 @foo6(i32 %a, i32 %b) signext {
+define signext i32 @foo6(i32 %a, i32 %b)  {
 entry:
     %c = xor i32 %a, 131072
     ret i32 %c
 }
 
-define i32 @foo9(i32 %a, i32 %b) signext {
+define signext i32 @foo9(i32 %a, i32 %b)  {
 entry:
     %c = xor i32 %a, 123456
     ret i32 %c
diff --git a/test/CodeGen/SystemZ/03-RetXorSubreg.ll b/test/CodeGen/SystemZ/03-RetXorSubreg.ll
index a9af231..02c4a2a 100644
--- a/test/CodeGen/SystemZ/03-RetXorSubreg.ll
+++ b/test/CodeGen/SystemZ/03-RetXorSubreg.ll
@@ -9,13 +9,13 @@ entry:
     ret i32 %c
 }
 
-define i32 @foo1(i32 %a, i32 %b) zeroext {
+define zeroext i32 @foo1(i32 %a, i32 %b)  {
 entry:
     %c = xor i32 %a, %b
     ret i32 %c
 }
 
-define i32 @foo2(i32 %a, i32 %b) signext {
+define signext i32 @foo2(i32 %a, i32 %b)  {
 entry:
     %c = xor i32 %a, %b
     ret i32 %c
diff --git a/test/CodeGen/SystemZ/11-BSwap.ll b/test/CodeGen/SystemZ/11-BSwap.ll
index b170a80..1aa9c67 100644
--- a/test/CodeGen/SystemZ/11-BSwap.ll
+++ b/test/CodeGen/SystemZ/11-BSwap.ll
@@ -5,34 +5,34 @@ target datalayout = "E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:3
 target triple = "s390x-ibm-linux"
 
 
-define i16 @foo(i16 zeroext %a) zeroext {
+define zeroext i16 @foo(i16 zeroext %a)  {
 	%res = tail call i16 @llvm.bswap.i16(i16 %a)
 	ret i16 %res
 }
 
-define i32 @foo2(i32 zeroext %a) zeroext {
+define zeroext i32 @foo2(i32 zeroext %a)  {
 ; CHECK: foo2:
-; CHECK:  lrvr %r1, %r2
+; CHECK:  lrvr [[R1:%r.]], %r2
         %res = tail call i32 @llvm.bswap.i32(i32 %a)
         ret i32 %res
 }
 
-define i64 @foo3(i64 %a) zeroext {
+define zeroext i64 @foo3(i64 %a)  {
 ; CHECK: foo3:
 ; CHECK:  lrvgr %r2, %r2
         %res = tail call i64 @llvm.bswap.i64(i64 %a)
         ret i64 %res
 }
 
-define i16 @foo4(i16* %b) zeroext {
+define zeroext i16 @foo4(i16* %b)  {
 	%a = load i16* %b
         %res = tail call i16 @llvm.bswap.i16(i16 %a)
         ret i16 %res
 }
 
-define i32 @foo5(i32* %b) zeroext {
+define zeroext i32 @foo5(i32* %b)  {
 ; CHECK: foo5:
-; CHECK:  lrv %r1, 0(%r2)
+; CHECK:  lrv [[R1:%r.]], 0(%r2)
 	%a = load i32* %b
         %res = tail call i32 @llvm.bswap.i32(i32 %a)
         ret i32 %res
diff --git a/test/CodeGen/Thumb/2007-03-06-AddR7.ll b/test/CodeGen/Thumb/2007-03-06-AddR7.ll
deleted file mode 100644
index 8d139e9..0000000
--- a/test/CodeGen/Thumb/2007-03-06-AddR7.ll
+++ /dev/null
@@ -1,117 +0,0 @@
-; RUN: llc < %s -march=thumb
-; RUN: llc < %s -mtriple=thumb-apple-darwin -relocation-model=pic \
-; RUN:   -mattr=+v6,+vfp2 | not grep {add r., r7, #2 \\* 4}
-
-	%struct.__fooAllocator = type opaque
-	%struct.__fooY = type { %struct.fooXBase, %struct.__fooString*, %struct.__fooU*, %struct.__fooV*, i8** }
-	%struct.__fooZ = type opaque
-	%struct.__fooU = type opaque
-	%struct.__fooString = type opaque
-	%struct.__fooV = type opaque
-	%struct.fooXBase = type { i32, [4 x i8] }
-	%struct.fooXClass = type { i32, i8*, void (i8*)*, i8* (%struct.__fooAllocator*, i8*)*, void (i8*)*, i8 (i8*, i8*) zeroext *, i32 (i8*)*, %struct.__fooString* (i8*, %struct.__fooZ*)*, %struct.__fooString* (i8*)* }
-	%struct.aa_cache = type { i32, i32, [1 x %struct.aa_method*] }
-	%struct.aa_class = type { %struct.aa_class*, %struct.aa_class*, i8*, i32, i32, i32, %struct.aa_ivar_list*, %struct.aa_method_list**, %struct.aa_cache*, %struct.aa_protocol_list* }
-	%struct.aa_ivar = type { i8*, i8*, i32 }
-	%struct.aa_ivar_list = type { i32, [1 x %struct.aa_ivar] }
-	%struct.aa_method = type { %struct.aa_ss*, i8*, %struct.aa_object* (%struct.aa_object*, %struct.aa_ss*, ...)* }
-	%struct.aa_method_list = type { %struct.aa_method_list*, i32, [1 x %struct.aa_method] }
-	%struct.aa_object = type { %struct.aa_class* }
-	%struct.aa_protocol_list = type { %struct.aa_protocol_list*, i32, [1 x %struct.aa_object*] }
-	%struct.aa_ss = type opaque
-@__kfooYTypeID = external global i32		; <i32*> [#uses=3]
-@__fooYClass = external constant %struct.fooXClass		; <%struct.fooXClass*> [#uses=1]
-@__fooXClassTableSize = external global i32		; <i32*> [#uses=1]
-@__fooXAaClassTable = external global i32*		; <i32**> [#uses=1]
-@s.10319 = external global %struct.aa_ss*		; <%struct.aa_ss**> [#uses=2]
-@str15 = external constant [24 x i8]		; <[24 x i8]*> [#uses=1]
-
-
-define i8 @test(%struct.__fooY* %calendar, double* %atp, i8* %componentDesc, ...) zeroext  {
-entry:
-	%args = alloca i8*, align 4		; <i8**> [#uses=5]
-	%args4 = bitcast i8** %args to i8*		; <i8*> [#uses=2]
-	call void @llvm.va_start( i8* %args4 )
-	%tmp6 = load i32* @__kfooYTypeID		; <i32> [#uses=1]
-	icmp eq i32 %tmp6, 0		; <i1>:0 [#uses=1]
-	br i1 %0, label %cond_true, label %cond_next
-
-cond_true:		; preds = %entry
-	%tmp7 = call i32 @_fooXRegisterClass( %struct.fooXClass* @__fooYClass )		; <i32> [#uses=1]
-	store i32 %tmp7, i32* @__kfooYTypeID
-	br label %cond_next
-
-cond_next:		; preds = %cond_true, %entry
-	%tmp8 = load i32* @__kfooYTypeID		; <i32> [#uses=2]
-	%tmp15 = load i32* @__fooXClassTableSize		; <i32> [#uses=1]
-	icmp ugt i32 %tmp15, %tmp8		; <i1>:1 [#uses=1]
-	br i1 %1, label %cond_next18, label %cond_true58
-
-cond_next18:		; preds = %cond_next
-	%tmp21 = getelementptr %struct.__fooY* %calendar, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
-	%tmp22 = load i32* %tmp21		; <i32> [#uses=2]
-	%tmp29 = load i32** @__fooXAaClassTable		; <i32*> [#uses=1]
-	%tmp31 = getelementptr i32* %tmp29, i32 %tmp8		; <i32*> [#uses=1]
-	%tmp32 = load i32* %tmp31		; <i32> [#uses=1]
-	icmp eq i32 %tmp22, %tmp32		; <i1>:2 [#uses=1]
-	%.not = xor i1 %2, true		; <i1> [#uses=1]
-	icmp ugt i32 %tmp22, 4095		; <i1>:3 [#uses=1]
-	%bothcond = and i1 %.not, %3		; <i1> [#uses=1]
-	br i1 %bothcond, label %cond_true58, label %bb48
-
-bb48:		; preds = %cond_next18
-	%tmp78 = call i32 @strlen( i8* %componentDesc )		; <i32> [#uses=4]
-	%tmp92 = alloca i32, i32 %tmp78		; <i32*> [#uses=2]
-	icmp sgt i32 %tmp78, 0		; <i1>:4 [#uses=1]
-	br i1 %4, label %cond_true111, label %bb114
-
-cond_true58:		; preds = %cond_next18, %cond_next
-	%tmp59 = load %struct.aa_ss** @s.10319		; <%struct.aa_ss*> [#uses=2]
-	icmp eq %struct.aa_ss* %tmp59, null		; <i1>:5 [#uses=1]
-	%tmp6869 = bitcast %struct.__fooY* %calendar to i8*		; <i8*> [#uses=2]
-	br i1 %5, label %cond_true60, label %cond_next64
-
-cond_true60:		; preds = %cond_true58
-	%tmp63 = call %struct.aa_ss* @sel_registerName( i8* getelementptr ([24 x i8]* @str15, i32 0, i32 0) )		; <%struct.aa_ss*> [#uses=2]
-	store %struct.aa_ss* %tmp63, %struct.aa_ss** @s.10319
-	%tmp66137 = volatile load i8** %args		; <i8*> [#uses=1]
-	%tmp73138 = call i8 (i8*, %struct.aa_ss*, ...) zeroext * bitcast (%struct.aa_object* (%struct.aa_object*, %struct.aa_ss*, ...)* @aa_mm to i8 (i8*, %struct.aa_ss*, ...) zeroext *)( i8* %tmp6869, %struct.aa_ss* %tmp63, double* %atp, i8* %componentDesc, i8* %tmp66137) zeroext 		; <i8> [#uses=1]
-	ret i8 %tmp73138
-
-cond_next64:		; preds = %cond_true58
-	%tmp66 = volatile load i8** %args		; <i8*> [#uses=1]
-	%tmp73 = call i8 (i8*, %struct.aa_ss*, ...) zeroext * bitcast (%struct.aa_object* (%struct.aa_object*, %struct.aa_ss*, ...)* @aa_mm to i8 (i8*, %struct.aa_ss*, ...) zeroext *)( i8* %tmp6869, %struct.aa_ss* %tmp59, double* %atp, i8* %componentDesc, i8* %tmp66 ) zeroext 		; <i8> [#uses=1]
-	ret i8 %tmp73
-
-cond_true111:		; preds = %cond_true111, %bb48
-	%idx.2132.0 = phi i32 [ 0, %bb48 ], [ %indvar.next, %cond_true111 ]		; <i32> [#uses=2]
-	%tmp95 = volatile load i8** %args		; <i8*> [#uses=2]
-	%tmp97 = getelementptr i8* %tmp95, i32 4		; <i8*> [#uses=1]
-	volatile store i8* %tmp97, i8** %args
-	%tmp9899 = bitcast i8* %tmp95 to i32*		; <i32*> [#uses=1]
-	%tmp100 = load i32* %tmp9899		; <i32> [#uses=1]
-	%tmp104 = getelementptr i32* %tmp92, i32 %idx.2132.0		; <i32*> [#uses=1]
-	store i32 %tmp100, i32* %tmp104
-	%indvar.next = add i32 %idx.2132.0, 1		; <i32> [#uses=2]
-	icmp eq i32 %indvar.next, %tmp78		; <i1>:6 [#uses=1]
-	br i1 %6, label %bb114, label %cond_true111
-
-bb114:		; preds = %cond_true111, %bb48
-	call void @llvm.va_end( i8* %args4 )
-	%tmp122 = call i8 @_fooYCCV( %struct.__fooY* %calendar, double* %atp, i8* %componentDesc, i32* %tmp92, i32 %tmp78 ) zeroext 		; <i8> [#uses=1]
-	ret i8 %tmp122
-}
-
-declare i32 @_fooXRegisterClass(%struct.fooXClass*)
-
-declare i8 @_fooYCCV(%struct.__fooY*, double*, i8*, i32*, i32) zeroext 
-
-declare %struct.aa_object* @aa_mm(%struct.aa_object*, %struct.aa_ss*, ...)
-
-declare %struct.aa_ss* @sel_registerName(i8*)
-
-declare void @llvm.va_start(i8*)
-
-declare i32 @strlen(i8*)
-
-declare void @llvm.va_end(i8*)
diff --git a/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll b/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll
deleted file mode 100644
index 9cdcd31..0000000
--- a/test/CodeGen/Thumb/2009-07-19-SPDecBug.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: llc < %s -mtriple=thumbv6-elf | not grep "subs sp"
-; PR4567
-
-define i8* @__gets_chk(i8* %s, i32 %slen) nounwind {
-entry:
-	br i1 undef, label %bb, label %bb1
-
-bb:		; preds = %entry
-	ret i8* undef
-
-bb1:		; preds = %entry
-	br i1 undef, label %bb3, label %bb2
-
-bb2:		; preds = %bb1
-	%0 = alloca i8, i32 undef, align 4		; <i8*> [#uses=0]
-	br label %bb4
-
-bb3:		; preds = %bb1
-	%1 = malloc i8, i32 undef		; <i8*> [#uses=0]
-	br label %bb4
-
-bb4:		; preds = %bb3, %bb2
-	br i1 undef, label %bb5, label %bb6
-
-bb5:		; preds = %bb4
-	%2 = call  i8* @gets(i8* %s) nounwind		; <i8*> [#uses=1]
-	ret i8* %2
-
-bb6:		; preds = %bb4
-	unreachable
-}
-
-declare i8* @gets(i8*) nounwind
diff --git a/test/CodeGen/Thumb/2011-06-16-NoGPRs.ll b/test/CodeGen/Thumb/2011-06-16-NoGPRs.ll
new file mode 100644
index 0000000..d39a760
--- /dev/null
+++ b/test/CodeGen/Thumb/2011-06-16-NoGPRs.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s
+;
+; This test would crash because isel creates a GPR register for the return
+; value from f1. The register is only used by tBLXr_r9 which accepts a full GPR
+; register, but we cannot have live GPRs in thumb mode because we don't know how
+; to spill them.
+;
+; <rdar://problem/9624323>
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv6-apple-darwin10"
+
+%0 = type opaque
+
+declare i8* (i8*, i8*, ...)* @f1(i8*, i8*) optsize
+declare i8* @f2(i8*, i8*, ...)
+
+define internal void @f(i8* %self, i8* %_cmd, %0* %inObjects, %0* %inIndexes) optsize ssp {
+entry:
+  %call14 = tail call i8* (i8*, i8*, ...)* (i8*, i8*)* @f1(i8* undef, i8* %_cmd) optsize
+  %0 = bitcast i8* (i8*, i8*, ...)* %call14 to void (i8*, i8*, %0*, %0*)*
+  tail call void %0(i8* %self, i8* %_cmd, %0* %inObjects, %0* %inIndexes) optsize
+  tail call void bitcast (i8* (i8*, i8*, ...)* @f2 to void (i8*, i8*, i32, %0*, %0*)*)(i8* %self, i8* undef, i32 2, %0* %inIndexes, %0* undef) optsize
+  ret void
+}
diff --git a/test/CodeGen/Thumb/barrier.ll b/test/CodeGen/Thumb/barrier.ll
index 419c3ba..d39b50f 100644
--- a/test/CodeGen/Thumb/barrier.ll
+++ b/test/CodeGen/Thumb/barrier.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=thumbv6-apple-darwin  | FileCheck %s -check-prefix=V6
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=-db | FileCheck %s -check-prefix=V6
-; RUN: llc < %s -march=thumb -mattr=+v6m       | FileCheck %s -check-prefix=V6M
+; RUN: llc < %s -march=thumb -mcpu=cortex-m0   | FileCheck %s -check-prefix=V6M
 
 declare void @llvm.memory.barrier(i1 , i1 , i1 , i1 , i1)
 
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
index cd35be6..f3f0834 100644
--- a/test/CodeGen/Thumb/dyn-stackalloc.ll
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -60,7 +60,7 @@ define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
 	%tmp9 = call i8* @strcpy( i8* %tmp6, i8* %tag )
 	%tmp6.len = call i32 @strlen( i8* %tmp6 )
 	%tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
-	call void @llvm.memcpy.i32( i8* %tmp6.indexed, i8* getelementptr ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1 )
+	call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp6.indexed, i8* getelementptr inbounds ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1, i1 false)
 	%tmp15 = call i8* @strcat( i8* %tmp6, i8* %contents )
 	call fastcc void @comment_add( %struct.comment* %vc, i8* %tmp6 )
 	ret void
@@ -72,6 +72,6 @@ declare i8* @strcat(i8*, i8*)
 
 declare fastcc void @comment_add(%struct.comment*, i8*)
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 declare i8* @strcpy(i8*, i8*)
diff --git a/test/CodeGen/Thumb/inlineasm-thumb.ll b/test/CodeGen/Thumb/inlineasm-thumb.ll
new file mode 100644
index 0000000..f2683c8
--- /dev/null
+++ b/test/CodeGen/Thumb/inlineasm-thumb.ll
@@ -0,0 +1,7 @@
+; RUN: llc < %s -march=thumb | FileCheck %s
+define i32 @t1(i32 %x, i32 %y) nounwind {
+entry:
+  ; CHECK: mov r0, r12
+  %0 = tail call i32 asm "mov $0, $1", "=l,h"(i32 %y) nounwind
+  ret i32 %0
+}
diff --git a/test/CodeGen/Thumb/select.ll b/test/CodeGen/Thumb/select.ll
index 780e5fa..3f10b05 100644
--- a/test/CodeGen/Thumb/select.ll
+++ b/test/CodeGen/Thumb/select.ll
@@ -1,10 +1,5 @@
-; RUN: llc < %s -march=thumb | grep beq | count 1
-; RUN: llc < %s -march=thumb | grep bgt | count 1
-; RUN: llc < %s -march=thumb | grep blt | count 3
-; RUN: llc < %s -march=thumb | grep ble | count 1
-; RUN: llc < %s -march=thumb | grep bls | count 1
-; RUN: llc < %s -march=thumb | grep bhi | count 1
-; RUN: llc < %s -mtriple=thumb-apple-darwin | grep __ltdf2
+; RUN: llc < %s -mtriple=thumb-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=thumb-pc-linux-gnueabi | FileCheck -check-prefix=CHECK-EABI %s
 
 define i32 @f1(i32 %a.s) {
 entry:
@@ -12,6 +7,10 @@ entry:
     %tmp1.s = select i1 %tmp, i32 2, i32 3
     ret i32 %tmp1.s
 }
+; CHECK: f1:
+; CHECK: beq
+; CHECK-EABI: f1:
+; CHECK-EABI: beq
 
 define i32 @f2(i32 %a.s) {
 entry:
@@ -19,6 +18,10 @@ entry:
     %tmp1.s = select i1 %tmp, i32 2, i32 3
     ret i32 %tmp1.s
 }
+; CHECK: f2:
+; CHECK: bgt
+; CHECK-EABI: f2:
+; CHECK-EABI: bgt
 
 define i32 @f3(i32 %a.s, i32 %b.s) {
 entry:
@@ -26,6 +29,10 @@ entry:
     %tmp1.s = select i1 %tmp, i32 2, i32 3
     ret i32 %tmp1.s
 }
+; CHECK: f3:
+; CHECK: blt
+; CHECK-EABI: f3:
+; CHECK-EABI: blt
 
 define i32 @f4(i32 %a.s, i32 %b.s) {
 entry:
@@ -33,6 +40,10 @@ entry:
     %tmp1.s = select i1 %tmp, i32 2, i32 3
     ret i32 %tmp1.s
 }
+; CHECK: f4:
+; CHECK: ble
+; CHECK-EABI: f4:
+; CHECK-EABI: ble
 
 define i32 @f5(i32 %a.u, i32 %b.u) {
 entry:
@@ -40,6 +51,10 @@ entry:
     %tmp1.s = select i1 %tmp, i32 2, i32 3
     ret i32 %tmp1.s
 }
+; CHECK: f5:
+; CHECK: bls
+; CHECK-EABI: f5:
+; CHECK-EABI: bls
 
 define i32 @f6(i32 %a.u, i32 %b.u) {
 entry:
@@ -47,9 +62,21 @@ entry:
     %tmp1.s = select i1 %tmp, i32 2, i32 3
     ret i32 %tmp1.s
 }
+; CHECK: f6:
+; CHECK: bhi
+; CHECK-EABI: f6:
+; CHECK-EABI: bhi
 
 define double @f7(double %a, double %b) {
     %tmp = fcmp olt double %a, 1.234e+00
     %tmp1 = select i1 %tmp, double -1.000e+00, double %b
     ret double %tmp1
 }
+; CHECK: f7:
+; CHECK: blt
+; CHECK: blt
+; CHECK: __ltdf2
+; CHECK-EABI: f7:
+; CHECK-EABI: __aeabi_dcmplt
+; CHECK-EABI: bne
+; CHECK-EABI: bne
diff --git a/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
index 41b3029..0b56103 100644
--- a/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-02-CoalescerBug.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
 
-	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16)*, i32 }		; type %0
-	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*)*, i32 }		; type %1
-	type { void (%"struct.xalanc_1_8::FormatterToXML"*, %"struct.xalanc_1_8::XalanDOMString"*)*, i32 }		; type %2
-	type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32, i32)*, i32 }		; type %3
-	type { void (%"struct.xalanc_1_8::FormatterToXML"*)*, i32 }		; type %4
+	%0 = type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16)*, i32 }		; type %0
+	%1 = type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*)*, i32 }		; type %1
+	%2 = type { void (%"struct.xalanc_1_8::FormatterToXML"*, %"struct.xalanc_1_8::XalanDOMString"*)*, i32 }		; type %2
+	%3 = type { void (%"struct.xalanc_1_8::FormatterToXML"*, i16*, i32, i32)*, i32 }		; type %3
+	%4 = type { void (%"struct.xalanc_1_8::FormatterToXML"*)*, i32 }		; type %4
 	%"struct.std::CharVectorType" = type { %"struct.std::_Vector_base<char,std::allocator<char> >" }
 	%"struct.std::_Bit_const_iterator" = type { %"struct.std::_Bit_iterator_base" }
 	%"struct.std::_Bit_iterator_base" = type { i32*, i32 }
diff --git a/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll b/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
index b832637..28ac28b 100644
--- a/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-CoalescerBug.ll
@@ -1,10 +1,10 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim
 
-	type { %struct.GAP }		; type %0
-	type { i16, i8, i8 }		; type %1
-	type { [2 x i32], [2 x i32] }		; type %2
-	type { %struct.rec* }		; type %3
-	type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
+	%0 = type { %struct.GAP }		; type %0
+	%1 = type { i16, i8, i8 }		; type %1
+	%2 = type { [2 x i32], [2 x i32] }		; type %2
+	%3 = type { %struct.rec* }		; type %3
+	%4 = type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.FILE_POS = type { i8, i8, i16, i32 }
 	%struct.FIRST_UNION = type { %struct.FILE_POS }
diff --git a/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll b/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
index 02fad4b..88accf8 100644
--- a/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-ScavengerAssert.ll
@@ -1,11 +1,11 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -relocation-model=pic -disable-fp-elim -O3
 
-	type { i16, i8, i8 }		; type %0
-	type { [2 x i32], [2 x i32] }		; type %1
-	type { %struct.GAP }		; type %2
-	type { %struct.rec* }		; type %3
-	type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
-	type { i8, i8, i8, i8 }		; type %5
+	%0 = type { i16, i8, i8 }		; type %0
+	%1 = type { [2 x i32], [2 x i32] }		; type %1
+	%2 = type { %struct.GAP }		; type %2
+	%3 = type { %struct.rec* }		; type %3
+	%4 = type { i8, i8, i16, i8, i8, i8, i8 }		; type %4
+	%5 = type { i8, i8, i8, i8 }		; type %5
 	%struct.COMPOSITE = type { i8, i16, i16 }
 	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
 	%struct.FILE_POS = type { i8, i8, i16, i32 }
diff --git a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
index bfea003..779e100 100644
--- a/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
+++ b/test/CodeGen/Thumb2/2009-08-04-SubregLoweringBug.ll
@@ -1,4 +1,3 @@
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | not grep fcpys
 ; rdar://7117307
 
@@ -13,7 +12,7 @@ entry:
 	br i1 undef, label %bb, label %bb6.preheader
 
 bb6.preheader:		; preds = %entry
-	call void @llvm.memcpy.i32(i8* undef, i8* undef, i32 12, i32 4)
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* undef, i8* undef, i32 12, i32 4, i1 false)
 	br i1 undef, label %bb15, label %bb13
 
 bb:		; preds = %entry
@@ -31,4 +30,4 @@ bb15:		; preds = %bb13, %bb6.preheader
 	ret void
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
index 9aee910..18c2e0b 100644
--- a/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
+++ b/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
@@ -13,7 +13,7 @@ define weak arm_aapcs_vfpcc i32 @_ZNKSs7compareERKSs(%"struct.std::basic_string<
 ; CHECK: _ZNKSs7compareERKSs:
 ; CHECK:      it  eq
 ; CHECK-NEXT: subeq{{(.w)?}} r0, r{{[0-9]+}}, r{{[0-9]+}}
-; CHECK-NEXT: ldmia.w sp!,
+; CHECK-NEXT: pop.w
 entry:
   %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
   %1 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]
diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
index 9ed6a01..01fb0a5 100644
--- a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
+++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O3 -relocation-model=pic -mattr=+thumb2 -mcpu=cortex-a8 | FileCheck %s
+; RUN: llc < %s -O3 -relocation-model=pic -mattr=+thumb2 -mcpu=cortex-a8 -disable-branch-fold | FileCheck %s
 target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
 target triple = "thumbv7-apple-darwin10"
 
@@ -26,7 +26,7 @@ entry:
 ; CHECK: vldr.64 [[LDR:d.*]],
 ; CHECK: LPC0_0:
 ; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]
-; CHECK: vmov.f64 [[LDR]]
+; CHECK-NOT: vmov.f64 [[ADD]]
   %5 = fadd <2 x double> %3, %3                   ; <<2 x double>> [#uses=2]
   %6 = fadd <2 x double> %4, %4                   ; <<2 x double>> [#uses=2]
   %tmp7 = extractelement <2 x double> %5, i32 0   ; <double> [#uses=1]
diff --git a/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll b/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
index 9e6d78e..b1ce3bb 100644
--- a/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
+++ b/test/CodeGen/Thumb2/2011-06-07-TwoAddrEarlyClobber.ll
@@ -1,13 +1,11 @@
-; RUN: llc < %s | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-darwin10"
+; RUN: llc -mtriple=thumbv7-apple-darwin10 < %s | FileCheck %s
 
 %struct.op = type { %struct.op*, %struct.op*, %struct.op* ()*, i32, i16, i16, i8, i8 }
 
 ; CHECK: Perl_ck_sort
-; CHECK: ldr
-; CHECK: mov [[REGISTER:(r[0-9]+)|(lr)]]
-; CHECK: str {{(r[0-9])|(lr)}}, {{\[}}[[REGISTER]]{{\]}}, #24
+; CHECK: ldreq
+; CHECK: moveq [[REGISTER:(r[0-9]+)|(lr)]]
+; CHECK: streq {{(r[0-9])|(lr)}}, {{\[}}[[REGISTER]]{{\]}}, #24
 
 define void @Perl_ck_sort() nounwind optsize {
 entry:
diff --git a/test/CodeGen/Thumb2/ldr-str-imm12.ll b/test/CodeGen/Thumb2/ldr-str-imm12.ll
index e1932bd..4597ba5 100644
--- a/test/CodeGen/Thumb2/ldr-str-imm12.ll
+++ b/test/CodeGen/Thumb2/ldr-str-imm12.ll
@@ -22,7 +22,7 @@
 
 define %union.rec* @Manifest(%union.rec* %x, %union.rec* %env, %struct.STYLE* %style, %union.rec** %bthr, %union.rec** %fthr, %union.rec** %target, %union.rec** %crs, i32 %ok, i32 %need_expand, %union.rec** %enclose, i32 %fcr) nounwind {
 entry:
-; CHECK:       ldr.w	{{(r[0-9]+)|(lr)}}, [r7, #28]
+; CHECK:       ldr{{(.w)?}}	{{(r[0-9]+)|(lr)}}, [r7, #28]
   %xgaps.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
   %ycomp.i = alloca [32 x %union.rec*], align 4   ; <[32 x %union.rec*]*> [#uses=0]
   br label %bb20
diff --git a/test/CodeGen/Thumb2/lsr-deficiency.ll b/test/CodeGen/Thumb2/lsr-deficiency.ll
index ad957a1..9ff114e 100644
--- a/test/CodeGen/Thumb2/lsr-deficiency.ll
+++ b/test/CodeGen/Thumb2/lsr-deficiency.ll
@@ -13,16 +13,16 @@
 
 define void @t() nounwind optsize {
 ; CHECK: t:
-; CHECK: mov.w r2, #1000
+; CHECK: mov{{.*}}, #1000
 entry:
   %.pre = load i32* @G, align 4                   ; <i32> [#uses=1]
   br label %bb
 
 bb:                                               ; preds = %bb, %entry
 ; CHECK: LBB0_1:
-; CHECK: cmp r2, #0
-; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], r2, #1
-; CHECK: mov r2, [[REGISTER]]
+; CHECK: cmp [[R2:r[0-9]+]], #0
+; CHECK: sub{{(.w)?}} [[REGISTER:(r[0-9]+)|(lr)]], [[R2]], #1
+; CHECK: mov [[R2]], [[REGISTER]]
 
   %0 = phi i32 [ %.pre, %entry ], [ %3, %bb ]     ; <i32> [#uses=1]
   %indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
diff --git a/test/CodeGen/Thumb2/machine-licm.ll b/test/CodeGen/Thumb2/machine-licm.ll
index ee054a1..b199d69 100644
--- a/test/CodeGen/Thumb2/machine-licm.ll
+++ b/test/CodeGen/Thumb2/machine-licm.ll
@@ -8,26 +8,25 @@
 define void @t1(i32* nocapture %vals, i32 %c) nounwind {
 entry:
 ; CHECK: t1:
-; CHECK: cbz
+; CHECK: bxeq lr
+
   %0 = icmp eq i32 %c, 0                          ; <i1> [#uses=1]
   br i1 %0, label %return, label %bb.nph
 
 bb.nph:                                           ; preds = %entry
-; CHECK: BB#1
 ; CHECK: movw r[[R2:[0-9]+]], :lower16:L_GV$non_lazy_ptr
 ; CHECK: movt r[[R2]], :upper16:L_GV$non_lazy_ptr
 ; CHECK: ldr{{(.w)?}} r[[R2b:[0-9]+]], [r[[R2]]
 ; CHECK: ldr{{.*}}, [r[[R2b]]
-; CHECK: LBB0_2
+; CHECK: LBB0_
 ; CHECK-NOT: LCPI0_0:
 
-; PIC: BB#1
 ; PIC: movw r[[R2:[0-9]+]], :lower16:(L_GV$non_lazy_ptr-(LPC0_0+4))
 ; PIC: movt r[[R2]], :upper16:(L_GV$non_lazy_ptr-(LPC0_0+4))
 ; PIC: add r[[R2]], pc
 ; PIC: ldr{{(.w)?}} r[[R2b:[0-9]+]], [r[[R2]]
 ; PIC: ldr{{.*}}, [r[[R2b]]
-; PIC: LBB0_2
+; PIC: LBB0_
 ; PIC-NOT: LCPI0_0:
 ; PIC: .section
   %.pre = load i32* @GV, align 4                  ; <i32> [#uses=1]
@@ -52,8 +51,8 @@ return:                                           ; preds = %bb, %entry
 define void @t2(i8* %ptr1, i8* %ptr2) nounwind {
 entry:
 ; CHECK: t2:
-; CHECK: mov.w r3, #1065353216
-; CHECK: vdup.32 q{{.*}}, r3
+; CHECK: mov.w [[R3:r[0-9]+]], #1065353216
+; CHECK: vdup.32 q{{.*}}, [[R3]]
   br i1 undef, label %bb1, label %bb2
 
 bb1:
diff --git a/test/CodeGen/Thumb2/thumb2-add.ll b/test/CodeGen/Thumb2/thumb2-add.ll
index 5e25cf6..66fca13 100644
--- a/test/CodeGen/Thumb2/thumb2-add.ll
+++ b/test/CodeGen/Thumb2/thumb2-add.ll
@@ -1,48 +1,81 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #255
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #256
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #257
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #4094
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #4095
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep #4096
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep add | grep lsl | grep #8
+; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s 
 
 define i32 @t2ADDrc_255(i32 %lhs) {
+; CHECK: t2ADDrc_255:
+; CHECK-NOT: bx lr
+; CHECK: add{{.*}} #255
+; CHECK: bx lr
+
     %Rd = add i32 %lhs, 255
     ret i32 %Rd
 }
 
 define i32 @t2ADDrc_256(i32 %lhs) {
+; CHECK: t2ADDrc_256:
+; CHECK-NOT: bx lr
+; CHECK: add{{.*}} #256
+; CHECK: bx lr
+
     %Rd = add i32 %lhs, 256
     ret i32 %Rd
 }
 
 define i32 @t2ADDrc_257(i32 %lhs) {
+; CHECK: t2ADDrc_257:
+; CHECK-NOT: bx lr
+; CHECK: add{{.*}} #257
+; CHECK: bx lr
+
     %Rd = add i32 %lhs, 257
     ret i32 %Rd
 }
 
 define i32 @t2ADDrc_4094(i32 %lhs) {
+; CHECK: t2ADDrc_4094:
+; CHECK-NOT: bx lr
+; CHECK: add{{.*}} #4094
+; CHECK: bx lr
+
     %Rd = add i32 %lhs, 4094
     ret i32 %Rd
 }
 
 define i32 @t2ADDrc_4095(i32 %lhs) {
+; CHECK: t2ADDrc_4095:
+; CHECK-NOT: bx lr
+; CHECK: add{{.*}} #4095
+; CHECK: bx lr
+
     %Rd = add i32 %lhs, 4095
     ret i32 %Rd
 }
 
 define i32 @t2ADDrc_4096(i32 %lhs) {
+; CHECK: t2ADDrc_4096:
+; CHECK-NOT: bx lr
+; CHECK: add{{.*}} #4096
+; CHECK: bx lr
+
     %Rd = add i32 %lhs, 4096
     ret i32 %Rd
 }
 
 define i32 @t2ADDrr(i32 %lhs, i32 %rhs) {
+; CHECK: t2ADDrr:
+; CHECK-NOT: bx lr
+; CHECK: add
+; CHECK: bx lr
+
     %Rd = add i32 %lhs, %rhs
     ret i32 %Rd
 }
 
 define i32 @t2ADDrs(i32 %lhs, i32 %rhs) {
+; CHECK: t2ADDrs:
+; CHECK-NOT: bx lr
+; CHECK: add{{.*}} lsl #8
+; CHECK: bx lr
+
     %tmp = shl i32 %rhs, 8
     %Rd = add i32 %lhs, %tmp
     ret i32 %Rd
diff --git a/test/CodeGen/Thumb2/thumb2-bcc.ll b/test/CodeGen/Thumb2/thumb2-bcc.ll
index aae9f5c..70febc06 100644
--- a/test/CodeGen/Thumb2/thumb2-bcc.ll
+++ b/test/CodeGen/Thumb2/thumb2-bcc.ll
@@ -1,5 +1,8 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | FileCheck %s
 ; RUN: llc < %s -march=thumb -mattr=+thumb2 | not grep it
+; If-conversion defeats the purpose of this test, which is to check CBZ
+; generation, so use memory barrier instruction to make sure it doesn't
+; happen and we get actual branches.
 
 define i32 @t1(i32 %a, i32 %b, i32 %c) {
 ; CHECK: t1:
@@ -8,12 +11,16 @@ define i32 @t1(i32 %a, i32 %b, i32 %c) {
 	br i1 %tmp2, label %cond_false, label %cond_true
 
 cond_true:
+        call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false)
 	%tmp5 = add i32 %b, 1
         %tmp6 = and i32 %tmp5, %c
 	ret i32 %tmp6
 
 cond_false:
+        call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false)
 	%tmp7 = add i32 %b, -1
         %tmp8 = xor i32 %tmp7, %c
 	ret i32 %tmp8
 }
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
diff --git a/test/CodeGen/Thumb2/thumb2-branch.ll b/test/CodeGen/Thumb2/thumb2-branch.ll
index 1d2af7a..4d9eda0 100644
--- a/test/CodeGen/Thumb2/thumb2-branch.ll
+++ b/test/CodeGen/Thumb2/thumb2-branch.ll
@@ -1,4 +1,7 @@
 ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+thumb2 | FileCheck %s
+; If-conversion defeats the purpose of this test, which is to check conditional
+; branch generation, so use memory barrier instruction to make sure it doesn't
+; happen and we get actual branches.
 
 define i32 @f1(i32 %a, i32 %b, i32* %v) {
 entry:
@@ -8,10 +11,12 @@ entry:
         br i1 %tmp, label %cond_true, label %return
 
 cond_true:              ; preds = %entry
+        call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false)
         store i32 0, i32* %v
         ret i32 0
 
 return:         ; preds = %entry
+        call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false)
         ret i32 1
 }
 
@@ -23,10 +28,12 @@ entry:
         br i1 %tmp, label %cond_true, label %return
 
 cond_true:              ; preds = %entry
+        call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false)
         store i32 0, i32* %v
         ret i32 0
 
 return:         ; preds = %entry
+        call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false)
         ret i32 1
 }
 
@@ -38,10 +45,12 @@ entry:
         br i1 %tmp, label %cond_true, label %return
 
 cond_true:              ; preds = %entry
+        call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false)
         store i32 0, i32* %v
         ret i32 0
 
 return:         ; preds = %entry
+        call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false)
         ret i32 1
 }
 
@@ -53,9 +62,13 @@ entry:
         br i1 %tmp, label %return, label %cond_true
 
 cond_true:              ; preds = %entry
+        call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false)
         store i32 0, i32* %v
         ret i32 0
 
 return:         ; preds = %entry
+        call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 false)
         ret i32 1
 }
+
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
diff --git a/test/CodeGen/Thumb2/thumb2-clz.ll b/test/CodeGen/Thumb2/thumb2-clz.ll
index 74728bf..00a54a0 100644
--- a/test/CodeGen/Thumb2/thumb2-clz.ll
+++ b/test/CodeGen/Thumb2/thumb2-clz.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7a | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7 | FileCheck %s
 
 define i32 @f1(i32 %a) {
 ; CHECK: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
index d842d4d..a4035bb 100644
--- a/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
+++ b/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
@@ -2,8 +2,10 @@
 
 define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
 ; CHECK: t1:
-; CHECK: it ne
+; CHECK: ittt ne
 ; CHECK: cmpne
+; CHECK: addne
+; CHECK: bxne lr
 	switch i32 %c, label %cond_next [
 		 i32 1, label %cond_true
 		 i32 7, label %cond_true
@@ -70,8 +72,9 @@ entry:
 define void @t3(i32 %a, i32 %b) nounwind {
 entry:
 ; CHECK: t3:
-; CHECK: it lt
-; CHECK: poplt {r7, pc}
+; CHECK: itt ge
+; CHECK: movge r0, r1
+; CHECK: blge  _foo
 	%tmp1 = icmp sgt i32 %a, 10		; <i1> [#uses=1]
 	br i1 %tmp1, label %cond_true, label %UnifiedReturnBlock
 
diff --git a/test/CodeGen/Thumb2/thumb2-mulhi.ll b/test/CodeGen/Thumb2/thumb2-mulhi.ll
index 5d47770..9d4840a 100644
--- a/test/CodeGen/Thumb2/thumb2-mulhi.ll
+++ b/test/CodeGen/Thumb2/thumb2-mulhi.ll
@@ -1,7 +1,8 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep smmul | count 1
-; RUN: llc < %s -march=thumb -mattr=+thumb2 | grep umull | count 1
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2dsp | FileCheck %s
 
 define i32 @smulhi(i32 %x, i32 %y) {
+; CHECK: smulhi
+; CHECK: smmul r0, r1, r0
         %tmp = sext i32 %x to i64               ; <i64> [#uses=1]
         %tmp1 = sext i32 %y to i64              ; <i64> [#uses=1]
         %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
@@ -11,6 +12,8 @@ define i32 @smulhi(i32 %x, i32 %y) {
 }
 
 define i32 @umulhi(i32 %x, i32 %y) {
+; CHECK: umulhi
+; CHECK: umull r1, r0, r1, r0
         %tmp = zext i32 %x to i64               ; <i64> [#uses=1]
         %tmp1 = zext i32 %y to i64              ; <i64> [#uses=1]
         %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
diff --git a/test/CodeGen/Thumb2/thumb2-rev.ll b/test/CodeGen/Thumb2/thumb2-rev.ll
index 2cee2e3..b469bbd 100644
--- a/test/CodeGen/Thumb2/thumb2-rev.ll
+++ b/test/CodeGen/Thumb2/thumb2-rev.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7a,+t2xtpk | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+v7,+t2xtpk | FileCheck %s
 
 define i32 @f1(i32 %a) {
 ; CHECK: f1:
diff --git a/test/CodeGen/Thumb2/thumb2-sbc.ll b/test/CodeGen/Thumb2/thumb2-sbc.ll
index 53f45ea..492e5f0 100644
--- a/test/CodeGen/Thumb2/thumb2-sbc.ll
+++ b/test/CodeGen/Thumb2/thumb2-sbc.ll
@@ -56,7 +56,6 @@ define i64 @f6(i64 %a) {
 ;
 ; CHECK: livecarry:
 ; CHECK: adds
-; CHECK: adcs
 ; CHECK: adc
 define i64 @livecarry(i64 %carry, i32 %digit) nounwind {
   %ch = lshr i64 %carry, 32
diff --git a/test/CodeGen/Thumb2/thumb2-smla.ll b/test/CodeGen/Thumb2/thumb2-smla.ll
index bd4dcbe..c128ecc 100644
--- a/test/CodeGen/Thumb2/thumb2-smla.ll
+++ b/test/CodeGen/Thumb2/thumb2-smla.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk,+t2dsp | FileCheck %s
 
 define i32 @f3(i32 %a, i16 %x, i32 %y) {
 ; CHECK: f3
diff --git a/test/CodeGen/Thumb2/thumb2-smul.ll b/test/CodeGen/Thumb2/thumb2-smul.ll
index ae17535..7a13269 100644
--- a/test/CodeGen/Thumb2/thumb2-smul.ll
+++ b/test/CodeGen/Thumb2/thumb2-smul.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk |  FileCheck %s
+; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk,+t2dsp |  FileCheck %s
 
 @x = weak global i16 0          ; <i16*> [#uses=1]
 @y = weak global i16 0          ; <i16*> [#uses=0]
diff --git a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
index 4b685a8..f3d0edf 100644
--- a/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-sxt_rot.ll
@@ -7,7 +7,7 @@ define i32 @test0(i8 %A) {
 	ret i32 %B
 }
 
-define i8 @test1(i32 %A) signext {
+define signext i8 @test1(i32 %A)  {
 ; CHECK: test1
 ; CHECK: sxtb.w r0, r0, ror #8
 	%B = lshr i32 %A, 8
@@ -17,7 +17,7 @@ define i8 @test1(i32 %A) signext {
 	ret i8 %E
 }
 
-define i32 @test2(i32 %A, i32 %X) signext {
+define signext i32 @test2(i32 %A, i32 %X)  {
 ; CHECK: test2
 ; CHECK: lsrs r0, r0, #8
 ; CHECK: sxtab  r0, r1, r0
diff --git a/test/CodeGen/Thumb2/thumb2-teq.ll b/test/CodeGen/Thumb2/thumb2-teq.ll
index 566408a..00c928f 100644
--- a/test/CodeGen/Thumb2/thumb2-teq.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq.ll
@@ -4,15 +4,6 @@
 ; test as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 ; 0x000000bb = 187
-define i1 @f1(i32 %a) {
-    %tmp = xor i32 %a, 187
-    %tmp1 = icmp ne i32 %tmp, 0
-    ret i1 %tmp1
-}
-; CHECK: f1:
-; CHECK: 	teq.w	r0, #187
-
-; 0x000000bb = 187
 define i1 @f2(i32 %a) {
     %tmp = xor i32 %a, 187
     %tmp1 = icmp eq i32 0, %tmp
@@ -30,24 +21,6 @@ define i1 @f3(i32 %a) {
 ; CHECK: f3:
 ; CHECK: 	teq.w	r0, #11141290
 
-; 0x00aa00aa = 11141290
-define i1 @f4(i32 %a) {
-    %tmp = xor i32 %a, 11141290 
-    %tmp1 = icmp ne i32 0, %tmp
-    ret i1 %tmp1
-}
-; CHECK: f4:
-; CHECK: 	teq.w	r0, #11141290
-
-; 0xcc00cc00 = 3422604288
-define i1 @f5(i32 %a) {
-    %tmp = xor i32 %a, 3422604288
-    %tmp1 = icmp ne i32 %tmp, 0
-    ret i1 %tmp1
-}
-; CHECK: f5:
-; CHECK: 	teq.w	r0, #-872363008
-
 ; 0xcc00cc00 = 3422604288
 define i1 @f6(i32 %a) {
     %tmp = xor i32 %a, 3422604288
@@ -72,17 +45,6 @@ define i1 @f8(i32 %a) {
     %tmp1 = icmp ne i32 0, %tmp
     ret i1 %tmp1
 }
-; CHECK: f8:
-; CHECK: 	teq.w	r0, #-572662307
-
-; 0x00110000 = 1114112
-define i1 @f9(i32 %a) {
-    %tmp = xor i32 %a, 1114112
-    %tmp1 = icmp ne i32 %tmp, 0
-    ret i1 %tmp1
-}
-; CHECK: f9:
-; CHECK: 	teq.w	r0, #1114112
 
 ; 0x00110000 = 1114112
 define i1 @f10(i32 %a) {
diff --git a/test/CodeGen/Thumb2/thumb2-teq2.ll b/test/CodeGen/Thumb2/thumb2-teq2.ll
index cdd3489..8acae90 100644
--- a/test/CodeGen/Thumb2/thumb2-teq2.ll
+++ b/test/CodeGen/Thumb2/thumb2-teq2.ll
@@ -3,14 +3,6 @@
 ; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
 ; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
 
-define i1 @f1(i32 %a, i32 %b) {
-; CHECK: f1
-; CHECK: teq.w  r0, r1
-    %tmp = xor i32 %a, %b
-    %tmp1 = icmp ne i32 %tmp, 0
-    ret i1 %tmp1
-}
-
 define i1 @f2(i32 %a, i32 %b) {
 ; CHECK: f2
 ; CHECK: teq.w r0, r1
@@ -19,14 +11,6 @@ define i1 @f2(i32 %a, i32 %b) {
     ret i1 %tmp1
 }
 
-define i1 @f3(i32 %a, i32 %b) {
-; CHECK: f3
-; CHECK: teq.w  r0, r1
-    %tmp = xor i32 %a, %b
-    %tmp1 = icmp ne i32 0, %tmp
-    ret i1 %tmp1
-}
-
 define i1 @f4(i32 %a, i32 %b) {
 ; CHECK: f4
 ; CHECK: teq.w  r0, r1
diff --git a/test/CodeGen/Thumb2/thumb2-tst.ll b/test/CodeGen/Thumb2/thumb2-tst.ll
index 47f553f..43e208c 100644
--- a/test/CodeGen/Thumb2/thumb2-tst.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst.ll
@@ -4,15 +4,6 @@
 ; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
 
 ; 0x000000bb = 187
-define i1 @f1(i32 %a) {
-    %tmp = and i32 %a, 187
-    %tmp1 = icmp ne i32 %tmp, 0
-    ret i1 %tmp1
-}
-; CHECK: f1:
-; CHECK: 	tst.w	r0, #187
-
-; 0x000000bb = 187
 define i1 @f2(i32 %a) {
     %tmp = and i32 %a, 187
     %tmp1 = icmp eq i32 0, %tmp
@@ -30,24 +21,6 @@ define i1 @f3(i32 %a) {
 ; CHECK: f3:
 ; CHECK: 	tst.w	r0, #11141290
 
-; 0x00aa00aa = 11141290
-define i1 @f4(i32 %a) {
-    %tmp = and i32 %a, 11141290 
-    %tmp1 = icmp ne i32 0, %tmp
-    ret i1 %tmp1
-}
-; CHECK: f4:
-; CHECK: 	tst.w	r0, #11141290
-
-; 0xcc00cc00 = 3422604288
-define i1 @f5(i32 %a) {
-    %tmp = and i32 %a, 3422604288
-    %tmp1 = icmp ne i32 %tmp, 0
-    ret i1 %tmp1
-}
-; CHECK: f5:
-; CHECK: 	tst.w	r0, #-872363008
-
 ; 0xcc00cc00 = 3422604288
 define i1 @f6(i32 %a) {
     %tmp = and i32 %a, 3422604288
@@ -66,24 +39,6 @@ define i1 @f7(i32 %a) {
 ; CHECK: f7:
 ; CHECK: 	tst.w	r0, #-572662307
 
-; 0xdddddddd = 3722304989
-define i1 @f8(i32 %a) {
-    %tmp = and i32 %a, 3722304989
-    %tmp1 = icmp ne i32 0, %tmp
-    ret i1 %tmp1
-}
-; CHECK: f8:
-; CHECK: 	tst.w	r0, #-572662307
-
-; 0x00110000 = 1114112
-define i1 @f9(i32 %a) {
-    %tmp = and i32 %a, 1114112
-    %tmp1 = icmp ne i32 %tmp, 0
-    ret i1 %tmp1
-}
-; CHECK: f9:
-; CHECK: 	tst.w	r0, #1114112
-
 ; 0x00110000 = 1114112
 define i1 @f10(i32 %a) {
     %tmp = and i32 %a, 1114112
diff --git a/test/CodeGen/Thumb2/thumb2-tst2.ll b/test/CodeGen/Thumb2/thumb2-tst2.ll
index 405b3bb..bfe016f 100644
--- a/test/CodeGen/Thumb2/thumb2-tst2.ll
+++ b/test/CodeGen/Thumb2/thumb2-tst2.ll
@@ -3,14 +3,6 @@
 ; These tests implicitly depend on 'movs r0, #0' being rematerialized below the
 ; tst as 'mov.w r0, #0'. So far, that requires physreg joining.
 
-define i1 @f1(i32 %a, i32 %b) {
-; CHECK: f1:
-; CHECK: tst r0, r1
-    %tmp = and i32 %a, %b
-    %tmp1 = icmp ne i32 %tmp, 0
-    ret i1 %tmp1
-}
-
 define i1 @f2(i32 %a, i32 %b) {
 ; CHECK: f2:
 ; CHECK: tst r0, r1
@@ -19,14 +11,6 @@ define i1 @f2(i32 %a, i32 %b) {
     ret i1 %tmp1
 }
 
-define i1 @f3(i32 %a, i32 %b) {
-; CHECK: f3:
-; CHECK: tst r0, r1
-    %tmp = and i32 %a, %b
-    %tmp1 = icmp ne i32 0, %tmp
-    ret i1 %tmp1
-}
-
 define i1 @f4(i32 %a, i32 %b) {
 ; CHECK: f4:
 ; CHECK: tst r0, r1
diff --git a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
index b8e4381..03189aa 100644
--- a/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
+++ b/test/CodeGen/Thumb2/thumb2-uxt_rot.ll
@@ -1,13 +1,13 @@
 ; RUN: llc < %s -march=thumb -mattr=+thumb2,+t2xtpk | FileCheck %s
 
-define i8 @test1(i32 %A.u) zeroext {
+define zeroext i8 @test1(i32 %A.u)  {
 ; CHECK: test1
 ; CHECK: uxtb r0, r0
     %B.u = trunc i32 %A.u to i8
     ret i8 %B.u
 }
 
-define i32 @test2(i32 %A.u, i32 %B.u) zeroext {
+define zeroext i32 @test2(i32 %A.u, i32 %B.u)  {
 ; CHECK: test2
 ; CHECK: uxtab  r0, r0, r1
     %C.u = trunc i32 %B.u to i8
@@ -16,7 +16,7 @@ define i32 @test2(i32 %A.u, i32 %B.u) zeroext {
     ret i32 %E.u
 }
 
-define i32 @test3(i32 %A.u) zeroext {
+define zeroext i32 @test3(i32 %A.u)  {
 ; CHECK: test3
 ; CHECK: uxth.w r0, r0, ror #8
     %B.u = lshr i32 %A.u, 8
diff --git a/test/CodeGen/X86/2004-02-12-Memcpy.ll b/test/CodeGen/X86/2004-02-12-Memcpy.ll
deleted file mode 100644
index f15a1b4..0000000
--- a/test/CodeGen/X86/2004-02-12-Memcpy.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i686-pc-linux-gnu | grep movs | count 1
-
-@A = global [32 x i32] zeroinitializer
-@B = global [32 x i32] zeroinitializer
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-define void @main() nounwind {
-  ; dword copy
-  call void @llvm.memcpy.i32(i8* bitcast ([32 x i32]* @A to i8*),
-                           i8* bitcast ([32 x i32]* @B to i8*),
-                           i32 128, i32 4 )
-
-  ; word copy
-  call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
-                           i8* bitcast ([32 x i32]* @B to i8*),
-                           i32 128, i32 2 )
-
-  ; byte copy
-  call void @llvm.memcpy.i32( i8* bitcast ([32 x i32]* @A to i8*),
-                           i8* bitcast ([32 x i32]* @B to i8*),
-                            i32 128, i32 1 )
-
-  ret void
-}
diff --git a/test/CodeGen/X86/2006-11-12-CSRetCC.ll b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
index 91210ea..6ec9a48 100644
--- a/test/CodeGen/X86/2006-11-12-CSRetCC.ll
+++ b/test/CodeGen/X86/2006-11-12-CSRetCC.ll
@@ -1,9 +1,14 @@
-; RUN: llc < %s -march=x86 | grep {subl	\$4, %esp}
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 target triple = "i686-pc-linux-gnu"
 @str = internal constant [9 x i8] c"%f+%f*i\0A\00"              ; <[9 x i8]*> [#uses=1]
 
 define i32 @main() {
+; CHECK: main:
+; CHECK-NOT: ret
+; CHECK: subl $4, %{{.*}}
+; CHECK: ret
+
 entry:
         %retval = alloca i32, align 4           ; <i32*> [#uses=1]
         %tmp = alloca { double, double }, align 16              ; <{ double, double }*> [#uses=4]
diff --git a/test/CodeGen/X86/2006-11-28-Memcpy.ll b/test/CodeGen/X86/2006-11-28-Memcpy.ll
deleted file mode 100644
index 8c1573f..0000000
--- a/test/CodeGen/X86/2006-11-28-Memcpy.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; PR1022, PR1023
-; RUN: llc < %s -march=x86 | grep -- -573785174 | count 2
-; RUN: llc < %s -march=x86 | grep -E {movl	_?bytes2} | count 1
-
-@fmt = constant [4 x i8] c"%x\0A\00"            ; <[4 x i8]*> [#uses=2]
-@bytes = constant [4 x i8] c"\AA\BB\CC\DD"              ; <[4 x i8]*> [#uses=1]
-@bytes2 = global [4 x i8] c"\AA\BB\CC\DD"               ; <[4 x i8]*> [#uses=1]
-
-define i32 @test1() nounwind {
-        %y = alloca i32         ; <i32*> [#uses=2]
-        %c = bitcast i32* %y to i8*             ; <i8*> [#uses=1]
-        %z = getelementptr [4 x i8]* @bytes, i32 0, i32 0               ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32( i8* %c, i8* %z, i32 4, i32 1 )
-        %r = load i32* %y               ; <i32> [#uses=1]
-        %t = bitcast [4 x i8]* @fmt to i8*              ; <i8*> [#uses=1]
-        %tmp = call i32 (i8*, ...)* @printf( i8* %t, i32 %r )           ; <i32> [#uses=0]
-        ret i32 0
-}
-
-define void @test2() nounwind {
-        %y = alloca i32         ; <i32*> [#uses=2]
-        %c = bitcast i32* %y to i8*             ; <i8*> [#uses=1]
-        %z = getelementptr [4 x i8]* @bytes2, i32 0, i32 0              ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32( i8* %c, i8* %z, i32 4, i32 1 )
-        %r = load i32* %y               ; <i32> [#uses=1]
-        %t = bitcast [4 x i8]* @fmt to i8*              ; <i8*> [#uses=1]
-        %tmp = call i32 (i8*, ...)* @printf( i8* %t, i32 %r )           ; <i32> [#uses=0]
-        ret void
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare i32 @printf(i8*, ...)
-
diff --git a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
index 10bbe74..b0eb1c54 100644
--- a/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
+++ b/test/CodeGen/X86/2007-02-04-OrAddrMode.ll
@@ -1,8 +1,12 @@
-; RUN: llc < %s -march=x86 | grep {orl	\$1, %eax}
-; RUN: llc < %s -march=x86 | grep {leal	3(,%eax,8)}
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 ;; This example can't fold the or into an LEA.
 define i32 @test(float ** %tmp2, i32 %tmp12) nounwind {
+; CHECK: test:
+; CHECK-NOT: ret
+; CHECK: orl $1, %{{.*}}
+; CHECK: ret
+
 	%tmp3 = load float** %tmp2
 	%tmp132 = shl i32 %tmp12, 2		; <i32> [#uses=1]
 	%tmp4 = bitcast float* %tmp3 to i8*		; <i8*> [#uses=1]
@@ -12,9 +16,13 @@ define i32 @test(float ** %tmp2, i32 %tmp12) nounwind {
 	ret i32 %tmp14
 }
 
-
 ;; This can!
 define i32 @test2(i32 %a, i32 %b) nounwind {
+; CHECK: test2:
+; CHECK-NOT: ret
+; CHECK: leal 3(,%{{.*}},8)
+; CHECK: ret
+
 	%c = shl i32 %a, 3
 	%d = or i32 %c, 3
 	ret i32 %d
diff --git a/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll b/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll
index a8f0e57..b48ce84 100644
--- a/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll
+++ b/test/CodeGen/X86/2007-02-23-DAGCombine-Miscompile.ll
@@ -1,13 +1,17 @@
 ; PR1219
-; RUN: llc < %s -march=x86 | grep {movl	\$1, %eax}
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i32 @test(i1 %X) {
-old_entry1:
-        %hvar2 = zext i1 %X to i32
+; CHECK: test:
+; CHECK-NOT: ret
+; CHECK: movl $1, %eax
+; CHECK: ret
+
+  %hvar2 = zext i1 %X to i32
 	%C = icmp sgt i32 %hvar2, -1
 	br i1 %C, label %cond_true15, label %cond_true
 cond_true15:
-        ret i32 1
+  ret i32 1
 cond_true:
-        ret i32 2
+  ret i32 2
 }
diff --git a/test/CodeGen/X86/2007-03-16-InlineAsm.ll b/test/CodeGen/X86/2007-03-16-InlineAsm.ll
index 9580726..3bd6d59 100644
--- a/test/CodeGen/X86/2007-03-16-InlineAsm.ll
+++ b/test/CodeGen/X86/2007-03-16-InlineAsm.ll
@@ -9,7 +9,6 @@ entry:
 	%retval = alloca i32, align 4		; <i32*> [#uses=2]
 	%tmp = alloca i32, align 4		; <i32*> [#uses=2]
 	%ret = alloca i32, align 4		; <i32*> [#uses=2]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %A, i32* %A_addr
 	store i32 %B, i32* %B_addr
 	%tmp1 = load i32* %A_addr		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll b/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
index 30453d5..e2cd750 100644
--- a/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
+++ b/test/CodeGen/X86/2007-03-24-InlineAsmXConstraint.ll
@@ -1,9 +1,14 @@
-; RUN: llc < %s -march=x86 | grep {psrlw \$8, %xmm0}
+; RUN: llc < %s -march=x86 | FileCheck %s
 target datalayout = "e-p:32:32"
 target triple = "i686-apple-darwin9"
 
 define void @test() {
-        tail call void asm sideeffect "psrlw $0, %xmm0", "X,~{dirflag},~{fpsr},~{flags}"( i32 8 )
-        ret void
+; CHECK: test:
+; CHECK-NOT: ret
+; CHECK: psrlw $8, %xmm0
+; CHECK: ret
+
+  tail call void asm sideeffect "psrlw $0, %xmm0", "X,~{dirflag},~{fpsr},~{flags}"( i32 8 )
+  ret void
 }
 
diff --git a/test/CodeGen/X86/2007-05-05-Personality.ll b/test/CodeGen/X86/2007-05-05-Personality.ll
index 0f49d2e..d1fc70d 100644
--- a/test/CodeGen/X86/2007-05-05-Personality.ll
+++ b/test/CodeGen/X86/2007-05-05-Personality.ll
@@ -3,35 +3,35 @@
 ; CHECK: .cfi_personality 0, __gnat_eh_personality
 ; CHECK: .cfi_lsda 0, .Lexception0
 
-@error = external global i8		; <i8*> [#uses=2]
+@error = external global i8
 
 define void @_ada_x() {
 entry:
-	invoke void @raise( )
-			to label %eh_then unwind label %unwind
-
-unwind:		; preds = %entry
-	%eh_ptr = tail call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i8* @error )		; <i32> [#uses=1]
-	%eh_typeid = tail call i32 @llvm.eh.typeid.for.i32( i8* @error )		; <i32> [#uses=1]
-	%tmp2 = icmp eq i32 %eh_select, %eh_typeid		; <i1> [#uses=1]
-	br i1 %tmp2, label %eh_then, label %Unwind
-
-eh_then:		; preds = %unwind, %entry
-	ret void
-
-Unwind:		; preds = %unwind
-	tail call i32 (...)* @_Unwind_Resume( i8* %eh_ptr )		; <i32>:0 [#uses=0]
-	unreachable
+  invoke void @raise()
+          to label %eh_then unwind label %unwind
+
+unwind:                                           ; preds = %entry
+  %eh_ptr = tail call i8* @llvm.eh.exception()
+  %eh_select = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i8* @error)
+  %eh_typeid = tail call i32 @llvm.eh.typeid.for(i8* @error)
+  %tmp2 = icmp eq i32 %eh_select, %eh_typeid
+  br i1 %tmp2, label %eh_then, label %Unwind
+
+eh_then:                                          ; preds = %unwind, %entry
+  ret void
+
+Unwind:                                           ; preds = %unwind
+  %0 = tail call i32 (...)* @_Unwind_Resume(i8* %eh_ptr)
+  unreachable
 }
 
 declare void @raise()
 
-declare i8* @llvm.eh.exception()
+declare i8* @llvm.eh.exception() nounwind readonly
 
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...)
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
 
-declare i32 @llvm.eh.typeid.for.i32(i8*)
+declare i32 @llvm.eh.typeid.for(i8*) nounwind
 
 declare i32 @__gnat_eh_personality(...)
 
diff --git a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
index ae49bd0..22e2750 100644
--- a/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
+++ b/test/CodeGen/X86/2007-05-07-InvokeSRet.ll
@@ -7,7 +7,7 @@ declare void @invokee(%struct.S* sret )
 
 define void @invoker(%struct.S* %name.0.0) {
 entry:
-	invoke void @invokee( %struct.S* %name.0.0 sret  )
+	invoke void @invokee( %struct.S* sret %name.0.0   )
 			to label %return unwind label %return
 
 return:		; preds = %entry, %entry
diff --git a/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll b/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
index 8ef2538..ecc5835 100644
--- a/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
+++ b/test/CodeGen/X86/2007-05-14-LiveIntervalAssert.ll
@@ -3,7 +3,7 @@
 	%struct.XDesc = type <{ i32, %struct.OpaqueXDataStorageType** }>
 	%struct.OpaqueXDataStorageType = type opaque
 
-declare i16 @GetParamDesc(%struct.XDesc*, i32, i32, %struct.XDesc*) signext 
+declare signext i16 @GetParamDesc(%struct.XDesc*, i32, i32, %struct.XDesc*)  
 
 declare void @r_raise(i64, i8*, ...)
 
@@ -18,7 +18,7 @@ cond_true109:		; preds = %entry
 	br i1 false, label %cond_next164, label %cond_true239
 
 cond_next164:		; preds = %cond_true109
-	%tmp176 = call i16 @GetParamDesc( %struct.XDesc* null, i32 1701999219, i32 1413830740, %struct.XDesc* null ) signext 		; <i16> [#uses=0]
+	%tmp176 = call signext i16 @GetParamDesc( %struct.XDesc* null, i32 1701999219, i32 1413830740, %struct.XDesc* null ) 
 	call void (i64, i8*, ...)* @r_raise( i64 0, i8* null )
 	unreachable
 
diff --git a/test/CodeGen/X86/2007-06-04-tailmerge4.ll b/test/CodeGen/X86/2007-06-04-tailmerge4.ll
deleted file mode 100644
index d5ec089..0000000
--- a/test/CodeGen/X86/2007-06-04-tailmerge4.ll
+++ /dev/null
@@ -1,454 +0,0 @@
-; RUN: llc < %s -asm-verbose | grep invcont131
-; PR 1496:  tail merge was incorrectly removing this block
-
-; ModuleID = 'report.1.bc'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-pc-linux-gnu"
-  %struct.ALLOC = type { %struct.string___XUB, [2 x i8] }
-  %struct.RETURN = type { i32, i32, i32, i64 }
-  %struct.ada__streams__root_stream_type = type { %struct.ada__tags__dispatch_table* }
-  %struct.ada__tags__dispatch_table = type { [1 x i8*] }
-  %struct.ada__text_io__text_afcb = type { %struct.system__file_control_block__afcb, i32, i32, i32, i32, i32, %struct.ada__text_io__text_afcb*, i8, i8 }
-  %struct.string___XUB = type { i32, i32 }
-  %struct.string___XUP = type { i8*, %struct.string___XUB* }
-  %struct.system__file_control_block__afcb = type { %struct.ada__streams__root_stream_type, i32, %struct.string___XUP, i32, %struct.string___XUP, i8, i8, i8, i8, i8, i8, i8, %struct.system__file_control_block__afcb*, %struct.system__file_control_block__afcb* }
-  %struct.system__secondary_stack__mark_id = type { i8*, i32 }
-  %struct.wide_string___XUP = type { i16*, %struct.string___XUB* }
-@report_E = global i8 0   ; <i8*> [#uses=0]
-@report__test_status = internal global i8 1   ; <i8*> [#uses=8]
-@report__test_name = internal global [15 x i8] zeroinitializer    ; <[15 x i8]*> [#uses=10]
-@report__test_name_len = internal global i32 0    ; <i32*> [#uses=15]
-@.str = internal constant [12 x i8] c"report.adb\00\00"   ; <[12 x i8]*> [#uses=1]
-@C.26.599 = internal constant %struct.string___XUB { i32 1, i32 1 }   ; <%struct.string___XUB*> [#uses=1]
-@.str1 = internal constant [1 x i8] c":"    ; <[1 x i8]*> [#uses=1]
-@.str2 = internal constant [1 x i8] c" "    ; <[1 x i8]*> [#uses=1]
-@.str3 = internal constant [1 x i8] c"-"    ; <[1 x i8]*> [#uses=1]
-@.str5 = internal constant [10 x i8] c"0123456789"    ; <[10 x i8]*> [#uses=12]
-@C.59.855 = internal constant %struct.string___XUB { i32 1, i32 0 }   ; <%struct.string___XUB*> [#uses=1]
-@C.69.876 = internal constant %struct.string___XUB { i32 1, i32 3 }   ; <%struct.string___XUB*> [#uses=1]
-@C.70.879 = internal constant %struct.string___XUB { i32 1, i32 6 }   ; <%struct.string___XUB*> [#uses=1]
-@C.81.900 = internal constant %struct.string___XUB { i32 1, i32 5 }   ; <%struct.string___XUB*> [#uses=1]
-@.str6 = internal constant [0 x i8] zeroinitializer   ; <[0 x i8]*> [#uses=1]
-@.str7 = internal constant [3 x i8] c"2.5"    ; <[3 x i8]*> [#uses=1]
-@.str8 = internal constant [6 x i8] c"ACATS "   ; <[6 x i8]*> [#uses=1]
-@.str9 = internal constant [5 x i8] c",.,. "    ; <[5 x i8]*> [#uses=1]
-@.str10 = internal constant [1 x i8] c"."   ; <[1 x i8]*> [#uses=1]
-@.str11 = internal constant [5 x i8] c"---- "   ; <[5 x i8]*> [#uses=1]
-@.str12 = internal constant [5 x i8] c"   - "   ; <[5 x i8]*> [#uses=1]
-@.str13 = internal constant [5 x i8] c"   * "   ; <[5 x i8]*> [#uses=1]
-@.str14 = internal constant [5 x i8] c"   + "   ; <[5 x i8]*> [#uses=1]
-@.str15 = internal constant [5 x i8] c"   ! "   ; <[5 x i8]*> [#uses=1]
-@C.209.1380 = internal constant %struct.string___XUB { i32 1, i32 37 }    ; <%struct.string___XUB*> [#uses=1]
-@.str16 = internal constant [37 x i8] c" PASSED ============================."    ; <[37 x i8]*> [#uses=1]
-@.str17 = internal constant [5 x i8] c"==== "   ; <[5 x i8]*> [#uses=1]
-@.str18 = internal constant [37 x i8] c" NOT-APPLICABLE ++++++++++++++++++++."    ; <[37 x i8]*> [#uses=1]
-@.str19 = internal constant [5 x i8] c"++++ "   ; <[5 x i8]*> [#uses=1]
-@.str20 = internal constant [37 x i8] c" TENTATIVELY PASSED !!!!!!!!!!!!!!!!."    ; <[37 x i8]*> [#uses=1]
-@.str21 = internal constant [5 x i8] c"!!!! "   ; <[5 x i8]*> [#uses=1]
-@.str22 = internal constant [37 x i8] c" SEE '!' COMMENTS FOR SPECIAL NOTES!!"    ; <[37 x i8]*> [#uses=1]
-@.str23 = internal constant [37 x i8] c" FAILED ****************************."    ; <[37 x i8]*> [#uses=1]
-@.str24 = internal constant [5 x i8] c"**** "   ; <[5 x i8]*> [#uses=1]
-@__gnat_others_value = external constant i32    ; <i32*> [#uses=2]
-@system__soft_links__abort_undefer = external global void ()*   ; <void ()**> [#uses=1]
-@C.320.1854 = internal constant %struct.string___XUB { i32 2, i32 6 }   ; <%struct.string___XUB*> [#uses=1]
-
-declare void @report__put_msg(i64 %msg.0.0)
-
-declare void @__gnat_rcheck_05(i8*, i32)
-
-declare void @__gnat_rcheck_12(i8*, i32)
-
-declare %struct.ada__text_io__text_afcb* @ada__text_io__standard_output()
-
-declare void @ada__text_io__set_col(%struct.ada__text_io__text_afcb*, i32)
-
-declare void @ada__text_io__put_line(%struct.ada__text_io__text_afcb*, i64)
-
-declare void @report__time_stamp(%struct.string___XUP* sret  %agg.result)
-
-declare i64 @ada__calendar__clock()
-
-declare void @ada__calendar__split(%struct.RETURN* sret , i64)
-
-declare void @system__string_ops_concat_5__str_concat_5(%struct.string___XUP* sret , i64, i64, i64, i64, i64)
-
-declare void @system__string_ops_concat_3__str_concat_3(%struct.string___XUP* sret , i64, i64, i64)
-
-declare i8* @system__secondary_stack__ss_allocate(i32)
-
-declare void @report__test(i64 %name.0.0, i64 %descr.0.0)
-
-declare void @system__secondary_stack__ss_mark(%struct.system__secondary_stack__mark_id* sret )
-
-declare i8* @llvm.eh.exception()
-
-declare i32 @llvm.eh.selector(i8*, i8*, ...)
-
-declare i32 @llvm.eh.typeid.for(i8*)
-
-declare i32 @__gnat_eh_personality(...)
-
-declare i32 @_Unwind_Resume(...)
-
-declare void @__gnat_rcheck_07(i8*, i32)
-
-declare void @system__secondary_stack__ss_release(i64)
-
-declare void @report__comment(i64 %descr.0.0)
-
-declare void @report__failed(i64 %descr.0.0)
-
-declare void @report__not_applicable(i64 %descr.0.0)
-
-declare void @report__special_action(i64 %descr.0.0)
-
-define void @report__result() {
-entry:
-  %tmp = alloca %struct.system__secondary_stack__mark_id, align 8   ; <%struct.system__secondary_stack__mark_id*> [#uses=3]
-  %A.210 = alloca %struct.string___XUB, align 8   ; <%struct.string___XUB*> [#uses=3]
-  %tmp5 = alloca %struct.string___XUP, align 8    ; <%struct.string___XUP*> [#uses=3]
-  %A.229 = alloca %struct.string___XUB, align 8   ; <%struct.string___XUB*> [#uses=3]
-  %tmp10 = alloca %struct.string___XUP, align 8   ; <%struct.string___XUP*> [#uses=3]
-  %A.248 = alloca %struct.string___XUB, align 8   ; <%struct.string___XUB*> [#uses=3]
-  %tmp15 = alloca %struct.string___XUP, align 8   ; <%struct.string___XUP*> [#uses=3]
-  %A.270 = alloca %struct.string___XUB, align 8   ; <%struct.string___XUB*> [#uses=3]
-  %tmp20 = alloca %struct.string___XUP, align 8   ; <%struct.string___XUP*> [#uses=3]
-  %A.284 = alloca %struct.string___XUB, align 8   ; <%struct.string___XUB*> [#uses=3]
-  %tmp25 = alloca %struct.string___XUP, align 8   ; <%struct.string___XUP*> [#uses=3]
-  call void @system__secondary_stack__ss_mark( %struct.system__secondary_stack__mark_id* %tmp sret  )
-  %tmp28 = getelementptr %struct.system__secondary_stack__mark_id* %tmp, i32 0, i32 0   ; <i8**> [#uses=1]
-  %tmp29 = load i8** %tmp28   ; <i8*> [#uses=2]
-  %tmp31 = getelementptr %struct.system__secondary_stack__mark_id* %tmp, i32 0, i32 1   ; <i32*> [#uses=1]
-  %tmp32 = load i32* %tmp31   ; <i32> [#uses=2]
-  %tmp33 = load i8* @report__test_status    ; <i8> [#uses=1]
-  switch i8 %tmp33, label %bb483 [
-     i8 0, label %bb
-     i8 2, label %bb143
-     i8 3, label %bb261
-  ]
-
-bb:   ; preds = %entry
-  %tmp34 = load i32* @report__test_name_len   ; <i32> [#uses=4]
-  %tmp35 = icmp sgt i32 %tmp34, 0   ; <i1> [#uses=2]
-  %tmp40 = icmp sgt i32 %tmp34, 15    ; <i1> [#uses=1]
-  %bothcond139 = and i1 %tmp35, %tmp40    ; <i1> [#uses=1]
-  br i1 %bothcond139, label %cond_true43, label %cond_next44
-
-cond_true43:    ; preds = %bb
-  invoke void @__gnat_rcheck_12( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 212 )
-      to label %UnifiedUnreachableBlock unwind label %unwind
-
-unwind:   ; preds = %invcont589, %cond_next567, %bb555, %cond_true497, %invcont249, %cond_next227, %bb215, %cond_true157, %invcont131, %cond_next109, %bb97, %cond_true43
-  %eh_ptr = call i8* @llvm.eh.exception( )    ; <i8*> [#uses=1]
-  br label %cleanup717
-
-cond_next44:    ; preds = %bb
-  %tmp72 = getelementptr %struct.string___XUB* %A.210, i32 0, i32 0   ; <i32*> [#uses=1]
-  store i32 1, i32* %tmp72
-  %tmp73 = getelementptr %struct.string___XUB* %A.210, i32 0, i32 1   ; <i32*> [#uses=1]
-  store i32 %tmp34, i32* %tmp73
-  br i1 %tmp35, label %cond_true80, label %cond_next109
-
-cond_true80:    ; preds = %cond_next44
-  %tmp45.off = add i32 %tmp34, -1   ; <i32> [#uses=1]
-  %bothcond = icmp ugt i32 %tmp45.off, 14   ; <i1> [#uses=1]
-  br i1 %bothcond, label %bb97, label %cond_next109
-
-bb97:   ; preds = %cond_true80
-  invoke void @__gnat_rcheck_05( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 212 )
-      to label %UnifiedUnreachableBlock unwind label %unwind
-
-cond_next109:   ; preds = %cond_true80, %cond_next44
-  %A.210128 = ptrtoint %struct.string___XUB* %A.210 to i32    ; <i32> [#uses=1]
-  %A.210128129 = zext i32 %A.210128 to i64    ; <i64> [#uses=1]
-  %A.210128129130 = shl i64 %A.210128129, 32    ; <i64> [#uses=1]
-  %A.210128129130.ins = or i64 %A.210128129130, zext (i32 ptrtoint ([15 x i8]* @report__test_name to i32) to i64)   ; <i64> [#uses=1]
-  invoke void @system__string_ops_concat_3__str_concat_3( %struct.string___XUP* %tmp5 sret , i64 or (i64 zext (i32 ptrtoint ([5 x i8]* @.str17 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.81.900 to i32) to i64), i64 32)), i64 %A.210128129130.ins, i64 or (i64 zext (i32 ptrtoint ([37 x i8]* @.str16 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.209.1380 to i32) to i64), i64 32)) )
-      to label %invcont131 unwind label %unwind
-
-invcont131:   ; preds = %cond_next109
-  %tmp133 = getelementptr %struct.string___XUP* %tmp5, i32 0, i32 0   ; <i8**> [#uses=1]
-  %tmp134 = load i8** %tmp133   ; <i8*> [#uses=1]
-  %tmp134120 = ptrtoint i8* %tmp134 to i32    ; <i32> [#uses=1]
-  %tmp134120121 = zext i32 %tmp134120 to i64    ; <i64> [#uses=1]
-  %tmp136 = getelementptr %struct.string___XUP* %tmp5, i32 0, i32 1   ; <%struct.string___XUB**> [#uses=1]
-  %tmp137 = load %struct.string___XUB** %tmp136   ; <%struct.string___XUB*> [#uses=1]
-  %tmp137116 = ptrtoint %struct.string___XUB* %tmp137 to i32    ; <i32> [#uses=1]
-  %tmp137116117 = zext i32 %tmp137116 to i64    ; <i64> [#uses=1]
-  %tmp137116117118 = shl i64 %tmp137116117, 32    ; <i64> [#uses=1]
-  %tmp137116117118.ins = or i64 %tmp137116117118, %tmp134120121   ; <i64> [#uses=1]
-  invoke fastcc void @report__put_msg( i64 %tmp137116117118.ins )
-      to label %cond_next618 unwind label %unwind
-
-bb143:    ; preds = %entry
-  %tmp144 = load i32* @report__test_name_len    ; <i32> [#uses=4]
-  %tmp147 = icmp sgt i32 %tmp144, 0   ; <i1> [#uses=2]
-  %tmp154 = icmp sgt i32 %tmp144, 15    ; <i1> [#uses=1]
-  %bothcond140 = and i1 %tmp147, %tmp154    ; <i1> [#uses=1]
-  br i1 %bothcond140, label %cond_true157, label %cond_next160
-
-cond_true157:   ; preds = %bb143
-  invoke void @__gnat_rcheck_12( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 215 )
-      to label %UnifiedUnreachableBlock unwind label %unwind
-
-cond_next160:   ; preds = %bb143
-  %tmp189 = getelementptr %struct.string___XUB* %A.229, i32 0, i32 0    ; <i32*> [#uses=1]
-  store i32 1, i32* %tmp189
-  %tmp190 = getelementptr %struct.string___XUB* %A.229, i32 0, i32 1    ; <i32*> [#uses=1]
-  store i32 %tmp144, i32* %tmp190
-  br i1 %tmp147, label %cond_true197, label %cond_next227
-
-cond_true197:   ; preds = %cond_next160
-  %tmp161.off = add i32 %tmp144, -1   ; <i32> [#uses=1]
-  %bothcond1 = icmp ugt i32 %tmp161.off, 14   ; <i1> [#uses=1]
-  br i1 %bothcond1, label %bb215, label %cond_next227
-
-bb215:    ; preds = %cond_true197
-  invoke void @__gnat_rcheck_05( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 215 )
-      to label %UnifiedUnreachableBlock unwind label %unwind
-
-cond_next227:   ; preds = %cond_true197, %cond_next160
-  %A.229105 = ptrtoint %struct.string___XUB* %A.229 to i32    ; <i32> [#uses=1]
-  %A.229105106 = zext i32 %A.229105 to i64    ; <i64> [#uses=1]
-  %A.229105106107 = shl i64 %A.229105106, 32    ; <i64> [#uses=1]
-  %A.229105106107.ins = or i64 %A.229105106107, zext (i32 ptrtoint ([15 x i8]* @report__test_name to i32) to i64)   ; <i64> [#uses=1]
-  invoke void @system__string_ops_concat_3__str_concat_3( %struct.string___XUP* %tmp10 sret , i64 or (i64 zext (i32 ptrtoint ([5 x i8]* @.str19 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.81.900 to i32) to i64), i64 32)), i64 %A.229105106107.ins, i64 or (i64 zext (i32 ptrtoint ([37 x i8]* @.str18 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.209.1380 to i32) to i64), i64 32)) )
-      to label %invcont249 unwind label %unwind
-
-invcont249:   ; preds = %cond_next227
-  %tmp251 = getelementptr %struct.string___XUP* %tmp10, i32 0, i32 0    ; <i8**> [#uses=1]
-  %tmp252 = load i8** %tmp251   ; <i8*> [#uses=1]
-  %tmp25297 = ptrtoint i8* %tmp252 to i32   ; <i32> [#uses=1]
-  %tmp2529798 = zext i32 %tmp25297 to i64   ; <i64> [#uses=1]
-  %tmp254 = getelementptr %struct.string___XUP* %tmp10, i32 0, i32 1    ; <%struct.string___XUB**> [#uses=1]
-  %tmp255 = load %struct.string___XUB** %tmp254   ; <%struct.string___XUB*> [#uses=1]
-  %tmp25593 = ptrtoint %struct.string___XUB* %tmp255 to i32   ; <i32> [#uses=1]
-  %tmp2559394 = zext i32 %tmp25593 to i64   ; <i64> [#uses=1]
-  %tmp255939495 = shl i64 %tmp2559394, 32   ; <i64> [#uses=1]
-  %tmp255939495.ins = or i64 %tmp255939495, %tmp2529798   ; <i64> [#uses=1]
-  invoke fastcc void @report__put_msg( i64 %tmp255939495.ins )
-      to label %cond_next618 unwind label %unwind
-
-bb261:    ; preds = %entry
-  %tmp262 = call i8* @llvm.stacksave( )   ; <i8*> [#uses=2]
-  %tmp263 = load i32* @report__test_name_len    ; <i32> [#uses=4]
-  %tmp266 = icmp sgt i32 %tmp263, 0   ; <i1> [#uses=2]
-  %tmp273 = icmp sgt i32 %tmp263, 15    ; <i1> [#uses=1]
-  %bothcond141 = and i1 %tmp266, %tmp273    ; <i1> [#uses=1]
-  br i1 %bothcond141, label %cond_true276, label %cond_next281
-
-cond_true276:   ; preds = %bb261
-  invoke void @__gnat_rcheck_12( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 218 )
-      to label %UnifiedUnreachableBlock unwind label %unwind277
-
-unwind277:    ; preds = %invcont467, %cond_next442, %invcont370, %cond_next348, %bb336, %cond_true276
-  %eh_ptr278 = call i8* @llvm.eh.exception( )   ; <i8*> [#uses=1]
-  call void @llvm.stackrestore( i8* %tmp262 )
-  br label %cleanup717
-
-cond_next281:   ; preds = %bb261
-  %tmp310 = getelementptr %struct.string___XUB* %A.248, i32 0, i32 0    ; <i32*> [#uses=1]
-  store i32 1, i32* %tmp310
-  %tmp311 = getelementptr %struct.string___XUB* %A.248, i32 0, i32 1    ; <i32*> [#uses=1]
-  store i32 %tmp263, i32* %tmp311
-  br i1 %tmp266, label %cond_true318, label %cond_next348
-
-cond_true318:   ; preds = %cond_next281
-  %tmp282.off = add i32 %tmp263, -1   ; <i32> [#uses=1]
-  %bothcond2 = icmp ugt i32 %tmp282.off, 14   ; <i1> [#uses=1]
-  br i1 %bothcond2, label %bb336, label %cond_next348
-
-bb336:    ; preds = %cond_true318
-  invoke void @__gnat_rcheck_05( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 218 )
-      to label %UnifiedUnreachableBlock unwind label %unwind277
-
-cond_next348:   ; preds = %cond_true318, %cond_next281
-  %A.24882 = ptrtoint %struct.string___XUB* %A.248 to i32   ; <i32> [#uses=1]
-  %A.2488283 = zext i32 %A.24882 to i64   ; <i64> [#uses=1]
-  %A.248828384 = shl i64 %A.2488283, 32   ; <i64> [#uses=1]
-  %A.248828384.ins = or i64 %A.248828384, zext (i32 ptrtoint ([15 x i8]* @report__test_name to i32) to i64)   ; <i64> [#uses=1]
-  invoke void @system__string_ops_concat_3__str_concat_3( %struct.string___XUP* %tmp15 sret , i64 or (i64 zext (i32 ptrtoint ([5 x i8]* @.str21 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.81.900 to i32) to i64), i64 32)), i64 %A.248828384.ins, i64 or (i64 zext (i32 ptrtoint ([37 x i8]* @.str20 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.209.1380 to i32) to i64), i64 32)) )
-      to label %invcont370 unwind label %unwind277
-
-invcont370:   ; preds = %cond_next348
-  %tmp372 = getelementptr %struct.string___XUP* %tmp15, i32 0, i32 0    ; <i8**> [#uses=1]
-  %tmp373 = load i8** %tmp372   ; <i8*> [#uses=1]
-  %tmp37374 = ptrtoint i8* %tmp373 to i32   ; <i32> [#uses=1]
-  %tmp3737475 = zext i32 %tmp37374 to i64   ; <i64> [#uses=1]
-  %tmp375 = getelementptr %struct.string___XUP* %tmp15, i32 0, i32 1    ; <%struct.string___XUB**> [#uses=1]
-  %tmp376 = load %struct.string___XUB** %tmp375   ; <%struct.string___XUB*> [#uses=1]
-  %tmp37670 = ptrtoint %struct.string___XUB* %tmp376 to i32   ; <i32> [#uses=1]
-  %tmp3767071 = zext i32 %tmp37670 to i64   ; <i64> [#uses=1]
-  %tmp376707172 = shl i64 %tmp3767071, 32   ; <i64> [#uses=1]
-  %tmp376707172.ins = or i64 %tmp376707172, %tmp3737475   ; <i64> [#uses=1]
-  invoke fastcc void @report__put_msg( i64 %tmp376707172.ins )
-      to label %invcont381 unwind label %unwind277
-
-invcont381:   ; preds = %invcont370
-  %tmp382 = load i32* @report__test_name_len    ; <i32> [#uses=6]
-  %tmp415 = icmp sgt i32 %tmp382, -1    ; <i1> [#uses=1]
-  %max416 = select i1 %tmp415, i32 %tmp382, i32 0   ; <i32> [#uses=1]
-  %tmp417 = alloca i8, i32 %max416    ; <i8*> [#uses=3]
-  %tmp423 = icmp sgt i32 %tmp382, 0   ; <i1> [#uses=1]
-  br i1 %tmp423, label %bb427, label %cond_next442
-
-bb427:    ; preds = %invcont381
-  store i8 32, i8* %tmp417
-  %tmp434 = icmp eq i32 %tmp382, 1    ; <i1> [#uses=1]
-  br i1 %tmp434, label %cond_next442, label %cond_next438.preheader
-
-cond_next438.preheader:   ; preds = %bb427
-  %tmp. = add i32 %tmp382, -1   ; <i32> [#uses=1]
-  br label %cond_next438
-
-cond_next438:   ; preds = %cond_next438, %cond_next438.preheader
-  %indvar = phi i32 [ 0, %cond_next438.preheader ], [ %J130b.513.5, %cond_next438 ]   ; <i32> [#uses=1]
-  %J130b.513.5 = add i32 %indvar, 1   ; <i32> [#uses=3]
-  %tmp43118 = getelementptr i8* %tmp417, i32 %J130b.513.5   ; <i8*> [#uses=1]
-  store i8 32, i8* %tmp43118
-  %exitcond = icmp eq i32 %J130b.513.5, %tmp.   ; <i1> [#uses=1]
-  br i1 %exitcond, label %cond_next442, label %cond_next438
-
-cond_next442:   ; preds = %cond_next438, %bb427, %invcont381
-  %tmp448 = getelementptr %struct.string___XUB* %A.270, i32 0, i32 0    ; <i32*> [#uses=1]
-  store i32 1, i32* %tmp448
-  %tmp449 = getelementptr %struct.string___XUB* %A.270, i32 0, i32 1    ; <i32*> [#uses=1]
-  store i32 %tmp382, i32* %tmp449
-  %tmp41762 = ptrtoint i8* %tmp417 to i32   ; <i32> [#uses=1]
-  %tmp4176263 = zext i32 %tmp41762 to i64   ; <i64> [#uses=1]
-  %A.27058 = ptrtoint %struct.string___XUB* %A.270 to i32   ; <i32> [#uses=1]
-  %A.2705859 = zext i32 %A.27058 to i64   ; <i64> [#uses=1]
-  %A.270585960 = shl i64 %A.2705859, 32   ; <i64> [#uses=1]
-  %A.270585960.ins = or i64 %tmp4176263, %A.270585960   ; <i64> [#uses=1]
-  invoke void @system__string_ops_concat_3__str_concat_3( %struct.string___XUP* %tmp20 sret , i64 or (i64 zext (i32 ptrtoint ([5 x i8]* @.str21 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.81.900 to i32) to i64), i64 32)), i64 %A.270585960.ins, i64 or (i64 zext (i32 ptrtoint ([37 x i8]* @.str22 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.209.1380 to i32) to i64), i64 32)) )
-      to label %invcont467 unwind label %unwind277
-
-invcont467:   ; preds = %cond_next442
-  %tmp469 = getelementptr %struct.string___XUP* %tmp20, i32 0, i32 0    ; <i8**> [#uses=1]
-  %tmp470 = load i8** %tmp469   ; <i8*> [#uses=1]
-  %tmp47050 = ptrtoint i8* %tmp470 to i32   ; <i32> [#uses=1]
-  %tmp4705051 = zext i32 %tmp47050 to i64   ; <i64> [#uses=1]
-  %tmp472 = getelementptr %struct.string___XUP* %tmp20, i32 0, i32 1    ; <%struct.string___XUB**> [#uses=1]
-  %tmp473 = load %struct.string___XUB** %tmp472   ; <%struct.string___XUB*> [#uses=1]
-  %tmp47346 = ptrtoint %struct.string___XUB* %tmp473 to i32   ; <i32> [#uses=1]
-  %tmp4734647 = zext i32 %tmp47346 to i64   ; <i64> [#uses=1]
-  %tmp473464748 = shl i64 %tmp4734647, 32   ; <i64> [#uses=1]
-  %tmp473464748.ins = or i64 %tmp473464748, %tmp4705051   ; <i64> [#uses=1]
-  invoke fastcc void @report__put_msg( i64 %tmp473464748.ins )
-      to label %cleanup unwind label %unwind277
-
-cleanup:    ; preds = %invcont467
-  call void @llvm.stackrestore( i8* %tmp262 )
-  br label %cond_next618
-
-bb483:    ; preds = %entry
-  %tmp484 = load i32* @report__test_name_len    ; <i32> [#uses=4]
-  %tmp487 = icmp sgt i32 %tmp484, 0   ; <i1> [#uses=2]
-  %tmp494 = icmp sgt i32 %tmp484, 15    ; <i1> [#uses=1]
-  %bothcond142 = and i1 %tmp487, %tmp494    ; <i1> [#uses=1]
-  br i1 %bothcond142, label %cond_true497, label %cond_next500
-
-cond_true497:   ; preds = %bb483
-  invoke void @__gnat_rcheck_12( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 223 )
-      to label %UnifiedUnreachableBlock unwind label %unwind
-
-cond_next500:   ; preds = %bb483
-  %tmp529 = getelementptr %struct.string___XUB* %A.284, i32 0, i32 0    ; <i32*> [#uses=1]
-  store i32 1, i32* %tmp529
-  %tmp530 = getelementptr %struct.string___XUB* %A.284, i32 0, i32 1    ; <i32*> [#uses=1]
-  store i32 %tmp484, i32* %tmp530
-  br i1 %tmp487, label %cond_true537, label %cond_next567
-
-cond_true537:   ; preds = %cond_next500
-  %tmp501.off = add i32 %tmp484, -1   ; <i32> [#uses=1]
-  %bothcond3 = icmp ugt i32 %tmp501.off, 14   ; <i1> [#uses=1]
-  br i1 %bothcond3, label %bb555, label %cond_next567
-
-bb555:    ; preds = %cond_true537
-  invoke void @__gnat_rcheck_05( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 223 )
-      to label %UnifiedUnreachableBlock unwind label %unwind
-
-cond_next567:   ; preds = %cond_true537, %cond_next500
-  %A.28435 = ptrtoint %struct.string___XUB* %A.284 to i32   ; <i32> [#uses=1]
-  %A.2843536 = zext i32 %A.28435 to i64   ; <i64> [#uses=1]
-  %A.284353637 = shl i64 %A.2843536, 32   ; <i64> [#uses=1]
-  %A.284353637.ins = or i64 %A.284353637, zext (i32 ptrtoint ([15 x i8]* @report__test_name to i32) to i64)   ; <i64> [#uses=1]
-  invoke void @system__string_ops_concat_3__str_concat_3( %struct.string___XUP* %tmp25 sret , i64 or (i64 zext (i32 ptrtoint ([5 x i8]* @.str24 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.81.900 to i32) to i64), i64 32)), i64 %A.284353637.ins, i64 or (i64 zext (i32 ptrtoint ([37 x i8]* @.str23 to i32) to i64), i64 shl (i64 zext (i32 ptrtoint (%struct.string___XUB* @C.209.1380 to i32) to i64), i64 32)) )
-      to label %invcont589 unwind label %unwind
-
-invcont589:   ; preds = %cond_next567
-  %tmp591 = getelementptr %struct.string___XUP* %tmp25, i32 0, i32 0    ; <i8**> [#uses=1]
-  %tmp592 = load i8** %tmp591   ; <i8*> [#uses=1]
-  %tmp59228 = ptrtoint i8* %tmp592 to i32   ; <i32> [#uses=1]
-  %tmp5922829 = zext i32 %tmp59228 to i64   ; <i64> [#uses=1]
-  %tmp594 = getelementptr %struct.string___XUP* %tmp25, i32 0, i32 1    ; <%struct.string___XUB**> [#uses=1]
-  %tmp595 = load %struct.string___XUB** %tmp594   ; <%struct.string___XUB*> [#uses=1]
-  %tmp59524 = ptrtoint %struct.string___XUB* %tmp595 to i32   ; <i32> [#uses=1]
-  %tmp5952425 = zext i32 %tmp59524 to i64   ; <i64> [#uses=1]
-  %tmp595242526 = shl i64 %tmp5952425, 32   ; <i64> [#uses=1]
-  %tmp595242526.ins = or i64 %tmp595242526, %tmp5922829   ; <i64> [#uses=1]
-  invoke fastcc void @report__put_msg( i64 %tmp595242526.ins )
-      to label %cond_next618 unwind label %unwind
-
-cond_next618:   ; preds = %invcont589, %cleanup, %invcont249, %invcont131
-  store i8 1, i8* @report__test_status
-  store i32 7, i32* @report__test_name_len
-  store i8 78, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 0)
-  store i8 79, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 1)
-  store i8 95, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 2)
-  store i8 78, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 3)
-  store i8 65, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 4)
-  store i8 77, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 5)
-  store i8 69, i8* getelementptr ([15 x i8]* @report__test_name, i32 0, i32 6)
-  %CHAIN.310.0.0.0.val5.i = ptrtoint i8* %tmp29 to i32    ; <i32> [#uses=1]
-  %CHAIN.310.0.0.0.val56.i = zext i32 %CHAIN.310.0.0.0.val5.i to i64    ; <i64> [#uses=1]
-  %CHAIN.310.0.0.1.val2.i = zext i32 %tmp32 to i64    ; <i64> [#uses=1]
-  %CHAIN.310.0.0.1.val23.i = shl i64 %CHAIN.310.0.0.1.val2.i, 32    ; <i64> [#uses=1]
-  %CHAIN.310.0.0.1.val23.ins.i = or i64 %CHAIN.310.0.0.1.val23.i, %CHAIN.310.0.0.0.val56.i    ; <i64> [#uses=1]
-  call void @system__secondary_stack__ss_release( i64 %CHAIN.310.0.0.1.val23.ins.i )
-  ret void
-
-cleanup717:   ; preds = %unwind277, %unwind
-  %eh_exception.0 = phi i8* [ %eh_ptr278, %unwind277 ], [ %eh_ptr, %unwind ]    ; <i8*> [#uses=1]
-  %CHAIN.310.0.0.0.val5.i8 = ptrtoint i8* %tmp29 to i32   ; <i32> [#uses=1]
-  %CHAIN.310.0.0.0.val56.i9 = zext i32 %CHAIN.310.0.0.0.val5.i8 to i64    ; <i64> [#uses=1]
-  %CHAIN.310.0.0.1.val2.i10 = zext i32 %tmp32 to i64    ; <i64> [#uses=1]
-  %CHAIN.310.0.0.1.val23.i11 = shl i64 %CHAIN.310.0.0.1.val2.i10, 32    ; <i64> [#uses=1]
-  %CHAIN.310.0.0.1.val23.ins.i12 = or i64 %CHAIN.310.0.0.1.val23.i11, %CHAIN.310.0.0.0.val56.i9   ; <i64> [#uses=1]
-  call void @system__secondary_stack__ss_release( i64 %CHAIN.310.0.0.1.val23.ins.i12 )
-  call i32 (...)* @_Unwind_Resume( i8* %eh_exception.0 )    ; <i32>:0 [#uses=0]
-  unreachable
-
-UnifiedUnreachableBlock:    ; preds = %bb555, %cond_true497, %bb336, %cond_true276, %bb215, %cond_true157, %bb97, %cond_true43
-  unreachable
-}
-
-declare i8* @llvm.stacksave()
-
-declare void @llvm.stackrestore(i8*)
-
-declare i32 @report__ident_int(i32 %x)
-
-declare i8 @report__equal(i32 %x, i32 %y)
-
-declare i8 @report__ident_char(i8 zeroext  %x)
-
-declare i16 @report__ident_wide_char(i16 zeroext  %x)
-
-declare i8 @report__ident_bool(i8 %x)
-
-declare void @report__ident_str(%struct.string___XUP* sret  %agg.result, i64 %x.0.0)
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare void @report__ident_wide_str(%struct.wide_string___XUP* sret  %agg.result, i64 %x.0.0)
-
-declare void @__gnat_begin_handler(i8*)
-
-declare void @__gnat_end_handler(i8*)
-
-declare void @report__legal_file_name(%struct.string___XUP* sret  %agg.result, i32 %x, i64 %nam.0.0)
-
-declare void @__gnat_rcheck_06(i8*, i32)
-
-declare void @system__string_ops__str_concat_cs(%struct.string___XUP* sret , i8 zeroext , i64)
diff --git a/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll b/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
deleted file mode 100644
index 36a97ef..0000000
--- a/test/CodeGen/X86/2007-06-05-LSR-Dominator.ll
+++ /dev/null
@@ -1,129 +0,0 @@
-; PR1495
-; RUN: llc < %s -march=x86
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-pc-linux-gnu"
-	%struct.AVRational = type { i32, i32 }
-	%struct.FFTComplex = type { float, float }
-	%struct.FFTContext = type { i32, i32, i16*, %struct.FFTComplex*, %struct.FFTComplex*, void (%struct.FFTContext*, %struct.FFTComplex*)*, void (%struct.MDCTContext*, float*, float*, float*)* }
-	%struct.MDCTContext = type { i32, i32, float*, float*, %struct.FFTContext }
-	%struct.Minima = type { i32, i32, i32, i32 }
-	%struct.codebook_t = type { i32, i8*, i32*, i32, float, float, i32, i32, i32*, float*, float* }
-	%struct.floor_class_t = type { i32, i32, i32, i32* }
-	%struct.floor_t = type { i32, i32*, i32, %struct.floor_class_t*, i32, i32, i32, %struct.Minima* }
-	%struct.mapping_t = type { i32, i32*, i32*, i32*, i32, i32*, i32* }
-	%struct.residue_t = type { i32, i32, i32, i32, i32, i32, [8 x i8]*, [2 x float]* }
-	%struct.venc_context_t = type { i32, i32, [2 x i32], [2 x %struct.MDCTContext], [2 x float*], i32, float*, float*, float*, float*, float, i32, %struct.codebook_t*, i32, %struct.floor_t*, i32, %struct.residue_t*, i32, %struct.mapping_t*, i32, %struct.AVRational* }
-
-define fastcc i32 @put_main_header(%struct.venc_context_t* %venc, i8** %out) {
-entry:
-	br i1 false, label %bb1820, label %bb288.bb148_crit_edge
-
-bb288.bb148_crit_edge:		; preds = %entry
-	ret i32 0
-
-cond_next1712:		; preds = %bb1820.bb1680_crit_edge
-	ret i32 0
-
-bb1817:		; preds = %bb1820.bb1680_crit_edge
-	br label %bb1820
-
-bb1820:		; preds = %bb1817, %entry
-	%pb.1.50 = phi i32 [ %tmp1693, %bb1817 ], [ 8, %entry ]		; <i32> [#uses=3]
-	br i1 false, label %bb2093, label %bb1820.bb1680_crit_edge
-
-bb1820.bb1680_crit_edge:		; preds = %bb1820
-	%tmp1693 = add i32 %pb.1.50, 8		; <i32> [#uses=2]
-	%tmp1702 = icmp slt i32 %tmp1693, 0		; <i1> [#uses=1]
-	br i1 %tmp1702, label %cond_next1712, label %bb1817
-
-bb2093:		; preds = %bb1820
-	%tmp2102 = add i32 %pb.1.50, 65		; <i32> [#uses=0]
-	%tmp2236 = add i32 %pb.1.50, 72		; <i32> [#uses=1]
-	%tmp2237 = sdiv i32 %tmp2236, 8		; <i32> [#uses=2]
-	br i1 false, label %bb2543, label %bb2536.bb2396_crit_edge
-
-bb2536.bb2396_crit_edge:		; preds = %bb2093
-	ret i32 0
-
-bb2543:		; preds = %bb2093
-	br i1 false, label %cond_next2576, label %bb2690
-
-cond_next2576:		; preds = %bb2543
-	ret i32 0
-
-bb2682:		; preds = %bb2690
-	ret i32 0
-
-bb2690:		; preds = %bb2543
-	br i1 false, label %bb2682, label %bb2698
-
-bb2698:		; preds = %bb2690
-	br i1 false, label %cond_next2726, label %bb2831
-
-cond_next2726:		; preds = %bb2698
-	ret i32 0
-
-bb2831:		; preds = %bb2698
-	br i1 false, label %cond_next2859, label %bb2964
-
-cond_next2859:		; preds = %bb2831
-	br i1 false, label %bb2943, label %cond_true2866
-
-cond_true2866:		; preds = %cond_next2859
-	br i1 false, label %cond_true2874, label %cond_false2897
-
-cond_true2874:		; preds = %cond_true2866
-	ret i32 0
-
-cond_false2897:		; preds = %cond_true2866
-	ret i32 0
-
-bb2943:		; preds = %cond_next2859
-	ret i32 0
-
-bb2964:		; preds = %bb2831
-	br i1 false, label %cond_next2997, label %bb4589
-
-cond_next2997:		; preds = %bb2964
-	ret i32 0
-
-bb3103:		; preds = %bb4589
-	ret i32 0
-
-bb4589:		; preds = %bb2964
-	br i1 false, label %bb3103, label %bb4597
-
-bb4597:		; preds = %bb4589
-	br i1 false, label %cond_next4630, label %bb4744
-
-cond_next4630:		; preds = %bb4597
-	br i1 false, label %bb4744, label %cond_true4724
-
-cond_true4724:		; preds = %cond_next4630
-	br i1 false, label %bb4736, label %bb7531
-
-bb4736:		; preds = %cond_true4724
-	ret i32 0
-
-bb4744:		; preds = %cond_next4630, %bb4597
-	ret i32 0
-
-bb7531:		; preds = %cond_true4724
-	%v_addr.023.0.i6 = add i32 %tmp2237, -255		; <i32> [#uses=1]
-	br label %bb.i14
-
-bb.i14:		; preds = %bb.i14, %bb7531
-	%n.021.0.i8 = phi i32 [ 0, %bb7531 ], [ %indvar.next, %bb.i14 ]		; <i32> [#uses=2]
-	%tmp..i9 = mul i32 %n.021.0.i8, -255		; <i32> [#uses=1]
-	%tmp5.i11 = add i32 %v_addr.023.0.i6, %tmp..i9		; <i32> [#uses=1]
-	%tmp10.i12 = icmp ugt i32 %tmp5.i11, 254		; <i1> [#uses=1]
-	%indvar.next = add i32 %n.021.0.i8, 1		; <i32> [#uses=1]
-	br i1 %tmp10.i12, label %bb.i14, label %bb12.loopexit.i18
-
-bb12.loopexit.i18:		; preds = %bb.i14
-	call void @llvm.memcpy.i32( i8* null, i8* null, i32 %tmp2237, i32 1 )
-	ret i32 0
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
diff --git a/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll b/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
index 3cd8052..62624a7 100644
--- a/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
+++ b/test/CodeGen/X86/2007-08-01-LiveVariablesBug.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 | not grep movl
 
-define i8 @t(i8 zeroext  %x, i8 zeroext  %y) zeroext  {
+define zeroext i8 @t(i8 zeroext  %x, i8 zeroext  %y)   {
 	%tmp2 = add i8 %x, 2
 	%tmp4 = add i8 %y, -2
 	%tmp5 = mul i8 %tmp4, %tmp2
diff --git a/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
index e93092f..77291f0 100644
--- a/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
+++ b/test/CodeGen/X86/2007-08-10-SignExtSubreg.ll
@@ -2,7 +2,7 @@
 
 @X = global i32 0               ; <i32*> [#uses=1]
 
-define i8 @_Z3fooi(i32 %x) signext  {
+define signext i8 @_Z3fooi(i32 %x)   {
 entry:
         store i32 %x, i32* @X, align 4
         %retval67 = trunc i32 %x to i8          ; <i8> [#uses=1]
diff --git a/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll b/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
index 1e43272..15466a1 100644
--- a/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
+++ b/test/CodeGen/X86/2007-09-17-ObjcFrameEH.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -disable-cfi -march=x86 -mtriple=i686-apple-darwin | grep {isNullOrNil].eh"} | FileCheck %s
+; RUN: llc < %s -disable-cfi -march=x86 -mtriple=i686-apple-darwin | FileCheck %s
 
 ; CHECK: "_-[NSString(local) isNullOrNil].eh":
 
@@ -26,7 +26,7 @@
     [1 x %struct._objc_method] [ %struct._objc_method {
         %struct.objc_selector* bitcast ([12 x i8]* @"\01L_OBJC_METH_VAR_NAME_0" to %struct.objc_selector*), 
         i8* getelementptr ([7 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0), 
-        i8* bitcast (i8 (%struct.NSString*, %struct.objc_selector*) signext * @"-[NSString(local) isNullOrNil]" to i8*) } ] }, section "__OBJC,__cat_inst_meth,regular,no_dead_strip"		; <{ i32, i32, [1 x %struct._objc_method] }*> [#uses=3]
+        i8* bitcast (i8 (%struct.NSString*, %struct.objc_selector*)  * @"-[NSString(local) isNullOrNil]" to i8*) } ] }, section "__OBJC,__cat_inst_meth,regular,no_dead_strip"		; <{ i32, i32, [1 x %struct._objc_method] }*> [#uses=3]
 @"\01L_OBJC_CATEGORY_NSString_local" = internal global { i8*, i8*, %struct._objc_method_list*, i32, i32, i32, i32 } {
     i8* getelementptr ([6 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), 
     i8* getelementptr ([9 x i8]* @"\01L_OBJC_CLASS_NAME_1", i32 0, i32 0), 
@@ -51,7 +51,7 @@
 @"\01L_OBJC_METH_VAR_TYPE_0" = internal global [7 x i8] c"c8@0:4\00", section "__TEXT,__cstring,cstring_literals"		; <[7 x i8]*> [#uses=2]
 @llvm.used = appending global [11 x i8*] [ i8* bitcast ({ i32, i32, i16, i16, [1 x %struct._objc_category*] }* @"\01L_OBJC_SYMBOLS" to i8*), i8* bitcast ({ i32, i32, [1 x %struct._objc_method] }* @"\01L_OBJC_CATEGORY_INSTANCE_METHODS_NSString_local" to i8*), i8* bitcast ({ i8*, i8*, %struct._objc_method_list*, i32, i32, i32, i32 }* @"\01L_OBJC_CATEGORY_NSString_local" to i8*), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*), i8* bitcast (i32* @"\01.objc_category_name_NSString_local" to i8*), i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_2", i32 0, i32 0), i8* getelementptr ([9 x i8]* @"\01L_OBJC_CLASS_NAME_1", i32 0, i32 0), i8* getelementptr ([6 x i8]* @"\01L_OBJC_CLASS_NAME_0", i32 0, i32 0), i8* getelementptr ([12 x i8]* @"\01L_OBJC_METH_VAR_NAME_0", i32 0, i32 0), i8* getelementptr ([7 x i8]* @"\01L_OBJC_METH_VAR_TYPE_0", i32 0, i32 0) ], section "llvm.metadata"		; <[11 x i8*]*> [#uses=0]
 
-define internal i8 @"-[NSString(local) isNullOrNil]"(%struct.NSString* %self, %struct.objc_selector* %_cmd) signext  {
+define internal signext i8 @"-[NSString(local) isNullOrNil]"(%struct.NSString* %self, %struct.objc_selector* %_cmd)   {
 entry:
 	%self_addr = alloca %struct.NSString*		; <%struct.NSString**> [#uses=1]
 	%_cmd_addr = alloca %struct.objc_selector*		; <%struct.objc_selector**> [#uses=1]
diff --git a/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
index b4a986f..f7ffb93 100644
--- a/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
+++ b/test/CodeGen/X86/2007-09-27-LDIntrinsics.ll
@@ -22,7 +22,7 @@ entry:
 ; CHECK: bar:
 ; CHECK: fldt 4(%esp)
 ; CHECK-NEXT: fld	%st(0)
-; CHECK-NEXT: fmul	%st(1), %st(0)
+; CHECK-NEXT: fmul	%st(1)
 ; CHECK-NEXT: fmulp
 ; CHECK-NEXT: ret
 }
diff --git a/test/CodeGen/X86/2007-10-05-3AddrConvert.ll b/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
deleted file mode 100644
index 2c2706d..0000000
--- a/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
+++ /dev/null
@@ -1,48 +0,0 @@
-; RUN: llc < %s -march=x86 | grep lea
-
-	%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
-	%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
-	%struct.node = type { i16, double, [3 x double], i32, i32 }
-
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
-entry:
-	%0 = malloc %struct.anon		; <%struct.anon*> [#uses=2]
-	%1 = getelementptr %struct.anon* %0, i32 0, i32 2		; <%struct.node**> [#uses=1]
-	br label %bb14.i
-
-bb14.i:		; preds = %bb14.i, %entry
-	%i8.0.reg2mem.0.i = phi i32 [ 0, %entry ], [ %2, %bb14.i ]		; <i32> [#uses=1]
-	%2 = add i32 %i8.0.reg2mem.0.i, 1		; <i32> [#uses=2]
-	%exitcond74.i = icmp eq i32 %2, 32		; <i1> [#uses=1]
-	br i1 %exitcond74.i, label %bb32.i, label %bb14.i
-
-bb32.i:		; preds = %bb32.i, %bb14.i
-	%tmp.0.reg2mem.0.i = phi i32 [ %indvar.next63.i, %bb32.i ], [ 0, %bb14.i ]		; <i32> [#uses=1]
-	%indvar.next63.i = add i32 %tmp.0.reg2mem.0.i, 1		; <i32> [#uses=2]
-	%exitcond64.i = icmp eq i32 %indvar.next63.i, 64		; <i1> [#uses=1]
-	br i1 %exitcond64.i, label %bb47.loopexit.i, label %bb32.i
-
-bb.i.i:		; preds = %bb47.loopexit.i
-	unreachable
-
-stepsystem.exit.i:		; preds = %bb47.loopexit.i
-	store %struct.node* null, %struct.node** %1, align 4
-	br label %bb.i6.i
-
-bb.i6.i:		; preds = %bb.i6.i, %stepsystem.exit.i
-	%tmp.0.i.i = add i32 0, -1		; <i32> [#uses=1]
-	%3 = icmp slt i32 %tmp.0.i.i, 0		; <i1> [#uses=1]
-	br i1 %3, label %bb107.i.i, label %bb.i6.i
-
-bb107.i.i:		; preds = %bb107.i.i, %bb.i6.i
-	%q_addr.0.i.i.in = phi %struct.bnode** [ null, %bb107.i.i ], [ %4, %bb.i6.i ]		; <%struct.bnode**> [#uses=1]
-	%q_addr.0.i.i = load %struct.bnode** %q_addr.0.i.i.in		; <%struct.bnode*> [#uses=1]
-	%q_addr.1 = getelementptr %struct.anon* %0, i32 0, i32 4, i32 1
-	store %struct.bnode* %q_addr.0.i.i, %struct.bnode** %q_addr.1, align 4
-	br label %bb107.i.i
-
-bb47.loopexit.i:		; preds = %bb32.i
-	%4 = getelementptr %struct.anon* %0, i32 0, i32 4, i32 0		; <%struct.bnode**> [#uses=1]
-	%5 = icmp eq %struct.node* null, null		; <i1> [#uses=1]
-	br i1 %5, label %stepsystem.exit.i, label %bb.i.i
-}
diff --git a/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
index db13fde..8091bd1 100644
--- a/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
+++ b/test/CodeGen/X86/2007-10-12-CoalesceExtSubReg.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 | not grep movb
 
-define i16 @f(i32* %bp, i32* %ss) signext  {
+define signext i16 @f(i32* %bp, i32* %ss)   {
 entry:
 	br label %cond_next127
 
diff --git a/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
index a3872ad..7a3d72d 100644
--- a/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
+++ b/test/CodeGen/X86/2007-10-12-SpillerUnfold2.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 | grep sarl | not grep esp
 
-define i16 @t(i16* %qmatrix, i16* %dct, i16* %acBaseTable, i16* %acExtTable, i16 signext  %acBaseRes, i16 signext  %acMaskRes, i16 signext  %acExtRes, i32* %bitptr, i32* %source, i32 %markerPrefix, i8** %byteptr, i32 %scale, i32 %round, i32 %bits) signext  {
+define signext   i16 @t(i16* %qmatrix, i16* %dct, i16* %acBaseTable, i16* %acExtTable, i16 signext  %acBaseRes, i16 signext  %acMaskRes, i16 signext  %acExtRes, i32* %bitptr, i32* %source, i32 %markerPrefix, i8** %byteptr, i32 %scale, i32 %round, i32 %bits) {
 entry:
 	br label %cond_next127
 
diff --git a/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
index 1e4ae84..2b56b4e 100644
--- a/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
+++ b/test/CodeGen/X86/2007-10-15-CoalescerCrash.ll
@@ -362,7 +362,7 @@ bb1159:		; preds = %cond_next1150
 
 cond_true1169:		; preds = %bb1159
 	%tmp11741175 = trunc i64 %lsum.11225.0 to i32		; <i32> [#uses=1]
-	%tmp1178 = tail call i32 (%struct._IO_FILE* noalias , i8* noalias , ...)* @fprintf( %struct._IO_FILE* %file noalias , i8* getelementptr ([49 x i8]* @.str32, i32 0, i64 0) noalias , i32 %tmp11741175, i32 0 )		; <i32> [#uses=0]
+	%tmp1178 = tail call i32 (%struct._IO_FILE*  , i8*  , ...)* @fprintf( %struct._IO_FILE* noalias %file  , i8* getelementptr ([49 x i8]* @.str32, i32 0, i64 0)  , i32 %tmp11741175, i32 0 )		; <i32> [#uses=0]
 	ret void
 
 UnifiedReturnBlock:		; preds = %bb1159
@@ -379,9 +379,9 @@ declare i32 @reg_preferred_class(i32)
 
 declare i32 @reg_alternate_class(i32)
 
-declare i8 @maybe_hot_bb_p(%struct.basic_block_def*) zeroext 
+declare zeroext i8 @maybe_hot_bb_p(%struct.basic_block_def*)  
 
-declare i8 @probably_never_executed_bb_p(%struct.basic_block_def*) zeroext 
+declare zeroext i8 @probably_never_executed_bb_p(%struct.basic_block_def*)  
 
 declare void @dump_regset(%struct.bitmap_head_def*, %struct._IO_FILE*)
 
diff --git a/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
index 600bd1f..d3120f3 100644
--- a/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
+++ b/test/CodeGen/X86/2007-10-19-SpillerUnfold.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 -x86-asm-syntax=intel | grep inc | not grep PTR
 
-define i16 @t(i32* %bitptr, i32* %source, i8** %byteptr, i32 %scale, i32 %round) signext  {
+define signext   i16 @t(i32* %bitptr, i32* %source, i8** %byteptr, i32 %scale, i32 %round) {
 entry:
 	br label %bb
 
diff --git a/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll b/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
index 86d3bbf..573a217 100644
--- a/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
+++ b/test/CodeGen/X86/2007-10-29-ExtendSetCC.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 | grep mov | count 1
 
-define i16 @t() signext  {
+define signext i16 @t()   {
 entry:
 	%tmp180 = load i16* null, align 2		; <i16> [#uses=3]
 	%tmp180181 = sext i16 %tmp180 to i32		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/2007-11-02-BadAsm.ll b/test/CodeGen/X86/2007-11-02-BadAsm.ll
deleted file mode 100644
index 4e11cda..0000000
--- a/test/CodeGen/X86/2007-11-02-BadAsm.ll
+++ /dev/null
@@ -1,144 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movl | not grep rax
-
-	%struct.color_sample = type { i64 }
-	%struct.gs_matrix = type { float, i64, float, i64, float, i64, float, i64, float, i64, float, i64 }
-	%struct.ref = type { %struct.color_sample, i16, i16 }
-	%struct.status = type { %struct.gs_matrix, i8*, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i32 }
-
-define i32 @ztype1imagepath(%struct.ref* %op) {
-entry:
-	br i1 false, label %cond_next, label %UnifiedReturnBlock
-
-cond_next:		; preds = %entry
-	br i1 false, label %cond_next68, label %UnifiedReturnBlock
-
-cond_next68:		; preds = %cond_next
-	%tmp5.i.i = malloc i8, i32 0		; <i8*> [#uses=2]
-	br i1 false, label %bb81.outer.i, label %xit.i
-
-bb81.outer.i:		; preds = %bb87.i, %cond_next68
-	%tmp67.i = add i32 0, 1		; <i32> [#uses=1]
-	br label %bb81.i
-
-bb61.i:		; preds = %bb81.i
-	%tmp71.i = getelementptr i8* %tmp5.i.i, i64 0		; <i8*> [#uses=1]
-	%tmp72.i = load i8* %tmp71.i, align 1		; <i8> [#uses=1]
-	%tmp73.i = icmp eq i8 %tmp72.i, 0		; <i1> [#uses=1]
-	br i1 %tmp73.i, label %bb81.i, label %xit.i
-
-bb81.i:		; preds = %bb61.i, %bb81.outer.i
-	br i1 false, label %bb87.i, label %bb61.i
-
-bb87.i:		; preds = %bb81.i
-	br i1 false, label %bb81.outer.i, label %xit.i
-
-xit.i:		; preds = %bb87.i, %bb61.i, %cond_next68
-	%lsbx.0.reg2mem.1.i = phi i32 [ 0, %cond_next68 ], [ 0, %bb61.i ], [ %tmp67.i, %bb87.i ]		; <i32> [#uses=1]
-	%tmp6162.i.i = fptrunc double 0.000000e+00 to float		; <float> [#uses=1]
-	%tmp67.i15.i = fptrunc double 0.000000e+00 to float		; <float> [#uses=1]
-	%tmp24.i27.i = icmp eq i64 0, 0		; <i1> [#uses=1]
-	br i1 %tmp24.i27.i, label %cond_next.i79.i, label %cond_true.i34.i
-
-cond_true.i34.i:		; preds = %xit.i
-	ret i32 0
-
-cond_next.i79.i:		; preds = %xit.i
-	%phitmp167.i = fptosi double 0.000000e+00 to i64		; <i64> [#uses=1]
-	%tmp142143.i = fpext float %tmp6162.i.i to double		; <double> [#uses=1]
-	%tmp2.i139.i = fadd double %tmp142143.i, 5.000000e-01		; <double> [#uses=1]
-	%tmp23.i140.i = fptosi double %tmp2.i139.i to i64		; <i64> [#uses=1]
-	br i1 false, label %cond_true.i143.i, label %round_coord.exit148.i
-
-cond_true.i143.i:		; preds = %cond_next.i79.i
-	%tmp8.i142.i = icmp sgt i64 %tmp23.i140.i, -32768		; <i1> [#uses=1]
-	br i1 %tmp8.i142.i, label %cond_true11.i145.i, label %round_coord.exit148.i
-
-cond_true11.i145.i:		; preds = %cond_true.i143.i
-	ret i32 0
-
-round_coord.exit148.i:		; preds = %cond_true.i143.i, %cond_next.i79.i
-	%tmp144149.i = phi i32 [ 32767, %cond_next.i79.i ], [ -32767, %cond_true.i143.i ]		; <i32> [#uses=1]
-	store i32 %tmp144149.i, i32* null, align 8
-	%tmp147148.i = fpext float %tmp67.i15.i to double		; <double> [#uses=1]
-	%tmp2.i128.i = fadd double %tmp147148.i, 5.000000e-01		; <double> [#uses=1]
-	%tmp23.i129.i = fptosi double %tmp2.i128.i to i64		; <i64> [#uses=2]
-	%tmp5.i130.i = icmp slt i64 %tmp23.i129.i, 32768		; <i1> [#uses=1]
-	br i1 %tmp5.i130.i, label %cond_true.i132.i, label %round_coord.exit137.i
-
-cond_true.i132.i:		; preds = %round_coord.exit148.i
-	%tmp8.i131.i = icmp sgt i64 %tmp23.i129.i, -32768		; <i1> [#uses=1]
-	br i1 %tmp8.i131.i, label %cond_true11.i134.i, label %round_coord.exit137.i
-
-cond_true11.i134.i:		; preds = %cond_true.i132.i
-	br label %round_coord.exit137.i
-
-round_coord.exit137.i:		; preds = %cond_true11.i134.i, %cond_true.i132.i, %round_coord.exit148.i
-	%tmp149138.i = phi i32 [ 0, %cond_true11.i134.i ], [ 32767, %round_coord.exit148.i ], [ -32767, %cond_true.i132.i ]		; <i32> [#uses=1]
-	br i1 false, label %cond_true.i121.i, label %round_coord.exit126.i
-
-cond_true.i121.i:		; preds = %round_coord.exit137.i
-	br i1 false, label %cond_true11.i123.i, label %round_coord.exit126.i
-
-cond_true11.i123.i:		; preds = %cond_true.i121.i
-	br label %round_coord.exit126.i
-
-round_coord.exit126.i:		; preds = %cond_true11.i123.i, %cond_true.i121.i, %round_coord.exit137.i
-	%tmp153127.i = phi i32 [ 0, %cond_true11.i123.i ], [ 32767, %round_coord.exit137.i ], [ -32767, %cond_true.i121.i ]		; <i32> [#uses=1]
-	br i1 false, label %cond_true.i110.i, label %round_coord.exit115.i
-
-cond_true.i110.i:		; preds = %round_coord.exit126.i
-	br i1 false, label %cond_true11.i112.i, label %round_coord.exit115.i
-
-cond_true11.i112.i:		; preds = %cond_true.i110.i
-	br label %round_coord.exit115.i
-
-round_coord.exit115.i:		; preds = %cond_true11.i112.i, %cond_true.i110.i, %round_coord.exit126.i
-	%tmp157116.i = phi i32 [ 0, %cond_true11.i112.i ], [ 32767, %round_coord.exit126.i ], [ -32767, %cond_true.i110.i ]		; <i32> [#uses=2]
-	br i1 false, label %cond_true.i99.i, label %round_coord.exit104.i
-
-cond_true.i99.i:		; preds = %round_coord.exit115.i
-	br i1 false, label %cond_true11.i101.i, label %round_coord.exit104.i
-
-cond_true11.i101.i:		; preds = %cond_true.i99.i
-	%tmp1213.i100.i = trunc i64 %phitmp167.i to i32		; <i32> [#uses=1]
-	br label %cond_next172.i
-
-round_coord.exit104.i:		; preds = %cond_true.i99.i, %round_coord.exit115.i
-	%UnifiedRetVal.i102.i = phi i32 [ 32767, %round_coord.exit115.i ], [ -32767, %cond_true.i99.i ]		; <i32> [#uses=1]
-	%tmp164.i = call fastcc i32 @put_int( %struct.status* null, i32 %tmp157116.i )		; <i32> [#uses=0]
-	br label %cond_next172.i
-
-cond_next172.i:		; preds = %round_coord.exit104.i, %cond_true11.i101.i
-	%tmp161105.reg2mem.0.i = phi i32 [ %tmp1213.i100.i, %cond_true11.i101.i ], [ %UnifiedRetVal.i102.i, %round_coord.exit104.i ]		; <i32> [#uses=1]
-	%tmp174.i = icmp eq i32 %tmp153127.i, 0		; <i1> [#uses=1]
-	%bothcond.i = and i1 false, %tmp174.i		; <i1> [#uses=1]
-	%tmp235.i = call fastcc i32 @put_int( %struct.status* null, i32 %tmp149138.i )		; <i32> [#uses=0]
-	%tmp245.i = load i8** null, align 8		; <i8*> [#uses=2]
-	%tmp246.i = getelementptr i8* %tmp245.i, i64 1		; <i8*> [#uses=1]
-	br i1 %bothcond.i, label %cond_next254.i, label %bb259.i
-
-cond_next254.i:		; preds = %cond_next172.i
-	store i8 13, i8* %tmp245.i, align 1
-	br label %bb259.i
-
-bb259.i:		; preds = %cond_next254.i, %cond_next172.i
-	%storemerge.i = phi i8* [ %tmp246.i, %cond_next254.i ], [ null, %cond_next172.i ]		; <i8*> [#uses=0]
-	%tmp261.i = shl i32 %lsbx.0.reg2mem.1.i, 2		; <i32> [#uses=1]
-	store i32 %tmp261.i, i32* null, align 8
-	%tmp270.i = add i32 0, %tmp157116.i		; <i32> [#uses=1]
-	store i32 %tmp270.i, i32* null, align 8
-	%tmp275.i = add i32 0, %tmp161105.reg2mem.0.i		; <i32> [#uses=0]
-	br i1 false, label %trace_cells.exit.i, label %bb.preheader.i.i
-
-bb.preheader.i.i:		; preds = %bb259.i
-	ret i32 0
-
-trace_cells.exit.i:		; preds = %bb259.i
-	free i8* %tmp5.i.i
-	ret i32 0
-
-UnifiedReturnBlock:		; preds = %cond_next, %entry
-	ret i32 -20
-}
-
-declare fastcc i32 @put_int(%struct.status*, i32)
diff --git a/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll b/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
deleted file mode 100644
index ca995cc..0000000
--- a/test/CodeGen/X86/2007-12-11-FoldImpDefSpill.ll
+++ /dev/null
@@ -1,680 +0,0 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin | not grep IMPLICIT_DEF
-
-	%struct.__sbuf = type { i8*, i32 }
-	%struct.ggBRDF = type { i32 (...)** }
-	%"struct.ggBST<ggMaterial>" = type { %"struct.ggBSTNode<ggMaterial>"*, i32 }
-	%"struct.ggBST<ggRasterSurfaceTexture>" = type { %"struct.ggBSTNode<ggRasterSurfaceTexture>"*, i32 }
-	%"struct.ggBST<ggSolidTexture>" = type { %"struct.ggBSTNode<ggSolidTexture>"*, i32 }
-	%"struct.ggBST<ggSpectrum>" = type { %"struct.ggBSTNode<ggSpectrum>"*, i32 }
-	%"struct.ggBST<mrObjectRecord>" = type { %"struct.ggBSTNode<mrObjectRecord>"*, i32 }
-	%"struct.ggBSTNode<ggMaterial>" = type { %"struct.ggBSTNode<ggMaterial>"*, %"struct.ggBSTNode<ggMaterial>"*, %struct.ggString, %struct.ggMaterial* }
-	%"struct.ggBSTNode<ggRasterSurfaceTexture>" = type { %"struct.ggBSTNode<ggRasterSurfaceTexture>"*, %"struct.ggBSTNode<ggRasterSurfaceTexture>"*, %struct.ggString, %struct.ggRasterSurfaceTexture* }
-	%"struct.ggBSTNode<ggSolidTexture>" = type { %"struct.ggBSTNode<ggSolidTexture>"*, %"struct.ggBSTNode<ggSolidTexture>"*, %struct.ggString, %struct.ggBRDF* }
-	%"struct.ggBSTNode<ggSpectrum>" = type { %"struct.ggBSTNode<ggSpectrum>"*, %"struct.ggBSTNode<ggSpectrum>"*, %struct.ggString, %struct.ggSpectrum* }
-	%"struct.ggBSTNode<mrObjectRecord>" = type { %"struct.ggBSTNode<mrObjectRecord>"*, %"struct.ggBSTNode<mrObjectRecord>"*, %struct.ggString, %struct.mrObjectRecord* }
-	%"struct.ggDictionary<ggMaterial>" = type { %"struct.ggBST<ggMaterial>" }
-	%"struct.ggDictionary<ggRasterSurfaceTexture>" = type { %"struct.ggBST<ggRasterSurfaceTexture>" }
-	%"struct.ggDictionary<ggSolidTexture>" = type { %"struct.ggBST<ggSolidTexture>" }
-	%"struct.ggDictionary<ggSpectrum>" = type { %"struct.ggBST<ggSpectrum>" }
-	%"struct.ggDictionary<mrObjectRecord>" = type { %"struct.ggBST<mrObjectRecord>" }
-	%struct.ggHAffineMatrix3 = type { %struct.ggHMatrix3 }
-	%struct.ggHBoxMatrix3 = type { %struct.ggHAffineMatrix3 }
-	%struct.ggHMatrix3 = type { [4 x [4 x double]] }
-	%struct.ggMaterial = type { i32 (...)**, %struct.ggBRDF* }
-	%struct.ggPoint3 = type { [3 x double] }
-	%"struct.ggRGBPixel<char>" = type { [3 x i8], i8 }
-	%"struct.ggRaster<ggRGBPixel<unsigned char> >" = type { i32, i32, %"struct.ggRGBPixel<char>"* }
-	%struct.ggRasterSurfaceTexture = type { %"struct.ggRaster<ggRGBPixel<unsigned char> >"* }
-	%struct.ggSolidNoise3 = type { i32, [256 x %struct.ggPoint3], [256 x i32] }
-	%struct.ggSpectrum = type { [8 x float] }
-	%struct.ggString = type { %"struct.ggString::StringRep"* }
-	%"struct.ggString::StringRep" = type { i32, i32, [1 x i8] }
-	%"struct.ggTrain<mrPixelRenderer*>" = type { %struct.ggBRDF**, i32, i32 }
-	%struct.mrObjectRecord = type { %struct.ggHBoxMatrix3, %struct.ggHBoxMatrix3, %struct.mrSurfaceList, %struct.ggMaterial*, i32, %struct.ggRasterSurfaceTexture*, %struct.ggBRDF*, i32, i32 }
-	%struct.mrScene = type { %struct.ggSpectrum, %struct.ggSpectrum, %struct.ggBRDF*, %struct.ggBRDF*, %struct.ggBRDF*, i32, double, %"struct.ggDictionary<mrObjectRecord>", %"struct.ggDictionary<ggRasterSurfaceTexture>", %"struct.ggDictionary<ggSolidTexture>", %"struct.ggDictionary<ggSpectrum>", %"struct.ggDictionary<ggMaterial>" }
-	%struct.mrSurfaceList = type { %struct.ggBRDF, %"struct.ggTrain<mrPixelRenderer*>" }
-	%"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>" = type { %"struct.std::locale::facet" }
-	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"* }
-	%"struct.std::basic_istream<char,std::char_traits<char> >" = type { i32 (...)**, i32, %"struct.std::basic_ios<char,std::char_traits<char> >" }
-	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
-	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
-	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }
-	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %struct.__sbuf, [8 x %struct.__sbuf], i32, %struct.__sbuf*, %"struct.std::locale" }
-	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
-	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
-	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
-	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
-@.str80 = external constant [7 x i8]		; <[7 x i8]*> [#uses=1]
-@.str81 = external constant [11 x i8]		; <[11 x i8]*> [#uses=1]
-
-define fastcc void @_ZN7mrScene4ReadERSi(%struct.mrScene* %this, %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces) {
-entry:
-	%tmp6.i.i8288 = invoke i8* @_Znam( i32 12 )
-			to label %_ZN8ggStringC1Ei.exit unwind label %lpad		; <i8*> [#uses=0]
-
-_ZN8ggStringC1Ei.exit:		; preds = %entry
-	%tmp6.i.i8995 = invoke i8* @_Znam( i32 12 )
-			to label %_ZN8ggStringC1Ei.exit96 unwind label %lpad3825		; <i8*> [#uses=0]
-
-_ZN8ggStringC1Ei.exit96:		; preds = %_ZN8ggStringC1Ei.exit
-	%tmp6.i.i97103 = invoke i8* @_Znam( i32 12 )
-			to label %_ZN8ggStringC1Ei.exit104 unwind label %lpad3829		; <i8*> [#uses=0]
-
-_ZN8ggStringC1Ei.exit104:		; preds = %_ZN8ggStringC1Ei.exit96
-	%tmp6.i.i105111 = invoke i8* @_Znam( i32 12 )
-			to label %_ZN8ggStringC1Ei.exit112 unwind label %lpad3833		; <i8*> [#uses=0]
-
-_ZN8ggStringC1Ei.exit112:		; preds = %_ZN8ggStringC1Ei.exit104
-	%tmp6.i.i122128 = invoke i8* @_Znam( i32 12 )
-			to label %_ZN8ggStringC1Ei.exit129 unwind label %lpad3837		; <i8*> [#uses=0]
-
-_ZN8ggStringC1Ei.exit129:		; preds = %_ZN8ggStringC1Ei.exit112
-	%tmp6.i.i132138 = invoke i8* @_Znam( i32 12 )
-			to label %_ZN8ggStringC1Ei.exit139 unwind label %lpad3841		; <i8*> [#uses=0]
-
-_ZN8ggStringC1Ei.exit139:		; preds = %_ZN8ggStringC1Ei.exit129
-	%tmp295 = invoke i8* @_Znwm( i32 16 )
-			to label %invcont294 unwind label %lpad3845		; <i8*> [#uses=0]
-
-invcont294:		; preds = %_ZN8ggStringC1Ei.exit139
-	%tmp10.i.i141 = invoke i8* @_Znam( i32 16 )
-			to label %_ZN13mrSurfaceListC1Ev.exit unwind label %lpad3849		; <i8*> [#uses=0]
-
-_ZN13mrSurfaceListC1Ev.exit:		; preds = %invcont294
-	%tmp3.i148 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
-			to label %tmp3.i.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-tmp3.i.noexc:		; preds = %_ZN13mrSurfaceListC1Ev.exit
-	%tmp15.i149 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
-			to label %tmp15.i.noexc unwind label %lpad3845		; <i8*> [#uses=0]
-
-tmp15.i.noexc:		; preds = %tmp3.i.noexc
-	br i1 false, label %bb308, label %bb.i
-
-bb.i:		; preds = %tmp15.i.noexc
-	ret void
-
-bb308:		; preds = %tmp15.i.noexc
-	br i1 false, label %bb3743.preheader, label %bb315
-
-bb3743.preheader:		; preds = %bb308
-	%tmp16.i3862 = getelementptr %struct.ggPoint3* null, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%tmp16.i3859 = getelementptr %struct.ggPoint3* null, i32 0, i32 0, i32 0		; <double*> [#uses=3]
-	br label %bb3743
-
-bb315:		; preds = %bb308
-	ret void
-
-bb333:		; preds = %invcont3758, %invcont335
-	%tmp3.i167180 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
-			to label %tmp3.i167.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-tmp3.i167.noexc:		; preds = %bb333
-	%tmp15.i182 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
-			to label %tmp15.i.noexc181 unwind label %lpad3845		; <i8*> [#uses=0]
-
-tmp15.i.noexc181:		; preds = %tmp3.i167.noexc
-	br i1 false, label %invcont335, label %bb.i178
-
-bb.i178:		; preds = %tmp15.i.noexc181
-	ret void
-
-invcont335:		; preds = %tmp15.i.noexc181
-	br i1 false, label %bb3743, label %bb333
-
-bb345:		; preds = %invcont3758
-	br i1 false, label %bb353, label %bb360
-
-bb353:		; preds = %bb345
-	%tmp356 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
-			to label %bb3743 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-bb360:		; preds = %bb345
-	br i1 false, label %bb368, label %bb374
-
-bb368:		; preds = %bb360
-	%tmp373 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
-			to label %bb3743 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-bb374:		; preds = %bb360
-	br i1 false, label %bb396, label %bb421
-
-bb396:		; preds = %bb374
-	ret void
-
-bb421:		; preds = %bb374
-	br i1 false, label %bb429, label %bb530
-
-bb429:		; preds = %bb421
-	ret void
-
-bb530:		; preds = %bb421
-	br i1 false, label %bb538, label %bb673
-
-bb538:		; preds = %bb530
-	ret void
-
-bb673:		; preds = %bb530
-	br i1 false, label %bb681, label %bb778
-
-bb681:		; preds = %bb673
-	ret void
-
-bb778:		; preds = %bb673
-	br i1 false, label %bb786, label %bb891
-
-bb786:		; preds = %bb778
-	ret void
-
-bb891:		; preds = %bb778
-	br i1 false, label %bb899, label %bb998
-
-bb899:		; preds = %bb891
-	ret void
-
-bb998:		; preds = %bb891
-	br i1 false, label %bb1168, label %bb1190
-
-bb1168:		; preds = %bb998
-	ret void
-
-bb1190:		; preds = %bb998
-	br i1 false, label %bb1198, label %bb1220
-
-bb1198:		; preds = %bb1190
-	ret void
-
-bb1220:		; preds = %bb1190
-	br i1 false, label %bb1228, label %bb1250
-
-bb1228:		; preds = %bb1220
-	ret void
-
-bb1250:		; preds = %bb1220
-	br i1 false, label %bb1258, label %bb1303
-
-bb1258:		; preds = %bb1250
-	ret void
-
-bb1303:		; preds = %bb1250
-	br i1 false, label %bb1311, label %bb1366
-
-bb1311:		; preds = %bb1303
-	ret void
-
-bb1366:		; preds = %bb1303
-	br i1 false, label %bb1374, label %bb1432
-
-bb1374:		; preds = %bb1366
-	ret void
-
-bb1432:		; preds = %bb1366
-	br i1 false, label %bb1440, label %bb1495
-
-bb1440:		; preds = %bb1432
-	ret void
-
-bb1495:		; preds = %bb1432
-	br i1 false, label %bb1503, label %bb1561
-
-bb1503:		; preds = %bb1495
-	ret void
-
-bb1561:		; preds = %bb1495
-	br i1 false, label %bb1569, label %bb1624
-
-bb1569:		; preds = %bb1561
-	ret void
-
-bb1624:		; preds = %bb1561
-	br i1 false, label %bb1632, label %bb1654
-
-bb1632:		; preds = %bb1624
-	store double 0.000000e+00, double* %tmp16.i3859, align 8
-	%tmp3.i38383852 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
-			to label %tmp3.i3838.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-tmp3.i3838.noexc:		; preds = %bb1632
-	%tmp15.i38473853 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
-			to label %tmp15.i3847.noexc unwind label %lpad3845		; <i8*> [#uses=0]
-
-tmp15.i3847.noexc:		; preds = %tmp3.i3838.noexc
-	br i1 false, label %invcont1634, label %bb.i3850
-
-bb.i3850:		; preds = %tmp15.i3847.noexc
-	ret void
-
-invcont1634:		; preds = %tmp15.i3847.noexc
-	%tmp3.i38173831 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
-			to label %tmp3.i3817.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-tmp3.i3817.noexc:		; preds = %invcont1634
-	%tmp15.i38263832 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
-			to label %tmp15.i3826.noexc unwind label %lpad3845		; <i8*> [#uses=0]
-
-tmp15.i3826.noexc:		; preds = %tmp3.i3817.noexc
-	br i1 false, label %invcont1636, label %bb.i3829
-
-bb.i3829:		; preds = %tmp15.i3826.noexc
-	ret void
-
-invcont1636:		; preds = %tmp15.i3826.noexc
-	%tmp8.i38083811 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* %tmp16.i3862 )
-			to label %tmp8.i3808.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
-
-tmp8.i3808.noexc:		; preds = %invcont1636
-	%tmp9.i38093812 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp8.i38083811, double* null )
-			to label %tmp9.i3809.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
-
-tmp9.i3809.noexc:		; preds = %tmp8.i3808.noexc
-	%tmp10.i38103813 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp9.i38093812, double* null )
-			to label %invcont1638 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-invcont1638:		; preds = %tmp9.i3809.noexc
-	%tmp8.i37983801 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* %tmp16.i3859 )
-			to label %tmp8.i3798.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
-
-tmp8.i3798.noexc:		; preds = %invcont1638
-	%tmp9.i37993802 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp8.i37983801, double* null )
-			to label %tmp9.i3799.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
-
-tmp9.i3799.noexc:		; preds = %tmp8.i3798.noexc
-	%tmp10.i38003803 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp9.i37993802, double* null )
-			to label %invcont1640 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-invcont1640:		; preds = %tmp9.i3799.noexc
-	%tmp3.i3778 = load double* %tmp16.i3859, align 8		; <double> [#uses=1]
-	%tmp1643 = invoke i8* @_Znwm( i32 76 )
-			to label %invcont1642 unwind label %lpad3845		; <i8*> [#uses=0]
-
-invcont1642:		; preds = %invcont1640
-	%tmp18.i3770 = fsub double %tmp3.i3778, 0.000000e+00		; <double> [#uses=0]
-	invoke fastcc void @_ZN7mrScene9AddObjectEP9mrSurfaceRK8ggStringS4_i( %struct.mrScene* %this, %struct.ggBRDF* null, %struct.ggString* null, %struct.ggString* null, i32 0 )
-			to label %bb3743 unwind label %lpad3845
-
-bb1654:		; preds = %bb1624
-	br i1 false, label %bb1662, label %bb1693
-
-bb1662:		; preds = %bb1654
-	%tmp3.i37143728 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
-			to label %tmp3.i3714.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-tmp3.i3714.noexc:		; preds = %bb1662
-	%tmp15.i37233729 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
-			to label %tmp15.i3723.noexc unwind label %lpad3845		; <i8*> [#uses=0]
-
-tmp15.i3723.noexc:		; preds = %tmp3.i3714.noexc
-	ret void
-
-bb1693:		; preds = %bb1654
-	br i1 false, label %bb1701, label %bb1745
-
-bb1701:		; preds = %bb1693
-	%tmp3.i36493663 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
-			to label %tmp3.i3649.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-tmp3.i3649.noexc:		; preds = %bb1701
-	ret void
-
-bb1745:		; preds = %bb1693
-	br i1 false, label %bb1753, label %bb1797
-
-bb1753:		; preds = %bb1745
-	ret void
-
-bb1797:		; preds = %bb1745
-	br i1 false, label %bb1805, label %bb1847
-
-bb1805:		; preds = %bb1797
-	ret void
-
-bb1847:		; preds = %bb1797
-	br i1 false, label %bb1855, label %bb1897
-
-bb1855:		; preds = %bb1847
-	%tmp3.i34633477 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
-			to label %tmp3.i3463.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-tmp3.i3463.noexc:		; preds = %bb1855
-	%tmp15.i34723478 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
-			to label %tmp15.i3472.noexc unwind label %lpad3845		; <i8*> [#uses=0]
-
-tmp15.i3472.noexc:		; preds = %tmp3.i3463.noexc
-	br i1 false, label %invcont1857, label %bb.i3475
-
-bb.i3475:		; preds = %tmp15.i3472.noexc
-	invoke fastcc void @_ZN8ggStringaSEPKc( %struct.ggString* null, i8* null )
-			to label %invcont1857 unwind label %lpad3845
-
-invcont1857:		; preds = %bb.i3475, %tmp15.i3472.noexc
-	%tmp1860 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
-			to label %invcont1859 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
-
-invcont1859:		; preds = %invcont1857
-	%tmp1862 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp1860, double* null )
-			to label %invcont1861 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
-
-invcont1861:		; preds = %invcont1859
-	%tmp1864 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp1862, double* null )
-			to label %invcont1863 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
-
-invcont1863:		; preds = %invcont1861
-	%tmp1866 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp1864, double* null )
-			to label %invcont1865 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
-
-invcont1865:		; preds = %invcont1863
-	%tmp1868 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp1866, double* null )
-			to label %invcont1867 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-invcont1867:		; preds = %invcont1865
-	%tmp1881 = invoke i8 @_ZNKSt9basic_iosIcSt11char_traitsIcEE4goodEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null ) zeroext 
-			to label %invcont1880 unwind label %lpad3845		; <i8> [#uses=0]
-
-invcont1880:		; preds = %invcont1867
-	%tmp1883 = invoke i8* @_Znwm( i32 24 )
-			to label %invcont1882 unwind label %lpad3845		; <i8*> [#uses=0]
-
-invcont1882:		; preds = %invcont1880
-	invoke fastcc void @_ZN7mrScene9AddObjectEP9mrSurfaceRK8ggStringS4_i( %struct.mrScene* %this, %struct.ggBRDF* null, %struct.ggString* null, %struct.ggString* null, i32 0 )
-			to label %bb3743 unwind label %lpad3845
-
-bb1897:		; preds = %bb1847
-	br i1 false, label %bb1905, label %bb1947
-
-bb1905:		; preds = %bb1897
-	ret void
-
-bb1947:		; preds = %bb1897
-	br i1 false, label %bb1955, label %bb2000
-
-bb1955:		; preds = %bb1947
-	ret void
-
-bb2000:		; preds = %bb1947
-	br i1 false, label %bb2008, label %bb2053
-
-bb2008:		; preds = %bb2000
-	ret void
-
-bb2053:		; preds = %bb2000
-	br i1 false, label %bb2061, label %bb2106
-
-bb2061:		; preds = %bb2053
-	%tmp3.i32433257 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
-			to label %tmp3.i3243.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-tmp3.i3243.noexc:		; preds = %bb2061
-	%tmp15.i32523258 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
-			to label %bb.i3255 unwind label %lpad3845		; <i8*> [#uses=0]
-
-bb.i3255:		; preds = %tmp3.i3243.noexc
-	invoke fastcc void @_ZN8ggStringaSEPKc( %struct.ggString* null, i8* null )
-			to label %invcont2063 unwind label %lpad3845
-
-invcont2063:		; preds = %bb.i3255
-	ret void
-
-bb2106:		; preds = %bb2053
-	%tmp7.i3214 = call i32 @strcmp( i8* %tmp5.i161, i8* getelementptr ([7 x i8]* @.str80, i32 0, i32 0) ) nounwind readonly 		; <i32> [#uses=0]
-	br i1 false, label %bb2114, label %bb2136
-
-bb2114:		; preds = %bb2106
-	%tmp3.i31923206 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
-			to label %tmp3.i3192.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-tmp3.i3192.noexc:		; preds = %bb2114
-	%tmp15.i32013207 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
-			to label %tmp15.i3201.noexc unwind label %lpad3845		; <i8*> [#uses=0]
-
-tmp15.i3201.noexc:		; preds = %tmp3.i3192.noexc
-	br i1 false, label %invcont2116, label %bb.i3204
-
-bb.i3204:		; preds = %tmp15.i3201.noexc
-	ret void
-
-invcont2116:		; preds = %tmp15.i3201.noexc
-	%tmp3.i31713185 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
-			to label %tmp3.i3171.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-tmp3.i3171.noexc:		; preds = %invcont2116
-	%tmp15.i31803186 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
-			to label %tmp15.i3180.noexc unwind label %lpad3845		; <i8*> [#uses=0]
-
-tmp15.i3180.noexc:		; preds = %tmp3.i3171.noexc
-	br i1 false, label %invcont2118, label %bb.i3183
-
-bb.i3183:		; preds = %tmp15.i3180.noexc
-	ret void
-
-invcont2118:		; preds = %tmp15.i3180.noexc
-	%tmp8.i31623165 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
-			to label %tmp8.i3162.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
-
-tmp8.i3162.noexc:		; preds = %invcont2118
-	%tmp9.i31633166 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp8.i31623165, double* null )
-			to label %tmp9.i3163.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
-
-tmp9.i3163.noexc:		; preds = %tmp8.i3162.noexc
-	%tmp10.i31643167 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp9.i31633166, double* null )
-			to label %invcont2120 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-invcont2120:		; preds = %tmp9.i3163.noexc
-	%tmp2123 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
-			to label %invcont2122 unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-invcont2122:		; preds = %invcont2120
-	%tmp2125 = invoke i8* @_Znwm( i32 36 )
-			to label %invcont2124 unwind label %lpad3845		; <i8*> [#uses=0]
-
-invcont2124:		; preds = %invcont2122
-	invoke fastcc void @_ZN7mrScene9AddObjectEP9mrSurfaceRK8ggStringS4_i( %struct.mrScene* %this, %struct.ggBRDF* null, %struct.ggString* null, %struct.ggString* null, i32 0 )
-			to label %bb3743 unwind label %lpad3845
-
-bb2136:		; preds = %bb2106
-	%tmp7.i3128 = call i32 @strcmp( i8* %tmp5.i161, i8* getelementptr ([11 x i8]* @.str81, i32 0, i32 0) ) nounwind readonly 		; <i32> [#uses=0]
-	br i1 false, label %bb2144, label %bb3336
-
-bb2144:		; preds = %bb2136
-	%tmp6.i.i31173123 = invoke i8* @_Znam( i32 12 )
-			to label %_ZN8ggStringC1Ei.exit3124 unwind label %lpad3845		; <i8*> [#uses=0]
-
-_ZN8ggStringC1Ei.exit3124:		; preds = %bb2144
-	%tmp3.i30983112 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
-			to label %tmp3.i3098.noexc unwind label %lpad3921		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-tmp3.i3098.noexc:		; preds = %_ZN8ggStringC1Ei.exit3124
-	%tmp15.i31073113 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
-			to label %tmp15.i3107.noexc unwind label %lpad3921		; <i8*> [#uses=0]
-
-tmp15.i3107.noexc:		; preds = %tmp3.i3098.noexc
-	br i1 false, label %invcont2147, label %bb.i3110
-
-bb.i3110:		; preds = %tmp15.i3107.noexc
-	ret void
-
-invcont2147:		; preds = %tmp15.i3107.noexc
-	%tmp2161 = invoke i8 @_ZNKSt9basic_iosIcSt11char_traitsIcEE4goodEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null ) zeroext 
-			to label %invcont2160 unwind label %lpad3921		; <i8> [#uses=0]
-
-invcont2160:		; preds = %invcont2147
-	%tmp4.i30933094 = invoke fastcc %struct.ggSpectrum* @_ZN5ggBSTI10ggSpectrumE4findERK8ggString3( %"struct.ggBSTNode<ggSpectrum>"* null, %struct.ggString* null )
-			to label %invcont2164 unwind label %lpad3921		; <%struct.ggSpectrum*> [#uses=0]
-
-invcont2164:		; preds = %invcont2160
-	br i1 false, label %bb2170, label %bb2181
-
-bb2170:		; preds = %invcont2164
-	ret void
-
-bb2181:		; preds = %invcont2164
-	invoke fastcc void @_ZN8ggStringD1Ev( %struct.ggString* null )
-			to label %bb3743 unwind label %lpad3845
-
-bb3336:		; preds = %bb2136
-	br i1 false, label %bb3344, label %bb3734
-
-bb3344:		; preds = %bb3336
-	%tmp6.i.i773779 = invoke i8* @_Znam( i32 12 )
-			to label %_ZN8ggStringC1Ei.exit780 unwind label %lpad3845		; <i8*> [#uses=0]
-
-_ZN8ggStringC1Ei.exit780:		; preds = %bb3344
-	%tmp6.i.i765771 = invoke i8* @_Znam( i32 12 )
-			to label %_ZN8ggStringC1Ei.exit772 unwind label %lpad4025		; <i8*> [#uses=0]
-
-_ZN8ggStringC1Ei.exit772:		; preds = %_ZN8ggStringC1Ei.exit780
-	%tmp3.i746760 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
-			to label %tmp3.i746.noexc unwind label %lpad4029		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-tmp3.i746.noexc:		; preds = %_ZN8ggStringC1Ei.exit772
-	%tmp15.i755761 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
-			to label %tmp15.i755.noexc unwind label %lpad4029		; <i8*> [#uses=0]
-
-tmp15.i755.noexc:		; preds = %tmp3.i746.noexc
-	br i1 false, label %invcont3348, label %bb.i758
-
-bb.i758:		; preds = %tmp15.i755.noexc
-	ret void
-
-invcont3348:		; preds = %tmp15.i755.noexc
-	%tmp3.i726740 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
-			to label %tmp3.i726.noexc unwind label %lpad4029		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-tmp3.i726.noexc:		; preds = %invcont3348
-	%tmp15.i735741 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
-			to label %tmp15.i735.noexc unwind label %lpad4029		; <i8*> [#uses=0]
-
-tmp15.i735.noexc:		; preds = %tmp3.i726.noexc
-	br i1 false, label %bb3458, label %bb.i738
-
-bb.i738:		; preds = %tmp15.i735.noexc
-	ret void
-
-bb3458:		; preds = %tmp15.i735.noexc
-	br i1 false, label %bb3466, label %bb3491
-
-bb3466:		; preds = %bb3458
-	%tmp3469 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, double* null )
-			to label %invcont3468 unwind label %lpad4029		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
-
-invcont3468:		; preds = %bb3466
-	%tmp3471 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp3469, double* null )
-			to label %invcont3470 unwind label %lpad4029		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=1]
-
-invcont3470:		; preds = %invcont3468
-	%tmp3473 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERi( %"struct.std::basic_istream<char,std::char_traits<char> >"* %tmp3471, i32* null )
-			to label %invcont3472 unwind label %lpad4029		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-invcont3472:		; preds = %invcont3470
-	%tmp3475 = invoke i8* @_Znwm( i32 7196 )
-			to label %invcont3474 unwind label %lpad4029		; <i8*> [#uses=1]
-
-invcont3474:		; preds = %invcont3472
-	invoke fastcc void @_ZN13ggSolidNoise3C1Ev( %struct.ggSolidNoise3* null )
-			to label %_ZN22ggCoverageSolidTextureC1Eddi.exit unwind label %lpad4045
-
-_ZN22ggCoverageSolidTextureC1Eddi.exit:		; preds = %invcont3474
-	%tmp34823483 = bitcast i8* %tmp3475 to %struct.ggBRDF*		; <%struct.ggBRDF*> [#uses=2]
-	invoke fastcc void @_ZN5ggBSTI14ggSolidTextureE17InsertIntoSubtreeERK8ggStringPS0_RP9ggBSTNodeIS0_E( %"struct.ggBST<ggSolidTexture>"* null, %struct.ggString* null, %struct.ggBRDF* %tmp34823483, %"struct.ggBSTNode<ggSolidTexture>"** null )
-			to label %bb3662 unwind label %lpad4029
-
-bb3491:		; preds = %bb3458
-	ret void
-
-bb3662:		; preds = %_ZN22ggCoverageSolidTextureC1Eddi.exit
-	invoke fastcc void @_ZN8ggStringD1Ev( %struct.ggString* null )
-			to label %invcont3663 unwind label %lpad4025
-
-invcont3663:		; preds = %bb3662
-	invoke fastcc void @_ZN8ggStringD1Ev( %struct.ggString* null )
-			to label %bb3743 unwind label %lpad3845
-
-bb3734:		; preds = %bb3336
-	ret void
-
-bb3743:		; preds = %invcont3663, %bb2181, %invcont2124, %invcont1882, %invcont1642, %bb368, %bb353, %invcont335, %bb3743.preheader
-	%tex1.3 = phi %struct.ggBRDF* [ undef, %bb3743.preheader ], [ %tex1.3, %bb368 ], [ %tex1.3, %invcont1642 ], [ %tex1.3, %invcont1882 ], [ %tex1.3, %invcont2124 ], [ %tex1.3, %bb2181 ], [ %tex1.3, %invcont335 ], [ %tmp34823483, %invcont3663 ], [ %tex1.3, %bb353 ]		; <%struct.ggBRDF*> [#uses=7]
-	%tmp3.i312325 = invoke %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_( %"struct.std::basic_istream<char,std::char_traits<char> >"* %surfaces, i8* null )
-			to label %tmp3.i312.noexc unwind label %lpad3845		; <%"struct.std::basic_istream<char,std::char_traits<char> >"*> [#uses=0]
-
-tmp3.i312.noexc:		; preds = %bb3743
-	%tmp15.i327 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
-			to label %tmp15.i.noexc326 unwind label %lpad3845		; <i8*> [#uses=0]
-
-tmp15.i.noexc326:		; preds = %tmp3.i312.noexc
-	br i1 false, label %invcont3745, label %bb.i323
-
-bb.i323:		; preds = %tmp15.i.noexc326
-	ret void
-
-invcont3745:		; preds = %tmp15.i.noexc326
-	%tmp3759 = invoke i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv( %"struct.std::basic_ios<char,std::char_traits<char> >"* null )
-			to label %invcont3758 unwind label %lpad3845		; <i8*> [#uses=0]
-
-invcont3758:		; preds = %invcont3745
-	%tmp5.i161 = getelementptr %"struct.ggString::StringRep"* null, i32 0, i32 2, i32 0		; <i8*> [#uses=2]
-	br i1 false, label %bb333, label %bb345
-
-lpad:		; preds = %entry
-	ret void
-
-lpad3825:		; preds = %_ZN8ggStringC1Ei.exit
-	ret void
-
-lpad3829:		; preds = %_ZN8ggStringC1Ei.exit96
-	ret void
-
-lpad3833:		; preds = %_ZN8ggStringC1Ei.exit104
-	ret void
-
-lpad3837:		; preds = %_ZN8ggStringC1Ei.exit112
-	ret void
-
-lpad3841:		; preds = %_ZN8ggStringC1Ei.exit129
-	ret void
-
-lpad3845:		; preds = %invcont3745, %tmp3.i312.noexc, %bb3743, %invcont3663, %bb3344, %bb2181, %bb2144, %invcont2124, %invcont2122, %invcont2120, %tmp9.i3163.noexc, %tmp8.i3162.noexc, %invcont2118, %tmp3.i3171.noexc, %invcont2116, %tmp3.i3192.noexc, %bb2114, %bb.i3255, %tmp3.i3243.noexc, %bb2061, %invcont1882, %invcont1880, %invcont1867, %invcont1865, %invcont1863, %invcont1861, %invcont1859, %invcont1857, %bb.i3475, %tmp3.i3463.noexc, %bb1855, %bb1701, %tmp3.i3714.noexc, %bb1662, %invcont1642, %invcont1640, %tmp9.i3799.noexc, %tmp8.i3798.noexc, %invcont1638, %tmp9.i3809.noexc, %tmp8.i3808.noexc, %invcont1636, %tmp3.i3817.noexc, %invcont1634, %tmp3.i3838.noexc, %bb1632, %bb368, %bb353, %tmp3.i167.noexc, %bb333, %tmp3.i.noexc, %_ZN13mrSurfaceListC1Ev.exit, %_ZN8ggStringC1Ei.exit139
-	ret void
-
-lpad3849:		; preds = %invcont294
-	ret void
-
-lpad3921:		; preds = %invcont2160, %invcont2147, %tmp3.i3098.noexc, %_ZN8ggStringC1Ei.exit3124
-	ret void
-
-lpad4025:		; preds = %bb3662, %_ZN8ggStringC1Ei.exit780
-	ret void
-
-lpad4029:		; preds = %_ZN22ggCoverageSolidTextureC1Eddi.exit, %invcont3472, %invcont3470, %invcont3468, %bb3466, %tmp3.i726.noexc, %invcont3348, %tmp3.i746.noexc, %_ZN8ggStringC1Ei.exit772
-	ret void
-
-lpad4045:		; preds = %invcont3474
-	ret void
-}
-
-declare fastcc void @_ZN8ggStringD1Ev(%struct.ggString*)
-
-declare i8* @_Znam(i32)
-
-declare fastcc void @_ZN8ggStringaSEPKc(%struct.ggString*, i8*)
-
-declare i32 @strcmp(i8*, i8*) nounwind readonly 
-
-declare %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERi(%"struct.std::basic_istream<char,std::char_traits<char> >"*, i32*)
-
-declare i8* @_Znwm(i32)
-
-declare i8* @_ZNKSt9basic_iosIcSt11char_traitsIcEEcvPvEv(%"struct.std::basic_ios<char,std::char_traits<char> >"*)
-
-declare %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSirsERd(%"struct.std::basic_istream<char,std::char_traits<char> >"*, double*)
-
-declare %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZStrsIcSt11char_traitsIcEERSt13basic_istreamIT_T0_ES6_PS3_(%"struct.std::basic_istream<char,std::char_traits<char> >"*, i8*)
-
-declare fastcc void @_ZN13ggSolidNoise3C1Ev(%struct.ggSolidNoise3*)
-
-declare i8 @_ZNKSt9basic_iosIcSt11char_traitsIcEE4goodEv(%"struct.std::basic_ios<char,std::char_traits<char> >"*) zeroext 
-
-declare fastcc %struct.ggSpectrum* @_ZN5ggBSTI10ggSpectrumE4findERK8ggString3(%"struct.ggBSTNode<ggSpectrum>"*, %struct.ggString*)
-
-declare fastcc void @_ZN5ggBSTI14ggSolidTextureE17InsertIntoSubtreeERK8ggStringPS0_RP9ggBSTNodeIS0_E(%"struct.ggBST<ggSolidTexture>"*, %struct.ggString*, %struct.ggBRDF*, %"struct.ggBSTNode<ggSolidTexture>"**)
-
-declare fastcc void @_ZN7mrScene9AddObjectEP9mrSurfaceRK8ggStringS4_i(%struct.mrScene*, %struct.ggBRDF*, %struct.ggString*, %struct.ggString*, i32)
diff --git a/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll b/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
index 6615b8c..fd9c35e 100644
--- a/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-02-25-X86-64-CoalescerBug.ll
@@ -4,7 +4,7 @@
 	%struct.YY = type { i64 }
 	%struct.ZZ = type opaque
 
-define i8 @f(%struct.XX*** %fontMap, %struct.XX* %uen) signext  {
+define signext i8 @f(%struct.XX*** %fontMap, %struct.XX* %uen)   {
 entry:
 	%tmp45 = add i16 0, 1		; <i16> [#uses=2]
 	br i1 false, label %bb124, label %bb53
diff --git a/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll b/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
index c6ba22e..19d49b2 100644
--- a/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
+++ b/test/CodeGen/X86/2008-03-13-TwoAddrPassCrash.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86
 
-define i16 @t(i32 %depth) signext nounwind  {
+define signext i16 @t(i32 %depth)  nounwind  {
 entry:
 	br i1 false, label %bb74, label %bb
 bb:		; preds = %entry
diff --git a/test/CodeGen/X86/2008-04-16-ReMatBug.ll b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
index bfe8ef5..109069e 100644
--- a/test/CodeGen/X86/2008-04-16-ReMatBug.ll
+++ b/test/CodeGen/X86/2008-04-16-ReMatBug.ll
@@ -6,7 +6,7 @@
 	%struct.pthread_mutex_t = type { i32, [40 x i8] }
 @iodbcdm_global_lock = external global %struct.pthread_mutex_t		; <%struct.pthread_mutex_t*> [#uses=1]
 
-define i16 @SQLDriversW(i8* %henv, i16 zeroext  %fDir, i32* %szDrvDesc, i16 signext  %cbDrvDescMax, i16* %pcbDrvDesc, i32* %szDrvAttr, i16 signext  %cbDrvAttrMax, i16* %pcbDrvAttr) signext nounwind  {
+define i16 @SQLDriversW(i8* %henv, i16 zeroext  %fDir, i32* %szDrvDesc, i16 signext  %cbDrvDescMax, i16* %pcbDrvDesc, i32* %szDrvAttr, i16 signext  %cbDrvAttrMax, i16* %pcbDrvAttr) nounwind  {
 entry:
 	%tmp12 = bitcast i8* %henv to %struct.GENV_t*		; <%struct.GENV_t*> [#uses=1]
 	br i1 true, label %bb28, label %bb
@@ -23,7 +23,7 @@ bb74:		; preds = %bb37
 bb92:		; preds = %bb74, %bb37
 	%tmp95180 = shl i16 %cbDrvAttrMax, 2		; <i16> [#uses=1]
 	%tmp100178 = shl i16 %cbDrvDescMax, 2		; <i16> [#uses=1]
-	%tmp113 = tail call i16 @SQLDrivers_Internal( i8* %henv, i16 zeroext  %fDir, i8* null, i16 signext  %tmp100178, i16* %pcbDrvDesc, i8* null, i16 signext  %tmp95180, i16* %pcbDrvAttr, i8 zeroext  87 ) signext nounwind 		; <i16> [#uses=1]
+	%tmp113 = tail call i16 @SQLDrivers_Internal( i8* %henv, i16 zeroext  %fDir, i8* null, i16 signext  %tmp100178, i16* %pcbDrvDesc, i8* null, i16 signext  %tmp95180, i16* %pcbDrvAttr, i8 zeroext  87 )  nounwind 		; <i16> [#uses=1]
 	br i1 false, label %done, label %bb137
 bb137:		; preds = %bb92
 	ret i16 0
@@ -41,6 +41,6 @@ bb167:		; preds = %done
 
 declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
 
-declare i16 @SQLDrivers_Internal(i8*, i16 zeroext , i8*, i16 signext , i16*, i8*, i16 signext , i16*, i8 zeroext ) signext nounwind 
+declare i16 @SQLDrivers_Internal(i8*, i16 zeroext , i8*, i16 signext , i16*, i8*, i16 signext , i16*, i8 zeroext )  nounwind 
 
 declare void @trace_SQLDriversW(i32, i32, i8*, i16 zeroext , i32*, i16 signext , i16*, i32*, i16 signext , i16*)
diff --git a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
index ac48285..77720aa 100644
--- a/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
+++ b/test/CodeGen/X86/2008-04-17-CoalescerBug.ll
@@ -75,7 +75,7 @@ bb5334:		; preds = %bb3314
 bb5484:		; preds = %bb3314
 	ret void
 bb5657:		; preds = %bb3314
-	%tmp5661 = invoke i16 @_ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE( %struct.wxDateTime* %this, %"struct.wxDateTime::TimeZone"* %tz ) zeroext 
+	%tmp5661 = invoke zeroext i16 @_ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE( %struct.wxDateTime* %this, %"struct.wxDateTime::TimeZone"* %tz )  
 			to label %invcont5660 unwind label %lpad		; <i16> [#uses=0]
 invcont5660:		; preds = %bb5657
 	ret void
@@ -120,7 +120,7 @@ invcont5814:		; preds = %bb448.i8694, %bb265.i8606
 	invoke void (%struct.wxString*, i32*, ...)* @_ZN8wxString6FormatEPKwz( %struct.wxString* noalias sret  null, i32* null, i32 %tmp58165817 )
 			to label %invcont5831 unwind label %lpad
 invcont5831:		; preds = %invcont5814
-	%tmp5862 = invoke i8 @_ZN12wxStringBase10ConcatSelfEmPKwm( %struct.wxStringBase* null, i32 0, i32* null, i32 0 ) zeroext 
+	%tmp5862 = invoke zeroext  i8 @_ZN12wxStringBase10ConcatSelfEmPKwm( %struct.wxStringBase* null, i32 0, i32* null, i32 0 ) 
 			to label %bb7834 unwind label %lpad8185		; <i8> [#uses=0]
 bb5968:		; preds = %bb3314
 	invoke void (%struct.wxString*, i32*, ...)* @_ZN8wxString6FormatEPKwz( %struct.wxString* noalias sret  null, i32* null, i32 0 )
@@ -158,11 +158,11 @@ lpad8185:		; preds = %invcont5831
 
 declare void @_Z10wxOnAssertPKwiPKcS0_S0_(i32*, i32, i8*, i32*, i32*)
 
-declare i8 @_ZN12wxStringBase10ConcatSelfEmPKwm(%struct.wxStringBase*, i32, i32*, i32) zeroext 
+declare zeroext  i8 @_ZN12wxStringBase10ConcatSelfEmPKwm(%struct.wxStringBase*, i32, i32*, i32) 
 
 declare %struct.tm* @gmtime_r(i32*, %struct.tm*)
 
-declare i16 @_ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE(%struct.wxDateTime*, %"struct.wxDateTime::TimeZone"*) zeroext 
+declare zeroext  i16 @_ZNK10wxDateTime12GetDayOfYearERKNS_8TimeZoneE(%struct.wxDateTime*, %"struct.wxDateTime::TimeZone"*) 
 
 declare %struct.wxStringBase* @_ZN12wxStringBase6appendEmw(%struct.wxStringBase*, i32, i32)
 
diff --git a/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll b/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
index 38d6aa6..6e9a629 100644
--- a/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
+++ b/test/CodeGen/X86/2008-04-26-Asm-Optimize-Imm.ll
@@ -1,10 +1,14 @@
-; RUN: llc < %s | grep {1 \$2 3}
+; RUN: llc < %s | FileCheck %s
 ; rdar://5720231
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
 define void @test() nounwind  {
-entry:
+; CHECK: test:
+; CHECK-NOT: ret
+; CHECK: 1 $2 3
+; CHECK: ret
+
 	tail call void asm sideeffect " ${0:c} $1 ${2:c} ", "imr,imr,i,~{dirflag},~{fpsr},~{flags}"( i32 1, i32 2, i32 3 ) nounwind 
 	ret void
 }
diff --git a/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll b/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
deleted file mode 100644
index 236b7cd..0000000
--- a/test/CodeGen/X86/2008-06-04-MemCpyLoweringBug.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -disable-fp-elim | grep subl | grep 24
-
-	%struct.argument_t = type { i8*, %struct.argument_t*, i32, %struct.ipc_type_t*, i32, void (...)*, void (...)*, void (...)*, void (...)*, void (...)*, i8*, i8*, i8*, i8*, i8*, i32, i32, i32, %struct.routine*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, %struct.argument_t*, i32, i32, i32, i32, i32, i32 }
-	%struct.ipc_type_t = type { i8*, %struct.ipc_type_t*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i32, i32, i32, i32, i32, i32, %struct.ipc_type_t*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8* }
-	%struct.routine = type opaque
-@"\01LC" = external constant [11 x i8]		; <[11 x i8]*> [#uses=1]
-
-define i8* @InArgMsgField(%struct.argument_t* %arg, i8* %str) nounwind  {
-entry:
-	%who = alloca [20 x i8]		; <[20 x i8]*> [#uses=1]
-	%who1 = getelementptr [20 x i8]* %who, i32 0, i32 0		; <i8*> [#uses=2]
-	call void @llvm.memset.i32( i8* %who1, i8 0, i32 20, i32 1 )
-	call void @llvm.memcpy.i32( i8* %who1, i8* getelementptr ([11 x i8]* @"\01LC", i32 0, i32 0), i32 11, i32 1 )
-	unreachable
-}
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind 
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
diff --git a/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll b/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
deleted file mode 100644
index ce9e389..0000000
--- a/test/CodeGen/X86/2008-08-23-X86-64AsmBug.ll
+++ /dev/null
@@ -1,59 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movd | count 1
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | grep movq
-; PR2677
-
-
-	%struct.Bigint = type { %struct.Bigint*, i32, i32, i32, i32, [1 x i32] }
-
-define double @_Z7qstrtodPKcPS0_Pb(i8* %s00, i8** %se, i8* %ok) nounwind {
-entry:
-	br label %bb163
-
-bb151:		; preds = %entry
-	br label %bb163
-
-bb163:		; preds = %bb151, %entry
-	%tmp366 = load double* null, align 8		; <double> [#uses=1]
-	%tmp368 = fmul double %tmp366, 0.000000e+00		; <double> [#uses=1]
-	%tmp368226 = bitcast double %tmp368 to i64		; <i64> [#uses=1]
-	br label %bb5.i
-
-bb5.i:		; preds = %bb5.i57.i, %bb163
-	%b.0.i = phi %struct.Bigint* [ null, %bb163 ]		; <%struct.Bigint*> [#uses=1]
-	%tmp3.i7.i728 = load i32* null, align 4		; <i32> [#uses=1]
-	br label %bb.i27.i
-
-bb.i27.i:		; preds = %bb.i27.i, %bb5.i
-	%tmp23.i20.i = lshr i32 0, 16		; <i32> [#uses=1]
-	br label %bb5.i57.i
-
-bb5.i57.i:		; preds = %bb.i27.i
-	%tmp50.i35.i = load i32* null, align 4		; <i32> [#uses=1]
-	%tmp51.i36.i = add i32 %tmp50.i35.i, 1		; <i32> [#uses=2]
-	%tmp2.i.i37.i = shl i32 1, %tmp51.i36.i		; <i32> [#uses=2]
-	%tmp4.i.i38.i = shl i32 %tmp2.i.i37.i, 2		; <i32> [#uses=1]
-	%tmp7.i.i39.i = add i32 %tmp4.i.i38.i, 28		; <i32> [#uses=1]
-	%tmp8.i.i40.i = malloc i8, i32 %tmp7.i.i39.i		; <i8*> [#uses=1]
-	%tmp9.i.i41.i = bitcast i8* %tmp8.i.i40.i to %struct.Bigint*		; <%struct.Bigint*> [#uses=2]
-	store i32 %tmp51.i36.i, i32* null, align 8
-	store i32 %tmp2.i.i37.i, i32* null, align 4
-	free %struct.Bigint* %b.0.i
-	store i32 %tmp23.i20.i, i32* null, align 4
-	%tmp74.i61.i = add i32 %tmp3.i7.i728, 1		; <i32> [#uses=1]
-	store i32 %tmp74.i61.i, i32* null, align 4
-	br label %bb7.i
-
-bb7.i:		; preds = %bb5.i57.i
-	%tmp514 = load i32* null, align 4		; <i32> [#uses=1]
-	%tmp515 = sext i32 %tmp514 to i64		; <i64> [#uses=1]
-	%tmp516 = shl i64 %tmp515, 2		; <i64> [#uses=1]
-	%tmp517 = add i64 %tmp516, 8		; <i64> [#uses=1]
-	%tmp519 = getelementptr %struct.Bigint* %tmp9.i.i41.i, i32 0, i32 3		; <i32*> [#uses=1]
-	%tmp523 = bitcast i32* %tmp519 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i64( i8* null, i8* %tmp523, i64 %tmp517, i32 1 )
-	%tmp524136 = bitcast i64 %tmp368226 to double		; <double> [#uses=1]
-	store double %tmp524136, double* null
-	unreachable
-}
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32) nounwind
diff --git a/test/CodeGen/X86/2008-09-25-sseregparm-1.ll b/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
index c92a8f4..fc3e35e 100644
--- a/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
+++ b/test/CodeGen/X86/2008-09-25-sseregparm-1.ll
@@ -2,11 +2,11 @@
 ; RUN: llc < %s -march=x86 -mattr=+sse2 | grep fld | count 2
 ; check 'inreg' attribute for sse_regparm
 
-define double @foo1() inreg nounwind {
+define inreg double @foo1()  nounwind {
   ret double 1.0
 }
 
-define float @foo2() inreg nounwind {
+define inreg float @foo2()  nounwind {
   ret float 1.0
 }
 
diff --git a/test/CodeGen/X86/2008-10-27-StackRealignment.ll b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
index 3d0766c..a57f716 100644
--- a/test/CodeGen/X86/2008-10-27-StackRealignment.ll
+++ b/test/CodeGen/X86/2008-10-27-StackRealignment.ll
@@ -11,12 +11,12 @@ target triple = "i386-pc-linux-gnu"
   
 define void @foo(i32 %t) nounwind {
   %tmp1210 = alloca i8, i32 32, align 4
-  call void @llvm.memset.i64(i8* %tmp1210, i8 0, i64 32, i32 4)
-  
+  call void @llvm.memset.p0i8.i64(i8* %tmp1210, i8 0, i64 32, i32 4, i1 false)
   %x = alloca i8, i32 %t
   call void @dummy(i8* %x)
   ret void
 }
 
-declare void @dummy(i8* %x)
-declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind
+declare void @dummy(i8*)
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/2008-12-05-SpillerCrash.ll b/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
deleted file mode 100644
index 7fd2e6f..0000000
--- a/test/CodeGen/X86/2008-12-05-SpillerCrash.ll
+++ /dev/null
@@ -1,237 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin9.5 -mattr=+sse41 -relocation-model=pic
-
-	%struct.XXActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
-	%struct.XXAlphaTest = type { float, i16, i8, i8 }
-	%struct.XXArrayRange = type { i8, i8, i8, i8 }
-	%struct.XXBlendMode = type { i16, i16, i16, i16, %struct.ZZIColor4, i16, i16, i8, i8, i8, i8 }
-	%struct.XXBBRec = type opaque
-	%struct.XXBBstate = type { %struct.ZZGTransformKey, %struct.ZZGTransformKey, %struct.XXProgramLimits, %struct.XXProgramLimits, i8, i8, i8, i8, %struct.ZZSBB, %struct.ZZSBB, [4 x %struct.ZZSBB], %struct.ZZSBB, %struct.ZZSBB, %struct.ZZSBB, [8 x %struct.ZZSBB], %struct.ZZSBB }
-	%struct.XXClearColor = type { double, %struct.ZZIColor4, %struct.ZZIColor4, float, i32 }
-	%struct.XXClipPlane = type { i32, [6 x %struct.ZZIColor4] }
-	%struct.XXColorBB = type { i16, i8, i8, [8 x i16], i8, i8, i8, i8 }
-	%struct.XXColorMatrix = type { [16 x float]*, %struct.XXImagingColorScale }
-	%struct.XXConfig = type { i32, float, %struct.ZZGTransformKey, %struct.ZZGTransformKey, i8, i8, i8, i8, i8, i8, i16, i32, i32, i32, %struct.XXPixelFormatInfo, %struct.XXPointLineLimits, %struct.XXPointLineLimits, %struct.XXRenderFeatures, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.XXTextureLimits, [3 x %struct.XXPipelineProgramLimits], %struct.XXFragmentProgramLimits, %struct.XXVertexProgramLimits, %struct.XXGeometryShaderLimits, %struct.XXProgramLimits, %struct.XXGeometryShaderLimits, %struct.XXVertexDescriptor*, %struct.XXVertexDescriptor*, [3 x i32], [4 x i32], [0 x i32] }
-	%struct.XXContextRec = type { float, float, float, float, float, float, float, float, %struct.ZZIColor4, %struct.ZZIColor4, %struct.YYFPContext, [16 x [2 x %struct.PPStreamToken]], %struct.ZZGProcessor, %struct._YYConstants*, void (%struct.XXContextRec*, i32, i32, %struct.YYFragmentAttrib*, %struct.YYFragmentAttrib*, i32)*, %struct._YYFunction*, %struct.PPStreamToken*, void (%struct.XXContextRec*, %struct.XXVertex*)*, void (%struct.XXContextRec*, %struct.XXVertex*, %struct.XXVertex*)*, void (%struct.XXContextRec*, %struct.XXVertex*, %struct.XXVertex*, %struct.XXVertex*)*, %struct._YYFunction*, %struct._YYFunction*, %struct._YYFunction*, [4 x i32], [3 x i32], [3 x i32], float, float, float, %struct.PPStreamToken, i32, %struct.ZZSDrawable, %struct.XXFramebufferRec*, %struct.XXFramebufferRec*, %struct.XXRect, %struct.XXFormat, %struct.XXFormat, %struct.XXFormat, %struct.XXConfig*, %struct.XXBBstate, %struct.XXBBstate, %struct.XXSharedRec*, %struct.XXState*, %struct.XXPluginState*, %struct.XXVertex*, %struct.YYFragmentAttrib*, %struct.YYFragmentAttrib*, %struct.YYFragmentAttrib*, %struct.XXProgramRec*, %struct.XXPipelineProgramRec*, %struct.YYTextures, %struct.XXStippleData, i8, i16, i8, i32, i32, i32, %struct.XXQueryRec*, %struct.XXQueryRec*, %struct.XXFallback, { void (i8*, i8*, i32, i8*)* } }
-	%struct.XXConvolution = type { %struct.ZZIColor4, %struct.XXImagingColorScale, i16, i16, [0 x i32], float*, i32, i32 }
-	%struct.XXCurrent16A = type { [8 x %struct.ZZIColor4], [16 x %struct.ZZIColor4], %struct.ZZIColor4, %struct.XXPointLineLimits, float, %struct.XXPointLineLimits, float, [4 x float], %struct.XXPointLineLimits, float, float, float, float, i8, i8, i8, i8 }
-	%struct.XXDepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
-	%struct.XXDrawableWindow = type { i32, i32, i32 }
-	%struct.XXFallback = type { float*, %struct.XXRenderDispatch*, %struct.XXConfig*, i8*, i8*, i32, i32 }
-	%struct.XXFenceRec = type opaque
-	%struct.XXFixedFunction = type { %struct.PPStreamToken* }
-	%struct.XXFogMode = type { %struct.ZZIColor4, float, float, float, float, float, i16, i16, i16, i8, i8 }
-	%struct.XXFormat = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8, i32, i32, i32 }
-	%struct.XXFragmentProgramLimits = type { i32, i32, i32, i16, i16, i32, i32 }
-	%struct.XXFramebufferAttachment = type { i16, i16, i32, i32, i32 }
-	%struct.XXFramebufferData = type { [10 x %struct.XXFramebufferAttachment], [8 x i16], i16, i16, i16, i8, i8, i32, i32 }
-	%struct.XXFramebufferRec = type { %struct.XXFramebufferData*, %struct.XXPluginFramebufferData*, %struct.XXFormat, i8, i8, i8, i8 }
-	%struct.XXGeometryShaderLimits = type { i32, i32, i32, i32, i32 }
-	%struct.XXHintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
-	%struct.XXHistogram = type { %struct.XXProgramLimits*, i32, i16, i8, i8 }
-	%struct.XXImagingColorScale = type { %struct.ZZTCoord2, %struct.ZZTCoord2, %struct.ZZTCoord2, %struct.ZZTCoord2 }
-	%struct.XXImagingSubset = type { %struct.XXConvolution, %struct.XXConvolution, %struct.XXConvolution, %struct.XXColorMatrix, %struct.XXMinmax, %struct.XXHistogram, %struct.XXImagingColorScale, %struct.XXImagingColorScale, %struct.XXImagingColorScale, %struct.XXImagingColorScale, i32, [0 x i32] }
-	%struct.XXLight = type { %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.XXPointLineLimits, float, float, float, float, float, %struct.XXPointLineLimits, float, %struct.XXPointLineLimits, float, %struct.XXPointLineLimits, float, float, float, float, float }
-	%struct.XXLightModel = type { %struct.ZZIColor4, [8 x %struct.XXLight], [2 x %struct.XXMaterial], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 }
-	%struct.XXLightProduct = type { %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4 }
-	%struct.XXLineMode = type { float, i32, i16, i16, i8, i8, i8, i8 }
-	%struct.XXLogicOp = type { i16, i8, i8 }
-	%struct.XXMaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
-	%struct.XXMaterial = type { %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, float, float, float, float, [8 x %struct.XXLightProduct], %struct.ZZIColor4, [8 x i32] }
-	%struct.XXMinmax = type { %struct.XXMinmaxTable*, i16, i8, i8, [0 x i32] }
-	%struct.XXMinmaxTable = type { %struct.ZZIColor4, %struct.ZZIColor4 }
-	%struct.XXMipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] }
-	%struct.XXMultisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
-	%struct.XXPipelineProgramData = type { i16, i8, i8, i32, %struct.PPStreamToken*, i64, %struct.ZZIColor4*, i32, [0 x i32] }
-	%struct.XXPipelineProgramLimits = type { i32, i16, i16, i32, i16, i16, i32, i32 }
-	%struct.XXPipelineProgramRec = type { %struct.XXPipelineProgramData*, %struct.PPStreamToken*, %struct.XXContextRec*, { %struct._YYFunction*, \2, \2, [20 x i32], [64 x i32], i32, i32, i32 }*, i32, i32 }
-	%struct.XXPipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.ZZIColor4* }
-	%struct.XXPixelFormatInfo = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
-	%struct.XXPixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.XXPixelMode = type { float, float, %struct.XXPixelStore, %struct.XXPixelTransfer, %struct.XXPixelMap, %struct.XXImagingSubset, i32, i32 }
-	%struct.XXPixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
-	%struct.XXPixelStore = type { %struct.XXPixelPack, %struct.XXPixelPack }
-	%struct.XXPixelTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }
-	%struct.XXPluginFramebufferData = type { [10 x %struct.XXTextureRec*], i8, i8, i8, i8 }
-	%struct.XXPluginProgramData = type { [3 x %struct.XXPipelineProgramRec*], %struct.XXBBRec**, i32, [0 x i32] }
-	%struct.XXPluginState = type { [16 x [5 x %struct.XXTextureRec*]], [3 x %struct.XXTextureRec*], [3 x %struct.XXPipelineProgramRec*], [3 x %struct.XXPipelineProgramRec*], %struct.XXProgramRec*, %struct.XXVertexArrayRec*, [16 x %struct.XXBBRec*], %struct.XXFramebufferRec*, %struct.XXFramebufferRec* }
-	%struct.XXPointLineLimits = type { float, float, float }
-	%struct.XXPointMode = type { float, float, float, float, %struct.XXPointLineLimits, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 }
-	%struct.XXPolygonMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }
-	%struct.XXProgramData = type { i32, i32, i32, i32, %struct.PPStreamToken*, i32*, i32, i32, i32, i32, i8, i8, i8, i8, [0 x i32] }
-	%struct.XXProgramLimits = type { i32, i32, i32, i32 }
-	%struct.XXProgramRec = type { %struct.XXProgramData*, %struct.XXPluginProgramData*, %struct.ZZIColor4**, i32 }
-	%struct.XXQueryRec = type { i32, i32, %struct.XXQueryRec* }
-	%struct.XXRect = type { i32, i32, i32, i32, i32, i32 }
-	%struct.XXRegisterCombiners = type { i8, i8, i8, i8, i32, [2 x %struct.ZZIColor4], [8 x %struct.XXRegisterCombinersPerStageState], %struct.XXRegisterCombinersFinalStageState }
-	%struct.XXRegisterCombinersFinalStageState = type { i8, i8, i8, i8, [7 x %struct.XXRegisterCombinersPerVariableState] }
-	%struct.XXRegisterCombinersPerPortionState = type { [4 x %struct.XXRegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
-	%struct.XXRegisterCombinersPerStageState = type { [2 x %struct.XXRegisterCombinersPerPortionState], [2 x %struct.ZZIColor4] }
-	%struct.XXRegisterCombinersPerVariableState = type { i16, i16, i16, i16 }
-	%struct.XXRenderDispatch = type { void (%struct.XXContextRec*, i32, float)*, void (%struct.XXContextRec*, i32)*, i32 (%struct.XXContextRec*, i32, i32, i32, i32, i32, i32, i8*, i32, %struct.XXBBRec*)*, i32 (%struct.XXContextRec*, %struct.XXVertex*, i32, i32, i32, i32, i8*, i32, %struct.XXBBRec*)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32, i32, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32, float, float, i8*, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex*, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex**, i32)*, void (%struct.XXContextRec*, %struct.XXVertex**, i32, i32)*, void (%struct.XXContextRec*, %struct.XXVertex**, i32, i32)*, i8* (%struct.XXContextRec*, i32, i32*)*, void (%struct.XXContextRec*, i32, i32, i32)*, i8* (%struct.XXContextRec*, i32, i32, i32, i32, i32)*, void (%struct.XXContextRec*, i32, i32, i32, i32, i32, i8*)*, void (%struct.XXContextRec*)*, void (%struct.XXContextRec*)*, void (%struct.XXContextRec*)*, void (%struct.XXContextRec*, %struct.XXFenceRec*)*, void (%struct.XXContextRec*, i32, %struct.XXQueryRec*)*, void (%struct.XXContextRec*, %struct.XXQueryRec*)*, i32 (%struct.XXContextRec*, i32, i32, i32, i32, i32, i8*, %struct.ZZIColor4*, %struct.XXCurrent16A*)*, i32 (%struct.XXContextRec*, %struct.XXTextureRec*, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, i32 (%struct.XXContextRec*, %struct.XXTextureRec*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i32, %struct.XXBBRec*)*, i32 (%struct.XXContextRec*, %struct.XXTextureRec*, i32)*, i32 (%struct.XXContextRec*, %struct.XXBBRec*, i32, i32, i8*)*, void (%struct.XXContextRec*, i32)*, void (%struct.XXContextRec*)*, void (%struct.XXContextRec*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)*, i32 (%struct.XXContextRec*, %struct.XXQueryRec*)*, void (%struct.XXContextRec*)* }
-	%struct.XXRenderFeatures = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
-	%struct.XXSWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 }
-	%struct.XXScissorTest = type { %struct.XXProgramLimits, i8, i8, i8, i8 }
-	%struct.XXSharedData = type {  }
-	%struct.XXSharedRec = type { %struct.__ZZarrayelementDrawInfoListType, %struct.XXSharedData*, i32, i8, i8, i8, i8 }
-	%struct.XXState = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.ZZIColor4], [128 x %struct.ZZIColor4], %struct.XXViewport, %struct.XXTransform, %struct.XXLightModel, %struct.XXActiveTextureTargets, %struct.XXAlphaTest, %struct.XXBlendMode, %struct.XXClearColor, %struct.XXColorBB, %struct.XXDepthTest, %struct.XXArrayRange, %struct.XXFogMode, %struct.XXHintMode, %struct.XXLineMode, %struct.XXLogicOp, %struct.XXMaskMode, %struct.XXPixelMode, %struct.XXPointMode, %struct.XXPolygonMode, %struct.XXScissorTest, i32, %struct.XXStencilTest, [8 x %struct.XXTextureMode], [16 x %struct.XXTextureImageMode], %struct.XXArrayRange, [8 x %struct.XXTextureCoordGen], %struct.XXClipPlane, %struct.XXMultisample, %struct.XXRegisterCombiners, %struct.XXArrayRange, %struct.XXArrayRange, [3 x %struct.XXPipelineProgramState], %struct.XXArrayRange, %struct.XXTransformFeedback, i32*, %struct.XXFixedFunction, [1 x i32] }>
-	%struct.XXStencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
-	%struct.XXStippleData = type { i32, i16, i16, [32 x [32 x i8]] }
-	%struct.XXTextureCoordGen = type { { i16, i16, %struct.ZZIColor4, %struct.ZZIColor4 }, { i16, i16, %struct.ZZIColor4, %struct.ZZIColor4 }, { i16, i16, %struct.ZZIColor4, %struct.ZZIColor4 }, { i16, i16, %struct.ZZIColor4, %struct.ZZIColor4 }, i8, i8, i8, i8 }
-	%struct.XXTextureGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] }
-	%struct.XXTextureImageMode = type { float }
-	%struct.XXTextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* }
-	%struct.XXTextureLimits = type { float, float, i16, i16, i16, i16, i16, i16, i16, i16, i16, i8, i8, [16 x i16], i32 }
-	%struct.XXTextureMode = type { %struct.ZZIColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
-	%struct.XXTextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.ZZIColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* }
-	%struct.XXTextureRec = type { [4 x float], %struct.XXTextureState*, %struct.XXMipmaplevel*, %struct.XXMipmaplevel*, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] }
-	%struct.XXTextureState = type { i16, i8, i8, i16, i16, float, i32, %struct.XXSWRSurfaceRec*, %struct.XXTextureParamState, %struct.XXTextureGeomState, i16, i16, i8*, %struct.XXTextureLevel, [1 x [15 x %struct.XXTextureLevel]] }
-	%struct.XXTransform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }>
-	%struct.XXTransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] }
-	%struct.XXVertex = type { %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.ZZIColor4, %struct.XXPointLineLimits, float, %struct.ZZIColor4, float, i8, i8, i8, i8, float, float, i32, i32, i32, i32, [4 x float], [2 x %struct.XXMaterial*], [2 x i32], [8 x %struct.ZZIColor4] }
-	%struct.XXVertexArrayRec = type opaque
-	%struct.XXVertexDescriptor = type { i8, i8, i8, i8, [0 x i32] }
-	%struct.XXVertexProgramLimits = type { i16, i16, i32, i32 }
-	%struct.XXViewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
-	%struct.ZZGColorTable = type { i32, i32, i32, i8* }
-	%struct.ZZGOperation = type { i8*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, %struct.ZZGColorTable, %struct.ZZGColorTable, %struct.ZZGColorTable }
-	%struct.ZZGProcessor = type { void (%struct.XXPixelMode*, %struct.ZZGOperation*, %struct._ZZGProcessorData*, %union._ZZGFunctionKey*)*, %struct._YYFunction*, %union._ZZGFunctionKey*, %struct._ZZGProcessorData* }
-	%struct.ZZGTransformKey = type { i32, i32 }
-	%struct.ZZIColor4 = type { float, float, float, float }
-	%struct.ZZSBB = type { i8* }
-	%struct.ZZSDrawable = type { %struct.ZZSWindowRec* }
-	%struct.ZZSWindowRec = type { %struct.ZZGTransformKey, %struct.ZZGTransformKey, i32, i32, %struct.ZZSDrawable, i8*, i8*, i8*, i8*, i8*, [4 x i8*], i32, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, %struct.XXDrawableWindow, i32, i32, i8*, i8* }
-	%struct.ZZTCoord2 = type { float, float }
-	%struct.YYFPContext = type { float, i32, i32, i32, float, [3 x float] }
-	%struct.YYFragmentAttrib = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, [8 x <4 x float>] }
-	%struct.YYTextures = type { [16 x %struct.XXTextureRec*] }
-	%struct.PPStreamToken = type { { i16, i16, i32 } }
-	%struct._ZZGProcessorData = type { void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32)*, void (i8*, i8*, i32, i32, i32, i32, i32, i32, i32)*, i8* (i32)*, void (i8*)* }
-	%struct._YYConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [4096 x i8], [8 x float], [48 x float], [128 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } }
-	%struct._YYFunction = type opaque
-	%struct.__ZZarrayelementDrawInfoListType = type { i32, [40 x i8] }
-	%union._ZZGFunctionKey = type opaque
-@llvm.used = appending global [1 x i8*] [ i8* bitcast (void (%struct.XXContextRec*, i32, i32, %struct.YYFragmentAttrib*, %struct.YYFragmentAttrib*, i32)* @t to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
-
-define void @t(%struct.XXContextRec* %ctx, i32 %x, i32 %y, %struct.YYFragmentAttrib* %start, %struct.YYFragmentAttrib* %deriv, i32 %num_frags) nounwind {
-entry:
-	%tmp7485.i.i.i = xor <4 x i32> zeroinitializer, < i32 -1, i32 -1, i32 -1, i32 -1 >		; <<4 x i32>> [#uses=1]
-	%tmp8382.i.i.i = extractelement <4 x i32> zeroinitializer, i32 1		; <i32> [#uses=1]
-	%tmp8383.i.i.i = extractelement <4 x i32> zeroinitializer, i32 2		; <i32> [#uses=2]
-	%tmp8384.i.i.i = extractelement <4 x i32> zeroinitializer, i32 3		; <i32> [#uses=2]
-	br label %bb7551.i.i.i
-
-bb4426.i.i.i:		; preds = %bb7551.i.i.i
-	%0 = getelementptr %struct.XXMipmaplevel* null, i32 %tmp8383.i.i.i, i32 3		; <[4 x i32]*> [#uses=1]
-	%1 = bitcast [4 x i32]* %0 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
-	%2 = load <4 x i32>* %1, align 16		; <<4 x i32>> [#uses=1]
-	%3 = getelementptr %struct.XXMipmaplevel* null, i32 %tmp8384.i.i.i, i32 3		; <[4 x i32]*> [#uses=1]
-	%4 = bitcast [4 x i32]* %3 to <4 x i32>*		; <<4 x i32>*> [#uses=1]
-	%5 = load <4 x i32>* %4, align 16		; <<4 x i32>> [#uses=1]
-	%6 = shufflevector <4 x i32> %2, <4 x i32> %5, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x i32>> [#uses=1]
-	%7 = bitcast <4 x i32> %6 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	%8 = shufflevector <2 x i64> zeroinitializer, <2 x i64> %7, <2 x i32> < i32 1, i32 3 >		; <<2 x i64>> [#uses=1]
-	%9 = getelementptr %struct.XXMipmaplevel* null, i32 %tmp8382.i.i.i, i32 6		; <float**> [#uses=1]
-	%10 = load float** %9, align 4		; <float*> [#uses=1]
-	%11 = bitcast float* %10 to i8*		; <i8*> [#uses=1]
-	%12 = getelementptr %struct.XXMipmaplevel* null, i32 %tmp8383.i.i.i, i32 6		; <float**> [#uses=1]
-	%13 = load float** %12, align 4		; <float*> [#uses=1]
-	%14 = bitcast float* %13 to i8*		; <i8*> [#uses=1]
-	%15 = getelementptr %struct.XXMipmaplevel* null, i32 %tmp8384.i.i.i, i32 6		; <float**> [#uses=1]
-	%16 = load float** %15, align 4		; <float*> [#uses=1]
-	%17 = bitcast float* %16 to i8*		; <i8*> [#uses=1]
-	%tmp7308.i.i.i = and <2 x i64> zeroinitializer, %8		; <<2 x i64>> [#uses=1]
-	%18 = bitcast <2 x i64> %tmp7308.i.i.i to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%19 = mul <4 x i32> %18, zeroinitializer		; <<4 x i32>> [#uses=1]
-	%20 = add <4 x i32> %19, zeroinitializer		; <<4 x i32>> [#uses=3]
-	%21 = load i32* null, align 4		; <i32> [#uses=0]
-	%22 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> zeroinitializer) nounwind readnone		; <<4 x float>> [#uses=1]
-	%23 = fmul <4 x float> %22, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
-	%tmp2114.i119.i.i = extractelement <4 x i32> %20, i32 1		; <i32> [#uses=1]
-	%24 = shl i32 %tmp2114.i119.i.i, 2		; <i32> [#uses=1]
-	%25 = getelementptr i8* %11, i32 %24		; <i8*> [#uses=1]
-	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
-	%27 = load i32* %26, align 4		; <i32> [#uses=1]
-	%28 = or i32 %27, -16777216		; <i32> [#uses=1]
-	%tmp1927.i120.i.i = insertelement <4 x i32> undef, i32 %28, i32 0		; <<4 x i32>> [#uses=1]
-	%29 = bitcast <4 x i32> %tmp1927.i120.i.i to <16 x i8>		; <<16 x i8>> [#uses=1]
-	%30 = shufflevector <16 x i8> %29, <16 x i8> < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef >, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
-	%31 = bitcast <16 x i8> %30 to <8 x i16>		; <<8 x i16>> [#uses=1]
-	%32 = shufflevector <8 x i16> %31, <8 x i16> < i16 0, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef >, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
-	%33 = bitcast <8 x i16> %32 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%34 = shufflevector <4 x i32> %33, <4 x i32> undef, <4 x i32> < i32 2, i32 1, i32 0, i32 3 >		; <<4 x i32>> [#uses=1]
-	%35 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %34) nounwind readnone		; <<4 x float>> [#uses=1]
-	%36 = fmul <4 x float> %35, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
-	%tmp2113.i124.i.i = extractelement <4 x i32> %20, i32 2		; <i32> [#uses=1]
-	%37 = shl i32 %tmp2113.i124.i.i, 2		; <i32> [#uses=1]
-	%38 = getelementptr i8* %14, i32 %37		; <i8*> [#uses=1]
-	%39 = bitcast i8* %38 to i32*		; <i32*> [#uses=1]
-	%40 = load i32* %39, align 4		; <i32> [#uses=1]
-	%41 = or i32 %40, -16777216		; <i32> [#uses=1]
-	%tmp1963.i125.i.i = insertelement <4 x i32> undef, i32 %41, i32 0		; <<4 x i32>> [#uses=1]
-	%42 = bitcast <4 x i32> %tmp1963.i125.i.i to <16 x i8>		; <<16 x i8>> [#uses=1]
-	%43 = shufflevector <16 x i8> %42, <16 x i8> < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef >, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
-	%44 = bitcast <16 x i8> %43 to <8 x i16>		; <<8 x i16>> [#uses=1]
-	%45 = shufflevector <8 x i16> %44, <8 x i16> < i16 0, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef >, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
-	%46 = bitcast <8 x i16> %45 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%47 = shufflevector <4 x i32> %46, <4 x i32> undef, <4 x i32> < i32 2, i32 1, i32 0, i32 3 >		; <<4 x i32>> [#uses=1]
-	%48 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %47) nounwind readnone		; <<4 x float>> [#uses=1]
-	%49 = fmul <4 x float> %48, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
-	%tmp2112.i129.i.i = extractelement <4 x i32> %20, i32 3		; <i32> [#uses=1]
-	%50 = shl i32 %tmp2112.i129.i.i, 2		; <i32> [#uses=1]
-	%51 = getelementptr i8* %17, i32 %50		; <i8*> [#uses=1]
-	%52 = bitcast i8* %51 to i32*		; <i32*> [#uses=1]
-	%53 = load i32* %52, align 4		; <i32> [#uses=1]
-	%54 = or i32 %53, -16777216		; <i32> [#uses=1]
-	%tmp1999.i130.i.i = insertelement <4 x i32> undef, i32 %54, i32 0		; <<4 x i32>> [#uses=1]
-	%55 = bitcast <4 x i32> %tmp1999.i130.i.i to <16 x i8>		; <<16 x i8>> [#uses=1]
-	%56 = shufflevector <16 x i8> %55, <16 x i8> < i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef >, <16 x i32> < i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23 >		; <<16 x i8>> [#uses=1]
-	%57 = bitcast <16 x i8> %56 to <8 x i16>		; <<8 x i16>> [#uses=1]
-	%58 = shufflevector <8 x i16> %57, <8 x i16> < i16 0, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 undef, i16 undef >, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
-	%59 = bitcast <8 x i16> %58 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%60 = shufflevector <4 x i32> %59, <4 x i32> undef, <4 x i32> < i32 2, i32 1, i32 0, i32 3 >		; <<4 x i32>> [#uses=1]
-	%61 = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %60) nounwind readnone		; <<4 x float>> [#uses=1]
-	%62 = fmul <4 x float> %61, < float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000, float 0x3F70101020000000 >		; <<4 x float>> [#uses=1]
-	%63 = fmul <4 x float> %23, zeroinitializer		; <<4 x float>> [#uses=1]
-	%64 = fadd <4 x float> zeroinitializer, %63		; <<4 x float>> [#uses=1]
-	%65 = fmul <4 x float> %36, zeroinitializer		; <<4 x float>> [#uses=1]
-	%66 = fadd <4 x float> zeroinitializer, %65		; <<4 x float>> [#uses=1]
-	%67 = fmul <4 x float> %49, zeroinitializer		; <<4 x float>> [#uses=1]
-	%68 = fadd <4 x float> zeroinitializer, %67		; <<4 x float>> [#uses=1]
-	%69 = fmul <4 x float> %62, zeroinitializer		; <<4 x float>> [#uses=1]
-	%70 = fadd <4 x float> zeroinitializer, %69		; <<4 x float>> [#uses=1]
-	%tmp7452.i.i.i = bitcast <4 x float> %64 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp7454.i.i.i = and <4 x i32> %tmp7452.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
-	%tmp7459.i.i.i = or <4 x i32> %tmp7454.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
-	%tmp7460.i.i.i = bitcast <4 x i32> %tmp7459.i.i.i to <4 x float>		; <<4 x float>> [#uses=1]
-	%tmp7468.i.i.i = bitcast <4 x float> %66 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp7470.i.i.i = and <4 x i32> %tmp7468.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
-	%tmp7475.i.i.i = or <4 x i32> %tmp7470.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
-	%tmp7476.i.i.i = bitcast <4 x i32> %tmp7475.i.i.i to <4 x float>		; <<4 x float>> [#uses=1]
-	%tmp7479.i.i.i = bitcast <4 x float> %.279.1.i to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp7480.i.i.i = and <4 x i32> zeroinitializer, %tmp7479.i.i.i		; <<4 x i32>> [#uses=1]
-	%tmp7484.i.i.i = bitcast <4 x float> %68 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp7486.i.i.i = and <4 x i32> %tmp7484.i.i.i, %tmp7485.i.i.i		; <<4 x i32>> [#uses=1]
-	%tmp7491.i.i.i = or <4 x i32> %tmp7486.i.i.i, %tmp7480.i.i.i		; <<4 x i32>> [#uses=1]
-	%tmp7492.i.i.i = bitcast <4 x i32> %tmp7491.i.i.i to <4 x float>		; <<4 x float>> [#uses=1]
-	%tmp7495.i.i.i = bitcast <4 x float> %.380.1.i to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp7496.i.i.i = and <4 x i32> zeroinitializer, %tmp7495.i.i.i		; <<4 x i32>> [#uses=1]
-	%tmp7500.i.i.i = bitcast <4 x float> %70 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp7502.i.i.i = and <4 x i32> %tmp7500.i.i.i, zeroinitializer		; <<4 x i32>> [#uses=1]
-	%tmp7507.i.i.i = or <4 x i32> %tmp7502.i.i.i, %tmp7496.i.i.i		; <<4 x i32>> [#uses=1]
-	%tmp7508.i.i.i = bitcast <4 x i32> %tmp7507.i.i.i to <4 x float>		; <<4 x float>> [#uses=1]
-	%indvar.next.i.i.i = add i32 %aniso.0.i.i.i, 1		; <i32> [#uses=1]
-	br label %bb7551.i.i.i
-
-bb7551.i.i.i:		; preds = %bb4426.i.i.i, %entry
-	%.077.1.i = phi <4 x float> [ undef, %entry ], [ %tmp7460.i.i.i, %bb4426.i.i.i ]		; <<4 x float>> [#uses=0]
-	%.178.1.i = phi <4 x float> [ undef, %entry ], [ %tmp7476.i.i.i, %bb4426.i.i.i ]		; <<4 x float>> [#uses=0]
-	%.279.1.i = phi <4 x float> [ undef, %entry ], [ %tmp7492.i.i.i, %bb4426.i.i.i ]		; <<4 x float>> [#uses=1]
-	%.380.1.i = phi <4 x float> [ undef, %entry ], [ %tmp7508.i.i.i, %bb4426.i.i.i ]		; <<4 x float>> [#uses=1]
-	%aniso.0.i.i.i = phi i32 [ 0, %entry ], [ %indvar.next.i.i.i, %bb4426.i.i.i ]		; <i32> [#uses=1]
-	br i1 false, label %glvmInterpretFPTransformFour6.exit, label %bb4426.i.i.i
-
-glvmInterpretFPTransformFour6.exit:		; preds = %bb7551.i.i.i
-	unreachable
-}
-
-declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
diff --git a/test/CodeGen/X86/2009-01-25-NoSSE.ll b/test/CodeGen/X86/2009-01-25-NoSSE.ll
index 0583ef1..8406c4a 100644
--- a/test/CodeGen/X86/2009-01-25-NoSSE.ll
+++ b/test/CodeGen/X86/2009-01-25-NoSSE.ll
@@ -3,18 +3,18 @@
 target datalayout =
 "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-unknown-linux-gnu"
-        %struct.ktermios = type { i32, i32, i32, i32, i8, [19 x i8], i32, i32 }
+
+%struct.ktermios = type { i32, i32, i32, i32, i8, [19 x i8], i32, i32 }
 
 define void @foo() nounwind {
 entry:
-        %termios = alloca %struct.ktermios, align 8
-        %termios1 = bitcast %struct.ktermios* %termios to i8*
-        call void @llvm.memset.i64(i8* %termios1, i8 0, i64 44, i32 8)
-        call void @bar(%struct.ktermios* %termios) nounwind
-        ret void
+  %termios = alloca %struct.ktermios, align 8
+  %termios1 = bitcast %struct.ktermios* %termios to i8*
+  call void @llvm.memset.p0i8.i64(i8* %termios1, i8 0, i64 44, i32 8, i1 false)
+  call void @bar(%struct.ktermios* %termios) nounwind
+  ret void
 }
 
-declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
-
 declare void @bar(%struct.ktermios*)
 
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll b/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
deleted file mode 100644
index 35fac0c..0000000
--- a/test/CodeGen/X86/2009-01-29-LocalRegAllocBug.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin9.6 -regalloc=fast -disable-fp-elim
-; rdar://6538384
-
-	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
-	%struct.Lit = type { i32 }
-	%struct.StreamBuffer = type { %struct.FILE*, [1048576 x i8], i32, i32 }
-	%struct.__sFILEX = type opaque
-	%struct.__sbuf = type { i8*, i32 }
-
-declare fastcc i32 @_Z8parseIntI12StreamBufferEiRT_(%struct.StreamBuffer*)
-
-declare i8* @llvm.eh.exception() nounwind
-
-define i32 @main(i32 %argc, i8** nocapture %argv) noreturn {
-entry:
-	%0 = invoke fastcc i32 @_Z8parseIntI12StreamBufferEiRT_(%struct.StreamBuffer* null)
-			to label %bb1.i16.i.i unwind label %lpad.i.i		; <i32> [#uses=0]
-
-bb1.i16.i.i:		; preds = %entry
-	br i1 false, label %bb.i.i.i.i, label %_ZN3vecI3LitE4pushERKS0_.exit.i.i.i
-
-bb.i.i.i.i:		; preds = %bb1.i16.i.i
-	br label %_ZN3vecI3LitE4pushERKS0_.exit.i.i.i
-
-_ZN3vecI3LitE4pushERKS0_.exit.i.i.i:		; preds = %bb.i.i.i.i, %bb1.i16.i.i
-	%lits.i.i.0.0 = phi %struct.Lit* [ null, %bb1.i16.i.i ], [ null, %bb.i.i.i.i ]		; <%struct.Lit*> [#uses=1]
-	%1 = invoke fastcc i32 @_Z8parseIntI12StreamBufferEiRT_(%struct.StreamBuffer* null)
-			to label %.noexc21.i.i unwind label %lpad.i.i		; <i32> [#uses=0]
-
-.noexc21.i.i:		; preds = %_ZN3vecI3LitE4pushERKS0_.exit.i.i.i
-	unreachable
-
-lpad.i.i:		; preds = %_ZN3vecI3LitE4pushERKS0_.exit.i.i.i, %entry
-	%lits.i.i.0.3 = phi %struct.Lit* [ %lits.i.i.0.0, %_ZN3vecI3LitE4pushERKS0_.exit.i.i.i ], [ null, %entry ]		; <%struct.Lit*> [#uses=1]
-	%eh_ptr.i.i = call i8* @llvm.eh.exception()		; <i8*> [#uses=0]
-	free %struct.Lit* %lits.i.i.0.3
-	unreachable
-}
diff --git a/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll b/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
index 2e148ad..d64c966 100644
--- a/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
+++ b/test/CodeGen/X86/2009-02-12-InlineAsm-nieZ-constraints.ll
@@ -1,18 +1,24 @@
-; RUN: llc < %s -march=x86 | grep {\$-81920} | count 3
-; RUN: llc < %s -march=x86 | grep {\$4294885376} | count 1
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 ; ModuleID = 'shant.c'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
 
 define void @f() nounwind {
-entry:
+; CHECK: f:
+; CHECK-NOT: ret
+; CHECK: foo $-81920
+; CHECK-NOT: ret
+; CHECK: foo $-81920
+; CHECK-NOT: ret
+; CHECK: foo $-81920
+; CHECK-NOT: ret
+; CHECK: foo $4294885376
+; CHECK: ret
+
 	call void asm sideeffect "foo $0", "n,~{dirflag},~{fpsr},~{flags}"(i32 -81920) nounwind
 	call void asm sideeffect "foo $0", "i,~{dirflag},~{fpsr},~{flags}"(i32 -81920) nounwind
 	call void asm sideeffect "foo $0", "e,~{dirflag},~{fpsr},~{flags}"(i32 -81920) nounwind
 	call void asm sideeffect "foo $0", "Z,~{dirflag},~{fpsr},~{flags}"(i64 4294885376) nounwind
-	br label %return
-
-return:		; preds = %entry
 	ret void
 }
diff --git a/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll b/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
deleted file mode 100644
index aba4bfc..0000000
--- a/test/CodeGen/X86/2009-02-20-PreAllocSplit-Crash.ll
+++ /dev/null
@@ -1,71 +0,0 @@
-; RUN: llc < %s -march=x86 -mtriple=i386-apple-darwin8 -pre-alloc-split -regalloc=linearscan
-
-define i32 @main() nounwind {
-bb4.i.thread:
-	br label %bb5.i4
-
-bb16:		; preds = %bb111.i
-	%phitmp = add i32 %indvar.reg2mem.4, 1		; <i32> [#uses=2]
-	switch i32 %indvar.reg2mem.4, label %bb100.i [
-		i32 0, label %bb5.i4
-		i32 1, label %bb5.i4
-		i32 2, label %bb5.i4
-		i32 5, label %bb.i14.i
-		i32 6, label %bb.i14.i
-		i32 7, label %bb.i14.i
-	]
-
-bb5.i4:		; preds = %bb16, %bb16, %bb16, %bb4.i.thread
-	br i1 false, label %bb102.i, label %bb103.i
-
-bb.i14.i:		; preds = %bb16, %bb16, %bb16
-	%0 = malloc [600 x i32]		; <[600 x i32]*> [#uses=0]
-	%1 = icmp eq i32 %phitmp, 7		; <i1> [#uses=1]
-	%tl.0.i = select i1 %1, float 1.000000e+02, float 1.000000e+00		; <float> [#uses=1]
-	%2 = icmp eq i32 %phitmp, 8		; <i1> [#uses=1]
-	%tu.0.i = select i1 %2, float 1.000000e+02, float 1.000000e+00		; <float> [#uses=1]
-	br label %bb30.i
-
-bb30.i:		; preds = %bb36.i, %bb.i14.i
-	%i.1173.i = phi i32 [ 0, %bb.i14.i ], [ %indvar.next240.i, %bb36.i ]		; <i32> [#uses=3]
-	%3 = icmp eq i32 0, %i.1173.i		; <i1> [#uses=1]
-	br i1 %3, label %bb33.i, label %bb34.i
-
-bb33.i:		; preds = %bb30.i
-	store float %tl.0.i, float* null, align 4
-	br label %bb36.i
-
-bb34.i:		; preds = %bb30.i
-	%4 = icmp eq i32 0, %i.1173.i		; <i1> [#uses=1]
-	br i1 %4, label %bb35.i, label %bb36.i
-
-bb35.i:		; preds = %bb34.i
-	store float %tu.0.i, float* null, align 4
-	br label %bb36.i
-
-bb36.i:		; preds = %bb35.i, %bb34.i, %bb33.i
-	%indvar.next240.i = add i32 %i.1173.i, 1		; <i32> [#uses=1]
-	br label %bb30.i
-
-bb100.i:		; preds = %bb16
-	ret i32 0
-
-bb102.i:		; preds = %bb5.i4
-	br label %bb103.i
-
-bb103.i:		; preds = %bb102.i, %bb5.i4
-	%indvar.reg2mem.4 = phi i32 [ 0, %bb5.i4 ], [ 0, %bb102.i ]		; <i32> [#uses=2]
-	%n.0.reg2mem.1.i = phi i32 [ 0, %bb102.i ], [ 0, %bb5.i4 ]		; <i32> [#uses=1]
-	%5 = icmp eq i32 0, 0		; <i1> [#uses=1]
-	br i1 %5, label %bb111.i, label %bb108.i
-
-bb108.i:		; preds = %bb103.i
-	ret i32 0
-
-bb111.i:		; preds = %bb103.i
-	%6 = icmp sgt i32 %n.0.reg2mem.1.i, 7		; <i1> [#uses=1]
-	br i1 %6, label %bb16, label %bb112.i
-
-bb112.i:		; preds = %bb111.i
-	unreachable
-}
diff --git a/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll b/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
deleted file mode 100644
index 97bbd93..0000000
--- a/test/CodeGen/X86/2009-04-09-InlineAsmCrash.ll
+++ /dev/null
@@ -1,165 +0,0 @@
-; RUN: llc < %s 
-; rdar://6774324
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin10.0"
-	type <{ i32, %1 }>		; type %0
-	type <{ [216 x i8] }>		; type %1
-	type <{ %3, %4*, %28*, i64, i32, %6, %6, i32, i32, i32, i32, void (i8*, i32)*, i8*, %29*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, [2 x i8*], i32, %30, i32, %24, %4*, %4*, i64, i64, i32, i32, void (i32, %2*)*, i32, i32, i32, i32, i32, i32, i32, i32, %24, i64, i64, i64, i64, i64, %21, i32, i32, %21, i32, %31*, %3, %33, %34, %9*, i32, i32, %3, %3, %35, %41*, %42*, %11, i32, i32, i32, i8, i8, i8, i8, %69*, %69, %9*, %9*, [11 x %61], %3, i8*, i32, i64, i64, i32, i32, i32, i64 }>		; type %2
-	type <{ %3*, %3* }>		; type %3
-	type <{ %3, i32, %2*, %2*, %2*, %5*, i32, i32, %21, i64, i64, i64, i32, %22, %9*, %6, %4*, %23 }>		; type %4
-	type <{ %3, %3, %4*, %4*, i32, %6, %9*, %9*, %5*, %20* }>		; type %5
-	type <{ %7, i16, i8, i8, %8 }>		; type %6
-	type <{ i32 }>		; type %7
-	type <{ i8*, i8*, [2 x i32], i16, i8, i8, i8*, i8, i8, i8, i8, i8* }>		; type %8
-	type <{ %10, %13, %15, i32, i32, i32, i32, %9*, %9*, %16*, i32, %17*, i64, i32 }>		; type %9
-	type <{ i32, i32, %11 }>		; type %10
-	type <{ %12 }>		; type %11
-	type <{ [12 x i8] }>		; type %12
-	type <{ %14 }>		; type %13
-	type <{ [40 x i8] }>		; type %14
-	type <{ [4 x i8] }>		; type %15
-	type <{ %15, %15 }>		; type %16
-	type <{ %17*, %17*, %9*, i32, %18*, %19* }>		; type %17
-	type opaque		; type %18
-	type <{ i32, i32, %9*, %9*, i32, i32 }>		; type %19
-	type <{ %5*, %20*, %20*, %20* }>		; type %20
-	type <{ %3, %3*, void (i8*, i8*)*, i8*, i8*, i64 }>		; type %21
-	type <{ i32, [4 x i32], i32, i32, [128 x %3] }>		; type %22
-	type <{ %24, %24, %24, %24*, %24*, %24*, %25, %26, %27, i32, i32, i8* }>		; type %23
-	type <{ i64, i32, i32, i32 }>		; type %24
-	type <{ i32, i32 }>		; type %25
-	type <{ i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i64, i64, i64, i32, i32 }>		; type %26
-	type <{ [16 x %17*], i32 }>		; type %27
-	type <{ i8, i8, i8, i8, %7, %3 }>		; type %28
-	type <{ i32, %11*, i8*, i8*, %11* }>		; type %29
-	type <{ i32, i32, i32, i32, i64 }>		; type %30
-	type <{ %32*, %3, %3, i32, i32, i32, %5* }>		; type %31
-	type opaque		; type %32
-	type <{ [44 x i8] }>		; type %33
-	type <{ %17* }>		; type %34
-	type <{ %36, %36*, i32, [4 x %40], i32, i32, i64, i32 }>		; type %35
-	type <{ i8*, %0*, %37*, i64, %39, i32, %39, %6, i64, i64, i8*, i32 }>		; type %36
-	type <{ i32, i32, i8, i8, i8, i8, i8, i8, i8, i8, %38 }>		; type %37
-	type <{ i16, i16, i8, i8, i16, i32, i16, i16, i32, i16, i16, i32, i32, [8 x [8 x i16]], [8 x [16 x i16]], [96 x i8] }>		; type %38
-	type <{ i8, i8, i8, i8, i8, i8, i8, i8 }>		; type %39
-	type <{ i64 }>		; type %40
-	type <{ %11, i32, i32, i32, %42*, %3, i8*, %3, %5*, %32*, i32, i32, i32, i32, i32, i32, i32, %59, %60, i64, i64, i32, %11, %9*, %9*, %9*, [11 x %61], %9*, %9*, %9*, %9*, %9*, [3 x %9*], %62*, %3, %3, i32, i32, %9*, %9*, i32, %67*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, %68*, [2 x i32], i64, i64, i32 }>		; type %41
-	type <{ %43, %44, %47*, i64, i64, i64, i32, %11, %54, %46*, %46*, i32, i32, i32, i32, i32, i32, i32 }>		; type %42
-	type <{ i16, i8, i8, i32, i32 }>		; type %43
-	type <{ %45, i32, i32 }>		; type %44
-	type <{ %46*, %46*, i64, i64 }>		; type %45
-	type <{ %45, %15, i64, i8, i8, i8, i8, i16, i16 }>		; type %46
-	type <{ i64*, i64, %48*, i32, i32, i32, %6, %53, i32, i64, i64*, i64*, %48*, %48*, %48*, i32 }>		; type %47
-	type <{ %3, %43, i64, %49*, i32, i32, i32, i32, %48*, %48*, i64, %50*, i64, %52*, i32, i16, i16, i8, i8, i8, i8, %3, %3, i64, i32, i32, i32, i8*, i32, i8, i8, i8, i8, %3 }>		; type %48
-	type <{ %3, %3, %49*, %48*, i64, i8, i8, i8, i8, i32, i8, i8, i8, i8 }>		; type %49
-	type <{ i32, %51* }>		; type %50
-	type <{ void (%50*)*, void (%50*)*, i32 (%50*, %52*, i32)*, i32 (%50*)*, i32 (%50*, i64, i32, i32, i32*)*, i32 (%50*, i64, i32, i64*, i32*, i32, i32, i32)*, i32 (%50*, i64, i32)*, i32 (%50*, i64, i64, i32)*, i32 (%50*, i64, i64, i32)*, i32 (%50*, i32)*, i32 (%50*)*, i8* }>		; type %51
-	type <{ i32, %48* }>		; type %52
-	type <{ i32, i32, i32 }>		; type %53
-	type <{ %11, %55*, i32, %53, i64 }>		; type %54
-	type <{ %3, i32, i32, i32, i32, i32, [64 x i8], %56 }>		; type %55
-	type <{ %57, %58, %58 }>		; type %56
-	type <{ i64, i64, i64, i64, i64 }>		; type %57
-	type <{ i64, i64, i64, i64, i64, i64, i64, i64 }>		; type %58
-	type <{ [2 x i32] }>		; type %59
-	type <{ [8 x i32] }>		; type %60
-	type <{ %9*, i32, i32, i32 }>		; type %61
-	type <{ %11, i32, %11, i32, i32, %63*, i32, %64*, %65, i32, i32, i32, i32, %41* }>		; type %62
-	type <{ %10*, i32, %15, %15 }>		; type %63
-	type opaque		; type %64
-	type <{ i32, %66*, %66*, %66**, %66*, %66** }>		; type %65
-	type <{ %63, i32, %62*, %66*, %66* }>		; type %66
-	type <{ i32, i32, [0 x %39] }>		; type %67
-	type opaque		; type %68
-	type <{ %69*, void (%69*, %2*)* }>		; type %69
-	type <{ %70*, %2*, i8*, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i32, %71, i32, i32, i64, i64, i64, %72, i8*, i8*, %73, %4*, %79*, %81*, %39*, %84, i32, i32, i32, i8*, i32, i32, i32, i32, i32, i32, i32, i64*, i32, i64*, i8*, i32, [256 x i32], i64, i64, %86, %77*, i64, i64, %88*, %2*, %2* }>		; type %70
-	type <{ %3, i64, i32, i32 }>		; type %71
-	type <{ i64, i64, i64 }>		; type %72
-	type <{ %73*, %73*, %73*, %73*, %74*, %75*, %76*, %70*, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, [3 x %78*], i8*, i8* }>		; type %73
-	type <{ %74*, %74*, %75*, %76*, %73*, i32, i32, i32, i32, i32, i8*, i8* }>		; type %74
-	type <{ %75*, %73*, %74*, %76*, i32, i32, i32, i32, %78*, i8*, i8* }>		; type %75
-	type <{ %76*, %73*, %74*, %75*, i32, i32, i32, i32, i8*, i8*, %77* }>		; type %76
-	type opaque		; type %77
-	type <{ %78*, %75*, i8, i8, i8, i8, i16, i16, i16, i8, i8, i32, [0 x %73*] }>		; type %78
-	type <{ i32, i32, i32, [20 x %80] }>		; type %79
-	type <{ i64*, i8* }>		; type %80
-	type <{ [256 x %39], [19 x %39], i8, i8, i8, i8, i8, i8, i8, i8, %82, i8, i8, i8, i8, i8, i8, i8, i8, %82, %83 }>		; type %81
-	type <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i16, i16 }>		; type %82
-	type <{ [16 x i64], i64 }>		; type %83
-	type <{ %82*, %85, %85, %39*, i32 }>		; type %84
-	type <{ i16, %39* }>		; type %85
-	type <{ %87, i8* }>		; type %86
-	type <{ i32, i32, i32, i8, i8, i16, i32, i32, i32, i32, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }>		; type %87
-	type <{ i64, i64, i32, i32, i32, i32 }>		; type %88
-	type <{ i32, i32, i32, i32, i32, i32, i32 }>		; type %89
-@kernel_stack_size = external global i32		; <i32*> [#uses=1]
-
-define void @test(%0*) nounwind {
-	%2 = tail call %2* asm sideeffect "mov %gs:${1:P},$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 ptrtoint (%2** getelementptr (%70* null, i32 0, i32 1) to i32)) nounwind		; <%2*> [#uses=1]
-	%3 = getelementptr %2* %2, i32 0, i32 15		; <i32*> [#uses=1]
-	%4 = load i32* %3		; <i32> [#uses=2]
-	%5 = icmp eq i32 %4, 0		; <i1> [#uses=1]
-	br i1 %5, label %47, label %6
-
-; <label>:6		; preds = %1
-	%7 = load i32* @kernel_stack_size		; <i32> [#uses=1]
-	%8 = add i32 %7, %4		; <i32> [#uses=1]
-	%9 = inttoptr i32 %8 to %89*		; <%89*> [#uses=12]
-	%10 = tail call %2* asm sideeffect "mov %gs:${1:P},$0", "=r,i,~{dirflag},~{fpsr},~{flags}"(i32 ptrtoint (%2** getelementptr (%70* null, i32 0, i32 1) to i32)) nounwind		; <%2*> [#uses=1]
-	%11 = getelementptr %2* %10, i32 0, i32 65, i32 1		; <%36**> [#uses=1]
-	%12 = load %36** %11		; <%36*> [#uses=1]
-	%13 = getelementptr %36* %12, i32 0, i32 1		; <%0**> [#uses=1]
-	%14 = load %0** %13		; <%0*> [#uses=1]
-	%15 = icmp eq %0* %14, %0		; <i1> [#uses=1]
-	br i1 %15, label %40, label %16
-
-; <label>:16		; preds = %6
-	%17 = getelementptr %0* %0, i32 0, i32 1		; <%1*> [#uses=1]
-	%18 = getelementptr %89* %9, i32 -1, i32 0		; <i32*> [#uses=1]
-	%19 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 32		; <i8*> [#uses=1]
-	%20 = bitcast i8* %19 to i32*		; <i32*> [#uses=1]
-	%21 = load i32* %20		; <i32> [#uses=1]
-	store i32 %21, i32* %18
-	%22 = getelementptr %89* %9, i32 -1, i32 1		; <i32*> [#uses=1]
-	%23 = ptrtoint %1* %17 to i32		; <i32> [#uses=1]
-	store i32 %23, i32* %22
-	%24 = getelementptr %89* %9, i32 -1, i32 2		; <i32*> [#uses=1]
-	%25 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 24		; <i8*> [#uses=1]
-	%26 = bitcast i8* %25 to i32*		; <i32*> [#uses=1]
-	%27 = load i32* %26		; <i32> [#uses=1]
-	store i32 %27, i32* %24
-	%28 = getelementptr %89* %9, i32 -1, i32 3		; <i32*> [#uses=1]
-	%29 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 16		; <i8*> [#uses=1]
-	%30 = bitcast i8* %29 to i32*		; <i32*> [#uses=1]
-	%31 = load i32* %30		; <i32> [#uses=1]
-	store i32 %31, i32* %28
-	%32 = getelementptr %89* %9, i32 -1, i32 4		; <i32*> [#uses=1]
-	%33 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 20		; <i8*> [#uses=1]
-	%34 = bitcast i8* %33 to i32*		; <i32*> [#uses=1]
-	%35 = load i32* %34		; <i32> [#uses=1]
-	store i32 %35, i32* %32
-	%36 = getelementptr %89* %9, i32 -1, i32 5		; <i32*> [#uses=1]
-	%37 = getelementptr %0* %0, i32 0, i32 1, i32 0, i32 56		; <i8*> [#uses=1]
-	%38 = bitcast i8* %37 to i32*		; <i32*> [#uses=1]
-	%39 = load i32* %38		; <i32> [#uses=1]
-	store i32 %39, i32* %36
-	ret void
-
-; <label>:40		; preds = %6
-	%41 = getelementptr %89* %9, i32 -1, i32 0		; <i32*> [#uses=1]
-	tail call void asm sideeffect "movl %ebx, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %41) nounwind
-	%42 = getelementptr %89* %9, i32 -1, i32 1		; <i32*> [#uses=1]
-	tail call void asm sideeffect "movl %esp, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %42) nounwind
-	%43 = getelementptr %89* %9, i32 -1, i32 2		; <i32*> [#uses=1]
-	tail call void asm sideeffect "movl %ebp, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %43) nounwind
-	%44 = getelementptr %89* %9, i32 -1, i32 3		; <i32*> [#uses=1]
-	tail call void asm sideeffect "movl %edi, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %44) nounwind
-	%45 = getelementptr %89* %9, i32 -1, i32 4		; <i32*> [#uses=1]
-	tail call void asm sideeffect "movl %esi, $0", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %45) nounwind
-	%46 = getelementptr %89* %9, i32 -1, i32 5		; <i32*> [#uses=1]
-	tail call void asm sideeffect "movl $$1f, $0\0A1:", "=*m,~{dirflag},~{fpsr},~{flags}"(i32* %46) nounwind
-	ret void
-
-; <label>:47		; preds = %1
-	ret void
-}
diff --git a/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll b/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
index 27f11cf..b1222d1 100644
--- a/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
+++ b/test/CodeGen/X86/2009-04-12-FastIselOverflowCrash.ll
@@ -2,7 +2,7 @@
 ; radr://6772169
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10"
-	type { i32, i1 }		; type %0
+	%0 = type { i32, i1 }		; type %0
 
 declare %0 @llvm.sadd.with.overflow.i32(i32, i32) nounwind
 
diff --git a/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll b/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
index ff8cf0a..3d70b586 100644
--- a/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
+++ b/test/CodeGen/X86/2009-04-13-2AddrAssert-2.ll
@@ -2,7 +2,7 @@
 ; rdar://6781755
 ; PR3934
 
-	type { i32, i32 }		; type %0
+	%0 = type { i32, i32 }		; type %0
 
 define void @bn_sqr_comba8(i32* nocapture %r, i32* %a) nounwind {
 entry:
diff --git a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll b/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
deleted file mode 100644
index f739216..0000000
--- a/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
+++ /dev/null
@@ -1,121 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep asm-printer | grep 77
-; rdar://6802189
-
-; Test if linearscan is unfavoring registers for allocation to allow more reuse
-; of reloads from stack slots.
-
-	%struct.SHA_CTX = type { i32, i32, i32, i32, i32, i32, i32, [16 x i32], i32 }
-
-define fastcc void @sha1_block_data_order(%struct.SHA_CTX* nocapture %c, i8* %p, i64 %num) nounwind {
-entry:
-	br label %bb
-
-bb:		; preds = %bb, %entry
-	%asmtmp511 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=3]
-	%asmtmp513 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 0) nounwind		; <i32> [#uses=2]
-	%asmtmp516 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 0) nounwind		; <i32> [#uses=1]
-	%asmtmp517 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=2]
-	%0 = xor i32 0, %asmtmp513		; <i32> [#uses=0]
-	%1 = add i32 0, %asmtmp517		; <i32> [#uses=1]
-	%2 = add i32 %1, 0		; <i32> [#uses=1]
-	%3 = add i32 %2, 0		; <i32> [#uses=1]
-	%asmtmp519 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 0) nounwind		; <i32> [#uses=1]
-	%4 = xor i32 0, %asmtmp511		; <i32> [#uses=1]
-	%asmtmp520 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %4) nounwind		; <i32> [#uses=2]
-	%5 = xor i32 0, %asmtmp516		; <i32> [#uses=1]
-	%6 = xor i32 %5, %asmtmp519		; <i32> [#uses=1]
-	%7 = add i32 %asmtmp513, -899497514		; <i32> [#uses=1]
-	%8 = add i32 %7, %asmtmp520		; <i32> [#uses=1]
-	%9 = add i32 %8, %6		; <i32> [#uses=1]
-	%10 = add i32 %9, 0		; <i32> [#uses=1]
-	%asmtmp523 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=1]
-	%asmtmp525 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %3) nounwind		; <i32> [#uses=2]
-	%11 = xor i32 0, %asmtmp525		; <i32> [#uses=1]
-	%12 = add i32 0, %11		; <i32> [#uses=1]
-	%13 = add i32 %12, 0		; <i32> [#uses=2]
-	%asmtmp528 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %10) nounwind		; <i32> [#uses=1]
-	%14 = xor i32 0, %asmtmp520		; <i32> [#uses=1]
-	%asmtmp529 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %14) nounwind		; <i32> [#uses=1]
-	%asmtmp530 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %13) nounwind		; <i32> [#uses=1]
-	%15 = add i32 0, %asmtmp530		; <i32> [#uses=1]
-	%16 = xor i32 0, %asmtmp523		; <i32> [#uses=1]
-	%asmtmp532 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %16) nounwind		; <i32> [#uses=2]
-	%asmtmp533 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %15) nounwind		; <i32> [#uses=1]
-	%17 = xor i32 %13, %asmtmp528		; <i32> [#uses=1]
-	%18 = xor i32 %17, 0		; <i32> [#uses=1]
-	%19 = add i32 %asmtmp525, -899497514		; <i32> [#uses=1]
-	%20 = add i32 %19, %asmtmp532		; <i32> [#uses=1]
-	%21 = add i32 %20, %18		; <i32> [#uses=1]
-	%22 = add i32 %21, %asmtmp533		; <i32> [#uses=1]
-	%23 = xor i32 0, %asmtmp511		; <i32> [#uses=1]
-	%24 = xor i32 %23, 0		; <i32> [#uses=1]
-	%asmtmp535 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %24) nounwind		; <i32> [#uses=3]
-	%25 = add i32 0, %asmtmp535		; <i32> [#uses=1]
-	%26 = add i32 %25, 0		; <i32> [#uses=1]
-	%27 = add i32 %26, 0		; <i32> [#uses=1]
-	%28 = xor i32 0, %asmtmp529		; <i32> [#uses=0]
-	%29 = xor i32 %22, 0		; <i32> [#uses=1]
-	%30 = xor i32 %29, 0		; <i32> [#uses=1]
-	%31 = add i32 0, %30		; <i32> [#uses=1]
-	%32 = add i32 %31, 0		; <i32> [#uses=3]
-	%asmtmp541 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=2]
-	%asmtmp542 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %32) nounwind		; <i32> [#uses=1]
-	%33 = add i32 0, %asmtmp541		; <i32> [#uses=1]
-	%34 = add i32 %33, 0		; <i32> [#uses=1]
-	%35 = add i32 %34, %asmtmp542		; <i32> [#uses=1]
-	%asmtmp543 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %27) nounwind		; <i32> [#uses=2]
-	%36 = xor i32 0, %asmtmp535		; <i32> [#uses=0]
-	%37 = xor i32 %32, 0		; <i32> [#uses=1]
-	%38 = xor i32 %37, %asmtmp543		; <i32> [#uses=1]
-	%39 = add i32 0, %38		; <i32> [#uses=1]
-	%40 = add i32 %39, 0		; <i32> [#uses=2]
-	%asmtmp546 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %32) nounwind		; <i32> [#uses=1]
-	%asmtmp547 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=2]
-	%41 = add i32 0, -899497514		; <i32> [#uses=1]
-	%42 = add i32 %41, %asmtmp547		; <i32> [#uses=1]
-	%43 = add i32 %42, 0		; <i32> [#uses=1]
-	%44 = add i32 %43, 0		; <i32> [#uses=3]
-	%asmtmp549 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %35) nounwind		; <i32> [#uses=2]
-	%45 = xor i32 0, %asmtmp541		; <i32> [#uses=1]
-	%asmtmp550 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %45) nounwind		; <i32> [#uses=2]
-	%asmtmp551 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %44) nounwind		; <i32> [#uses=1]
-	%46 = xor i32 %40, %asmtmp546		; <i32> [#uses=1]
-	%47 = xor i32 %46, %asmtmp549		; <i32> [#uses=1]
-	%48 = add i32 %asmtmp543, -899497514		; <i32> [#uses=1]
-	%49 = add i32 %48, %asmtmp550		; <i32> [#uses=1]
-	%50 = add i32 %49, %47		; <i32> [#uses=1]
-	%51 = add i32 %50, %asmtmp551		; <i32> [#uses=1]
-	%asmtmp552 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %40) nounwind		; <i32> [#uses=2]
-	%52 = xor i32 %44, %asmtmp549		; <i32> [#uses=1]
-	%53 = xor i32 %52, %asmtmp552		; <i32> [#uses=1]
-	%54 = add i32 0, %53		; <i32> [#uses=1]
-	%55 = add i32 %54, 0		; <i32> [#uses=2]
-	%asmtmp555 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %44) nounwind		; <i32> [#uses=2]
-	%56 = xor i32 0, %asmtmp532		; <i32> [#uses=1]
-	%57 = xor i32 %56, %asmtmp547		; <i32> [#uses=1]
-	%asmtmp556 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %57) nounwind		; <i32> [#uses=1]
-	%58 = add i32 0, %asmtmp556		; <i32> [#uses=1]
-	%59 = add i32 %58, 0		; <i32> [#uses=1]
-	%60 = add i32 %59, 0		; <i32> [#uses=1]
-	%asmtmp558 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %51) nounwind		; <i32> [#uses=1]
-	%61 = xor i32 %asmtmp517, %asmtmp511		; <i32> [#uses=1]
-	%62 = xor i32 %61, %asmtmp535		; <i32> [#uses=1]
-	%63 = xor i32 %62, %asmtmp550		; <i32> [#uses=1]
-	%asmtmp559 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %63) nounwind		; <i32> [#uses=1]
-	%64 = xor i32 %55, %asmtmp555		; <i32> [#uses=1]
-	%65 = xor i32 %64, %asmtmp558		; <i32> [#uses=1]
-	%asmtmp561 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %55) nounwind		; <i32> [#uses=1]
-	%66 = add i32 %asmtmp552, -899497514		; <i32> [#uses=1]
-	%67 = add i32 %66, %65		; <i32> [#uses=1]
-	%68 = add i32 %67, %asmtmp559		; <i32> [#uses=1]
-	%69 = add i32 %68, 0		; <i32> [#uses=1]
-	%70 = add i32 %69, 0		; <i32> [#uses=1]
-	store i32 %70, i32* null, align 4
-	%71 = add i32 0, %60		; <i32> [#uses=1]
-	store i32 %71, i32* null, align 4
-	%72 = add i32 0, %asmtmp561		; <i32> [#uses=1]
-	store i32 %72, i32* null, align 4
-	%73 = add i32 0, %asmtmp555		; <i32> [#uses=1]
-	store i32 %73, i32* null, align 4
-	br label %bb
-}
diff --git a/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll b/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
deleted file mode 100644
index 0a2fcdb..0000000
--- a/test/CodeGen/X86/2009-04-27-LiveIntervalsBug.ll
+++ /dev/null
@@ -1,165 +0,0 @@
-; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | grep cmpxchgl | not grep eax
-; PR4076
-
-	type { i8, i8, i8 }		; type %0
-	type { i32, i8** }		; type %1
-	type { %3* }		; type %2
-	type { %4 }		; type %3
-	type { %5 }		; type %4
-	type { %6, i32, %7 }		; type %5
-	type { i8* }		; type %6
-	type { i32, [12 x i8] }		; type %7
-	type { %9 }		; type %8
-	type { %10, %11*, i8 }		; type %9
-	type { %11* }		; type %10
-	type { i32, %6, i8*, %12, %13*, i8, i32, %28, %29, i32, %30, i32, i32, i32, i8*, i8*, i8, i8 }		; type %11
-	type { %13* }		; type %12
-	type { %14, i32, %13*, %21 }		; type %13
-	type { %15, %16 }		; type %14
-	type { i32 (...)** }		; type %15
-	type { %17, i8* (i32)*, void (i8*)*, i8 }		; type %16
-	type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %18 }		; type %17
-	type { %19* }		; type %18
-	type { i32, %20**, i32, %20**, i8** }		; type %19
-	type { i32 (...)**, i32 }		; type %20
-	type { %22, %25*, i8, i8, %17*, %26*, %27*, %27* }		; type %21
-	type { i32 (...)**, i32, i32, i32, i32, i32, %23*, %24, [8 x %24], i32, %24*, %18 }		; type %22
-	type { %23*, void (i32, %22*, i32)*, i32, i32 }		; type %23
-	type { i8*, i32 }		; type %24
-	type { i32 (...)**, %21 }		; type %25
-	type { %20, i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }		; type %26
-	type { %20 }		; type %27
-	type { void (%9*)*, i32 }		; type %28
-	type { %15* }		; type %29
-	type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8* }		; type %30
-@AtomicOps_Internalx86CPUFeatures = external global %0		; <%0*> [#uses=1]
-internal constant [19 x i8] c"xxxxxxxxxxxxxxxxxx\00"		; <[19 x i8]*>:0 [#uses=1]
-internal constant [47 x i8] c"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\00"		; <[47 x i8]*>:1 [#uses=1]
-
-define i8** @func6(i8 zeroext, i32, i32, %1*) nounwind {
-; <label>:4
-	%5 = alloca i32, align 4		; <i32*> [#uses=2]
-	%6 = alloca i32, align 4		; <i32*> [#uses=2]
-	%7 = alloca %2, align 8		; <%2*> [#uses=3]
-	%8 = alloca %8, align 8		; <%8*> [#uses=2]
-	br label %17
-
-; <label>:9		; preds = %17
-	%10 = getelementptr %1* %3, i32 %19, i32 0		; <i32*> [#uses=1]
-	%11 = load i32* %10, align 4		; <i32> [#uses=1]
-	%12 = icmp eq i32 %11, %2		; <i1> [#uses=1]
-	br i1 %12, label %13, label %16
-
-; <label>:13		; preds = %9
-	%14 = getelementptr %1* %3, i32 %19, i32 1		; <i8***> [#uses=1]
-	%15 = load i8*** %14, align 4		; <i8**> [#uses=1]
-	ret i8** %15
-
-; <label>:16		; preds = %9
-	%indvar.next13 = add i32 %18, 1		; <i32> [#uses=1]
-	br label %17
-
-; <label>:17		; preds = %16, %4
-	%18 = phi i32 [ 0, %4 ], [ %indvar.next13, %16 ]		; <i32> [#uses=2]
-	%19 = add i32 %18, %1		; <i32> [#uses=3]
-	%20 = icmp sgt i32 %19, 3		; <i1> [#uses=1]
-	br i1 %20, label %21, label %9
-
-; <label>:21		; preds = %17
-	call void @func5()
-	%22 = getelementptr %1* %3, i32 0, i32 0		; <i32*> [#uses=1]
-	%23 = load i32* %22, align 4		; <i32> [#uses=1]
-	%24 = icmp eq i32 %23, 0		; <i1> [#uses=1]
-	br i1 %24, label %._crit_edge, label %._crit_edge1
-
-._crit_edge1:		; preds = %._crit_edge1, %21
-	%25 = phi i32 [ 0, %21 ], [ %26, %._crit_edge1 ]		; <i32> [#uses=1]
-	%26 = add i32 %25, 1		; <i32> [#uses=4]
-	%27 = getelementptr %1* %3, i32 %26, i32 0		; <i32*> [#uses=1]
-	%28 = load i32* %27, align 4		; <i32> [#uses=1]
-	%29 = icmp ne i32 %28, 0		; <i1> [#uses=1]
-	%30 = icmp ne i32 %26, 4		; <i1> [#uses=1]
-	%31 = and i1 %29, %30		; <i1> [#uses=1]
-	br i1 %31, label %._crit_edge1, label %._crit_edge
-
-._crit_edge:		; preds = %._crit_edge1, %21
-	%32 = phi i32 [ 0, %21 ], [ %26, %._crit_edge1 ]		; <i32> [#uses=3]
-	%33 = call i8* @pthread_getspecific(i32 0) nounwind		; <i8*> [#uses=2]
-	%34 = icmp ne i8* %33, null		; <i1> [#uses=1]
-	%35 = icmp eq i8 %0, 0		; <i1> [#uses=1]
-	%36 = or i1 %34, %35		; <i1> [#uses=1]
-	br i1 %36, label %._crit_edge4, label %37
-
-; <label>:37		; preds = %._crit_edge
-	%38 = call i8* @func2(i32 2048)		; <i8*> [#uses=4]
-	call void @llvm.memset.i32(i8* %38, i8 0, i32 2048, i32 4)
-	%39 = call i32 @pthread_setspecific(i32 0, i8* %38) nounwind		; <i32> [#uses=2]
-	store i32 %39, i32* %5
-	store i32 0, i32* %6
-	%40 = icmp eq i32 %39, 0		; <i1> [#uses=1]
-	br i1 %40, label %41, label %43
-
-; <label>:41		; preds = %37
-	%42 = getelementptr %2* %7, i32 0, i32 0		; <%3**> [#uses=1]
-	store %3* null, %3** %42, align 8
-	br label %._crit_edge4
-
-; <label>:43		; preds = %37
-	%44 = call %3* @func1(i32* %5, i32* %6, i8* getelementptr ([47 x i8]* @1, i32 0, i32 0))		; <%3*> [#uses=2]
-	%45 = getelementptr %2* %7, i32 0, i32 0		; <%3**> [#uses=1]
-	store %3* %44, %3** %45, align 8
-	%46 = icmp eq %3* %44, null		; <i1> [#uses=1]
-	br i1 %46, label %._crit_edge4, label %47
-
-; <label>:47		; preds = %43
-	call void @func4(%8* %8, i8* getelementptr ([19 x i8]* @0, i32 0, i32 0), i32 165, %2* %7)
-	call void @func3(%8* %8) noreturn
-	unreachable
-
-._crit_edge4:		; preds = %43, %41, %._crit_edge
-	%48 = phi i8* [ %38, %41 ], [ %33, %._crit_edge ], [ %38, %43 ]		; <i8*> [#uses=2]
-	%49 = bitcast i8* %48 to i8**		; <i8**> [#uses=3]
-	%50 = icmp ne i8* %48, null		; <i1> [#uses=1]
-	%51 = icmp slt i32 %32, 4		; <i1> [#uses=1]
-	%52 = and i1 %50, %51		; <i1> [#uses=1]
-	br i1 %52, label %53, label %._crit_edge6
-
-; <label>:53		; preds = %._crit_edge4
-	%54 = getelementptr %1* %3, i32 %32, i32 0		; <i32*> [#uses=1]
-	%55 = call i32 asm sideeffect "lock; cmpxchgl $1,$2", "={ax},q,*m,0,~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %2, i32* %54, i32 0) nounwind		; <i32> [#uses=1]
-	%56 = load i8* getelementptr (%0* @AtomicOps_Internalx86CPUFeatures, i32 0, i32 0), align 8		; <i8> [#uses=1]
-	%57 = icmp eq i8 %56, 0		; <i1> [#uses=1]
-	br i1 %57, label %._crit_edge7, label %58
-
-; <label>:58		; preds = %53
-	call void asm sideeffect "lfence", "~{dirflag},~{fpsr},~{flags},~{memory}"() nounwind
-	br label %._crit_edge7
-
-._crit_edge7:		; preds = %58, %53
-	%59 = icmp eq i32 %55, 0		; <i1> [#uses=1]
-	br i1 %59, label %60, label %._crit_edge6
-
-._crit_edge6:		; preds = %._crit_edge7, %._crit_edge4
-	ret i8** %49
-
-; <label>:60		; preds = %._crit_edge7
-	%61 = getelementptr %1* %3, i32 %32, i32 1		; <i8***> [#uses=1]
-	store i8** %49, i8*** %61, align 4
-	ret i8** %49
-}
-
-declare %3* @func1(i32* nocapture, i32* nocapture, i8*)
-
-declare void @func5()
-
-declare void @func4(%8*, i8*, i32, %2*)
-
-declare void @func3(%8*) noreturn
-
-declare i8* @pthread_getspecific(i32) nounwind
-
-declare i8* @func2(i32)
-
-declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
-
-declare i32 @pthread_setspecific(i32, i8*) nounwind
diff --git a/test/CodeGen/X86/2009-04-29-LinearScanBug.ll b/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
index 6843723..2fbf7aa 100644
--- a/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
+++ b/test/CodeGen/X86/2009-04-29-LinearScanBug.ll
@@ -1,33 +1,33 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin10
 ; rdar://6837009
 
-	type { %struct.pf_state*, %struct.pf_state*, %struct.pf_state*, i32 }		; type %0
-	type { %2 }		; type %1
-	type { %struct.pf_addr, %struct.pf_addr }		; type %2
-	type { %struct.in6_addr }		; type %3
-	type { [4 x i32] }		; type %4
-	type { %struct.pfi_dynaddr*, [4 x i8] }		; type %5
-	type { %struct.pfi_dynaddr*, %struct.pfi_dynaddr** }		; type %6
-	type { %struct.pfr_ktable*, %struct.pfr_ktable*, %struct.pfr_ktable*, i32 }		; type %7
-	type { %struct.pfr_ktable* }		; type %8
-	type { i8* }		; type %9
-	type { %11 }		; type %10
-	type { i8*, i8*, %struct.radix_node* }		; type %11
-	type { [2 x %struct.pf_rulequeue], %13, %13 }		; type %12
-	type { %struct.pf_rulequeue*, %struct.pf_rule**, i32, i32, i32 }		; type %13
-	type { %struct.pf_anchor*, %struct.pf_anchor*, %struct.pf_anchor*, i32 }		; type %14
-	type { %struct.pfi_kif*, %struct.pfi_kif*, %struct.pfi_kif*, i32 }		; type %15
-	type { %struct.ifnet*, %struct.ifnet** }		; type %16
-	type { %18 }		; type %17
-	type { %struct.pkthdr, %19 }		; type %18
-	type { %struct.m_ext, [176 x i8] }		; type %19
-	type { %struct.ifmultiaddr*, %struct.ifmultiaddr** }		; type %20
-	type { i32, %22 }		; type %21
-	type { i8*, [4 x i8] }		; type %22
-	type { %struct.tcphdr* }		; type %23
-	type { %struct.pf_ike_state }		; type %24
-	type { %struct.pf_state_key*, %struct.pf_state_key*, %struct.pf_state_key*, i32 }		; type %25
-	type { %struct.pf_src_node*, %struct.pf_src_node*, %struct.pf_src_node*, i32 }		; type %26
+	%0 = type { %struct.pf_state*, %struct.pf_state*, %struct.pf_state*, i32 }
+	%1 = type { %2 }
+	%2 = type { %struct.pf_addr, %struct.pf_addr }
+	%3 = type { %struct.in6_addr }
+	%4 = type { [4 x i32] }
+	%5 = type { %struct.pfi_dynaddr*, [4 x i8] }
+	%6 = type { %struct.pfi_dynaddr*, %struct.pfi_dynaddr** }
+	%7 = type { %struct.pfr_ktable*, %struct.pfr_ktable*, %struct.pfr_ktable*, i32 }
+	%8 = type { %struct.pfr_ktable* }
+	%9 = type { i8* }
+	%10 = type { %11 }
+	%11 = type { i8*, i8*, %struct.radix_node* }
+	%12 = type { [2 x %struct.pf_rulequeue], %13, %13 }
+	%13 = type { %struct.pf_rulequeue*, %struct.pf_rule**, i32, i32, i32 }
+	%14 = type { %struct.pf_anchor*, %struct.pf_anchor*, %struct.pf_anchor*, i32 }
+	%15 = type { %struct.pfi_kif*, %struct.pfi_kif*, %struct.pfi_kif*, i32 }
+	%16 = type { %struct.ifnet*, %struct.ifnet** }
+	%17 = type { %18 }
+	%18 = type { %struct.pkthdr, %19 }
+	%19 = type { %struct.m_ext, [176 x i8] }
+	%20 = type { %struct.ifmultiaddr*, %struct.ifmultiaddr** }
+	%21 = type { i32, %22 }
+	%22 = type { i8*, [4 x i8] }
+	%23 = type { %struct.tcphdr* }
+	%24 = type { %struct.pf_ike_state }
+	%25 = type { %struct.pf_state_key*, %struct.pf_state_key*, %struct.pf_state_key*, i32 }
+	%26 = type { %struct.pf_src_node*, %struct.pf_src_node*, %struct.pf_src_node*, i32 }
 	%struct.anon = type { %struct.pf_state*, %struct.pf_state** }
 	%struct.au_mask_t = type { i32, i32 }
 	%struct.bpf_if = type opaque
diff --git a/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll b/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
index d1f9cf8..e803d6b 100644
--- a/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
+++ b/test/CodeGen/X86/2009-04-29-RegAllocAssert.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -disable-fp-elim -relocation-model=pic
 ; PR4099
 
-	type { [62 x %struct.Bitvec*] }		; type %0
-	type { i8* }		; type %1
-	type { double }		; type %2
+	%0 = type { [62 x %struct.Bitvec*] }		; type %0
+	%1 = type { i8* }		; type %1
+	%2 = type { double }		; type %2
 	%struct..5sPragmaType = type { i8*, i32 }
 	%struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
 	%struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
diff --git a/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
index 9415732..3dcc0d4 100644
--- a/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
+++ b/test/CodeGen/X86/2009-06-04-VirtualLiveIn.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=x86
 
-	type { %struct.GAP }		; type %0
-	type { i16, i8, i8 }		; type %1
-	type { [2 x i32], [2 x i32] }		; type %2
-	type { %struct.rec* }		; type %3
+	%0 = type { %struct.GAP }		; type %0
+	%1 = type { i16, i8, i8 }		; type %1
+	%2 = type { [2 x i32], [2 x i32] }		; type %2
+	%3 = type { %struct.rec* }		; type %3
 	%struct.FILE_POS = type { i8, i8, i16, i32 }
 	%struct.FIRST_UNION = type { %struct.FILE_POS }
 	%struct.FOURTH_UNION = type { %struct.STYLE }
diff --git a/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll b/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
index b329c91..2080c0a 100644
--- a/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
+++ b/test/CodeGen/X86/2009-08-06-branchfolder-crash.ll
@@ -87,8 +87,6 @@ for.inc:		; preds = %for.inc, %lor.end.i, %lor.rhs.i, %land.lhs.true3.i
 	br label %for.inc
 }
 
-declare i32 @safe()
-
 define i32 @func_35(i8 signext %p_35) nounwind readonly {
 entry:
   %tobool = icmp eq i8 %p_35, 0                   ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll b/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
index 6b0d6d9..bf668e3 100644
--- a/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
+++ b/test/CodeGen/X86/2009-08-14-Win64MemoryIndirectArg.ll
@@ -2,8 +2,8 @@
 target triple = "x86_64-mingw"
 
 ; ModuleID = 'mm.bc'
-	type opaque		; type %0
-	type opaque		; type %1
+	%0 = type opaque		; type %0
+	%1 = type opaque		; type %1
 
 define internal fastcc float @computeMipmappingRho(%0* %shaderExecutionStatePtr, i32 %index, <4 x float> %texCoord, <4 x float> %texCoordDX, <4 x float> %texCoordDY) readonly {
 indexCheckBlock:
diff --git a/test/CodeGen/X86/2009-09-19-earlyclobber.ll b/test/CodeGen/X86/2009-09-19-earlyclobber.ll
index 4f44cae..66f5118 100644
--- a/test/CodeGen/X86/2009-09-19-earlyclobber.ll
+++ b/test/CodeGen/X86/2009-09-19-earlyclobber.ll
@@ -4,7 +4,7 @@
 ; Registers other than RAX, RCX are OK, but they must be different.
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin10.0"
-	type { i64, i64 }		; type %0
+	%0 = type { i64, i64 }		; type %0
 
 define i64 @flsst(i64 %find) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll b/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll
deleted file mode 100644
index 91c5440..0000000
--- a/test/CodeGen/X86/2009-10-08-MachineLICMBug.ll
+++ /dev/null
@@ -1,264 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -relocation-model=pic -stats |& grep {machine-licm} | grep 2
-; rdar://7274692
-
-%0 = type { [125 x i32] }
-%1 = type { i32 }
-%struct..5sPragmaType = type { i8*, i32 }
-%struct.AggInfo = type { i8, i8, i32, %struct.ExprList*, i32, %struct.AggInfo_col*, i32, i32, i32, %struct.AggInfo_func*, i32, i32 }
-%struct.AggInfo_col = type { %struct.Table*, i32, i32, i32, i32, %struct.Expr* }
-%struct.AggInfo_func = type { %struct.Expr*, %struct.FuncDef*, i32, i32 }
-%struct.AuxData = type { i8*, void (i8*)* }
-%struct.Bitvec = type { i32, i32, i32, %0 }
-%struct.BtCursor = type { %struct.Btree*, %struct.BtShared*, %struct.BtCursor*, %struct.BtCursor*, i32 (i8*, i32, i8*, i32, i8*)*, i8*, i32, %struct.MemPage*, i32, %struct.CellInfo, i8, i8, i8*, i64, i32, i8, i32* }
-%struct.BtLock = type { %struct.Btree*, i32, i8, %struct.BtLock* }
-%struct.BtShared = type { %struct.Pager*, %struct.sqlite3*, %struct.BtCursor*, %struct.MemPage*, i8, i8, i8, i8, i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, i8, i32, i8*, void (i8*)*, %struct.sqlite3_mutex*, %struct.BusyHandler, i32, %struct.BtShared*, %struct.BtLock*, %struct.Btree* }
-%struct.Btree = type { %struct.sqlite3*, %struct.BtShared*, i8, i8, i8, i32, %struct.Btree*, %struct.Btree* }
-%struct.BtreeMutexArray = type { i32, [11 x %struct.Btree*] }
-%struct.BusyHandler = type { i32 (i8*, i32)*, i8*, i32 }
-%struct.CellInfo = type { i8*, i64, i32, i32, i16, i16, i16, i16 }
-%struct.CollSeq = type { i8*, i8, i8, i8*, i32 (i8*, i32, i8*, i32, i8*)*, void (i8*)* }
-%struct.Column = type { i8*, %struct.Expr*, i8*, i8*, i8, i8, i8, i8 }
-%struct.Context = type { i64, i32, %struct.Fifo }
-%struct.CountCtx = type { i64 }
-%struct.Cursor = type { %struct.BtCursor*, i32, i64, i64, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i64, %struct.Btree*, i32, i8*, i64, i8*, %struct.KeyInfo*, i32, i64, %struct.sqlite3_vtab_cursor*, %struct.sqlite3_module*, i32, i32, i32*, i32*, i8* }
-%struct.Db = type { i8*, %struct.Btree*, i8, i8, i8*, void (i8*)*, %struct.Schema* }
-%struct.DbPage = type { %struct.Pager*, i32, %struct.DbPage*, %struct.DbPage*, %struct.PagerLruLink, %struct.DbPage*, i8, i8, i8, i8, i8, i16, %struct.DbPage*, %struct.DbPage*, i8* }
-%struct.Expr = type { i8, i8, i16, %struct.CollSeq*, %struct.Expr*, %struct.Expr*, %struct.ExprList*, %struct..5sPragmaType, %struct..5sPragmaType, i32, i32, %struct.AggInfo*, i32, i32, %struct.Select*, %struct.Table*, i32 }
-%struct.ExprList = type { i32, i32, i32, %struct.ExprList_item* }
-%struct.ExprList_item = type { %struct.Expr*, i8*, i8, i8, i8 }
-%struct.FILE = type { i8*, i32, i32, i16, i16, %struct..5sPragmaType, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct..5sPragmaType, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct..5sPragmaType, i32, i64 }
-%struct.FKey = type { %struct.Table*, %struct.FKey*, i8*, %struct.FKey*, i32, %struct.sColMap*, i8, i8, i8, i8 }
-%struct.Fifo = type { i32, %struct.FifoPage*, %struct.FifoPage* }
-%struct.FifoPage = type { i32, i32, i32, %struct.FifoPage*, [1 x i64] }
-%struct.FuncDef = type { i16, i8, i8, i8, i8*, %struct.FuncDef*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*, i32, %struct.Mem**)*, void (%struct.sqlite3_context*)*, [1 x i8] }
-%struct.Hash = type { i8, i8, i32, i32, %struct.HashElem*, %struct._ht* }
-%struct.HashElem = type { %struct.HashElem*, %struct.HashElem*, i8*, i8*, i32 }
-%struct.IdList = type { %struct..5sPragmaType*, i32, i32 }
-%struct.Index = type { i8*, i32, i32*, i32*, %struct.Table*, i32, i8, i8, i8*, %struct.Index*, %struct.Schema*, i8*, i8** }
-%struct.KeyInfo = type { %struct.sqlite3*, i8, i8, i8, i32, i8*, [1 x %struct.CollSeq*] }
-%struct.Mem = type { %struct.CountCtx, double, %struct.sqlite3*, i8*, i32, i16, i8, i8, void (i8*)* }
-%struct.MemPage = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i16, i16, i16, i16, i16, i16, [5 x %struct._OvflCell], %struct.BtShared*, i8*, %struct.DbPage*, i32, %struct.MemPage* }
-%struct.Module = type { %struct.sqlite3_module*, i8*, i8*, void (i8*)* }
-%struct.Op = type { i8, i8, i8, i8, i32, i32, i32, %1 }
-%struct.Pager = type { %struct.sqlite3_vfs*, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, %struct.Bitvec*, %struct.Bitvec*, i8*, i8*, i8*, i8*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.sqlite3_file*, %struct.BusyHandler*, %struct.PagerLruList, %struct.DbPage*, %struct.DbPage*, %struct.DbPage*, i64, i64, i64, i64, i64, i32, void (%struct.DbPage*, i32)*, void (%struct.DbPage*, i32)*, i32, %struct.DbPage**, i8*, [16 x i8] }
-%struct.PagerLruLink = type { %struct.DbPage*, %struct.DbPage* }
-%struct.PagerLruList = type { %struct.DbPage*, %struct.DbPage*, %struct.DbPage* }
-%struct.Schema = type { i32, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Hash, %struct.Table*, i8, i8, i16, i32, %struct.sqlite3* }
-%struct.Select = type { %struct.ExprList*, i8, i8, i8, i8, i8, i8, i8, %struct.SrcList*, %struct.Expr*, %struct.ExprList*, %struct.Expr*, %struct.ExprList*, %struct.Select*, %struct.Select*, %struct.Select*, %struct.Expr*, %struct.Expr*, i32, i32, [3 x i32] }
-%struct.SrcList = type { i16, i16, [1 x %struct.SrcList_item] }
-%struct.SrcList_item = type { i8*, i8*, i8*, %struct.Table*, %struct.Select*, i8, i8, i32, %struct.Expr*, %struct.IdList*, i64 }
-%struct.Table = type { i8*, i32, %struct.Column*, i32, %struct.Index*, i32, %struct.Select*, i32, %struct.Trigger*, %struct.FKey*, i8*, %struct.Expr*, i32, i8, i8, i8, i8, i8, i8, i8, %struct.Module*, %struct.sqlite3_vtab*, i32, i8**, %struct.Schema* }
-%struct.Trigger = type { i8*, i8*, i8, i8, %struct.Expr*, %struct.IdList*, %struct..5sPragmaType, %struct.Schema*, %struct.Schema*, %struct.TriggerStep*, %struct.Trigger* }
-%struct.TriggerStep = type { i32, i32, %struct.Trigger*, %struct.Select*, %struct..5sPragmaType, %struct.Expr*, %struct.ExprList*, %struct.IdList*, %struct.TriggerStep*, %struct.TriggerStep* }
-%struct.Vdbe = type { %struct.sqlite3*, %struct.Vdbe*, %struct.Vdbe*, i32, i32, %struct.Op*, i32, i32, i32*, %struct.Mem**, %struct.Mem*, i32, %struct.Cursor**, i32, %struct.Mem*, i8**, i32, i32, i32, %struct.Mem*, i32, i32, %struct.Fifo, i32, i32, %struct.Context*, i32, i32, i32, i32, i32, [25 x i32], i32, i32, i8**, i8*, %struct.Mem*, i8, i8, i8, i8, i8, i8, i32, i64, i32, %struct.BtreeMutexArray, i32, i8*, i32 }
-%struct.VdbeFunc = type { %struct.FuncDef*, i32, [1 x %struct.AuxData] }
-%struct._OvflCell = type { i8*, i16 }
-%struct._RuneCharClass = type { [14 x i8], i32 }
-%struct._RuneEntry = type { i32, i32, i32, i32* }
-%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i32, i8**)*, i32 (i32, i8*, i32, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* }
-%struct._RuneRange = type { i32, %struct._RuneEntry* }
-%struct.__sFILEX = type opaque
-%struct._ht = type { i32, %struct.HashElem* }
-%struct.callback_data = type { %struct.sqlite3*, i32, i32, %struct.FILE*, i32, i32, i32, i8*, [20 x i8], [100 x i32], [100 x i32], [20 x i8], %struct.previous_mode_data, [1024 x i8], i8* }
-%struct.previous_mode_data = type { i32, i32, i32, [100 x i32] }
-%struct.sColMap = type { i32, i8* }
-%struct.sqlite3 = type { %struct.sqlite3_vfs*, i32, %struct.Db*, i32, i32, i32, i32, i8, i8, i8, i8, i32, %struct.CollSeq*, i64, i64, i32, i32, i32, %struct.sqlite3_mutex*, %struct.sqlite3InitInfo, i32, i8**, %struct.Vdbe*, i32, void (i8*, i8*)*, i8*, void (i8*, i8*, i64)*, i8*, i8*, i32 (i8*)*, i8*, void (i8*)*, i8*, void (i8*, i32, i8*, i8*, i64)*, void (i8*, %struct.sqlite3*, i32, i8*)*, void (i8*, %struct.sqlite3*, i32, i8*)*, i8*, %struct.Mem*, i8*, i8*, %union.anon, i32 (i8*, i32, i8*, i8*, i8*, i8*)*, i8*, i32 (i8*)*, i8*, i32, %struct.Hash, %struct.Table*, %struct.sqlite3_vtab**, i32, %struct.Hash, %struct.Hash, %struct.BusyHandler, i32, [2 x %struct.Db], i8 }
-%struct.sqlite3InitInfo = type { i32, i32, i8 }
-%struct.sqlite3_context = type { %struct.FuncDef*, %struct.VdbeFunc*, %struct.Mem, %struct.Mem*, i32, %struct.CollSeq* }
-%struct.sqlite3_file = type { %struct.sqlite3_io_methods* }
-%struct.sqlite3_index_constraint = type { i32, i8, i8, i32 }
-%struct.sqlite3_index_constraint_usage = type { i32, i8 }
-%struct.sqlite3_index_info = type { i32, %struct.sqlite3_index_constraint*, i32, %struct.sqlite3_index_constraint_usage*, %struct.sqlite3_index_constraint_usage*, i32, i8*, i32, i32, double }
-%struct.sqlite3_io_methods = type { i32, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i8*, i32, i64)*, i32 (%struct.sqlite3_file*, i64)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i64*)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*, i32)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*, i32, i8*)*, i32 (%struct.sqlite3_file*)*, i32 (%struct.sqlite3_file*)* }
-%struct.sqlite3_module = type { i32, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3*, i8*, i32, i8**, %struct.sqlite3_vtab**, i8**)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_index_info*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, %struct.sqlite3_vtab_cursor**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, i32, i8*, i32, %struct.Mem**)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*)*, i32 (%struct.sqlite3_vtab_cursor*, %struct.sqlite3_context*, i32)*, i32 (%struct.sqlite3_vtab_cursor*, i64*)*, i32 (%struct.sqlite3_vtab*, i32, %struct.Mem**, i64*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*)*, i32 (%struct.sqlite3_vtab*, i32, i8*, void (%struct.sqlite3_context*, i32, %struct.Mem**)**, i8**)*, i32 (%struct.sqlite3_vtab*, i8*)* }
-%struct.sqlite3_mutex = type opaque
-%struct.sqlite3_vfs = type { i32, i32, i32, %struct.sqlite3_vfs*, i8*, i8*, i32 (%struct.sqlite3_vfs*, i8*, %struct.sqlite3_file*, i32, i32*)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i8*, i32)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i8*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*)*, void (%struct.sqlite3_vfs*, i32, i8*)*, i8* (%struct.sqlite3_vfs*, i8*, i8*)*, void (%struct.sqlite3_vfs*, i8*)*, i32 (%struct.sqlite3_vfs*, i32, i8*)*, i32 (%struct.sqlite3_vfs*, i32)*, i32 (%struct.sqlite3_vfs*, double*)* }
-%struct.sqlite3_vtab = type { %struct.sqlite3_module*, i32, i8* }
-%struct.sqlite3_vtab_cursor = type { %struct.sqlite3_vtab* }
-%union.anon = type { double }
-
-@_DefaultRuneLocale = external global %struct._RuneLocale ; <%struct._RuneLocale*> [#uses=2]
-@__stderrp = external global %struct.FILE*        ; <%struct.FILE**> [#uses=1]
-@.str10 = internal constant [16 x i8] c"Out of memory!\0A\00", align 1 ; <[16 x i8]*> [#uses=1]
-@llvm.used = appending global [1 x i8*] [i8* bitcast (void (%struct.callback_data*, i8*)* @set_table_name to i8*)], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
-
-define fastcc void @set_table_name(%struct.callback_data* nocapture %p, i8* %zName) nounwind ssp {
-entry:
-  %0 = getelementptr inbounds %struct.callback_data* %p, i32 0, i32 7 ; <i8**> [#uses=3]
-  %1 = load i8** %0, align 4                      ; <i8*> [#uses=2]
-  %2 = icmp eq i8* %1, null                       ; <i1> [#uses=1]
-  br i1 %2, label %bb1, label %bb
-
-bb:                                               ; preds = %entry
-  free i8* %1
-  store i8* null, i8** %0, align 4
-  br label %bb1
-
-bb1:                                              ; preds = %bb, %entry
-  %3 = icmp eq i8* %zName, null                   ; <i1> [#uses=1]
-  br i1 %3, label %return, label %bb2
-
-bb2:                                              ; preds = %bb1
-  %4 = load i8* %zName, align 1                   ; <i8> [#uses=2]
-  %5 = zext i8 %4 to i32                          ; <i32> [#uses=2]
-  %6 = icmp sgt i8 %4, -1                         ; <i1> [#uses=1]
-  br i1 %6, label %bb.i.i, label %bb1.i.i
-
-bb.i.i:                                           ; preds = %bb2
-  %7 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i32 0, i32 5, i32 %5 ; <i32*> [#uses=1]
-  %8 = load i32* %7, align 4                      ; <i32> [#uses=1]
-  %9 = and i32 %8, 256                            ; <i32> [#uses=1]
-  br label %isalpha.exit
-
-bb1.i.i:                                          ; preds = %bb2
-  %10 = tail call i32 @__maskrune(i32 %5, i32 256) nounwind ; <i32> [#uses=1]
-  br label %isalpha.exit
-
-isalpha.exit:                                     ; preds = %bb1.i.i, %bb.i.i
-  %storemerge.in.in.i.i = phi i32 [ %9, %bb.i.i ], [ %10, %bb1.i.i ] ; <i32> [#uses=1]
-  %storemerge.in.i.i = icmp eq i32 %storemerge.in.in.i.i, 0 ; <i1> [#uses=1]
-  br i1 %storemerge.in.i.i, label %bb3, label %bb5
-
-bb3:                                              ; preds = %isalpha.exit
-  %11 = load i8* %zName, align 1                  ; <i8> [#uses=2]
-  %12 = icmp eq i8 %11, 95                        ; <i1> [#uses=1]
-  br i1 %12, label %bb5, label %bb12.preheader
-
-bb5:                                              ; preds = %bb3, %isalpha.exit
-  %.pre = load i8* %zName, align 1                ; <i8> [#uses=1]
-  br label %bb12.preheader
-
-bb12.preheader:                                   ; preds = %bb5, %bb3
-  %13 = phi i8 [ %.pre, %bb5 ], [ %11, %bb3 ]     ; <i8> [#uses=1]
-  %needQuote.1.ph = phi i32 [ 0, %bb5 ], [ 1, %bb3 ] ; <i32> [#uses=2]
-  %14 = icmp eq i8 %13, 0                         ; <i1> [#uses=1]
-  br i1 %14, label %bb13, label %bb7
-
-bb7:                                              ; preds = %bb11, %bb12.preheader
-  %i.011 = phi i32 [ %tmp17, %bb11 ], [ 0, %bb12.preheader ] ; <i32> [#uses=2]
-  %n.110 = phi i32 [ %26, %bb11 ], [ 0, %bb12.preheader ] ; <i32> [#uses=3]
-  %needQuote.19 = phi i32 [ %needQuote.0, %bb11 ], [ %needQuote.1.ph, %bb12.preheader ] ; <i32> [#uses=2]
-  %scevgep16 = getelementptr i8* %zName, i32 %i.011 ; <i8*> [#uses=2]
-  %tmp17 = add i32 %i.011, 1                      ; <i32> [#uses=2]
-  %scevgep18 = getelementptr i8* %zName, i32 %tmp17 ; <i8*> [#uses=1]
-  %15 = load i8* %scevgep16, align 1              ; <i8> [#uses=2]
-  %16 = zext i8 %15 to i32                        ; <i32> [#uses=2]
-  %17 = icmp sgt i8 %15, -1                       ; <i1> [#uses=1]
-  br i1 %17, label %bb.i.i2, label %bb1.i.i3
-
-bb.i.i2:                                          ; preds = %bb7
-  %18 = getelementptr inbounds %struct._RuneLocale* @_DefaultRuneLocale, i32 0, i32 5, i32 %16 ; <i32*> [#uses=1]
-  %19 = load i32* %18, align 4                    ; <i32> [#uses=1]
-  %20 = and i32 %19, 1280                         ; <i32> [#uses=1]
-  br label %isalnum.exit
-
-bb1.i.i3:                                         ; preds = %bb7
-  %21 = tail call i32 @__maskrune(i32 %16, i32 1280) nounwind ; <i32> [#uses=1]
-  br label %isalnum.exit
-
-isalnum.exit:                                     ; preds = %bb1.i.i3, %bb.i.i2
-  %storemerge.in.in.i.i4 = phi i32 [ %20, %bb.i.i2 ], [ %21, %bb1.i.i3 ] ; <i32> [#uses=1]
-  %storemerge.in.i.i5 = icmp eq i32 %storemerge.in.in.i.i4, 0 ; <i1> [#uses=1]
-  br i1 %storemerge.in.i.i5, label %bb8, label %bb11
-
-bb8:                                              ; preds = %isalnum.exit
-  %22 = load i8* %scevgep16, align 1              ; <i8> [#uses=2]
-  %23 = icmp eq i8 %22, 95                        ; <i1> [#uses=1]
-  br i1 %23, label %bb11, label %bb9
-
-bb9:                                              ; preds = %bb8
-  %24 = icmp eq i8 %22, 39                        ; <i1> [#uses=1]
-  %25 = zext i1 %24 to i32                        ; <i32> [#uses=1]
-  %.n.1 = add i32 %n.110, %25                     ; <i32> [#uses=1]
-  br label %bb11
-
-bb11:                                             ; preds = %bb9, %bb8, %isalnum.exit
-  %needQuote.0 = phi i32 [ 1, %bb9 ], [ %needQuote.19, %isalnum.exit ], [ %needQuote.19, %bb8 ] ; <i32> [#uses=2]
-  %n.0 = phi i32 [ %.n.1, %bb9 ], [ %n.110, %isalnum.exit ], [ %n.110, %bb8 ] ; <i32> [#uses=1]
-  %26 = add nsw i32 %n.0, 1                       ; <i32> [#uses=2]
-  %27 = load i8* %scevgep18, align 1              ; <i8> [#uses=1]
-  %28 = icmp eq i8 %27, 0                         ; <i1> [#uses=1]
-  br i1 %28, label %bb13, label %bb7
-
-bb13:                                             ; preds = %bb11, %bb12.preheader
-  %n.1.lcssa = phi i32 [ 0, %bb12.preheader ], [ %26, %bb11 ] ; <i32> [#uses=2]
-  %needQuote.1.lcssa = phi i32 [ %needQuote.1.ph, %bb12.preheader ], [ %needQuote.0, %bb11 ] ; <i32> [#uses=1]
-  %29 = add nsw i32 %n.1.lcssa, 2                 ; <i32> [#uses=1]
-  %30 = icmp eq i32 %needQuote.1.lcssa, 0         ; <i1> [#uses=3]
-  %n.1. = select i1 %30, i32 %n.1.lcssa, i32 %29  ; <i32> [#uses=1]
-  %31 = add nsw i32 %n.1., 1                      ; <i32> [#uses=1]
-  %32 = malloc i8, i32 %31                        ; <i8*> [#uses=7]
-  store i8* %32, i8** %0, align 4
-  %33 = icmp eq i8* %32, null                     ; <i1> [#uses=1]
-  br i1 %33, label %bb16, label %bb17
-
-bb16:                                             ; preds = %bb13
-  %34 = load %struct.FILE** @__stderrp, align 4   ; <%struct.FILE*> [#uses=1]
-  %35 = bitcast %struct.FILE* %34 to i8*          ; <i8*> [#uses=1]
-  %36 = tail call i32 @"\01_fwrite$UNIX2003"(i8* getelementptr inbounds ([16 x i8]* @.str10, i32 0, i32 0), i32 1, i32 15, i8* %35) nounwind ; <i32> [#uses=0]
-  tail call void @exit(i32 1) noreturn nounwind
-  unreachable
-
-bb17:                                             ; preds = %bb13
-  br i1 %30, label %bb23.preheader, label %bb18
-
-bb18:                                             ; preds = %bb17
-  store i8 39, i8* %32, align 4
-  br label %bb23.preheader
-
-bb23.preheader:                                   ; preds = %bb18, %bb17
-  %n.3.ph = phi i32 [ 1, %bb18 ], [ 0, %bb17 ]    ; <i32> [#uses=2]
-  %37 = load i8* %zName, align 1                  ; <i8> [#uses=1]
-  %38 = icmp eq i8 %37, 0                         ; <i1> [#uses=1]
-  br i1 %38, label %bb24, label %bb20
-
-bb20:                                             ; preds = %bb22, %bb23.preheader
-  %storemerge18 = phi i32 [ %tmp, %bb22 ], [ 0, %bb23.preheader ] ; <i32> [#uses=2]
-  %n.37 = phi i32 [ %n.4, %bb22 ], [ %n.3.ph, %bb23.preheader ] ; <i32> [#uses=3]
-  %scevgep = getelementptr i8* %zName, i32 %storemerge18 ; <i8*> [#uses=1]
-  %tmp = add i32 %storemerge18, 1                 ; <i32> [#uses=2]
-  %scevgep15 = getelementptr i8* %zName, i32 %tmp ; <i8*> [#uses=1]
-  %39 = load i8* %scevgep, align 1                ; <i8> [#uses=2]
-  %40 = getelementptr inbounds i8* %32, i32 %n.37 ; <i8*> [#uses=1]
-  store i8 %39, i8* %40, align 1
-  %41 = add nsw i32 %n.37, 1                      ; <i32> [#uses=2]
-  %42 = icmp eq i8 %39, 39                        ; <i1> [#uses=1]
-  br i1 %42, label %bb21, label %bb22
-
-bb21:                                             ; preds = %bb20
-  %43 = getelementptr inbounds i8* %32, i32 %41   ; <i8*> [#uses=1]
-  store i8 39, i8* %43, align 1
-  %44 = add nsw i32 %n.37, 2                      ; <i32> [#uses=1]
-  br label %bb22
-
-bb22:                                             ; preds = %bb21, %bb20
-  %n.4 = phi i32 [ %44, %bb21 ], [ %41, %bb20 ]   ; <i32> [#uses=2]
-  %45 = load i8* %scevgep15, align 1              ; <i8> [#uses=1]
-  %46 = icmp eq i8 %45, 0                         ; <i1> [#uses=1]
-  br i1 %46, label %bb24, label %bb20
-
-bb24:                                             ; preds = %bb22, %bb23.preheader
-  %n.3.lcssa = phi i32 [ %n.3.ph, %bb23.preheader ], [ %n.4, %bb22 ] ; <i32> [#uses=3]
-  br i1 %30, label %bb26, label %bb25
-
-bb25:                                             ; preds = %bb24
-  %47 = getelementptr inbounds i8* %32, i32 %n.3.lcssa ; <i8*> [#uses=1]
-  store i8 39, i8* %47, align 1
-  %48 = add nsw i32 %n.3.lcssa, 1                 ; <i32> [#uses=1]
-  br label %bb26
-
-bb26:                                             ; preds = %bb25, %bb24
-  %n.5 = phi i32 [ %48, %bb25 ], [ %n.3.lcssa, %bb24 ] ; <i32> [#uses=1]
-  %49 = getelementptr inbounds i8* %32, i32 %n.5  ; <i8*> [#uses=1]
-  store i8 0, i8* %49, align 1
-  ret void
-
-return:                                           ; preds = %bb1
-  ret void
-}
-
-declare i32 @"\01_fwrite$UNIX2003"(i8*, i32, i32, i8*)
-
-declare void @exit(i32) noreturn nounwind
-
-declare i32 @__maskrune(i32, i32)
diff --git a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
index d33f93e..94075e7 100644
--- a/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
+++ b/test/CodeGen/X86/2009-11-16-UnfoldMemOpBug.ll
@@ -16,7 +16,7 @@ bb1:
 ; CHECK: LBB0_1:
 ; CHECK: movaps %xmm0, (%rsp)
   %tmp2 = phi i32 [ %tmp3, %bb1 ], [ 0, %entry ]
-  call void @llvm.memcpy.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* getelementptr inbounds ([28 x i8]* @str, i64 0, i64 0), i64 28, i32 1, i1 false)
   %tmp3 = add i32 %tmp2, 1
   %tmp4 = icmp eq i32 %tmp3, %count
   br i1 %tmp4, label %bb2, label %bb1
@@ -25,4 +25,4 @@ bb2:
   ret void
 }
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll b/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
index 1e7a418..0700323 100644
--- a/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
+++ b/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
@@ -22,8 +22,11 @@ return:                                           ; preds = %entry
 define void @t2() nounwind ssp {
 entry:
 ; CHECK: t2:
-; CHECK: movl %eax, %ecx
-; CHECK: %ecx = foo (%ecx, %eax)
+; CHECK: movl
+; CHECK: [[D2:%e.x]] = foo
+; CHECK: ([[D2]],
+; CHECK-NOT: [[D2]]
+; CHECK: )
   %b = alloca i32                                 ; <i32*> [#uses=2]
   %a = alloca i32                                 ; <i32*> [#uses=1]
   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
diff --git a/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll b/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll
index c5d3d16..739a27a 100644
--- a/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll
+++ b/test/CodeGen/X86/2010-02-12-CoalescerBug-Impdef.ll
@@ -22,6 +22,7 @@ module asm "\09.ident\09\22GCC: (GNU) 4.5.0 20100212 (experimental) LLVM: 95975\
 
 %0 = type { %"union gimple_statement_d"* }
 %"BITMAP_WORD[]" = type [2 x i64]
+%"uchar[]" = type [1 x i8]
 %"char[]" = type [4 x i8]
 %"enum dom_state[]" = type [2 x i32]
 %"int[]" = type [4 x i32]
@@ -61,6 +62,7 @@ module asm "\09.ident\09\22GCC: (GNU) 4.5.0 20100212 (experimental) LLVM: 95975\
 %"struct gimple_seq_d" = type { %"struct gimple_seq_node_d"*, %"struct gimple_seq_node_d"*, %"struct gimple_seq_d"* }
 %"struct gimple_seq_node_d" = type { %"union gimple_statement_d"*, %"struct gimple_seq_node_d"*, %"struct gimple_seq_node_d"* }
 %"struct gimple_statement_base" = type { i8, i8, i16, i32, i32, i32, %"struct basic_block_def"*, %"union tree_node"* }
+%"struct phi_arg_d[]" = type [1 x %"struct phi_arg_d"]
 %"struct gimple_statement_phi" = type { %"struct gimple_statement_base", i32, i32, %"union tree_node"*, %"struct phi_arg_d[]" }
 %"struct htab" = type { i32 (i8*)*, i32 (i8*, i8*)*, void (i8*)*, i8**, i64, i64, i64, i32, i32, i8* (i64, i64)*, void (i8*)*, i8*, i8* (i8*, i64, i64)*, void (i8*, i8*)*, i32 }
 %"struct iv" = type { %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, %"union tree_node"*, i8, i8, i32 }
@@ -78,7 +80,6 @@ module asm "\09.ident\09\22GCC: (GNU) 4.5.0 20100212 (experimental) LLVM: 95975\
 %"struct object_block" = type { %"union section"*, i32, i64, %"struct VEC_rtx_gc"*, %"struct VEC_rtx_gc"* }
 %"struct obstack" = type { i64, %"struct _obstack_chunk"*, i8*, i8*, i8*, i64, i32, %"struct _obstack_chunk"* (i8*, i64)*, void (i8*, %"struct _obstack_chunk"*)*, i8*, i8 }
 %"struct phi_arg_d" = type { %"struct ssa_use_operand_d", %"union tree_node"*, i32 }
-%"struct phi_arg_d[]" = type [1 x %"struct phi_arg_d"]
 %"struct pointer_map_t" = type opaque
 %"struct pt_solution" = type { i8, %"struct bitmap_head_def"* }
 %"struct rtx_def" = type { i16, i8, i8, %"union u" }
@@ -98,7 +99,6 @@ module asm "\09.ident\09\22GCC: (GNU) 4.5.0 20100212 (experimental) LLVM: 95975\
 %"struct unnamed_section" = type { %"struct section_common", void (i8*)*, i8*, %"union section"* }
 %"struct use_optype_d" = type { %"struct use_optype_d"*, %"struct ssa_use_operand_d" }
 %"struct version_info" = type { %"union tree_node"*, %"struct iv"*, i8, i32, i8 }
-%"uchar[]" = type [1 x i8]
 %"union basic_block_il_dependent" = type { %"struct gimple_bb_info"* }
 %"union edge_def_insns" = type { %"struct gimple_seq_d"* }
 %"union gimple_statement_d" = type { %"struct gimple_statement_phi" }
diff --git a/test/CodeGen/X86/2010-04-08-CoalescerBug.ll b/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
index 1c7c28c..9a5958e 100644
--- a/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
+++ b/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s
 ; rdar://7842028
 
 ; Do not delete partially dead copy instructions.
diff --git a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
index bb1db59..05f581a 100644
--- a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
+++ b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O1 -mtriple=x86_64-unknown-linux-gnu -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
+; RUN: llc -O1 -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 -relocation-model=pic -disable-fp-elim < %s | FileCheck %s
 ; <rdar://problem/8124405>
 
 %struct.type = type { %struct.subtype*, i32, i8, i32, i8, i32, i32, i32, i32, i32, i8, i32, i32, i32, i32, i32, [256 x i32], i32, [257 x i32], [257 x i32], i32*, i16*, i8*, i32, i32, i32, i32, i32, [256 x i8], [16 x i8], [256 x i8], [4096 x i8], [16 x i32], [18002 x i8], [18002 x i8], [6 x [258 x i8]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x [258 x i32]], [6 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32*, i32*, i32* }
diff --git a/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll b/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
index be7d94c..e96da94 100644
--- a/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
+++ b/test/CodeGen/X86/2010-07-11-FPStackLoneUse.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=i486
+; RUN: llc < %s -mcpu=core2
 ; PR7375
 ;
 ; This function contains a block (while.cond) with a lonely RFP use that is
diff --git a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
index eaede30..1b33977 100644
--- a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
+++ b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -combiner-alias-analysis -march=x86-64 | FileCheck %s
+; RUN: llc < %s -combiner-alias-analysis -march=x86-64 -mcpu=core2 | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-apple-darwin10.4"
diff --git a/test/CodeGen/X86/2010-11-09-MOVLPS.ll b/test/CodeGen/X86/2010-11-09-MOVLPS.ll
index 2368f3f..710cb86 100644
--- a/test/CodeGen/X86/2010-11-09-MOVLPS.ll
+++ b/test/CodeGen/X86/2010-11-09-MOVLPS.ll
@@ -5,11 +5,11 @@ target triple = "x86_64-unknown-linux-gnu"
 
 module asm "\09.ident\09\22GCC: (GNU) 4.5.2 20100914 (prerelease) LLVM: 114628\22"
 
+%"int[]" = type [4 x i32]
 %0 = type { %"int[]" }
 %float = type float
 %"float[]" = type [4 x float]
 %int = type i32
-%"int[]" = type [4 x i32]
 %"long unsigned int" = type i64
 
 define void @swizzle(i8* %a, %0* %b, %0* %c) nounwind {
diff --git a/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll b/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
index 07b1971..c6f4b49 100644
--- a/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
+++ b/test/CodeGen/X86/2011-04-13-SchedCmpJmp.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=yonah | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s
 ; Reduced from JavaScriptCore
 
 %"class.JSC::CodeLocationCall" = type { [8 x i8] }
diff --git a/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
new file mode 100644
index 0000000..a51dad0
--- /dev/null
+++ b/test/CodeGen/X86/2011-06-12-FastAllocSpill.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s -O0 -disable-fp-elim -relocation-model=pic -stats |& FileCheck %s
+;
+; This test should not cause any spilling with RAFast.
+;
+; CHECK: Number of copies coalesced
+; CHECK-NOT: Number of stores added
+;
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+%0 = type { i64, i64, i8*, i8* }
+%1 = type opaque
+%2 = type opaque
+%3 = type <{ i8*, i32, i32, void (%4*)*, i8*, i64 }>
+%4 = type { i8**, i32, i32, i8**, %5*, i64 }
+%5 = type { i64, i64 }
+%6 = type { i8*, i32, i32, i8*, %5* }
+
+@0 = external hidden constant %0
+
+define hidden void @f() ssp {
+bb:
+  %tmp5 = alloca i64, align 8
+  %tmp6 = alloca void ()*, align 8
+  %tmp7 = alloca %3, align 8
+  store i64 0, i64* %tmp5, align 8
+  br label %bb8
+
+bb8:                                              ; preds = %bb23, %bb
+  %tmp15 = getelementptr inbounds %3* %tmp7, i32 0, i32 4
+  store i8* bitcast (%0* @0 to i8*), i8** %tmp15
+  %tmp16 = bitcast %3* %tmp7 to void ()*
+  store void ()* %tmp16, void ()** %tmp6, align 8
+  %tmp17 = load void ()** %tmp6, align 8
+  %tmp18 = bitcast void ()* %tmp17 to %6*
+  %tmp19 = getelementptr inbounds %6* %tmp18, i32 0, i32 3
+  %tmp20 = bitcast %6* %tmp18 to i8*
+  %tmp21 = load i8** %tmp19
+  %tmp22 = bitcast i8* %tmp21 to void (i8*)*
+  call void %tmp22(i8* %tmp20)
+  br label %bb23
+
+bb23:                                             ; preds = %bb8
+  %tmp24 = load i64* %tmp5, align 8
+  %tmp25 = add i64 %tmp24, 1
+  store i64 %tmp25, i64* %tmp5, align 8
+  %tmp26 = icmp ult i64 %tmp25, 10
+  br i1 %tmp26, label %bb8, label %bb27
+
+bb27:                                             ; preds = %bb23
+  ret void
+}
diff --git a/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll b/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll
new file mode 100644
index 0000000..114b985
--- /dev/null
+++ b/test/CodeGen/X86/2011-06-14-PreschedRegalias.ll
@@ -0,0 +1,18 @@
+; RUN: llc < %s -march=x86-64 -stress-sched | FileCheck %s
+; REQUIRES: asserts
+; Test interference between physreg aliases during preRAsched.
+; mul wants an operand in AL, but call clobbers it.
+
+define i8 @f(i8 %v1, i8 %v2) nounwind {
+entry:
+; CHECK: callq
+; CHECK: movb %{{.*}}, %al
+; CHECK: mulb
+; CHECK: mulb
+        %rval = tail call i8 @bar() nounwind
+        %m1 = mul i8 %v1, %v2
+        %m2 = mul i8 %m1, %rval
+        ret i8 %m2
+}
+
+declare i8 @bar()
diff --git a/test/CodeGen/X86/2011-06-14-mmx-inlineasm.ll b/test/CodeGen/X86/2011-06-14-mmx-inlineasm.ll
new file mode 100644
index 0000000..445fc01
--- /dev/null
+++ b/test/CodeGen/X86/2011-06-14-mmx-inlineasm.ll
@@ -0,0 +1,45 @@
+; RUN: llc -mcpu=i686 -mattr=+mmx < %s | FileCheck %s
+; ModuleID = 'tq.c'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-macosx10.6.6"
+
+%0 = type { x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx }
+
+define i32 @pixman_fill_mmx(i32* nocapture %bits, i32 %stride, i32 %bpp, i32 %x, i32 %y, i32 %width, i32 %height, i32 %xor) nounwind ssp {
+entry:
+  %conv = zext i32 %xor to i64
+  %shl = shl nuw i64 %conv, 32
+  %or = or i64 %shl, %conv
+  %0 = bitcast i64 %or to x86_mmx
+; CHECK:      movq [[MMXR:%mm[0-7],]] {{%mm[0-7]}}
+; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}}
+; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}}
+; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}}
+; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}}
+; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}}
+; CHECK-NEXT: movq [[MMXR]] {{%mm[0-7]}}
+  %1 = tail call %0 asm "movq\09\09$7,\09$0\0Amovq\09\09$7,\09$1\0Amovq\09\09$7,\09$2\0Amovq\09\09$7,\09$3\0Amovq\09\09$7,\09$4\0Amovq\09\09$7,\09$5\0Amovq\09\09$7,\09$6\0A", "=&y,=&y,=&y,=&y,=&y,=&y,=y,y,~{dirflag},~{fpsr},~{flags}"(x86_mmx %0) nounwind, !srcloc !0
+  %asmresult = extractvalue %0 %1, 0
+  %asmresult6 = extractvalue %0 %1, 1
+  %asmresult7 = extractvalue %0 %1, 2
+  %asmresult8 = extractvalue %0 %1, 3
+  %asmresult9 = extractvalue %0 %1, 4
+  %asmresult10 = extractvalue %0 %1, 5
+  %asmresult11 = extractvalue %0 %1, 6
+; CHECK:      movq {{%mm[0-7]}},
+; CHECK-NEXT: movq {{%mm[0-7]}},
+; CHECK-NEXT: movq {{%mm[0-7]}},
+; CHECK-NEXT: movq {{%mm[0-7]}},
+; CHECK-NEXT: movq {{%mm[0-7]}},
+; CHECK-NEXT: movq {{%mm[0-7]}},
+; CHECK-NEXT: movq {{%mm[0-7]}},
+; CHECK-NEXT: movq {{%mm[0-7]}},
+  tail call void asm sideeffect "movq\09$1,\09  ($0)\0Amovq\09$2,\09 8($0)\0Amovq\09$3,\0916($0)\0Amovq\09$4,\0924($0)\0Amovq\09$5,\0932($0)\0Amovq\09$6,\0940($0)\0Amovq\09$7,\0948($0)\0Amovq\09$8,\0956($0)\0A", "r,y,y,y,y,y,y,y,y,~{memory},~{dirflag},~{fpsr},~{flags}"(i8* undef, x86_mmx %0, x86_mmx %asmresult, x86_mmx %asmresult6, x86_mmx %asmresult7, x86_mmx %asmresult8, x86_mmx %asmresult9, x86_mmx %asmresult10, x86_mmx %asmresult11) nounwind, !srcloc !1
+  tail call void @llvm.x86.mmx.emms() nounwind
+  ret i32 1
+}
+
+declare void @llvm.x86.mmx.emms() nounwind
+
+!0 = metadata !{i32 888, i32 917, i32 945, i32 973, i32 1001, i32 1029, i32 1057}
+!1 = metadata !{i32 1390, i32 1430, i32 1469, i32 1508, i32 1547, i32 1586, i32 1625, i32 1664}
diff --git a/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll b/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll
new file mode 100644
index 0000000..7632034
--- /dev/null
+++ b/test/CodeGen/X86/2011-07-13-BadFrameIndexDisplacement.ll
@@ -0,0 +1,20 @@
+; RUN: llc -march=x86-64 < %s -disable-fp-elim | FileCheck %s
+
+; This test is checking that we don't crash and we don't incorrectly fold
+; a large displacement and a frame index into a single lea.
+; <rdar://problem/9763308>
+
+declare void @bar([39 x i8]*)
+define i32 @f(i64 %a, i64 %b) nounwind readnone {
+entry:
+  %stack_main = alloca [39 x i8]
+  call void @bar([39 x i8]* %stack_main)
+  %tmp6 = add i64 %a, -2147483647
+  %.sum = add i64 %tmp6, %b
+  %tmp8 = getelementptr inbounds [39 x i8]* %stack_main, i64 0, i64 %.sum
+  %tmp9 = load i8* %tmp8, align 1
+  %tmp10 = sext i8 %tmp9 to i32
+  ret i32 %tmp10
+}
+; CHECK: f:
+; CHECK: movsbl	-2147483647
diff --git a/test/CodeGen/X86/4char-promote.ll b/test/CodeGen/X86/4char-promote.ll
new file mode 100644
index 0000000..386057f
--- /dev/null
+++ b/test/CodeGen/X86/4char-promote.ll
@@ -0,0 +1,17 @@
+; A test for checking PR 9623
+;RUN: llc -march=x86-64 -mcpu=corei7 -promote-elements < %s | FileCheck %s
+
+target triple = "x86_64-apple-darwin"
+
+; CHECK:  pmulld 
+; CHECK:  paddd  
+; CHECK:  movdqa 
+
+define <4 x i8> @foo(<4 x i8> %x, <4 x i8> %y) {
+entry:
+ %binop = mul <4 x i8> %x, %y
+ %binop6 = add <4 x i8> %binop, %x
+ ret <4 x i8> %binop6
+}
+
+
diff --git a/test/CodeGen/X86/GC/simple_ocaml.ll b/test/CodeGen/X86/GC/simple_ocaml.ll
deleted file mode 100644
index f765dc0..0000000
--- a/test/CodeGen/X86/GC/simple_ocaml.ll
+++ /dev/null
@@ -1,42 +0,0 @@
-; RUN: llc < %s | grep caml.*__frametable
-; RUN: llc < %s -march=x86 | grep {movl	.0}
-
-%struct.obj = type { i8*, %struct.obj* }
-
-define %struct.obj* @fun(%struct.obj* %head) gc "ocaml" {
-entry:
-	%gcroot.0 = alloca i8*
-	%gcroot.1 = alloca i8*
-	
-	call void @llvm.gcroot(i8** %gcroot.0, i8* null)
-	call void @llvm.gcroot(i8** %gcroot.1, i8* null)
-	
-	%local.0 = bitcast i8** %gcroot.0 to %struct.obj**
-	%local.1 = bitcast i8** %gcroot.1 to %struct.obj**
-
-	store %struct.obj* %head, %struct.obj** %local.0
-	br label %bb.loop
-bb.loop:
-	%t0 = load %struct.obj** %local.0
-	%t1 = getelementptr %struct.obj* %t0, i32 0, i32 1
-	%t2 = bitcast %struct.obj* %t0 to i8*
-	%t3 = bitcast %struct.obj** %t1 to i8**
-	%t4 = call i8* @llvm.gcread(i8* %t2, i8** %t3)
-	%t5 = bitcast i8* %t4 to %struct.obj*
-	%t6 = icmp eq %struct.obj* %t5, null
-	br i1 %t6, label %bb.loop, label %bb.end
-bb.end:
-	%t7 = malloc %struct.obj
-	store %struct.obj* %t7, %struct.obj** %local.1
-	%t8 = bitcast %struct.obj* %t7 to i8*
-	%t9 = load %struct.obj** %local.0
-	%t10 = getelementptr %struct.obj* %t9, i32 0, i32 1
-	%t11 = bitcast %struct.obj* %t9 to i8*
-	%t12 = bitcast %struct.obj** %t10 to i8**
-	call void @llvm.gcwrite(i8* %t8, i8* %t11, i8** %t12)
-	ret %struct.obj* %t7
-}
-
-declare void @llvm.gcroot(i8** %value, i8* %tag)
-declare void @llvm.gcwrite(i8* %value, i8* %obj, i8** %field)
-declare i8* @llvm.gcread(i8* %obj, i8** %field)
diff --git a/test/CodeGen/X86/adde-carry.ll b/test/CodeGen/X86/adde-carry.ll
index 98c4f99..e86adf4 100644
--- a/test/CodeGen/X86/adde-carry.ll
+++ b/test/CodeGen/X86/adde-carry.ll
@@ -1,5 +1,4 @@
 ; RUN: llc -march=x86-64 < %s | FileCheck %s -check-prefix=CHECK-64
-; RUN: llc -march=x86 < %s | FileCheck %s -check-prefix=CHECK-32
 
 define void @a(i64* nocapture %s, i64* nocapture %t, i64 %a, i64 %b, i64 %c) nounwind {
 entry:
@@ -16,11 +15,6 @@ entry:
  store i64 %8, i64* %t, align 8
  ret void
 
-; CHECK-32: addl
-; CHECK-32: adcl
-; CHECK-32: adcl $0
-; CHECK-32: adcl $0
-
 ; CHECK-64: addq
 ; CHECK-64: adcq $0
 }
diff --git a/test/CodeGen/X86/allrem-moddi3.ll b/test/CodeGen/X86/allrem-moddi3.ll
new file mode 100644
index 0000000..0c3d04f
--- /dev/null
+++ b/test/CodeGen/X86/allrem-moddi3.ll
@@ -0,0 +1,19 @@
+; Test that, for a 64 bit signed rem, a libcall to allrem is made on Windows
+; unless we have libgcc.
+
+; RUN: llc < %s -mtriple i386-pc-win32 | FileCheck %s
+; RUN: llc < %s -mtriple i386-pc-cygwin | FileCheck %s -check-prefix USEMODDI
+; RUN: llc < %s -mtriple i386-pc-mingw32 | FileCheck %s -check-prefix USEMODDI
+; PR10305
+; END.
+
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind readonly {
+entry:
+  %conv4 = sext i32 %argc to i64
+  %div = srem i64 84, %conv4
+  %conv7 = trunc i64 %div to i32
+  ret i32 %conv7
+}
+
+; CHECK: allrem
+; USEMODDI: moddi3
diff --git a/test/CodeGen/X86/asm-global-imm.ll b/test/CodeGen/X86/asm-global-imm.ll
index 96da224..6c569d6 100644
--- a/test/CodeGen/X86/asm-global-imm.ll
+++ b/test/CodeGen/X86/asm-global-imm.ll
@@ -1,7 +1,4 @@
-; RUN: llc < %s -march=x86 -relocation-model=static | \
-; RUN:   grep {test1 \$_GV}
-; RUN: llc < %s -march=x86 -relocation-model=static | \
-; RUN:   grep {test2 _GV}
+; RUN: llc < %s -march=x86 -relocation-model=static | FileCheck %s
 ; PR882
 
 target datalayout = "e-p:32:32"
@@ -10,7 +7,13 @@ target triple = "i686-apple-darwin9.0.0d2"
 @str = external global [12 x i8]		; <[12 x i8]*> [#uses=1]
 
 define void @foo() {
-entry:
+; CHECK: foo:
+; CHECK-NOT: ret
+; CHECK: test1 $_GV
+; CHECK-NOT: ret
+; CHECK: test2 _GV
+; CHECK: ret
+
 	tail call void asm sideeffect "test1 $0", "i,~{dirflag},~{fpsr},~{flags}"( i32* @GV )
 	tail call void asm sideeffect "test2 ${0:c}", "i,~{dirflag},~{fpsr},~{flags}"( i32* @GV )
 	ret void
diff --git a/test/CodeGen/X86/asm-label.ll b/test/CodeGen/X86/asm-label.ll
new file mode 100644
index 0000000..1fc6e2e
--- /dev/null
+++ b/test/CodeGen/X86/asm-label.ll
@@ -0,0 +1,40 @@
+; RUN: llc -mtriple=x86_64-apple-darwin10 -O0 < %s | FileCheck %s
+
+; test that we print a label that we use. We had a bug where
+; we would print the jump, but not the label because it was considered
+; a fall through.
+
+; CHECK:        jmp     LBB0_9
+; CHECK: LBB0_9:                                 ## %cleanup
+
+define void @foo()  {
+entry:
+  br i1 undef, label %land.lhs.true, label %if.end11
+
+land.lhs.true:                                    ; preds = %entry
+  br i1 undef, label %if.then, label %if.end11
+
+if.then:                                          ; preds = %land.lhs.true
+  br i1 undef, label %if.then9, label %if.end
+
+if.then9:                                         ; preds = %if.then
+  br label %cleanup
+
+if.end:                                           ; preds = %if.then
+  br label %cleanup
+
+cleanup:                                          ; preds = %if.end, %if.then9
+  switch i32 undef, label %unreachable [
+    i32 0, label %cleanup.cont
+    i32 1, label %if.end11
+  ]
+
+cleanup.cont:                                     ; preds = %cleanup
+  br label %if.end11
+
+if.end11:                                         ; preds = %cleanup.cont, %cleanup, %land.lhs.true, %entry
+  ret void
+
+unreachable:                                      ; preds = %cleanup
+  unreachable
+}
diff --git a/test/CodeGen/X86/asm-label2.ll b/test/CodeGen/X86/asm-label2.ll
new file mode 100644
index 0000000..0b5de34
--- /dev/null
+++ b/test/CodeGen/X86/asm-label2.ll
@@ -0,0 +1,22 @@
+; RUN: llc -mtriple=x86_64-apple-darwin10 -O0 < %s | FileCheck %s
+
+; test that we print a label that we use. We had a bug where
+; we would print the jump, but not the label because it was considered
+; a fall through.
+
+; CHECK:        jmp     LBB0_1
+; CHECK: LBB0_1:
+
+define void @foobar()  {
+entry:
+  invoke void @_zed()
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  ret void
+
+lpad:                                             ; preds = %entry
+  unreachable
+}
+
+declare void @_zed() ssp align 2
diff --git a/test/CodeGen/X86/atomic-or.ll b/test/CodeGen/X86/atomic-or.ll
new file mode 100644
index 0000000..164252d
--- /dev/null
+++ b/test/CodeGen/X86/atomic-or.ll
@@ -0,0 +1,36 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; rdar://9692967
+
+define void @t1(i64* %p, i32 %b) nounwind {
+entry:
+  %p.addr = alloca i64*, align 8
+  store i64* %p, i64** %p.addr, align 8
+  %tmp = load i64** %p.addr, align 8
+  call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+; CHECK: t1:
+; CHECK: movl    $2147483648, %eax
+; CHECK: lock
+; CHECK-NEXT: orq %r{{.*}}, (%r{{.*}})
+  %0 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %tmp, i64 2147483648)
+  call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+  ret void
+}
+
+define void @t2(i64* %p, i32 %b) nounwind {
+entry:
+  %p.addr = alloca i64*, align 8
+  store i64* %p, i64** %p.addr, align 8
+  %tmp = load i64** %p.addr, align 8
+  call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+; CHECK: t2:
+; CHECK-NOT: movl
+; CHECK: lock
+; CHECK-NEXT: orq $2147483644, (%r{{.*}})
+  %0 = call i64 @llvm.atomic.load.or.i64.p0i64(i64* %tmp, i64 2147483644)
+  call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+  ret void
+}
+
+declare i64 @llvm.atomic.load.or.i64.p0i64(i64* nocapture, i64) nounwind
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
diff --git a/test/CodeGen/X86/avx-128.ll b/test/CodeGen/X86/avx-128.ll
index 2bd3b5d..57a3826 100644
--- a/test/CodeGen/X86/avx-128.ll
+++ b/test/CodeGen/X86/avx-128.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -march=x86 -mcpu=corei7 -mattr=avx | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
 
 @z = common global <4 x float> zeroinitializer, align 16
 
@@ -10,3 +10,45 @@ entry:
   ret void
 }
 
+define void @fpext() nounwind uwtable {
+entry:
+  %f = alloca float, align 4
+  %d = alloca double, align 8
+  %tmp = load float* %f, align 4
+  ; CHECK: vcvtss2sd
+  %conv = fpext float %tmp to double
+  store double %conv, double* %d, align 8
+  ret void
+}
+
+; CHECK: vcvtsi2sdq (%
+define double @funcA(i64* nocapture %e) nounwind uwtable readonly ssp {
+entry:
+  %tmp1 = load i64* %e, align 8
+  %conv = sitofp i64 %tmp1 to double
+  ret double %conv
+}
+
+; CHECK: vcvtsi2sd (%
+define double @funcB(i32* nocapture %e) nounwind uwtable readonly ssp {
+entry:
+  %tmp1 = load i32* %e, align 4
+  %conv = sitofp i32 %tmp1 to double
+  ret double %conv
+}
+
+; CHECK: vcvtsi2ss (%
+define float @funcC(i32* nocapture %e) nounwind uwtable readonly ssp {
+entry:
+  %tmp1 = load i32* %e, align 4
+  %conv = sitofp i32 %tmp1 to float
+  ret float %conv
+}
+
+; CHECK: vcvtsi2ssq  (%
+define float @funcD(i64* nocapture %e) nounwind uwtable readonly ssp {
+entry:
+  %tmp1 = load i64* %e, align 8
+  %conv = sitofp i64 %tmp1 to float
+  ret float %conv
+}
diff --git a/test/CodeGen/X86/avx-256-arith.ll b/test/CodeGen/X86/avx-256-arith.ll
new file mode 100644
index 0000000..5c512db
--- /dev/null
+++ b/test/CodeGen/X86/avx-256-arith.ll
@@ -0,0 +1,116 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vaddpd
+define <4 x double> @addpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %add.i = fadd <4 x double> %x, %y
+  ret <4 x double> %add.i
+}
+
+; CHECK: vaddpd LCP{{.*}}(%rip)
+define <4 x double> @addpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %add.i = fadd <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
+  ret <4 x double> %add.i
+}
+
+; CHECK: vaddps
+define <8 x float> @addps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %add.i = fadd <8 x float> %x, %y
+  ret <8 x float> %add.i
+}
+
+; CHECK: vaddps LCP{{.*}}(%rip)
+define <8 x float> @addps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %add.i = fadd <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
+  ret <8 x float> %add.i
+}
+
+; CHECK: vsubpd
+define <4 x double> @subpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %sub.i = fsub <4 x double> %x, %y
+  ret <4 x double> %sub.i
+}
+
+; CHECK: vsubpd (%
+define <4 x double> @subpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+  %tmp2 = load <4 x double>* %x, align 32
+  %sub.i = fsub <4 x double> %y, %tmp2
+  ret <4 x double> %sub.i
+}
+
+; CHECK: vsubps
+define <8 x float> @subps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %sub.i = fsub <8 x float> %x, %y
+  ret <8 x float> %sub.i
+}
+
+; CHECK: vsubps (%
+define <8 x float> @subps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+  %tmp2 = load <8 x float>* %x, align 32
+  %sub.i = fsub <8 x float> %y, %tmp2
+  ret <8 x float> %sub.i
+}
+
+; CHECK: vmulpd
+define <4 x double> @mulpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %mul.i = fmul <4 x double> %x, %y
+  ret <4 x double> %mul.i
+}
+
+; CHECK: vmulpd LCP{{.*}}(%rip)
+define <4 x double> @mulpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %mul.i = fmul <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
+  ret <4 x double> %mul.i
+}
+
+; CHECK: vmulps
+define <8 x float> @mulps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %mul.i = fmul <8 x float> %x, %y
+  ret <8 x float> %mul.i
+}
+
+; CHECK: vmulps LCP{{.*}}(%rip)
+define <8 x float> @mulps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %mul.i = fmul <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
+  ret <8 x float> %mul.i
+}
+
+; CHECK: vdivpd
+define <4 x double> @divpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %div.i = fdiv <4 x double> %x, %y
+  ret <4 x double> %div.i
+}
+
+; CHECK: vdivpd LCP{{.*}}(%rip)
+define <4 x double> @divpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %div.i = fdiv <4 x double> %y, <double 4.500000e+00, double 3.400000e+00, double 2.300000e+00, double 1.200000e+00>
+  ret <4 x double> %div.i
+}
+
+; CHECK: vdivps
+define <8 x float> @divps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %div.i = fdiv <8 x float> %x, %y
+  ret <8 x float> %div.i
+}
+
+; CHECK: vdivps LCP{{.*}}(%rip)
+define <8 x float> @divps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %div.i = fdiv <8 x float> %y, <float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000, float 4.500000e+00, float 0x400B333340000000, float 0x4002666660000000, float 0x3FF3333340000000>
+  ret <8 x float> %div.i
+}
+
diff --git a/test/CodeGen/X86/avx-256-arith.s b/test/CodeGen/X86/avx-256-arith.s
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/test/CodeGen/X86/avx-256-arith.s
diff --git a/test/CodeGen/X86/avx-256-logic.ll b/test/CodeGen/X86/avx-256-logic.ll
new file mode 100644
index 0000000..d9e5d08
--- /dev/null
+++ b/test/CodeGen/X86/avx-256-logic.ll
@@ -0,0 +1,161 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vandpd
+define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %x to <4 x i64>
+  %1 = bitcast <4 x double> %y to <4 x i64>
+  %and.i = and <4 x i64> %0, %1
+  %2 = bitcast <4 x i64> %and.i to <4 x double>
+  ret <4 x double> %2
+}
+
+; CHECK: vandpd LCP{{.*}}(%rip)
+define <4 x double> @andpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %y to <4 x i64>
+  %and.i = and <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
+  %1 = bitcast <4 x i64> %and.i to <4 x double>
+  ret <4 x double> %1
+}
+
+; CHECK: vandps
+define <8 x float> @andps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %x to <8 x i32>
+  %1 = bitcast <8 x float> %y to <8 x i32>
+  %and.i = and <8 x i32> %0, %1
+  %2 = bitcast <8 x i32> %and.i to <8 x float>
+  ret <8 x float> %2
+}
+
+; CHECK: vandps LCP{{.*}}(%rip)
+define <8 x float> @andps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %y to <8 x i32>
+  %and.i = and <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
+  %1 = bitcast <8 x i32> %and.i to <8 x float>
+  ret <8 x float> %1
+}
+
+; CHECK: vxorpd
+define <4 x double> @xorpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %x to <4 x i64>
+  %1 = bitcast <4 x double> %y to <4 x i64>
+  %xor.i = xor <4 x i64> %0, %1
+  %2 = bitcast <4 x i64> %xor.i to <4 x double>
+  ret <4 x double> %2
+}
+
+; CHECK: vxorpd LCP{{.*}}(%rip)
+define <4 x double> @xorpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %y to <4 x i64>
+  %xor.i = xor <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
+  %1 = bitcast <4 x i64> %xor.i to <4 x double>
+  ret <4 x double> %1
+}
+
+; CHECK: vxorps
+define <8 x float> @xorps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %x to <8 x i32>
+  %1 = bitcast <8 x float> %y to <8 x i32>
+  %xor.i = xor <8 x i32> %0, %1
+  %2 = bitcast <8 x i32> %xor.i to <8 x float>
+  ret <8 x float> %2
+}
+
+; CHECK: vxorps LCP{{.*}}(%rip)
+define <8 x float> @xorps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %y to <8 x i32>
+  %xor.i = xor <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
+  %1 = bitcast <8 x i32> %xor.i to <8 x float>
+  ret <8 x float> %1
+}
+
+; CHECK: vorpd
+define <4 x double> @orpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %x to <4 x i64>
+  %1 = bitcast <4 x double> %y to <4 x i64>
+  %or.i = or <4 x i64> %0, %1
+  %2 = bitcast <4 x i64> %or.i to <4 x double>
+  ret <4 x double> %2
+}
+
+; CHECK: vorpd LCP{{.*}}(%rip)
+define <4 x double> @orpd256fold(<4 x double> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %y to <4 x i64>
+  %or.i = or <4 x i64> %0, <i64 4616752568008179712, i64 4614838538166547251, i64 4612361558371493478, i64 4608083138725491507>
+  %1 = bitcast <4 x i64> %or.i to <4 x double>
+  ret <4 x double> %1
+}
+
+; CHECK: vorps
+define <8 x float> @orps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %x to <8 x i32>
+  %1 = bitcast <8 x float> %y to <8 x i32>
+  %or.i = or <8 x i32> %0, %1
+  %2 = bitcast <8 x i32> %or.i to <8 x float>
+  ret <8 x float> %2
+}
+
+; CHECK: vorps LCP{{.*}}(%rip)
+define <8 x float> @orps256fold(<8 x float> %y) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %y to <8 x i32>
+  %or.i = or <8 x i32> %0, <i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938, i32 1083179008, i32 1079613850, i32 1075000115, i32 1067030938>
+  %1 = bitcast <8 x i32> %or.i to <8 x float>
+  ret <8 x float> %1
+}
+
+; CHECK: vandnpd
+define <4 x double> @andnotpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <4 x double> %x to <4 x i64>
+  %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %1 = bitcast <4 x double> %y to <4 x i64>
+  %and.i = and <4 x i64> %1, %neg.i
+  %2 = bitcast <4 x i64> %and.i to <4 x double>
+  ret <4 x double> %2
+}
+
+; CHECK: vandnpd (%
+define <4 x double> @andnotpd256fold(<4 x double> %y, <4 x double>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+  %tmp2 = load <4 x double>* %x, align 32
+  %0 = bitcast <4 x double> %y to <4 x i64>
+  %neg.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1>
+  %1 = bitcast <4 x double> %tmp2 to <4 x i64>
+  %and.i = and <4 x i64> %1, %neg.i
+  %2 = bitcast <4 x i64> %and.i to <4 x double>
+  ret <4 x double> %2
+}
+
+; CHECK: vandnps
+define <8 x float> @andnotps256(<8 x float> %y, <8 x float> %x) nounwind uwtable readnone ssp {
+entry:
+  %0 = bitcast <8 x float> %x to <8 x i32>
+  %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = bitcast <8 x float> %y to <8 x i32>
+  %and.i = and <8 x i32> %1, %neg.i
+  %2 = bitcast <8 x i32> %and.i to <8 x float>
+  ret <8 x float> %2
+}
+
+; CHECK: vandnps (%
+define <8 x float> @andnotps256fold(<8 x float> %y, <8 x float>* nocapture %x) nounwind uwtable readonly ssp {
+entry:
+  %tmp2 = load <8 x float>* %x, align 32
+  %0 = bitcast <8 x float> %y to <8 x i32>
+  %neg.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>
+  %1 = bitcast <8 x float> %tmp2 to <8 x i32>
+  %and.i = and <8 x i32> %1, %neg.i
+  %2 = bitcast <8 x i32> %and.i to <8 x float>
+  ret <8 x float> %2
+}
diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll
new file mode 100644
index 0000000..5196089
--- /dev/null
+++ b/test/CodeGen/X86/avx-load-store.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+
+; CHECK: vmovaps
+; CHECK: vmovaps
+; CHECK: vmovapd
+; CHECK: vmovapd
+; CHECK: vmovaps
+; CHECK: vmovaps
+define void @test_256_load(double* nocapture %d, float* nocapture %f, <4 x i64>* nocapture %i) nounwind uwtable ssp {
+entry:
+  %0 = bitcast double* %d to <4 x double>*
+  %tmp1.i = load <4 x double>* %0, align 32
+  %1 = bitcast float* %f to <8 x float>*
+  %tmp1.i17 = load <8 x float>* %1, align 32
+  %tmp1.i16 = load <4 x i64>* %i, align 32
+  tail call void @dummy(<4 x double> %tmp1.i, <8 x float> %tmp1.i17, <4 x i64> %tmp1.i16) nounwind
+  store <4 x double> %tmp1.i, <4 x double>* %0, align 32
+  store <8 x float> %tmp1.i17, <8 x float>* %1, align 32
+  store <4 x i64> %tmp1.i16, <4 x i64>* %i, align 32
+  ret void
+}
+
+declare void @dummy(<4 x double>, <8 x float>, <4 x i64>)
+
diff --git a/test/CodeGen/X86/bswap.ll b/test/CodeGen/X86/bswap.ll
index 0a72c1c..a7540aa 100644
--- a/test/CodeGen/X86/bswap.ll
+++ b/test/CodeGen/X86/bswap.ll
@@ -1,8 +1,6 @@
 ; bswap should be constant folded when it is passed a constant argument
 
-; RUN: llc < %s -march=x86 | \
-; RUN:   grep bswapl | count 3
-; RUN: llc < %s -march=x86 | grep rolw | count 1
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 declare i16 @llvm.bswap.i16(i16)
 
@@ -11,17 +9,51 @@ declare i32 @llvm.bswap.i32(i32)
 declare i64 @llvm.bswap.i64(i64)
 
 define i16 @W(i16 %A) {
+; CHECK: W:
+; CHECK: rolw $8, %ax
         %Z = call i16 @llvm.bswap.i16( i16 %A )         ; <i16> [#uses=1]
         ret i16 %Z
 }
 
 define i32 @X(i32 %A) {
+; CHECK: X:
+; CHECK: bswapl %eax
         %Z = call i32 @llvm.bswap.i32( i32 %A )         ; <i32> [#uses=1]
         ret i32 %Z
 }
 
 define i64 @Y(i64 %A) {
+; CHECK: Y:
+; CHECK: bswapl %eax
+; CHECK: bswapl %edx
         %Z = call i64 @llvm.bswap.i64( i64 %A )         ; <i64> [#uses=1]
         ret i64 %Z
 }
 
+; rdar://9164521
+define i32 @test1(i32 %a) nounwind readnone {
+entry:
+; CHECK: test1
+; CHECK: bswapl %eax
+; CHECK: shrl $16, %eax
+  %and = lshr i32 %a, 8
+  %shr3 = and i32 %and, 255
+  %and2 = shl i32 %a, 8
+  %shl = and i32 %and2, 65280
+  %or = or i32 %shr3, %shl
+  ret i32 %or
+}
+
+define i32 @test2(i32 %a) nounwind readnone {
+entry:
+; CHECK: test2
+; CHECK: bswapl %eax
+; CHECK: sarl $16, %eax
+  %and = lshr i32 %a, 8
+  %shr4 = and i32 %and, 255
+  %and2 = shl i32 %a, 8
+  %or = or i32 %shr4, %and2
+  %sext = shl i32 %or, 16
+  %conv3 = ashr exact i32 %sext, 16
+  ret i32 %conv3
+}
diff --git a/test/CodeGen/X86/byval2.ll b/test/CodeGen/X86/byval2.ll
index 03a9f0f..196efe5 100644
--- a/test/CodeGen/X86/byval2.ll
+++ b/test/CodeGen/X86/byval2.ll
@@ -37,8 +37,8 @@ entry:
 	store i64 %b, i64* %tmp2, align 16
 	%tmp4 = getelementptr %struct.s* %d, i32 0, i32 2
 	store i64 %c, i64* %tmp4, align 16
-	call void @f( %struct.s* %d byval)
-	call void @f( %struct.s* %d byval)
+	call void @f( %struct.s*byval %d )
+	call void @f( %struct.s*byval %d )
 	ret void
 }
 
diff --git a/test/CodeGen/X86/byval3.ll b/test/CodeGen/X86/byval3.ll
index 8d5bb6d..f3b125c 100644
--- a/test/CodeGen/X86/byval3.ll
+++ b/test/CodeGen/X86/byval3.ll
@@ -45,8 +45,8 @@ entry:
         store i32 %a5, i32* %tmp8, align 16
         %tmp10 = getelementptr %struct.s* %d, i32 0, i32 5
         store i32 %a6, i32* %tmp10, align 16
-        call void @f( %struct.s* %d byval)
-        call void @f( %struct.s* %d byval)
+        call void @f( %struct.s* byval %d)
+        call void @f( %struct.s* byval %d)
         ret void
 }
 
diff --git a/test/CodeGen/X86/byval4.ll b/test/CodeGen/X86/byval4.ll
index ae1a79a..b7a4aa3 100644
--- a/test/CodeGen/X86/byval4.ll
+++ b/test/CodeGen/X86/byval4.ll
@@ -51,8 +51,8 @@ entry:
         store i16 %a5, i16* %tmp8, align 16
         %tmp10 = getelementptr %struct.s* %a, i32 0, i32 5
         store i16 %a6, i16* %tmp10, align 16
-        call void @f( %struct.s* %a byval )
-        call void @f( %struct.s* %a byval )
+        call void @f( %struct.s* byval %a )
+        call void @f( %struct.s* byval %a )
         ret void
 }
 
diff --git a/test/CodeGen/X86/byval5.ll b/test/CodeGen/X86/byval5.ll
index a376709..dca0936 100644
--- a/test/CodeGen/X86/byval5.ll
+++ b/test/CodeGen/X86/byval5.ll
@@ -59,8 +59,8 @@ entry:
         store i8 %a5, i8* %tmp8, align 8
         %tmp10 = getelementptr %struct.s* %a, i32 0, i32 5
         store i8 %a6, i8* %tmp10, align 8
-        call void @f( %struct.s* %a byval )
-        call void @f( %struct.s* %a byval )
+        call void @f( %struct.s* byval %a )
+        call void @f( %struct.s* byval %a )
         ret void
 }
 
diff --git a/test/CodeGen/X86/change-compare-stride-0.ll b/test/CodeGen/X86/change-compare-stride-0.ll
index d520a6f..3a383ee 100644
--- a/test/CodeGen/X86/change-compare-stride-0.ll
+++ b/test/CodeGen/X86/change-compare-stride-0.ll
@@ -1,11 +1,14 @@
-; RUN: llc < %s -march=x86 > %t
-; RUN: grep {cmpl	\$-478,} %t
-; RUN: not grep inc %t
-; RUN: not grep {leal	1(} %t
-; RUN: not grep {leal	-1(} %t
-; RUN: grep dec %t | count 1
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
+; CHECK: borf:
+; CHECK-NOT: inc
+; CHECK-NOT: leal 1(
+; CHECK-NOT: leal -1(
+; CHECK: decl
+; CHECK-NEXT: cmpl $-478
+; CHECK: ret
+
 bb4.thread:
 	br label %bb2.outer
 
diff --git a/test/CodeGen/X86/change-compare-stride-1.ll b/test/CodeGen/X86/change-compare-stride-1.ll
index a9ddbdb..eee3b79 100644
--- a/test/CodeGen/X86/change-compare-stride-1.ll
+++ b/test/CodeGen/X86/change-compare-stride-1.ll
@@ -1,11 +1,14 @@
-; RUN: llc < %s -march=x86-64 > %t
-; RUN: grep {cmpq	\$-478,} %t
-; RUN: not grep inc %t
-; RUN: not grep {leal	1(} %t
-; RUN: not grep {leal	-1(} %t
-; RUN: grep dec %t | count 1
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
 define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
+; CHECK: borf:
+; CHECK-NOT: inc
+; CHECK-NOT: leal 1(
+; CHECK-NOT: leal -1(
+; CHECK: decq
+; CHECK-NEXT: cmpq $-478
+; CHECK: ret
+
 bb4.thread:
 	br label %bb2.outer
 
diff --git a/test/CodeGen/X86/change-compare-stride-trickiness-1.ll b/test/CodeGen/X86/change-compare-stride-trickiness-1.ll
index cb63809..a3933e2 100644
--- a/test/CodeGen/X86/change-compare-stride-trickiness-1.ll
+++ b/test/CodeGen/X86/change-compare-stride-trickiness-1.ll
@@ -1,6 +1,4 @@
-; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmp.	\$10}
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-apple-darwin9"
+; RUN: llc -march=x86 < %s | FileCheck %s
 
 ; The comparison happens after the relevant use, so the stride can easily
 ; be changed. The comparison can be done in a narrower mode than the
@@ -9,6 +7,11 @@ target triple = "x86_64-apple-darwin9"
 ; could be made simpler.
 
 define void @foo() nounwind {
+; CHECK: foo:
+; CHECK-NOT: ret
+; CHECK: cmpl $10
+; CHECK: ret
+
 entry:
 	br label %loop
 
diff --git a/test/CodeGen/X86/coalescer-cross.ll b/test/CodeGen/X86/coalescer-cross.ll
index 976db64..3f1fec1 100644
--- a/test/CodeGen/X86/coalescer-cross.ll
+++ b/test/CodeGen/X86/coalescer-cross.ll
@@ -5,8 +5,8 @@
 ; CHECK: os_clock
 ; CHECK-NOT: movaps
 
-	type { %struct.TValue }		; type %0
-	type { %struct.L_Umaxalign, i32, %struct.Node* }		; type %1
+	%0 = type { %struct.TValue }		; type %0
+	%1 = type { %struct.L_Umaxalign, i32, %struct.Node* }		; type %1
 	%struct.CallInfo = type { %struct.TValue*, %struct.TValue*, %struct.TValue*, i32*, i32, i32 }
 	%struct.GCObject = type { %struct.lua_State }
 	%struct.L_Umaxalign = type { double }
diff --git a/test/CodeGen/X86/crash.ll b/test/CodeGen/X86/crash.ll
index 7c4e64c..b5b1ad4 100644
--- a/test/CodeGen/X86/crash.ll
+++ b/test/CodeGen/X86/crash.ll
@@ -215,3 +215,104 @@ bb2:
 }
 
 declare i64 @llvm.objectsize.i64(i8*, i1) nounwind readnone
+
+; PR10277
+; This test has dead code elimination caused by remat during spilling.
+; DCE causes a live interval to break into connected components.
+; One of the components is spilled.
+
+%t2 = type { i8 }
+%t9 = type { %t10 }
+%t10 = type { %t11 }
+%t11 = type { %t12 }
+%t12 = type { %t13*, %t13*, %t13* }
+%t13 = type { %t14*, %t15, %t15 }
+%t14 = type opaque
+%t15 = type { i8, i32, i32 }
+%t16 = type { %t17, i8* }
+%t17 = type { %t18 }
+%t18 = type { %t19 }
+%t19 = type { %t20*, %t20*, %t20* }
+%t20 = type { i32, i32 }
+%t21 = type { %t13* }
+
+define void @_ZNK4llvm17MipsFrameLowering12emitPrologueERNS_15MachineFunctionE() ssp align 2 {
+bb:
+  %tmp = load %t9** undef, align 4, !tbaa !0
+  %tmp2 = getelementptr inbounds %t9* %tmp, i32 0, i32 0
+  %tmp3 = getelementptr inbounds %t9* %tmp, i32 0, i32 0, i32 0, i32 0, i32 1
+  br label %bb4
+
+bb4:                                              ; preds = %bb37, %bb
+  %tmp5 = phi i96 [ undef, %bb ], [ %tmp38, %bb37 ]
+  %tmp6 = phi i96 [ undef, %bb ], [ %tmp39, %bb37 ]
+  br i1 undef, label %bb34, label %bb7
+
+bb7:                                              ; preds = %bb4
+  %tmp8 = load i32* undef, align 4
+  %tmp9 = and i96 %tmp6, 4294967040
+  %tmp10 = zext i32 %tmp8 to i96
+  %tmp11 = shl nuw nsw i96 %tmp10, 32
+  %tmp12 = or i96 %tmp9, %tmp11
+  %tmp13 = or i96 %tmp12, 1
+  %tmp14 = load i32* undef, align 4
+  %tmp15 = and i96 %tmp5, 4294967040
+  %tmp16 = zext i32 %tmp14 to i96
+  %tmp17 = shl nuw nsw i96 %tmp16, 32
+  %tmp18 = or i96 %tmp15, %tmp17
+  %tmp19 = or i96 %tmp18, 1
+  %tmp20 = load i8* undef, align 1
+  %tmp21 = and i8 %tmp20, 1
+  %tmp22 = icmp ne i8 %tmp21, 0
+  %tmp23 = select i1 %tmp22, i96 %tmp19, i96 %tmp13
+  %tmp24 = select i1 %tmp22, i96 %tmp13, i96 %tmp19
+  store i96 %tmp24, i96* undef, align 4
+  %tmp25 = load %t13** %tmp3, align 4
+  %tmp26 = icmp eq %t13* %tmp25, undef
+  br i1 %tmp26, label %bb28, label %bb27
+
+bb27:                                             ; preds = %bb7
+  br label %bb29
+
+bb28:                                             ; preds = %bb7
+  call void @_ZNSt6vectorIN4llvm11MachineMoveESaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%t10* %tmp2, %t21* byval align 4 undef, %t13* undef)
+  br label %bb29
+
+bb29:                                             ; preds = %bb28, %bb27
+  store i96 %tmp23, i96* undef, align 4
+  %tmp30 = load %t13** %tmp3, align 4
+  br i1 false, label %bb33, label %bb31
+
+bb31:                                             ; preds = %bb29
+  %tmp32 = getelementptr inbounds %t13* %tmp30, i32 1
+  store %t13* %tmp32, %t13** %tmp3, align 4
+  br label %bb37
+
+bb33:                                             ; preds = %bb29
+  unreachable
+
+bb34:                                             ; preds = %bb4
+  br i1 undef, label %bb36, label %bb35
+
+bb35:                                             ; preds = %bb34
+  store %t13* null, %t13** %tmp3, align 4
+  br label %bb37
+
+bb36:                                             ; preds = %bb34
+  call void @_ZNSt6vectorIN4llvm11MachineMoveESaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%t10* %tmp2, %t21* byval align 4 undef, %t13* undef)
+  br label %bb37
+
+bb37:                                             ; preds = %bb36, %bb35, %bb31
+  %tmp38 = phi i96 [ %tmp23, %bb31 ], [ %tmp5, %bb35 ], [ %tmp5, %bb36 ]
+  %tmp39 = phi i96 [ %tmp24, %bb31 ], [ %tmp6, %bb35 ], [ %tmp6, %bb36 ]
+  %tmp40 = add i32 undef, 1
+  br label %bb4
+}
+
+declare %t14* @_ZN4llvm9MCContext16CreateTempSymbolEv(%t2*)
+
+declare void @_ZNSt6vectorIN4llvm11MachineMoveESaIS1_EE13_M_insert_auxEN9__gnu_cxx17__normal_iteratorIPS1_S3_EERKS1_(%t10*, %t21* byval align 4, %t13*)
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff --git a/test/CodeGen/X86/dag-rauw-cse.ll b/test/CodeGen/X86/dag-rauw-cse.ll
index edcfeb7..eca8c86 100644
--- a/test/CodeGen/X86/dag-rauw-cse.ll
+++ b/test/CodeGen/X86/dag-rauw-cse.ll
@@ -1,7 +1,11 @@
-; RUN: llc < %s -march=x86 | grep {orl	\$1}
+; RUN: llc < %s -march=x86 | FileCheck %s
 ; PR3018
 
 define i32 @test(i32 %A) nounwind {
+; CHECK: test:
+; CHECK-NOT: ret
+; CHECK: orl $1
+; CHECK: ret
   %B = or i32 %A, 1
   %C = or i32 %B, 1
   %D = and i32 %C, 7057
diff --git a/test/CodeGen/X86/darwin-bzero.ll b/test/CodeGen/X86/darwin-bzero.ll
index a9573cf..3099526 100644
--- a/test/CodeGen/X86/darwin-bzero.ll
+++ b/test/CodeGen/X86/darwin-bzero.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -mtriple=i386-apple-darwin10 | grep __bzero
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
 
 define void @foo(i8* %p, i32 %len) {
-  call void @llvm.memset.i32(i8* %p, i8 0, i32 %len, i32 1)
+  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 %len, i32 1, i1 false)
   ret void
 }
diff --git a/test/CodeGen/X86/dbg-i128-const.ll b/test/CodeGen/X86/dbg-i128-const.ll
new file mode 100644
index 0000000..fb83fca
--- /dev/null
+++ b/test/CodeGen/X86/dbg-i128-const.ll
@@ -0,0 +1,26 @@
+; RUN: llc < %s | FileCheck %s
+
+; CHECK: DW_AT_const_value
+; CHECK-NEXT: 42
+
+define i128 @__foo(i128 %a, i128 %b) nounwind {
+entry:
+  tail call void @llvm.dbg.value(metadata !0, i64 0, metadata !1), !dbg !11
+  %add = add i128 %a, %b, !dbg !11
+  ret i128 %add, !dbg !11
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+!0 = metadata !{i128 42 }
+!1 = metadata !{i32 524544, metadata !2, metadata !"MAX", metadata !4, i32 29, metadata !8} ; [ DW_TAG_auto_variable ]
+!2 = metadata !{i32 524299, metadata !3, i32 26, i32 0} ; [ DW_TAG_lexical_block ]
+!3 = metadata !{i32 524334, i32 0, metadata !4, metadata !"__foo", metadata !"__foo", metadata !"__foo", metadata !4, i32 26, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i1 false} ; [ DW_TAG_subprogram ]
+!4 = metadata !{i32 524329, metadata !"foo.c", metadata !"/tmp", metadata !5} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 524305, i32 0, i32 1, metadata !"foo.c", metadata !"/tmp", metadata !"clang", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!6 = metadata !{i32 524309, metadata !4, metadata !"", metadata !4, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!7 = metadata !{metadata !8, metadata !8, metadata !8}
+!8 = metadata !{i32 524310, metadata !4, metadata !"ti_int", metadata !9, i32 78, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ]
+!9 = metadata !{i32 524329, metadata !"myint.h", metadata !"/tmp", metadata !5} ; [ DW_TAG_file_type ]
+!10 = metadata !{i32 524324, metadata !4, metadata !"", metadata !4, i32 0, i64 128, i64 128, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!11 = metadata !{i32 29, i32 0, metadata !2, null}
diff --git a/test/CodeGen/X86/divide-by-constant.ll b/test/CodeGen/X86/divide-by-constant.ll
index 08e3272..87c1be5 100644
--- a/test/CodeGen/X86/divide-by-constant.ll
+++ b/test/CodeGen/X86/divide-by-constant.ll
@@ -40,7 +40,7 @@ entry:
 	%div = sdiv i16 %x, 33		; <i32> [#uses=1]
 	ret i16 %div
 ; CHECK: test4:
-; CHECK: imull	$1986, %eax, %eax 
+; CHECK: imull	$1986, %eax, %
 }
 
 define i32 @test5(i32 %A) nounwind {
diff --git a/test/CodeGen/X86/fast-isel-bail.ll b/test/CodeGen/X86/fast-isel-bail.ll
index 9072c5c..a485827 100644
--- a/test/CodeGen/X86/fast-isel-bail.ll
+++ b/test/CodeGen/X86/fast-isel-bail.ll
@@ -3,7 +3,7 @@
 ; This file is for regression tests for cases where FastISel needs
 ; to gracefully bail out and let SelectionDAGISel take over.
 
-	type { i64, i8* }		; type %0
+	%0 = type { i64, i8* }		; type %0
 
 declare void @bar(%0)
 
diff --git a/test/CodeGen/X86/fast-isel-call.ll b/test/CodeGen/X86/fast-isel-call.ll
index 2fbe4e2..3159741 100644
--- a/test/CodeGen/X86/fast-isel-call.ll
+++ b/test/CodeGen/X86/fast-isel-call.ll
@@ -14,12 +14,12 @@ BB2:
 ; CHECK: calll
 ; CHECK-NEXT: testb	$1
 }
-declare i1 @foo() zeroext nounwind
+declare zeroext i1 @foo()  nounwind
 
 declare void @foo2(%struct.s* byval)
 
 define void @test2(%struct.s* %d) nounwind {
-  call void @foo2(%struct.s* %d byval)
+  call void @foo2(%struct.s* byval %d )
   ret void
 ; CHECK: test2:
 ; CHECK: movl	(%eax)
diff --git a/test/CodeGen/X86/fast-isel-gep.ll b/test/CodeGen/X86/fast-isel-gep.ll
index 48abfd0..1a2e34e 100644
--- a/test/CodeGen/X86/fast-isel-gep.ll
+++ b/test/CodeGen/X86/fast-isel-gep.ll
@@ -24,7 +24,7 @@ define i32 @test2(i64 %t3, i32* %t1) nounwind {
        %t15 = load i32* %t9            ; <i32> [#uses=1]
        ret i32 %t15
 ; X32: test2:
-; X32:  	movl	(%edx,%ecx,4), %eax
+; X32:  	movl	(%edx,%ecx,4), %e
 ; X32:  	ret
 
 ; X64: test2:
diff --git a/test/CodeGen/X86/fma.ll b/test/CodeGen/X86/fma.ll
new file mode 100644
index 0000000..5deedb9
--- /dev/null
+++ b/test/CodeGen/X86/fma.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin10 | FileCheck %s
+
+; CHECK: test_f32
+; CHECK: _fmaf
+
+define float @test_f32(float %a, float %b, float %c) nounwind readnone ssp {
+entry:
+  %call = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
+  ret float %call
+}
+
+; CHECK: test_f64
+; CHECK: _fma
+
+define double @test_f64(double %a, double %b, double %c) nounwind readnone ssp {
+entry:
+  %call = tail call double @llvm.fma.f64(double %a, double %b, double %c) nounwind readnone
+  ret double %call
+}
+
+; CHECK: test_f80
+; CHECK: _fmal
+
+define x86_fp80 @test_f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) nounwind readnone ssp {
+entry:
+  %call = tail call x86_fp80 @llvm.fma.f80(x86_fp80 %a, x86_fp80 %b, x86_fp80 %c) nounwind readnone
+  ret x86_fp80 %call
+}
+
+declare float @llvm.fma.f32(float, float, float) nounwind readnone
+declare double @llvm.fma.f64(double, double, double) nounwind readnone
+declare x86_fp80 @llvm.fma.f80(x86_fp80, x86_fp80, x86_fp80) nounwind readnone
diff --git a/test/CodeGen/X86/fold-add.ll b/test/CodeGen/X86/fold-add.ll
index 5e80ea5..63e7d36 100644
--- a/test/CodeGen/X86/fold-add.ll
+++ b/test/CodeGen/X86/fold-add.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 | grep {cmpb	\$0, (%r.\*,%r.\*)}
+; RUN: llc < %s -march=x86-64 | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.6"
@@ -7,6 +7,11 @@ target triple = "x86_64-apple-darwin9.6"
 @llvm.used = appending global [1 x i8*] [i8* bitcast (i32 (i32)* @longest_match to i8*)]		; <[1 x i8*]*> [#uses=0]
 
 define fastcc i32 @longest_match(i32 %cur_match) nounwind {
+; CHECK: longest_match:
+; CHECK-NOT: ret
+; CHECK: cmpb $0, (%r{{.*}},%r{{.*}})
+; CHECK: ret
+
 entry:
 	%0 = load i32* @prev_length, align 4		; <i32> [#uses=3]
 	%1 = zext i32 %cur_match to i64		; <i64> [#uses=1]
diff --git a/test/CodeGen/X86/fold-sext-trunc.ll b/test/CodeGen/X86/fold-sext-trunc.ll
index 2605123..b453310 100644
--- a/test/CodeGen/X86/fold-sext-trunc.ll
+++ b/test/CodeGen/X86/fold-sext-trunc.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86-64 | grep movslq | count 1
 ; PR4050
 
-	type { i64 }		; type %0
+	%0 = type { i64 }		; type %0
 	%struct.S1 = type { i16, i32 }
 @g_10 = external global %struct.S1		; <%struct.S1*> [#uses=2]
 
diff --git a/test/CodeGen/X86/fp-stack-2results.ll b/test/CodeGen/X86/fp-stack-2results.ll
index 321e267..c8da9ea 100644
--- a/test/CodeGen/X86/fp-stack-2results.ll
+++ b/test/CodeGen/X86/fp-stack-2results.ll
@@ -1,12 +1,16 @@
 ; RUN: llc < %s -march=x86 | grep fldz
 ; RUN: llc < %s -march=x86-64 | grep fld1
 
+%0 = type { x86_fp80, x86_fp80 }
+
 ; This is basically this code on x86-64:
 ; _Complex long double test() { return 1.0; }
-define {x86_fp80, x86_fp80} @test() {
+define %0 @test() {
   %A = fpext double 1.0 to x86_fp80
   %B = fpext double 0.0 to x86_fp80
-  ret x86_fp80 %A, x86_fp80 %B
+  %mrv = insertvalue %0 undef, x86_fp80 %A, 0
+  %mrv1 = insertvalue %0 %mrv, x86_fp80 %B, 1
+  ret %0 %mrv1
 }
 
 
@@ -14,46 +18,48 @@ define {x86_fp80, x86_fp80} @test() {
 ;	fld1
 ;	fld	%st(0)
 ;	ret
-define {x86_fp80, x86_fp80} @test2() {
+define %0 @test2() {
   %A = fpext double 1.0 to x86_fp80
-  ret x86_fp80 %A, x86_fp80 %A
+  %mrv = insertvalue %0 undef, x86_fp80 %A, 0
+  %mrv1 = insertvalue %0 %mrv, x86_fp80 %A, 1
+  ret %0 %mrv1
 }
 
 ; Uses both values.
 define void @call1(x86_fp80 *%P1, x86_fp80 *%P2) {
-  %a = call {x86_fp80,x86_fp80} @test()
-  %b = getresult {x86_fp80,x86_fp80} %a, 0
+  %a = call %0 @test()
+  %b = extractvalue %0 %a, 0
   store x86_fp80 %b, x86_fp80* %P1
 
-  %c = getresult {x86_fp80,x86_fp80} %a, 1
+  %c = extractvalue %0 %a, 1
   store x86_fp80 %c, x86_fp80* %P2
   ret void 
 }
 
 ; Uses both values, requires fxch
 define void @call2(x86_fp80 *%P1, x86_fp80 *%P2) {
-  %a = call {x86_fp80,x86_fp80} @test()
-  %b = getresult {x86_fp80,x86_fp80} %a, 1
+  %a = call %0 @test()
+  %b = extractvalue %0 %a, 1
   store x86_fp80 %b, x86_fp80* %P1
 
-  %c = getresult {x86_fp80,x86_fp80} %a, 0
+  %c = extractvalue %0 %a, 0
   store x86_fp80 %c, x86_fp80* %P2
   ret void
 }
 
 ; Uses ST(0), ST(1) is dead but must be popped.
 define void @call3(x86_fp80 *%P1, x86_fp80 *%P2) {
-  %a = call {x86_fp80,x86_fp80} @test()
-  %b = getresult {x86_fp80,x86_fp80} %a, 0
+  %a = call %0 @test()
+  %b = extractvalue %0 %a, 0
   store x86_fp80 %b, x86_fp80* %P1
   ret void 
 }
 
 ; Uses ST(1), ST(0) is dead and must be popped.
 define void @call4(x86_fp80 *%P1, x86_fp80 *%P2) {
-  %a = call {x86_fp80,x86_fp80} @test()
+  %a = call %0 @test()
 
-  %c = getresult {x86_fp80,x86_fp80} %a, 1
+  %c = extractvalue %0 %a, 1
   store x86_fp80 %c, x86_fp80* %P2
   ret void 
 }
diff --git a/test/CodeGen/X86/fp-stack-O0.ll b/test/CodeGen/X86/fp-stack-O0.ll
new file mode 100644
index 0000000..b9cb5d7
--- /dev/null
+++ b/test/CodeGen/X86/fp-stack-O0.ll
@@ -0,0 +1,24 @@
+; RUN: llc < %s -O0 | FileCheck %s
+target triple = "x86_64-apple-macosx"
+
+declare x86_fp80 @x1(i32) nounwind
+declare i32 @x2(x86_fp80, x86_fp80) nounwind
+
+; Keep track of the return value.
+; CHECK: test1
+; CHECK: x1
+; Pass arguments on the stack.
+; CHECK-NEXT: movq %rsp, [[RCX:%r..]]
+; Copy constant-pool value.
+; CHECK-NEXT: fldt LCPI
+; CHECK-NEXT: fstpt 16([[RCX]])
+; Copy x1 return value.
+; CHECK-NEXT: fstpt ([[RCX]])
+; CHECK-NEXT: x2
+define i32 @test1() nounwind uwtable ssp {
+entry:
+  %call = call x86_fp80 (...)* bitcast (x86_fp80 (i32)* @x1 to x86_fp80 (...)*)(i32 -1)
+  %call1 = call i32 @x2(x86_fp80 %call, x86_fp80 0xK401EFFFFFFFF00000000)
+  ret i32 %call1
+}
+
diff --git a/test/CodeGen/X86/fp-stack-ret.ll b/test/CodeGen/X86/fp-stack-ret.ll
index c83a0cb..1307f70 100644
--- a/test/CodeGen/X86/fp-stack-ret.ll
+++ b/test/CodeGen/X86/fp-stack-ret.ll
@@ -1,25 +1,40 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin8 -mcpu=yonah -march=x86 > %t
-; RUN: grep fldl %t | count 1
-; RUN: not grep xmm %t
-; RUN: grep {sub.*esp} %t | count 1
+; RUN: llc < %s -mtriple=i686-apple-darwin8 -mcpu=yonah -march=x86 | FileCheck %s
 
 ; These testcases shouldn't require loading into an XMM register then storing 
 ; to memory, then reloading into an FPStack reg.
 
+; CHECK: test1
+; CHECK: fldl
+; CHECK-NEXT: ret
 define double @test1(double *%P) {
         %A = load double* %P
         ret double %A
 }
 
-; fastcc should return a value 
+; fastcc should return a value
+; CHECK: test2
+; CHECK-NOT: xmm
+; CHECK: ret
 define fastcc double @test2(<2 x double> %A) {
 	%B = extractelement <2 x double> %A, i32 0
 	ret double %B
 }
 
+; CHECK: test3
+; CHECK: sub{{.*}}%esp
+; CHECLK-NOT: xmm
 define fastcc double @test3(<4 x float> %A) {
 	%B = bitcast <4 x float> %A to <2 x double>
 	%C = call fastcc double @test2(<2 x double> %B)
 	ret double %C
 }
-	
+
+; Clear the stack when not using a return value.
+; CHECK: test4
+; CHECK: call
+; CHECK: fstp
+; CHECK: ret
+define void @test4(double *%P) {
+  %A = call double @test1(double *%P)
+  ret void
+}
diff --git a/test/CodeGen/X86/h-registers-2.ll b/test/CodeGen/X86/h-registers-2.ll
index 16e13f8..488444c 100644
--- a/test/CodeGen/X86/h-registers-2.ll
+++ b/test/CodeGen/X86/h-registers-2.ll
@@ -1,14 +1,19 @@
-; RUN: llc < %s -march=x86 > %t
-; RUN: grep {movzbl	%\[abcd\]h,} %t | count 1
-; RUN: grep {shll	\$3,} %t | count 1
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 ; Use an h register, but don't omit the explicit shift for
 ; non-address use(s).
 
 define i32 @foo(i8* %x, i32 %y) nounwind {
+; CHECK: foo:
+; CHECK-NOT: ret
+; CHECK: movzbl %{{[abcd]h}},
+; CHECK-NOT: ret
+; CHECK: shll $3,
+; CHECK: ret
+
 	%t0 = lshr i32 %y, 8		; <i32> [#uses=1]
 	%t1 = and i32 %t0, 255		; <i32> [#uses=2]
-        %t2 = shl i32 %t1, 3
+  %t2 = shl i32 %t1, 3
 	%t3 = getelementptr i8* %x, i32 %t2		; <i8*> [#uses=1]
 	store i8 77, i8* %t3, align 4
 	ret i32 %t2
diff --git a/test/CodeGen/X86/inline-asm-error.ll b/test/CodeGen/X86/inline-asm-error.ll
index 29c5ae5..134d6e9 100644
--- a/test/CodeGen/X86/inline-asm-error.ll
+++ b/test/CodeGen/X86/inline-asm-error.ll
@@ -5,10 +5,8 @@
 ; RUN: FileCheck %s < %t2
 ; RUN: FileCheck %s < %t3
 
-; The register allocator must fail on this function, and it should print the
-; inline asm in the diagnostic.
-; CHECK: LLVM ERROR: Ran out of registers during register allocation!
-; CHECK: INLINEASM <es:hello world>
+; The register allocator must fail on this function.
+; CHECK: error: ran out of registers during register allocation
 
 define void @f(i32 %x0, i32 %x1, i32 %x2, i32 %x3, i32 %x4, i32 %x5, i32 %x6, i32 %x7, i32 %x8, i32 %x9) nounwind ssp {
 entry:
diff --git a/test/CodeGen/X86/inline-asm-fpstack.ll b/test/CodeGen/X86/inline-asm-fpstack.ll
index 6348fca..8e48bbe 100644
--- a/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -26,7 +26,7 @@ define double @test2() {
 ; CHECK-NOT: fstp
 ; CHECK: ret
 define void @test3(x86_fp80 %X) {
-        call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( x86_fp80 %X)
+        call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( x86_fp80 %X)
         ret void
 }
 
@@ -37,7 +37,7 @@ define void @test3(x86_fp80 %X) {
 ; CHECK-NOT: fstp
 ; CHECK: ret
 define void @test4(double %X) {
-        call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( double %X)
+        call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( double %X)
         ret void
 }
 
@@ -49,7 +49,7 @@ define void @test4(double %X) {
 ; CHECK: ret
 define void @test5(double %X) {
         %Y = fadd double %X, 123.0
-        call void asm sideeffect "frob ", "{st(0)},~{dirflag},~{fpsr},~{flags}"( double %Y)
+        call void asm sideeffect "frob ", "{st(0)},~{st},~{dirflag},~{fpsr},~{flags}"( double %Y)
         ret void
 }
 
@@ -86,3 +86,246 @@ entry:
 	ret void
 }
 
+; PR4185
+; Passing a non-killed value to asm in {st}.
+; Make sure it is duped before.
+; asm kills st(0), so we shouldn't pop anything
+; CHECK: testPR4185
+; CHECK: fld %st(0)
+; CHECK: fistpl
+; CHECK-NOT: fstp
+; CHECK: fistpl
+; CHECK-NOT: fstp
+; CHECK: ret
+; A valid alternative would be to remat the constant pool load before each
+; inline asm.
+define void @testPR4185() {
+return:
+	call void asm sideeffect "fistpl $0", "{st},~{st}"(double 1.000000e+06)
+	call void asm sideeffect "fistpl $0", "{st},~{st}"(double 1.000000e+06)
+	ret void
+}
+
+; Passing a non-killed value through asm in {st}.
+; Make sure it is not duped before.
+; Second asm kills st(0), so we shouldn't pop anything
+; CHECK: testPR4185b
+; CHECK-NOT: fld %st(0)
+; CHECK: fistl
+; CHECK-NOT: fstp
+; CHECK: fistpl
+; CHECK-NOT: fstp
+; CHECK: ret
+; A valid alternative would be to remat the constant pool load before each
+; inline asm.
+define void @testPR4185b() {
+return:
+	call void asm sideeffect "fistl $0", "{st}"(double 1.000000e+06)
+	call void asm sideeffect "fistpl $0", "{st},~{st}"(double 1.000000e+06)
+	ret void
+}
+
+; PR4459
+; The return value from ceil must be duped before being consumed by asm.
+; CHECK: testPR4459
+; CHECK: ceil
+; CHECK: fld %st(0)
+; CHECK-NOT: fxch
+; CHECK: fistpl
+; CHECK-NOT: fxch
+; CHECK: fstpt
+; CHECK: test
+define void @testPR4459(x86_fp80 %a) {
+entry:
+	%0 = call x86_fp80 @ceil(x86_fp80 %a)
+	call void asm sideeffect "fistpl $0", "{st},~{st}"( x86_fp80 %0)
+	call void @test3(x86_fp80 %0 )
+        ret void
+}
+declare x86_fp80 @ceil(x86_fp80)
+
+; PR4484
+; test1 leaves a value on the stack that is needed after the asm.
+; CHECK: testPR4484
+; CHECK: test1
+; CHECK-NOT: fstp
+; Load %a from stack after ceil
+; CHECK: fldt
+; CHECK-NOT: fxch
+; CHECK: fistpl
+; CHECK-NOT: fstp
+; Set up call to test.
+; CHECK: fstpt
+; CHECK: test
+define void @testPR4484(x86_fp80 %a) {
+entry:
+	%0 = call x86_fp80 @test1()
+	call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %a)
+	call void @test3(x86_fp80 %0)
+	ret void
+}
+
+; PR4485
+; CHECK: testPR4485
+define void @testPR4485(x86_fp80* %a) {
+entry:
+	%0 = load x86_fp80* %a, align 16
+	%1 = fmul x86_fp80 %0, 0xK4006B400000000000000
+	%2 = fmul x86_fp80 %1, 0xK4012F424000000000000
+	tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %2)
+	%3 = load x86_fp80* %a, align 16
+	%4 = fmul x86_fp80 %3, 0xK4006B400000000000000
+	%5 = fmul x86_fp80 %4, 0xK4012F424000000000000
+	tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %5)
+	ret void
+}
+
+; An input argument in a fixed position is implicitly popped by the asm only if
+; the input argument is tied to an output register, or it is in the clobber list.
+; The clobber list case is tested above.
+;
+; This doesn't implicitly pop the stack:
+;
+;   void fist1(long double x, int *p) {
+;     asm volatile ("fistl %1" : : "t"(x), "m"(*p));
+;   }
+;
+; CHECK: fist1
+; CHECK: fldt
+; CHECK: fistl (%e
+; CHECK: fstp
+; CHECK: ret
+define void @fist1(x86_fp80 %x, i32* %p) nounwind ssp {
+entry:
+  tail call void asm sideeffect "fistl $1", "{st},*m,~{memory},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, i32* %p) nounwind
+  ret void
+}
+
+; Here, the input operand is tied to an output which means that is is
+; implicitly popped (and then the output is implicitly pushed).
+;
+;   long double fist2(long double x, int *p) {
+;     long double y;
+;     asm ("fistl %1" : "=&t"(y) : "0"(x), "m"(*p) : "memory");
+;     return y;
+;   }
+;
+; CHECK: fist2
+; CHECK: fldt
+; CHECK: fistl (%e
+; CHECK-NOT: fstp
+; CHECK: ret
+define x86_fp80 @fist2(x86_fp80 %x, i32* %p) nounwind ssp {
+entry:
+  %0 = tail call x86_fp80 asm "fistl $2", "=&{st},0,*m,~{memory},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, i32* %p) nounwind
+  ret x86_fp80 %0
+}
+
+; An 'f' constraint is never implicitly popped:
+;
+;   void fucomp1(long double x, long double y) {
+;     asm volatile ("fucomp %1" : : "t"(x), "f"(y) : "st");
+;   }
+; CHECK: fucomp1
+; CHECK: fldt
+; CHECK: fldt
+; CHECK: fucomp %st
+; CHECK: fstp
+; CHECK-NOT: fstp
+; CHECK: ret
+define void @fucomp1(x86_fp80 %x, x86_fp80 %y) nounwind ssp {
+entry:
+  tail call void asm sideeffect "fucomp $1", "{st},f,~{st},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind
+  ret void
+}
+
+; The 'u' constraint is only popped implicitly when clobbered:
+;
+;   void fucomp2(long double x, long double y) {
+;     asm volatile ("fucomp %1" : : "t"(x), "u"(y) : "st");
+;   }
+;
+;   void fucomp3(long double x, long double y) {
+;     asm volatile ("fucompp %1" : : "t"(x), "u"(y) : "st", "st(1)");
+;   }
+;
+; CHECK: fucomp2
+; CHECK: fldt
+; CHECK: fldt
+; CHECK: fucomp %st(1)
+; CHECK: fstp
+; CHECK-NOT: fstp
+; CHECK: ret
+;
+; CHECK: fucomp3
+; CHECK: fldt
+; CHECK: fldt
+; CHECK: fucompp %st(1)
+; CHECK-NOT: fstp
+; CHECK: ret
+define void @fucomp2(x86_fp80 %x, x86_fp80 %y) nounwind ssp {
+entry:
+  tail call void asm sideeffect "fucomp $1", "{st},{st(1)},~{st},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind
+  ret void
+}
+define void @fucomp3(x86_fp80 %x, x86_fp80 %y) nounwind ssp {
+entry:
+  tail call void asm sideeffect "fucompp $1", "{st},{st(1)},~{st},~{st(1)},~{dirflag},~{fpsr},~{flags}"(x86_fp80 %x, x86_fp80 %y) nounwind
+  ret void
+}
+
+; One input, two outputs, one dead output.
+%complex = type { float, float }
+; CHECK: sincos1
+; CHECK: flds
+; CHECK-NOT: fxch
+; CHECK: sincos
+; CHECK-NOT: fstp
+; CHECK: fstp %st(1)
+; CHECK-NOT: fstp
+; CHECK: ret
+define float @sincos1(float %x) nounwind ssp {
+entry:
+  %0 = tail call %complex asm "sincos", "={st},={st(1)},0,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind
+  %asmresult = extractvalue %complex %0, 0
+  ret float %asmresult
+}
+
+; Same thing, swapped output operands.
+; CHECK: sincos2
+; CHECK: flds
+; CHECK-NOT: fxch
+; CHECK: sincos
+; CHECK-NOT: fstp
+; CHECK: fstp %st(1)
+; CHECK-NOT: fstp
+; CHECK: ret
+define float @sincos2(float %x) nounwind ssp {
+entry:
+  %0 = tail call %complex asm "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind
+  %asmresult = extractvalue %complex %0, 1
+  ret float %asmresult
+}
+
+; Clobber st(0) after it was live-out/dead from the previous asm.
+; CHECK: sincos3
+; Load x, make a copy for the second asm.
+; CHECK: flds
+; CHECK: fld %st(0)
+; CHECK: sincos
+; Discard dead result in st(0), bring x to the top.
+; CHECK: fstp %st(0)
+; CHECK: fxch
+; x is now in st(0) for the second asm
+; CHECK: sincos
+; Discard both results.
+; CHECK: fstp
+; CHECK: fstp
+; CHECK: ret
+define float @sincos3(float %x) nounwind ssp {
+entry:
+  %0 = tail call %complex asm sideeffect "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind
+  %1 = tail call %complex asm sideeffect "sincos", "={st(1)},={st},1,~{dirflag},~{fpsr},~{flags}"(float %x) nounwind
+  %asmresult = extractvalue %complex %0, 0
+  ret float %asmresult
+}
diff --git a/test/CodeGen/X86/inline-asm-fpstack2.ll b/test/CodeGen/X86/inline-asm-fpstack2.ll
deleted file mode 100644
index 78037e0..0000000
--- a/test/CodeGen/X86/inline-asm-fpstack2.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
-; PR4185
-
-; Passing a non-killed value to asm in {st}.
-; Make sure it is duped before.
-; asm kills st(0), so we shouldn't pop anything
-; CHECK: fld %st(0)
-; CHECK: fistpl
-; CHECK-NOT: fstp
-; CHECK: fistpl
-; CHECK-NOT: fstp
-; CHECK: ret
-define void @test() {
-return:
-	call void asm sideeffect "fistpl $0", "{st}"(double 1.000000e+06)
-	call void asm sideeffect "fistpl $0", "{st}"(double 1.000000e+06)
-	ret void
-}
-
-; A valid alternative would be to remat the constant pool load before each
-; inline asm.
diff --git a/test/CodeGen/X86/inline-asm-fpstack3.ll b/test/CodeGen/X86/inline-asm-fpstack3.ll
deleted file mode 100644
index a609681..0000000
--- a/test/CodeGen/X86/inline-asm-fpstack3.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
-; PR4459
-
-; The return value from ceil must be duped before being consumed by asm.
-; CHECK: ceil
-; CHECK: fld %st(0)
-; CHECK-NOT: fxch
-; CHECK: fistpl
-; CHECK-NOT: fxch
-; CHECK: fstpt
-; CHECK: test
-define void @test2(x86_fp80 %a) {
-entry:
-	%0 = call x86_fp80 @ceil(x86_fp80 %a)
-	call void asm sideeffect "fistpl $0", "{st}"( x86_fp80 %0)
-	call void @test(x86_fp80 %0 )
-        ret void
-}
-declare x86_fp80 @ceil(x86_fp80)
-declare void @test(x86_fp80)
diff --git a/test/CodeGen/X86/inline-asm-fpstack4.ll b/test/CodeGen/X86/inline-asm-fpstack4.ll
deleted file mode 100644
index ec572b4..0000000
--- a/test/CodeGen/X86/inline-asm-fpstack4.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=x86 | FileCheck %s
-; PR4484
-
-; ceil leaves a value on the stack that is needed after the asm.
-; CHECK: ceil
-; CHECK-NOT: fstp
-; Load %a from stack after ceil
-; CHECK: fldt
-; CHECK-NOT: fxch
-; CHECK: fistpl
-; CHECK-NOT: fstp
-; Set up call to test.
-; CHECK: fstpt
-; CHECK: test
-define void @test2(x86_fp80 %a) {
-entry:
-	%0 = call x86_fp80 @ceil()
-	call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %a)
-	call void @test(x86_fp80 %0)
-	ret void
-}
-
-declare x86_fp80 @ceil()
-declare void @test(x86_fp80)
diff --git a/test/CodeGen/X86/inline-asm-fpstack5.ll b/test/CodeGen/X86/inline-asm-fpstack5.ll
deleted file mode 100644
index 8b219cf..0000000
--- a/test/CodeGen/X86/inline-asm-fpstack5.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llc < %s -march=x86
-; PR4485
-
-define void @test(x86_fp80* %a) {
-entry:
-	%0 = load x86_fp80* %a, align 16
-	%1 = fmul x86_fp80 %0, 0xK4006B400000000000000
-	%2 = fmul x86_fp80 %1, 0xK4012F424000000000000
-	tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %2)
-	%3 = load x86_fp80* %a, align 16
-	%4 = fmul x86_fp80 %3, 0xK4006B400000000000000
-	%5 = fmul x86_fp80 %4, 0xK4012F424000000000000
-	tail call void asm sideeffect "fistpl $0", "{st},~{st}"(x86_fp80 %5)
-	ret void
-}
diff --git a/test/CodeGen/X86/inline-asm-mrv.ll b/test/CodeGen/X86/inline-asm-mrv.ll
index 78d7e77..733205d 100644
--- a/test/CodeGen/X86/inline-asm-mrv.ll
+++ b/test/CodeGen/X86/inline-asm-mrv.ll
@@ -11,7 +11,7 @@ define i32 @test1(i8* %v, i8* %blk2, i8* %blk1, i32 %stride, i32 %h) nounwind  {
 	%tmp12 = sext i32 %stride to i64		; <i64> [#uses=1]
 	%mrv = call {i32, i8*, i8*} asm sideeffect "$0 $1 $2 $3 $4 $5 $6",
          "=r,=r,=r,r,r,r,r"( i64 %tmp12, i32 %h, i8* %blk1, i8* %blk2 ) nounwind
-        %tmp6 = getresult {i32, i8*, i8*} %mrv, 0
+        %tmp6 = extractvalue {i32, i8*, i8*} %mrv, 0
 	%tmp7 = call i32 asm sideeffect "set $0",
              "=r,~{dirflag},~{fpsr},~{flags}"( ) nounwind
 	ret i32 %tmp7
@@ -19,16 +19,16 @@ define i32 @test1(i8* %v, i8* %blk2, i8* %blk1, i32 %stride, i32 %h) nounwind  {
 
 define <4 x float> @test2() nounwind {
 	%mrv = call {<4 x float>, <4 x float>} asm "set $0, $1", "=x,=x"()
-	%a = getresult {<4 x float>, <4 x float>} %mrv, 0
-	%b = getresult {<4 x float>, <4 x float>} %mrv, 1
+	%a = extractvalue {<4 x float>, <4 x float>} %mrv, 0
+	%b = extractvalue {<4 x float>, <4 x float>} %mrv, 1
 	%c = fadd <4 x float> %a, %b
 	ret <4 x float> %c
 }
 
 define <4 x i32> @test3() nounwind {
 	%mrv = call {<4 x i32>, <4 x i32>} asm "set $0, $1", "=x,=x"()
-	%a = getresult {<4 x i32>, <4 x i32>} %mrv, 0
-	%b = getresult {<4 x i32>, <4 x i32>} %mrv, 1
+	%a = extractvalue {<4 x i32>, <4 x i32>} %mrv, 0
+	%b = extractvalue {<4 x i32>, <4 x i32>} %mrv, 1
 	%c = add <4 x i32> %a, %b
 	ret <4 x i32> %c
 }
diff --git a/test/CodeGen/X86/inline-asm-q-regs.ll b/test/CodeGen/X86/inline-asm-q-regs.ll
index ab44206..1c8e2f9 100644
--- a/test/CodeGen/X86/inline-asm-q-regs.ll
+++ b/test/CodeGen/X86/inline-asm-q-regs.ll
@@ -1,10 +1,22 @@
 ; RUN: llc < %s -march=x86-64
 ; rdar://7066579
 
-	type { i64, i64, i64, i64, i64 }		; type %0
+	%0 = type { i64, i64, i64, i64, i64 }		; type %0
 
-define void @t() nounwind {
+define void @test1() nounwind {
 entry:
 	%asmtmp = call %0 asm sideeffect "mov    %cr0, $0       \0Amov    %cr2, $1       \0Amov    %cr3, $2       \0Amov    %cr4, $3       \0Amov    %cr8, $0       \0A", "=q,=q,=q,=q,=q,~{dirflag},~{fpsr},~{flags}"() nounwind		; <%0> [#uses=0]
 	ret void
 }
+
+; PR9602
+define void @test2(float %tmp) nounwind {
+  call void asm sideeffect "$0", "q"(float %tmp) nounwind
+  call void asm sideeffect "$0", "Q"(float %tmp) nounwind
+  ret void
+}
+
+define void @test3(double %tmp) nounwind {
+  call void asm sideeffect "$0", "q"(double %tmp) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/inline-asm.ll b/test/CodeGen/X86/inline-asm.ll
index c66d7a8..eef6c2f 100644
--- a/test/CodeGen/X86/inline-asm.ll
+++ b/test/CodeGen/X86/inline-asm.ll
@@ -23,3 +23,23 @@ define void @test4() nounwind {
        tail call void asm sideeffect "bork $0", "J"(i32 37) nounwind
        ret void
 }
+
+; rdar://9738585
+define i32 @test5() nounwind {
+entry:
+  %0 = tail call i32 asm "test", "=l,~{dirflag},~{fpsr},~{flags}"() nounwind
+  ret i32 0
+}
+
+; rdar://9777108 PR10352
+define void @test6(i1 zeroext %desired) nounwind {
+entry:
+  tail call void asm sideeffect "foo $0", "q,~{dirflag},~{fpsr},~{flags}"(i1 %desired) nounwind
+  ret void
+}
+
+define void @test7(i1 zeroext %desired, i32* %p) nounwind {
+entry:
+  %0 = tail call i8 asm sideeffect "xchg $0, $1", "=r,*m,0,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %p, i1 %desired) nounwind
+  ret void
+}
diff --git a/test/CodeGen/X86/isel-sink.ll b/test/CodeGen/X86/isel-sink.ll
index 0f94b23..d275533 100644
--- a/test/CodeGen/X86/isel-sink.ll
+++ b/test/CodeGen/X86/isel-sink.ll
@@ -1,8 +1,14 @@
-; RUN: llc < %s -march=x86 | not grep lea
-; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin8 | \
-; RUN:   grep {movl	\$4, (.*,.*,4)}
+; RUN: llc < %s -march=x86 | FileCheck %s
 
 define i32 @test(i32* %X, i32 %B) {
+; CHECK: test:
+; CHECK-NOT: ret
+; CHECK-NOT: lea
+; CHECK: mov{{.}} $4, ({{.*}},{{.*}},4)
+; CHECK: ret
+; CHECK: mov{{.}} ({{.*}},{{.*}},4),
+; CHECK: ret
+
 	; This gep should be sunk out of this block into the load/store users.
 	%P = getelementptr i32* %X, i32 %B
 	%G = icmp ult i32 %B, 1234
@@ -14,5 +20,3 @@ F:
 	%V = load i32* %P
 	ret i32 %V
 }
-	
-	
diff --git a/test/CodeGen/X86/longlong-deadload.ll b/test/CodeGen/X86/longlong-deadload.ll
index 9a4c8f2..db91961e 100644
--- a/test/CodeGen/X86/longlong-deadload.ll
+++ b/test/CodeGen/X86/longlong-deadload.ll
@@ -1,8 +1,11 @@
-; RUN: llc < %s -march=x86 | not grep '4{(%...)}
+; RUN: llc < %s -march=x86 | FileCheck %s
 ; This should not load or store the top part of *P.
 
 define void @test(i64* %P) nounwind  {
-entry:
+; CHECK: test:
+; CHECK: movl 4(%esp), %[[REGISTER:.*]]
+; CHECK-NOT: 4(%[[REGISTER]])
+; CHECK: ret
 	%tmp1 = load i64* %P, align 8		; <i64> [#uses=1]
 	%tmp2 = xor i64 %tmp1, 1		; <i64> [#uses=1]
 	store i64 %tmp2, i64* %P, align 8
diff --git a/test/CodeGen/X86/loop-strength-reduce2.ll b/test/CodeGen/X86/loop-strength-reduce2.ll
index 9b53adb..689ee1c 100644
--- a/test/CodeGen/X86/loop-strength-reduce2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce2.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | grep {\$pb} | grep mov
+; RUN: llc < %s -mtriple=i686-apple-darwin -relocation-model=pic | FileCheck %s
 ;
 ; Make sure the PIC label flags2-"L1$pb" is not moved up to the preheader.
+; CHECK: mov{{.}} {{.*}}$pb
 
 @flags2 = internal global [8193 x i8] zeroinitializer, align 32		; <[8193 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/X86/lsr-nonaffine.ll b/test/CodeGen/X86/lsr-nonaffine.ll
index b0d3064..d0d2bbd 100644
--- a/test/CodeGen/X86/lsr-nonaffine.ll
+++ b/test/CodeGen/X86/lsr-nonaffine.ll
@@ -1,23 +1,30 @@
-; RUN: llc -march=x86-64 < %s | FileCheck %s
+; RUN: llc -asm-verbose=false -march=x86-64 -mtriple=x86_64-apple-darwin -o - < %s | FileCheck %s
 
-; LSR should compute the correct starting values for this loop. Note that
-; it's not necessarily LSR's job to compute loop exit expressions; that's
-; indvars' job.
-; CHECK: movl  $12
-; CHECK: movl  $42
+; LSR should leave non-affine expressions alone because it currently
+; doesn't know how to do anything with them, and when it tries, it
+; gets SCEVExpander's current expansion for them, which is suboptimal.
 
-define i32 @real_symmetric_eigen(i32 %n) nounwind {
-while.body127:                                    ; preds = %while.cond122
-  br label %while.cond141
+; CHECK:        xorl %eax, %eax
+; CHECK-NEXT:   align
+; CHECK-NEXT: BB0_1:
+; CHECK-NEXT:   movq  %rax, (%rdx)
+; CHECK-NEXT:   addq  %rsi, %rax
+; CHECK-NEXT:   cmpq  %rdi, %rax
+; CHECK-NEXT:   jl
+; CHECK-NEXT:   imulq %rax, %rax
+; CHECK-NEXT:   ret
+define i64 @foo(i64 %n, i64 %s, i64* %p) nounwind {
+entry:
+  br label %loop
 
-while.cond141:                                    ; preds = %while.cond141, %while.body127
-  %0 = phi i32 [ 7, %while.body127 ], [ %indvar.next67, %while.cond141 ] ; <i32> [#uses=3]
-  %indvar.next67 = add i32 %0, 1                  ; <i32> [#uses=1]
-  %t = icmp slt i32 %indvar.next67, %n
-  br i1 %t, label %if.then171, label %while.cond141
+loop:
+  %i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
+  volatile store i64 %i, i64* %p
+  %i.next = add i64 %i, %s
+  %c = icmp slt i64 %i.next, %n
+  br i1 %c, label %loop, label %exit
 
-if.then171:                                       ; preds = %while.cond141
-  %mul150 = mul i32 %0, %0                 ; <i32> [#uses=1]
-  %add174 = add i32 %mul150, %0                 ; <i32> [#uses=1]
-  ret i32 %add174
+exit:
+  %mul = mul i64 %i.next, %i.next
+  ret i64 %mul
 }
diff --git a/test/CodeGen/X86/lsr-redundant-addressing.ll b/test/CodeGen/X86/lsr-redundant-addressing.ll
index aaa1426..cb0ac8b 100644
--- a/test/CodeGen/X86/lsr-redundant-addressing.ll
+++ b/test/CodeGen/X86/lsr-redundant-addressing.ll
@@ -1,4 +1,4 @@
-; RUN: llc -march=x86-64 < %s | fgrep {addq	$-16,} | count 1
+; RUN: llc -march=x86-64 < %s | FileCheck %s
 ; rdar://9081094
 
 ; LSR shouldn't create lots of redundant address computations.
@@ -10,6 +10,12 @@
 @isa = external hidden unnamed_addr constant [13 x %1], align 32
 
 define void @main_bb.i() nounwind {
+; CHECK: main_bb.i:
+; CHECK-NOT: ret
+; CHECK: addq $-16,
+; CHECK-NOT: ret
+; CHECK: ret
+
 bb:
   br label %bb38
 
diff --git a/test/CodeGen/X86/lsr-reuse-trunc.ll b/test/CodeGen/X86/lsr-reuse-trunc.ll
index 4770519..1f87089 100644
--- a/test/CodeGen/X86/lsr-reuse-trunc.ll
+++ b/test/CodeGen/X86/lsr-reuse-trunc.ll
@@ -5,8 +5,9 @@
 ; stick with indexing here.
 
 ; CHECK: movaps        (%{{rsi|rdx}},%rax,4), [[X3:%xmm[0-9]+]]
-; CHECK: movaps
-; CHECK:        [[X3]], (%{{rdi|rcx}},%rax,4)
+; CHECK: cvtdq2ps
+; CHECK: orps          {{%xmm[0-9]+}}, [[X4:%xmm[0-9]+]]
+; CHECK: movaps        [[X4]], (%{{rdi|rcx}},%rax,4)
 ; CHECK: addq  $4, %rax
 ; CHECK: cmpl  %eax, (%{{rdx|r8}})
 ; CHECK-NEXT: jg
diff --git a/test/CodeGen/X86/mem-promote-integers.ll b/test/CodeGen/X86/mem-promote-integers.ll
new file mode 100644
index 0000000..80103d1
--- /dev/null
+++ b/test/CodeGen/X86/mem-promote-integers.ll
@@ -0,0 +1,391 @@
+; Test the basic functionality of integer element promotions of different types.
+; This tests checks passing of arguments, loading and storing to memory and
+; basic arithmetic.
+; RUN: llc -march=x86 -promote-elements < %s
+; RUN: llc -march=x86-64 -promote-elements < %s
+
+define <1 x i8> @test_1xi8(<1 x i8> %x, <1 x i8>* %b) {
+  %bb = load <1 x i8>* %b
+  %tt = xor <1 x i8> %x, %bb
+  store <1 x i8> %tt, <1 x i8>* %b
+  br label %next
+
+next:
+  ret <1 x i8> %tt
+}
+
+
+define <1 x i16> @test_1xi16(<1 x i16> %x, <1 x i16>* %b) {
+  %bb = load <1 x i16>* %b
+  %tt = xor <1 x i16> %x, %bb
+  store <1 x i16> %tt, <1 x i16>* %b
+  br label %next
+
+next:
+  ret <1 x i16> %tt
+}
+
+
+define <1 x i32> @test_1xi32(<1 x i32> %x, <1 x i32>* %b) {
+  %bb = load <1 x i32>* %b
+  %tt = xor <1 x i32> %x, %bb
+  store <1 x i32> %tt, <1 x i32>* %b
+  br label %next
+
+next:
+  ret <1 x i32> %tt
+}
+
+
+define <1 x i64> @test_1xi64(<1 x i64> %x, <1 x i64>* %b) {
+  %bb = load <1 x i64>* %b
+  %tt = xor <1 x i64> %x, %bb
+  store <1 x i64> %tt, <1 x i64>* %b
+  br label %next
+
+next:
+  ret <1 x i64> %tt
+}
+
+
+define <1 x i128> @test_1xi128(<1 x i128> %x, <1 x i128>* %b) {
+  %bb = load <1 x i128>* %b
+  %tt = xor <1 x i128> %x, %bb
+  store <1 x i128> %tt, <1 x i128>* %b
+  br label %next
+
+next:
+  ret <1 x i128> %tt
+}
+
+
+define <1 x i256> @test_1xi256(<1 x i256> %x, <1 x i256>* %b) {
+  %bb = load <1 x i256>* %b
+  %tt = xor <1 x i256> %x, %bb
+  store <1 x i256> %tt, <1 x i256>* %b
+  br label %next
+
+next:
+  ret <1 x i256> %tt
+}
+
+
+define <1 x i512> @test_1xi512(<1 x i512> %x, <1 x i512>* %b) {
+  %bb = load <1 x i512>* %b
+  %tt = xor <1 x i512> %x, %bb
+  store <1 x i512> %tt, <1 x i512>* %b
+  br label %next
+
+next:
+  ret <1 x i512> %tt
+}
+
+
+define <2 x i8> @test_2xi8(<2 x i8> %x, <2 x i8>* %b) {
+  %bb = load <2 x i8>* %b
+  %tt = xor <2 x i8> %x, %bb
+  store <2 x i8> %tt, <2 x i8>* %b
+  br label %next
+
+next:
+  ret <2 x i8> %tt
+}
+
+
+define <2 x i16> @test_2xi16(<2 x i16> %x, <2 x i16>* %b) {
+  %bb = load <2 x i16>* %b
+  %tt = xor <2 x i16> %x, %bb
+  store <2 x i16> %tt, <2 x i16>* %b
+  br label %next
+
+next:
+  ret <2 x i16> %tt
+}
+
+
+define <2 x i32> @test_2xi32(<2 x i32> %x, <2 x i32>* %b) {
+  %bb = load <2 x i32>* %b
+  %tt = xor <2 x i32> %x, %bb
+  store <2 x i32> %tt, <2 x i32>* %b
+  br label %next
+
+next:
+  ret <2 x i32> %tt
+}
+
+
+define <2 x i64> @test_2xi64(<2 x i64> %x, <2 x i64>* %b) {
+  %bb = load <2 x i64>* %b
+  %tt = xor <2 x i64> %x, %bb
+  store <2 x i64> %tt, <2 x i64>* %b
+  br label %next
+
+next:
+  ret <2 x i64> %tt
+}
+
+
+define <2 x i128> @test_2xi128(<2 x i128> %x, <2 x i128>* %b) {
+  %bb = load <2 x i128>* %b
+  %tt = xor <2 x i128> %x, %bb
+  store <2 x i128> %tt, <2 x i128>* %b
+  br label %next
+
+next:
+  ret <2 x i128> %tt
+}
+
+
+define <2 x i256> @test_2xi256(<2 x i256> %x, <2 x i256>* %b) {
+  %bb = load <2 x i256>* %b
+  %tt = xor <2 x i256> %x, %bb
+  store <2 x i256> %tt, <2 x i256>* %b
+  br label %next
+
+next:
+  ret <2 x i256> %tt
+}
+
+
+define <2 x i512> @test_2xi512(<2 x i512> %x, <2 x i512>* %b) {
+  %bb = load <2 x i512>* %b
+  %tt = xor <2 x i512> %x, %bb
+  store <2 x i512> %tt, <2 x i512>* %b
+  br label %next
+
+next:
+  ret <2 x i512> %tt
+}
+
+
+define <3 x i8> @test_3xi8(<3 x i8> %x, <3 x i8>* %b) {
+  %bb = load <3 x i8>* %b
+  %tt = xor <3 x i8> %x, %bb
+  store <3 x i8> %tt, <3 x i8>* %b
+  br label %next
+
+next:
+  ret <3 x i8> %tt
+}
+
+
+define <3 x i16> @test_3xi16(<3 x i16> %x, <3 x i16>* %b) {
+  %bb = load <3 x i16>* %b
+  %tt = xor <3 x i16> %x, %bb
+  store <3 x i16> %tt, <3 x i16>* %b
+  br label %next
+
+next:
+  ret <3 x i16> %tt
+}
+
+
+define <3 x i32> @test_3xi32(<3 x i32> %x, <3 x i32>* %b) {
+  %bb = load <3 x i32>* %b
+  %tt = xor <3 x i32> %x, %bb
+  store <3 x i32> %tt, <3 x i32>* %b
+  br label %next
+
+next:
+  ret <3 x i32> %tt
+}
+
+
+define <3 x i64> @test_3xi64(<3 x i64> %x, <3 x i64>* %b) {
+  %bb = load <3 x i64>* %b
+  %tt = xor <3 x i64> %x, %bb
+  store <3 x i64> %tt, <3 x i64>* %b
+  br label %next
+
+next:
+  ret <3 x i64> %tt
+}
+
+
+define <3 x i128> @test_3xi128(<3 x i128> %x, <3 x i128>* %b) {
+  %bb = load <3 x i128>* %b
+  %tt = xor <3 x i128> %x, %bb
+  store <3 x i128> %tt, <3 x i128>* %b
+  br label %next
+
+next:
+  ret <3 x i128> %tt
+}
+
+
+define <3 x i256> @test_3xi256(<3 x i256> %x, <3 x i256>* %b) {
+  %bb = load <3 x i256>* %b
+  %tt = xor <3 x i256> %x, %bb
+  store <3 x i256> %tt, <3 x i256>* %b
+  br label %next
+
+next:
+  ret <3 x i256> %tt
+}
+
+
+define <3 x i512> @test_3xi512(<3 x i512> %x, <3 x i512>* %b) {
+  %bb = load <3 x i512>* %b
+  %tt = xor <3 x i512> %x, %bb
+  store <3 x i512> %tt, <3 x i512>* %b
+  br label %next
+
+next:
+  ret <3 x i512> %tt
+}
+
+
+define <4 x i8> @test_4xi8(<4 x i8> %x, <4 x i8>* %b) {
+  %bb = load <4 x i8>* %b
+  %tt = xor <4 x i8> %x, %bb
+  store <4 x i8> %tt, <4 x i8>* %b
+  br label %next
+
+next:
+  ret <4 x i8> %tt
+}
+
+
+define <4 x i16> @test_4xi16(<4 x i16> %x, <4 x i16>* %b) {
+  %bb = load <4 x i16>* %b
+  %tt = xor <4 x i16> %x, %bb
+  store <4 x i16> %tt, <4 x i16>* %b
+  br label %next
+
+next:
+  ret <4 x i16> %tt
+}
+
+
+define <4 x i32> @test_4xi32(<4 x i32> %x, <4 x i32>* %b) {
+  %bb = load <4 x i32>* %b
+  %tt = xor <4 x i32> %x, %bb
+  store <4 x i32> %tt, <4 x i32>* %b
+  br label %next
+
+next:
+  ret <4 x i32> %tt
+}
+
+
+define <4 x i64> @test_4xi64(<4 x i64> %x, <4 x i64>* %b) {
+  %bb = load <4 x i64>* %b
+  %tt = xor <4 x i64> %x, %bb
+  store <4 x i64> %tt, <4 x i64>* %b
+  br label %next
+
+next:
+  ret <4 x i64> %tt
+}
+
+
+define <4 x i128> @test_4xi128(<4 x i128> %x, <4 x i128>* %b) {
+  %bb = load <4 x i128>* %b
+  %tt = xor <4 x i128> %x, %bb
+  store <4 x i128> %tt, <4 x i128>* %b
+  br label %next
+
+next:
+  ret <4 x i128> %tt
+}
+
+
+define <4 x i256> @test_4xi256(<4 x i256> %x, <4 x i256>* %b) {
+  %bb = load <4 x i256>* %b
+  %tt = xor <4 x i256> %x, %bb
+  store <4 x i256> %tt, <4 x i256>* %b
+  br label %next
+
+next:
+  ret <4 x i256> %tt
+}
+
+
+define <4 x i512> @test_4xi512(<4 x i512> %x, <4 x i512>* %b) {
+  %bb = load <4 x i512>* %b
+  %tt = xor <4 x i512> %x, %bb
+  store <4 x i512> %tt, <4 x i512>* %b
+  br label %next
+
+next:
+  ret <4 x i512> %tt
+}
+
+
+define <5 x i8> @test_5xi8(<5 x i8> %x, <5 x i8>* %b) {
+  %bb = load <5 x i8>* %b
+  %tt = xor <5 x i8> %x, %bb
+  store <5 x i8> %tt, <5 x i8>* %b
+  br label %next
+
+next:
+  ret <5 x i8> %tt
+}
+
+
+define <5 x i16> @test_5xi16(<5 x i16> %x, <5 x i16>* %b) {
+  %bb = load <5 x i16>* %b
+  %tt = xor <5 x i16> %x, %bb
+  store <5 x i16> %tt, <5 x i16>* %b
+  br label %next
+
+next:
+  ret <5 x i16> %tt
+}
+
+
+define <5 x i32> @test_5xi32(<5 x i32> %x, <5 x i32>* %b) {
+  %bb = load <5 x i32>* %b
+  %tt = xor <5 x i32> %x, %bb
+  store <5 x i32> %tt, <5 x i32>* %b
+  br label %next
+
+next:
+  ret <5 x i32> %tt
+}
+
+
+define <5 x i64> @test_5xi64(<5 x i64> %x, <5 x i64>* %b) {
+  %bb = load <5 x i64>* %b
+  %tt = xor <5 x i64> %x, %bb
+  store <5 x i64> %tt, <5 x i64>* %b
+  br label %next
+
+next:
+  ret <5 x i64> %tt
+}
+
+
+define <5 x i128> @test_5xi128(<5 x i128> %x, <5 x i128>* %b) {
+  %bb = load <5 x i128>* %b
+  %tt = xor <5 x i128> %x, %bb
+  store <5 x i128> %tt, <5 x i128>* %b
+  br label %next
+
+next:
+  ret <5 x i128> %tt
+}
+
+
+define <5 x i256> @test_5xi256(<5 x i256> %x, <5 x i256>* %b) {
+  %bb = load <5 x i256>* %b
+  %tt = xor <5 x i256> %x, %bb
+  store <5 x i256> %tt, <5 x i256>* %b
+  br label %next
+
+next:
+  ret <5 x i256> %tt
+}
+
+
+define <5 x i512> @test_5xi512(<5 x i512> %x, <5 x i512>* %b) {
+  %bb = load <5 x i512>* %b
+  %tt = xor <5 x i512> %x, %bb
+  store <5 x i512> %tt, <5 x i512>* %b
+  br label %next
+
+next:
+  ret <5 x i512> %tt
+}
+
+
diff --git a/test/CodeGen/X86/membarrier.ll b/test/CodeGen/X86/membarrier.ll
new file mode 100644
index 0000000..42f8ef5
--- /dev/null
+++ b/test/CodeGen/X86/membarrier.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 -mattr=-sse -O0
+; PR9675
+
+define i32 @t() {
+entry:
+  %i = alloca i32, align 4
+  store i32 1, i32* %i, align 4
+  call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+  %0 = call i32 @llvm.atomic.load.sub.i32.p0i32(i32* %i, i32 1)
+  call void @llvm.memory.barrier(i1 true, i1 true, i1 true, i1 true, i1 true)
+  ret i32 0
+}
+
+declare i32 @llvm.atomic.load.sub.i32.p0i32(i32* nocapture, i32) nounwind
+declare void @llvm.memory.barrier(i1, i1, i1, i1, i1) nounwind
diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll
index 17cd8e8..eae2e70 100644
--- a/test/CodeGen/X86/memcpy-2.ll
+++ b/test/CodeGen/X86/memcpy-2.ll
@@ -38,7 +38,7 @@ entry:
 ; X86-64: movq $0
   %tmp1 = alloca [25 x i8]
   %tmp2 = bitcast [25 x i8]* %tmp1 to i8*
-  call void @llvm.memcpy.i32( i8* %tmp2, i8* getelementptr ([25 x i8]* @.str, i32 0, i32 0), i32 25, i32 1 ) nounwind 
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([25 x i8]* @.str, i32 0, i32 0), i32 25, i32 1, i1 false)
   unreachable
 }
 
@@ -72,7 +72,7 @@ entry:
 ; X86-64: movaps %xmm0, (%rdi)
   %tmp2 = bitcast %struct.s0* %a to i8*           ; <i8*> [#uses=1]
   %tmp3 = bitcast %struct.s0* %b to i8*           ; <i8*> [#uses=1]
-  tail call void @llvm.memcpy.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16)
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false)
   ret void
 }
 
@@ -115,7 +115,7 @@ entry:
 ; X86-64: movq %rax, (%rdi)
   %tmp2 = bitcast %struct.s0* %a to i8*           ; <i8*> [#uses=1]
   %tmp3 = bitcast %struct.s0* %b to i8*           ; <i8*> [#uses=1]
-  tail call void @llvm.memcpy.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 8)
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 8, i1 false)
   ret void
 }
 
@@ -160,8 +160,8 @@ entry:
 ; X86-64: movl $2021161080
   %tmp1 = alloca [30 x i8]
   %tmp2 = bitcast [30 x i8]* %tmp1 to i8*
-  call void @llvm.memcpy.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* getelementptr inbounds ([30 x i8]* @.str2, i32 0, i32 0), i32 30, i32 1, i1 false)
   unreachable
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll
index 72342cb..f43b0bf 100644
--- a/test/CodeGen/X86/memcpy.ll
+++ b/test/CodeGen/X86/memcpy.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s -check-prefix=LINUX
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=DARWIN
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2 | FileCheck %s -check-prefix=LINUX
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=DARWIN
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
diff --git a/test/CodeGen/X86/memmove-4.ll b/test/CodeGen/X86/memmove-4.ll
deleted file mode 100644
index 027db1f..0000000
--- a/test/CodeGen/X86/memmove-4.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s | not grep call
-
-target triple = "i686-pc-linux-gnu"
-
-define void @a(i8* %a, i8* %b) nounwind {
-        %tmp2 = bitcast i8* %a to i8*
-        %tmp3 = bitcast i8* %b to i8*
-        tail call void @llvm.memmove.i32( i8* %tmp2, i8* %tmp3, i32 12, i32 4 )
-        ret void
-}
-
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll
index 993583b..b2bd72b 100644
--- a/test/CodeGen/X86/memset-2.ll
+++ b/test/CodeGen/X86/memset-2.ll
@@ -6,7 +6,7 @@ define fastcc void @t1() nounwind {
 entry:
 ; CHECK: t1:
 ; CHECK: calll _memset
-  call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind
+  call void @llvm.memset.p0i8.i32(i8* null, i8 0, i32 188, i32 1, i1 false)
   unreachable
 }
 
@@ -14,7 +14,7 @@ define fastcc void @t2(i8 signext %c) nounwind {
 entry:
 ; CHECK: t2:
 ; CHECK: calll _memset
-  call void @llvm.memset.i32( i8* undef, i8 %c, i32 76, i32 1 ) nounwind
+  call void @llvm.memset.p0i8.i32(i8* undef, i8 %c, i32 76, i32 1, i1 false)
   unreachable
 }
 
diff --git a/test/CodeGen/X86/memset-3.ll b/test/CodeGen/X86/memset-3.ll
index 9b20ad5..29febfa 100644
--- a/test/CodeGen/X86/memset-3.ll
+++ b/test/CodeGen/X86/memset-3.ll
@@ -5,8 +5,8 @@ define void @t() nounwind ssp {
 entry:
   %buf = alloca [512 x i8], align 1
   %ptr = getelementptr inbounds [512 x i8]* %buf, i32 0, i32 0
-  call void @llvm.memset.i32(i8* %ptr, i8 undef, i32 512, i32 1)
+  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 undef, i32 512, i32 1, i1 false)
   unreachable
 }
 
-declare void @llvm.memset.i32(i8* nocapture, i8, i32, i32) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/X86/memset.ll b/test/CodeGen/X86/memset.ll
index cf7464d0..72b3e0f 100644
--- a/test/CodeGen/X86/memset.ll
+++ b/test/CodeGen/X86/memset.ll
@@ -8,11 +8,11 @@ entry:
 	%up_mvd = alloca [8 x %struct.x]		; <[8 x %struct.x]*> [#uses=2]
 	%up_mvd116 = getelementptr [8 x %struct.x]* %up_mvd, i32 0, i32 0		; <%struct.x*> [#uses=1]
 	%tmp110117 = bitcast [8 x %struct.x]* %up_mvd to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.i64( i8* %tmp110117, i8 0, i64 32, i32 8 )
+	call void @llvm.memset.p0i8.i64(i8* %tmp110117, i8 0, i64 32, i32 8, i1 false)
 	call void @foo( %struct.x* %up_mvd116 ) nounwind 
 	ret void
 }
 
 declare void @foo(%struct.x*)
 
-declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind 
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/memset64-on-x86-32.ll b/test/CodeGen/X86/memset64-on-x86-32.ll
index 3f069b4..e20fce1 100644
--- a/test/CodeGen/X86/memset64-on-x86-32.ll
+++ b/test/CodeGen/X86/memset64-on-x86-32.ll
@@ -4,9 +4,8 @@
 
 define void @bork() nounwind {
 entry:
-        call void @llvm.memset.i64( i8* null, i8 0, i64 80, i32 4 )
-        ret void
+  call void @llvm.memset.p0i8.i64(i8* null, i8 0, i64 80, i32 4, i1 false)
+  ret void
 }
 
-declare void @llvm.memset.i64(i8*, i8, i64, i32) nounwind
-
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/mmx-shuffle.ll b/test/CodeGen/X86/mmx-shuffle.ll
index 9f7501e..869f32b 100644
--- a/test/CodeGen/X86/mmx-shuffle.ll
+++ b/test/CodeGen/X86/mmx-shuffle.ll
@@ -5,12 +5,12 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i686-pc-linux-gnu"
 	%struct.DrawHelper = type { void (i32, %struct.QT_FT_Span*, i8*)*, void (i32, %struct.QT_FT_Span*, i8*)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i8*, i32, i32, i32)*, void (%struct.QRasterBuffer*, i32, i32, i32, i32, i32)* }
 	%struct.QBasicAtomic = type { i32 }
-	%struct.QClipData = type { i32, "struct.QClipData::ClipLine"*, i32, i32, %struct.QT_FT_Span*, i32, i32, i32, i32 }
-	"struct.QClipData::ClipLine" = type { i32, %struct.QT_FT_Span* }
+	%struct.QClipData = type { i32, %"struct.QClipData::ClipLine"*, i32, i32, %struct.QT_FT_Span*, i32, i32, i32, i32 }
+	%"struct.QClipData::ClipLine" = type { i32, %struct.QT_FT_Span* }
 	%struct.QRasterBuffer = type { %struct.QRect, %struct.QRegion, %struct.QClipData*, %struct.QClipData*, i8, i32, i32, %struct.DrawHelper*, i32, i32, i32, i8* }
 	%struct.QRect = type { i32, i32, i32, i32 }
-	%struct.QRegion = type { "struct.QRegion::QRegionData"* }
-	"struct.QRegion::QRegionData" = type { %struct.QBasicAtomic, %struct._XRegion*, i8*, %struct.QRegionPrivate* }
+	%struct.QRegion = type { %"struct.QRegion::QRegionData"* }
+	%"struct.QRegion::QRegionData" = type { %struct.QBasicAtomic, %struct._XRegion*, i8*, %struct.QRegionPrivate* }
 	%struct.QRegionPrivate = type opaque
 	%struct.QT_FT_Span = type { i16, i16, i16, i8 }
 	%struct._XRegion = type opaque
diff --git a/test/CodeGen/X86/muloti.ll b/test/CodeGen/X86/muloti.ll
new file mode 100644
index 0000000..2f0986e
--- /dev/null
+++ b/test/CodeGen/X86/muloti.ll
@@ -0,0 +1,81 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+%0 = type { i64, i64 }
+%1 = type { i128, i1 }
+
+define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
+; CHECK: x
+entry:
+  %tmp16 = zext i64 %a.coerce0 to i128
+  %tmp11 = zext i64 %a.coerce1 to i128
+  %tmp12 = shl nuw i128 %tmp11, 64
+  %ins14 = or i128 %tmp12, %tmp16
+  %tmp6 = zext i64 %b.coerce0 to i128
+  %tmp3 = zext i64 %b.coerce1 to i128
+  %tmp4 = shl nuw i128 %tmp3, 64
+  %ins = or i128 %tmp4, %tmp6
+  %0 = tail call %1 @llvm.smul.with.overflow.i128(i128 %ins14, i128 %ins)
+; CHECK: callq   ___muloti4
+  %1 = extractvalue %1 %0, 0
+  %2 = extractvalue %1 %0, 1
+  br i1 %2, label %overflow, label %nooverflow
+
+overflow:                                         ; preds = %entry
+  tail call void @llvm.trap()
+  unreachable
+
+nooverflow:                                       ; preds = %entry
+  %tmp20 = trunc i128 %1 to i64
+  %tmp21 = insertvalue %0 undef, i64 %tmp20, 0
+  %tmp22 = lshr i128 %1, 64
+  %tmp23 = trunc i128 %tmp22 to i64
+  %tmp24 = insertvalue %0 %tmp21, i64 %tmp23, 1
+  ret %0 %tmp24
+}
+
+define %0 @foo(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
+entry:
+; CHECK: foo
+  %retval = alloca i128, align 16
+  %coerce = alloca i128, align 16
+  %a.addr = alloca i128, align 16
+  %coerce1 = alloca i128, align 16
+  %b.addr = alloca i128, align 16
+  %0 = bitcast i128* %coerce to %0*
+  %1 = getelementptr %0* %0, i32 0, i32 0
+  store i64 %a.coerce0, i64* %1
+  %2 = getelementptr %0* %0, i32 0, i32 1
+  store i64 %a.coerce1, i64* %2
+  %a = load i128* %coerce, align 16
+  store i128 %a, i128* %a.addr, align 16
+  %3 = bitcast i128* %coerce1 to %0*
+  %4 = getelementptr %0* %3, i32 0, i32 0
+  store i64 %b.coerce0, i64* %4
+  %5 = getelementptr %0* %3, i32 0, i32 1
+  store i64 %b.coerce1, i64* %5
+  %b = load i128* %coerce1, align 16
+  store i128 %b, i128* %b.addr, align 16
+  %tmp = load i128* %a.addr, align 16
+  %tmp2 = load i128* %b.addr, align 16
+  %6 = call %1 @llvm.umul.with.overflow.i128(i128 %tmp, i128 %tmp2)
+; CHECK: cmov
+; CHECK: divti3
+  %7 = extractvalue %1 %6, 0
+  %8 = extractvalue %1 %6, 1
+  br i1 %8, label %overflow, label %nooverflow
+
+overflow:                                         ; preds = %entry
+  call void @llvm.trap()
+  unreachable
+
+nooverflow:                                       ; preds = %entry
+  store i128 %7, i128* %retval
+  %9 = bitcast i128* %retval to %0*
+  %10 = load %0* %9, align 1
+  ret %0 %10
+}
+
+declare %1 @llvm.umul.with.overflow.i128(i128, i128) nounwind readnone
+
+declare %1 @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone
+
+declare void @llvm.trap() nounwind
diff --git a/test/CodeGen/X86/multiple-return-values-cross-block.ll b/test/CodeGen/X86/multiple-return-values-cross-block.ll
index e9837d0..b0cb061 100644
--- a/test/CodeGen/X86/multiple-return-values-cross-block.ll
+++ b/test/CodeGen/X86/multiple-return-values-cross-block.ll
@@ -4,12 +4,12 @@ declare {x86_fp80, x86_fp80} @test()
 
 define void @call2(x86_fp80 *%P1, x86_fp80 *%P2) {
   %a = call {x86_fp80,x86_fp80} @test()
-  %b = getresult {x86_fp80,x86_fp80} %a, 1
+  %b = extractvalue {x86_fp80,x86_fp80} %a, 1
   store x86_fp80 %b, x86_fp80* %P1
 br label %L
 
 L:
-  %c = getresult {x86_fp80,x86_fp80} %a, 0
+  %c = extractvalue {x86_fp80,x86_fp80} %a, 0
   store x86_fp80 %c, x86_fp80* %P2
   ret void
 }
diff --git a/test/CodeGen/X86/multiple-return-values.ll b/test/CodeGen/X86/multiple-return-values.ll
deleted file mode 100644
index 018d997..0000000
--- a/test/CodeGen/X86/multiple-return-values.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc < %s -march=x86
-
-define {i64, float} @bar(i64 %a, float %b) {
-        %y = add i64 %a, 7
-        %z = fadd float %b, 7.0
-	ret i64 %y, float %z
-}
-
-define i64 @foo() {
-	%M = call {i64, float} @bar(i64 21, float 21.0)
-        %N = getresult {i64, float} %M, 0
-        %O = getresult {i64, float} %M, 1
-        %P = fptosi float %O to i64
-        %Q = add i64 %P, %N
-	ret i64 %Q
-}
diff --git a/test/CodeGen/X86/non-lazy-bind.ll b/test/CodeGen/X86/non-lazy-bind.ll
new file mode 100644
index 0000000..f729658
--- /dev/null
+++ b/test/CodeGen/X86/non-lazy-bind.ll
@@ -0,0 +1,27 @@
+; RUN: llc -mtriple=x86_64-apple-darwin < %s | FileCheck %s
+
+declare void @lazy() nonlazybind
+declare void @not()
+
+; CHECK: foo:
+; CHECK:  callq _not
+; CHECK:  callq *_lazy@GOTPCREL(%rip)
+define void @foo() nounwind {
+  call void @not()
+  call void @lazy()
+  ret void
+}
+
+; CHECK: tail_call_regular:
+; CHECK:   jmp _not
+define void @tail_call_regular() nounwind {
+  tail call void @not()
+  ret void
+}
+
+; CHECK: tail_call_eager:
+; CHECK:   jmpq *_lazy@GOTPCREL(%rip)
+define void @tail_call_eager() nounwind {
+  tail call void @lazy()
+  ret void
+}
diff --git a/test/CodeGen/X86/opt-ext-uses.ll b/test/CodeGen/X86/opt-ext-uses.ll
index fa2aef5..72fb38b 100644
--- a/test/CodeGen/X86/opt-ext-uses.ll
+++ b/test/CodeGen/X86/opt-ext-uses.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -march=x86 | grep movw | count 1
 
-define i16 @t() signext  {
+define signext i16 @t()   {
 entry:
         %tmp180 = load i16* null, align 2               ; <i16> [#uses=3]
         %tmp180181 = sext i16 %tmp180 to i32            ; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/optimize-max-0.ll b/test/CodeGen/X86/optimize-max-0.ll
index 162c7a5..981a16a 100644
--- a/test/CodeGen/X86/optimize-max-0.ll
+++ b/test/CodeGen/X86/optimize-max-0.ll
@@ -8,454 +8,454 @@ target triple = "i386-apple-darwin9"
 
 define void @foo(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
 entry:
-	%0 = mul i32 %x, %w		; <i32> [#uses=2]
-	%1 = mul i32 %x, %w		; <i32> [#uses=1]
-	%2 = sdiv i32 %1, 4		; <i32> [#uses=1]
-	%.sum2 = add i32 %2, %0		; <i32> [#uses=2]
-	%cond = icmp eq i32 %d, 1		; <i1> [#uses=1]
-	br i1 %cond, label %bb29, label %bb10.preheader
-
-bb10.preheader:		; preds = %entry
-	%3 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
-	br i1 %3, label %bb.nph9, label %bb18.loopexit
-
-bb.nph7:		; preds = %bb7.preheader
-	%4 = mul i32 %y.08, %w		; <i32> [#uses=1]
-	%5 = mul i32 %y.08, %s		; <i32> [#uses=1]
-	%6 = add i32 %5, 1		; <i32> [#uses=1]
-	%tmp8 = icmp sgt i32 1, %w		; <i1> [#uses=1]
-	%smax9 = select i1 %tmp8, i32 1, i32 %w		; <i32> [#uses=1]
-	br label %bb6
-
-bb6:		; preds = %bb7, %bb.nph7
-	%x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]		; <i32> [#uses=3]
-	%7 = add i32 %x.06, %4		; <i32> [#uses=1]
-	%8 = shl i32 %x.06, 1		; <i32> [#uses=1]
-	%9 = add i32 %6, %8		; <i32> [#uses=1]
-	%10 = getelementptr i8* %r, i32 %9		; <i8*> [#uses=1]
-	%11 = load i8* %10, align 1		; <i8> [#uses=1]
-	%12 = getelementptr i8* %j, i32 %7		; <i8*> [#uses=1]
-	store i8 %11, i8* %12, align 1
-	br label %bb7
-
-bb7:		; preds = %bb6
-	%indvar.next7 = add i32 %x.06, 1		; <i32> [#uses=2]
-	%exitcond10 = icmp ne i32 %indvar.next7, %smax9		; <i1> [#uses=1]
-	br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
-
-bb7.bb9_crit_edge:		; preds = %bb7
-	br label %bb9
-
-bb9:		; preds = %bb7.preheader, %bb7.bb9_crit_edge
-	br label %bb10
-
-bb10:		; preds = %bb9
-	%indvar.next11 = add i32 %y.08, 1		; <i32> [#uses=2]
-	%exitcond12 = icmp ne i32 %indvar.next11, %x		; <i1> [#uses=1]
-	br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
-
-bb10.bb18.loopexit_crit_edge:		; preds = %bb10
-	br label %bb10.bb18.loopexit_crit_edge.split
-
-bb10.bb18.loopexit_crit_edge.split:		; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
-	br label %bb18.loopexit
-
-bb.nph9:		; preds = %bb10.preheader
-	%13 = icmp sgt i32 %w, 0		; <i1> [#uses=1]
-	br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
-
-bb.nph9.split:		; preds = %bb.nph9
-	br label %bb7.preheader
-
-bb7.preheader:		; preds = %bb.nph9.split, %bb10
-	%y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]		; <i32> [#uses=3]
-	br i1 true, label %bb.nph7, label %bb9
-
-bb.nph5:		; preds = %bb18.loopexit
-	%14 = sdiv i32 %w, 2		; <i32> [#uses=1]
-	%15 = icmp slt i32 %w, 2		; <i1> [#uses=1]
-	%16 = sdiv i32 %x, 2		; <i32> [#uses=2]
-	br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
-
-bb.nph5.split:		; preds = %bb.nph5
-	%tmp2 = icmp sgt i32 1, %16		; <i1> [#uses=1]
-	%smax3 = select i1 %tmp2, i32 1, i32 %16		; <i32> [#uses=1]
-	br label %bb13
-
-bb13:		; preds = %bb18, %bb.nph5.split
-	%y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]		; <i32> [#uses=4]
-	%17 = mul i32 %14, %y.14		; <i32> [#uses=2]
-	%18 = shl i32 %y.14, 1		; <i32> [#uses=1]
-	%19 = srem i32 %y.14, 2		; <i32> [#uses=1]
-	%20 = add i32 %19, %18		; <i32> [#uses=1]
-	%21 = mul i32 %20, %s		; <i32> [#uses=2]
-	br i1 true, label %bb.nph3, label %bb17
-
-bb.nph3:		; preds = %bb13
-	%22 = add i32 %17, %0		; <i32> [#uses=1]
-	%23 = add i32 %17, %.sum2		; <i32> [#uses=1]
-	%24 = sdiv i32 %w, 2		; <i32> [#uses=2]
-	%tmp = icmp sgt i32 1, %24		; <i1> [#uses=1]
-	%smax = select i1 %tmp, i32 1, i32 %24		; <i32> [#uses=1]
-	br label %bb14
-
-bb14:		; preds = %bb15, %bb.nph3
-	%x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]		; <i32> [#uses=5]
-	%25 = shl i32 %x.12, 2		; <i32> [#uses=1]
-	%26 = add i32 %25, %21		; <i32> [#uses=1]
-	%27 = getelementptr i8* %r, i32 %26		; <i8*> [#uses=1]
-	%28 = load i8* %27, align 1		; <i8> [#uses=1]
-	%.sum = add i32 %22, %x.12		; <i32> [#uses=1]
-	%29 = getelementptr i8* %j, i32 %.sum		; <i8*> [#uses=1]
-	store i8 %28, i8* %29, align 1
-	%30 = shl i32 %x.12, 2		; <i32> [#uses=1]
-	%31 = or i32 %30, 2		; <i32> [#uses=1]
-	%32 = add i32 %31, %21		; <i32> [#uses=1]
-	%33 = getelementptr i8* %r, i32 %32		; <i8*> [#uses=1]
-	%34 = load i8* %33, align 1		; <i8> [#uses=1]
-	%.sum6 = add i32 %23, %x.12		; <i32> [#uses=1]
-	%35 = getelementptr i8* %j, i32 %.sum6		; <i8*> [#uses=1]
-	store i8 %34, i8* %35, align 1
-	br label %bb15
-
-bb15:		; preds = %bb14
-	%indvar.next = add i32 %x.12, 1		; <i32> [#uses=2]
-	%exitcond = icmp ne i32 %indvar.next, %smax		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
-
-bb15.bb17_crit_edge:		; preds = %bb15
-	br label %bb17
-
-bb17:		; preds = %bb15.bb17_crit_edge, %bb13
-	br label %bb18
-
-bb18.loopexit:		; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
-	%36 = icmp slt i32 %x, 2		; <i1> [#uses=1]
-	br i1 %36, label %bb20, label %bb.nph5
-
-bb18:		; preds = %bb17
-	%indvar.next1 = add i32 %y.14, 1		; <i32> [#uses=2]
-	%exitcond4 = icmp ne i32 %indvar.next1, %smax3		; <i1> [#uses=1]
-	br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
-
-bb18.bb20_crit_edge:		; preds = %bb18
-	br label %bb18.bb20_crit_edge.split
-
-bb18.bb20_crit_edge.split:		; preds = %bb18.bb20_crit_edge, %bb.nph5
-	br label %bb20
-
-bb20:		; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
-	switch i32 %d, label %return [
-		i32 3, label %bb22
-		i32 1, label %bb29
-	]
-
-bb22:		; preds = %bb20
-	%37 = mul i32 %x, %w		; <i32> [#uses=1]
-	%38 = sdiv i32 %37, 4		; <i32> [#uses=1]
-	%.sum3 = add i32 %38, %.sum2		; <i32> [#uses=2]
-	%39 = add i32 %x, 15		; <i32> [#uses=1]
-	%40 = and i32 %39, -16		; <i32> [#uses=1]
-	%41 = add i32 %w, 15		; <i32> [#uses=1]
-	%42 = and i32 %41, -16		; <i32> [#uses=1]
-	%43 = mul i32 %40, %s		; <i32> [#uses=1]
-	%44 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
-	br i1 %44, label %bb.nph, label %bb26
-
-bb.nph:		; preds = %bb22
-	br label %bb23
-
-bb23:		; preds = %bb24, %bb.nph
-	%y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]		; <i32> [#uses=3]
-	%45 = mul i32 %y.21, %42		; <i32> [#uses=1]
-	%.sum1 = add i32 %45, %43		; <i32> [#uses=1]
-	%46 = getelementptr i8* %r, i32 %.sum1		; <i8*> [#uses=1]
-	%47 = mul i32 %y.21, %w		; <i32> [#uses=1]
-	%.sum5 = add i32 %47, %.sum3		; <i32> [#uses=1]
-	%48 = getelementptr i8* %j, i32 %.sum5		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %48, i8* %46, i32 %w, i32 1)
-	br label %bb24
-
-bb24:		; preds = %bb23
-	%indvar.next5 = add i32 %y.21, 1		; <i32> [#uses=2]
-	%exitcond6 = icmp ne i32 %indvar.next5, %x		; <i1> [#uses=1]
-	br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
-
-bb24.bb26_crit_edge:		; preds = %bb24
-	br label %bb26
-
-bb26:		; preds = %bb24.bb26_crit_edge, %bb22
-	%49 = mul i32 %x, %w		; <i32> [#uses=1]
-	%.sum4 = add i32 %.sum3, %49		; <i32> [#uses=1]
-	%50 = getelementptr i8* %j, i32 %.sum4		; <i8*> [#uses=1]
-	%51 = mul i32 %x, %w		; <i32> [#uses=1]
-	%52 = sdiv i32 %51, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %50, i8 -128, i32 %52, i32 1)
-	ret void
-
-bb29:		; preds = %bb20, %entry
-	%53 = add i32 %w, 15		; <i32> [#uses=1]
-	%54 = and i32 %53, -16		; <i32> [#uses=1]
-	%55 = icmp sgt i32 %x, 0		; <i1> [#uses=1]
-	br i1 %55, label %bb.nph11, label %bb33
-
-bb.nph11:		; preds = %bb29
-	br label %bb30
-
-bb30:		; preds = %bb31, %bb.nph11
-	%y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]		; <i32> [#uses=3]
-	%56 = mul i32 %y.310, %54		; <i32> [#uses=1]
-	%57 = getelementptr i8* %r, i32 %56		; <i8*> [#uses=1]
-	%58 = mul i32 %y.310, %w		; <i32> [#uses=1]
-	%59 = getelementptr i8* %j, i32 %58		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %59, i8* %57, i32 %w, i32 1)
-	br label %bb31
-
-bb31:		; preds = %bb30
-	%indvar.next13 = add i32 %y.310, 1		; <i32> [#uses=2]
-	%exitcond14 = icmp ne i32 %indvar.next13, %x		; <i1> [#uses=1]
-	br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
-
-bb31.bb33_crit_edge:		; preds = %bb31
-	br label %bb33
-
-bb33:		; preds = %bb31.bb33_crit_edge, %bb29
-	%60 = mul i32 %x, %w		; <i32> [#uses=1]
-	%61 = getelementptr i8* %j, i32 %60		; <i8*> [#uses=1]
-	%62 = mul i32 %x, %w		; <i32> [#uses=1]
-	%63 = sdiv i32 %62, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %61, i8 -128, i32 %63, i32 1)
-	ret void
-
-return:		; preds = %bb20
-	ret void
+  %0 = mul i32 %x, %w
+  %1 = mul i32 %x, %w
+  %2 = sdiv i32 %1, 4
+  %.sum2 = add i32 %2, %0
+  %cond = icmp eq i32 %d, 1
+  br i1 %cond, label %bb29, label %bb10.preheader
+
+bb10.preheader:                                   ; preds = %entry
+  %3 = icmp sgt i32 %x, 0
+  br i1 %3, label %bb.nph9, label %bb18.loopexit
+
+bb.nph7:                                          ; preds = %bb7.preheader
+  %4 = mul i32 %y.08, %w
+  %5 = mul i32 %y.08, %s
+  %6 = add i32 %5, 1
+  %tmp8 = icmp sgt i32 1, %w
+  %smax9 = select i1 %tmp8, i32 1, i32 %w
+  br label %bb6
+
+bb6:                                              ; preds = %bb7, %bb.nph7
+  %x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]
+  %7 = add i32 %x.06, %4
+  %8 = shl i32 %x.06, 1
+  %9 = add i32 %6, %8
+  %10 = getelementptr i8* %r, i32 %9
+  %11 = load i8* %10, align 1
+  %12 = getelementptr i8* %j, i32 %7
+  store i8 %11, i8* %12, align 1
+  br label %bb7
+
+bb7:                                              ; preds = %bb6
+  %indvar.next7 = add i32 %x.06, 1
+  %exitcond10 = icmp ne i32 %indvar.next7, %smax9
+  br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
+
+bb7.bb9_crit_edge:                                ; preds = %bb7
+  br label %bb9
+
+bb9:                                              ; preds = %bb7.preheader, %bb7.bb9_crit_edge
+  br label %bb10
+
+bb10:                                             ; preds = %bb9
+  %indvar.next11 = add i32 %y.08, 1
+  %exitcond12 = icmp ne i32 %indvar.next11, %x
+  br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
+
+bb10.bb18.loopexit_crit_edge:                     ; preds = %bb10
+  br label %bb10.bb18.loopexit_crit_edge.split
+
+bb10.bb18.loopexit_crit_edge.split:               ; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
+  br label %bb18.loopexit
+
+bb.nph9:                                          ; preds = %bb10.preheader
+  %13 = icmp sgt i32 %w, 0
+  br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
+
+bb.nph9.split:                                    ; preds = %bb.nph9
+  br label %bb7.preheader
+
+bb7.preheader:                                    ; preds = %bb.nph9.split, %bb10
+  %y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]
+  br i1 true, label %bb.nph7, label %bb9
+
+bb.nph5:                                          ; preds = %bb18.loopexit
+  %14 = sdiv i32 %w, 2
+  %15 = icmp slt i32 %w, 2
+  %16 = sdiv i32 %x, 2
+  br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
+
+bb.nph5.split:                                    ; preds = %bb.nph5
+  %tmp2 = icmp sgt i32 1, %16
+  %smax3 = select i1 %tmp2, i32 1, i32 %16
+  br label %bb13
+
+bb13:                                             ; preds = %bb18, %bb.nph5.split
+  %y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]
+  %17 = mul i32 %14, %y.14
+  %18 = shl i32 %y.14, 1
+  %19 = srem i32 %y.14, 2
+  %20 = add i32 %19, %18
+  %21 = mul i32 %20, %s
+  br i1 true, label %bb.nph3, label %bb17
+
+bb.nph3:                                          ; preds = %bb13
+  %22 = add i32 %17, %0
+  %23 = add i32 %17, %.sum2
+  %24 = sdiv i32 %w, 2
+  %tmp = icmp sgt i32 1, %24
+  %smax = select i1 %tmp, i32 1, i32 %24
+  br label %bb14
+
+bb14:                                             ; preds = %bb15, %bb.nph3
+  %x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]
+  %25 = shl i32 %x.12, 2
+  %26 = add i32 %25, %21
+  %27 = getelementptr i8* %r, i32 %26
+  %28 = load i8* %27, align 1
+  %.sum = add i32 %22, %x.12
+  %29 = getelementptr i8* %j, i32 %.sum
+  store i8 %28, i8* %29, align 1
+  %30 = shl i32 %x.12, 2
+  %31 = or i32 %30, 2
+  %32 = add i32 %31, %21
+  %33 = getelementptr i8* %r, i32 %32
+  %34 = load i8* %33, align 1
+  %.sum6 = add i32 %23, %x.12
+  %35 = getelementptr i8* %j, i32 %.sum6
+  store i8 %34, i8* %35, align 1
+  br label %bb15
+
+bb15:                                             ; preds = %bb14
+  %indvar.next = add i32 %x.12, 1
+  %exitcond = icmp ne i32 %indvar.next, %smax
+  br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
+
+bb15.bb17_crit_edge:                              ; preds = %bb15
+  br label %bb17
+
+bb17:                                             ; preds = %bb15.bb17_crit_edge, %bb13
+  br label %bb18
+
+bb18.loopexit:                                    ; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
+  %36 = icmp slt i32 %x, 2
+  br i1 %36, label %bb20, label %bb.nph5
+
+bb18:                                             ; preds = %bb17
+  %indvar.next1 = add i32 %y.14, 1
+  %exitcond4 = icmp ne i32 %indvar.next1, %smax3
+  br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
+
+bb18.bb20_crit_edge:                              ; preds = %bb18
+  br label %bb18.bb20_crit_edge.split
+
+bb18.bb20_crit_edge.split:                        ; preds = %bb18.bb20_crit_edge, %bb.nph5
+  br label %bb20
+
+bb20:                                             ; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
+  switch i32 %d, label %return [
+    i32 3, label %bb22
+    i32 1, label %bb29
+  ]
+
+bb22:                                             ; preds = %bb20
+  %37 = mul i32 %x, %w
+  %38 = sdiv i32 %37, 4
+  %.sum3 = add i32 %38, %.sum2
+  %39 = add i32 %x, 15
+  %40 = and i32 %39, -16
+  %41 = add i32 %w, 15
+  %42 = and i32 %41, -16
+  %43 = mul i32 %40, %s
+  %44 = icmp sgt i32 %x, 0
+  br i1 %44, label %bb.nph, label %bb26
+
+bb.nph:                                           ; preds = %bb22
+  br label %bb23
+
+bb23:                                             ; preds = %bb24, %bb.nph
+  %y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]
+  %45 = mul i32 %y.21, %42
+  %.sum1 = add i32 %45, %43
+  %46 = getelementptr i8* %r, i32 %.sum1
+  %47 = mul i32 %y.21, %w
+  %.sum5 = add i32 %47, %.sum3
+  %48 = getelementptr i8* %j, i32 %.sum5
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %48, i8* %46, i32 %w, i32 1, i1 false)
+  br label %bb24
+
+bb24:                                             ; preds = %bb23
+  %indvar.next5 = add i32 %y.21, 1
+  %exitcond6 = icmp ne i32 %indvar.next5, %x
+  br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
+
+bb24.bb26_crit_edge:                              ; preds = %bb24
+  br label %bb26
+
+bb26:                                             ; preds = %bb24.bb26_crit_edge, %bb22
+  %49 = mul i32 %x, %w
+  %.sum4 = add i32 %.sum3, %49
+  %50 = getelementptr i8* %j, i32 %.sum4
+  %51 = mul i32 %x, %w
+  %52 = sdiv i32 %51, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %50, i8 -128, i32 %52, i32 1, i1 false)
+  ret void
+
+bb29:                                             ; preds = %bb20, %entry
+  %53 = add i32 %w, 15
+  %54 = and i32 %53, -16
+  %55 = icmp sgt i32 %x, 0
+  br i1 %55, label %bb.nph11, label %bb33
+
+bb.nph11:                                         ; preds = %bb29
+  br label %bb30
+
+bb30:                                             ; preds = %bb31, %bb.nph11
+  %y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]
+  %56 = mul i32 %y.310, %54
+  %57 = getelementptr i8* %r, i32 %56
+  %58 = mul i32 %y.310, %w
+  %59 = getelementptr i8* %j, i32 %58
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %59, i8* %57, i32 %w, i32 1, i1 false)
+  br label %bb31
+
+bb31:                                             ; preds = %bb30
+  %indvar.next13 = add i32 %y.310, 1
+  %exitcond14 = icmp ne i32 %indvar.next13, %x
+  br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
+
+bb31.bb33_crit_edge:                              ; preds = %bb31
+  br label %bb33
+
+bb33:                                             ; preds = %bb31.bb33_crit_edge, %bb29
+  %60 = mul i32 %x, %w
+  %61 = getelementptr i8* %j, i32 %60
+  %62 = mul i32 %x, %w
+  %63 = sdiv i32 %62, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %61, i8 -128, i32 %63, i32 1, i1 false)
+  ret void
+
+return:                                           ; preds = %bb20
+  ret void
 }
 
 define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
 entry:
-	%0 = mul i32 %x, %w		; <i32> [#uses=2]
-	%1 = mul i32 %x, %w		; <i32> [#uses=1]
-	%2 = udiv i32 %1, 4		; <i32> [#uses=1]
-	%.sum2 = add i32 %2, %0		; <i32> [#uses=2]
-	%cond = icmp eq i32 %d, 1		; <i1> [#uses=1]
-	br i1 %cond, label %bb29, label %bb10.preheader
-
-bb10.preheader:		; preds = %entry
-	%3 = icmp ne i32 %x, 0		; <i1> [#uses=1]
-	br i1 %3, label %bb.nph9, label %bb18.loopexit
-
-bb.nph7:		; preds = %bb7.preheader
-	%4 = mul i32 %y.08, %w		; <i32> [#uses=1]
-	%5 = mul i32 %y.08, %s		; <i32> [#uses=1]
-	%6 = add i32 %5, 1		; <i32> [#uses=1]
-	%tmp8 = icmp ugt i32 1, %w		; <i1> [#uses=1]
-	%smax9 = select i1 %tmp8, i32 1, i32 %w		; <i32> [#uses=1]
-	br label %bb6
-
-bb6:		; preds = %bb7, %bb.nph7
-	%x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]		; <i32> [#uses=3]
-	%7 = add i32 %x.06, %4		; <i32> [#uses=1]
-	%8 = shl i32 %x.06, 1		; <i32> [#uses=1]
-	%9 = add i32 %6, %8		; <i32> [#uses=1]
-	%10 = getelementptr i8* %r, i32 %9		; <i8*> [#uses=1]
-	%11 = load i8* %10, align 1		; <i8> [#uses=1]
-	%12 = getelementptr i8* %j, i32 %7		; <i8*> [#uses=1]
-	store i8 %11, i8* %12, align 1
-	br label %bb7
-
-bb7:		; preds = %bb6
-	%indvar.next7 = add i32 %x.06, 1		; <i32> [#uses=2]
-	%exitcond10 = icmp ne i32 %indvar.next7, %smax9		; <i1> [#uses=1]
-	br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
-
-bb7.bb9_crit_edge:		; preds = %bb7
-	br label %bb9
-
-bb9:		; preds = %bb7.preheader, %bb7.bb9_crit_edge
-	br label %bb10
-
-bb10:		; preds = %bb9
-	%indvar.next11 = add i32 %y.08, 1		; <i32> [#uses=2]
-	%exitcond12 = icmp ne i32 %indvar.next11, %x		; <i1> [#uses=1]
-	br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
-
-bb10.bb18.loopexit_crit_edge:		; preds = %bb10
-	br label %bb10.bb18.loopexit_crit_edge.split
-
-bb10.bb18.loopexit_crit_edge.split:		; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
-	br label %bb18.loopexit
-
-bb.nph9:		; preds = %bb10.preheader
-	%13 = icmp ugt i32 %w, 0		; <i1> [#uses=1]
-	br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
-
-bb.nph9.split:		; preds = %bb.nph9
-	br label %bb7.preheader
-
-bb7.preheader:		; preds = %bb.nph9.split, %bb10
-	%y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]		; <i32> [#uses=3]
-	br i1 true, label %bb.nph7, label %bb9
-
-bb.nph5:		; preds = %bb18.loopexit
-	%14 = udiv i32 %w, 2		; <i32> [#uses=1]
-	%15 = icmp ult i32 %w, 2		; <i1> [#uses=1]
-	%16 = udiv i32 %x, 2		; <i32> [#uses=2]
-	br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
-
-bb.nph5.split:		; preds = %bb.nph5
-	%tmp2 = icmp ugt i32 1, %16		; <i1> [#uses=1]
-	%smax3 = select i1 %tmp2, i32 1, i32 %16		; <i32> [#uses=1]
-	br label %bb13
-
-bb13:		; preds = %bb18, %bb.nph5.split
-	%y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]		; <i32> [#uses=4]
-	%17 = mul i32 %14, %y.14		; <i32> [#uses=2]
-	%18 = shl i32 %y.14, 1		; <i32> [#uses=1]
-	%19 = urem i32 %y.14, 2		; <i32> [#uses=1]
-	%20 = add i32 %19, %18		; <i32> [#uses=1]
-	%21 = mul i32 %20, %s		; <i32> [#uses=2]
-	br i1 true, label %bb.nph3, label %bb17
-
-bb.nph3:		; preds = %bb13
-	%22 = add i32 %17, %0		; <i32> [#uses=1]
-	%23 = add i32 %17, %.sum2		; <i32> [#uses=1]
-	%24 = udiv i32 %w, 2		; <i32> [#uses=2]
-	%tmp = icmp ugt i32 1, %24		; <i1> [#uses=1]
-	%smax = select i1 %tmp, i32 1, i32 %24		; <i32> [#uses=1]
-	br label %bb14
-
-bb14:		; preds = %bb15, %bb.nph3
-	%x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]		; <i32> [#uses=5]
-	%25 = shl i32 %x.12, 2		; <i32> [#uses=1]
-	%26 = add i32 %25, %21		; <i32> [#uses=1]
-	%27 = getelementptr i8* %r, i32 %26		; <i8*> [#uses=1]
-	%28 = load i8* %27, align 1		; <i8> [#uses=1]
-	%.sum = add i32 %22, %x.12		; <i32> [#uses=1]
-	%29 = getelementptr i8* %j, i32 %.sum		; <i8*> [#uses=1]
-	store i8 %28, i8* %29, align 1
-	%30 = shl i32 %x.12, 2		; <i32> [#uses=1]
-	%31 = or i32 %30, 2		; <i32> [#uses=1]
-	%32 = add i32 %31, %21		; <i32> [#uses=1]
-	%33 = getelementptr i8* %r, i32 %32		; <i8*> [#uses=1]
-	%34 = load i8* %33, align 1		; <i8> [#uses=1]
-	%.sum6 = add i32 %23, %x.12		; <i32> [#uses=1]
-	%35 = getelementptr i8* %j, i32 %.sum6		; <i8*> [#uses=1]
-	store i8 %34, i8* %35, align 1
-	br label %bb15
-
-bb15:		; preds = %bb14
-	%indvar.next = add i32 %x.12, 1		; <i32> [#uses=2]
-	%exitcond = icmp ne i32 %indvar.next, %smax		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
-
-bb15.bb17_crit_edge:		; preds = %bb15
-	br label %bb17
-
-bb17:		; preds = %bb15.bb17_crit_edge, %bb13
-	br label %bb18
-
-bb18.loopexit:		; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
-	%36 = icmp ult i32 %x, 2		; <i1> [#uses=1]
-	br i1 %36, label %bb20, label %bb.nph5
-
-bb18:		; preds = %bb17
-	%indvar.next1 = add i32 %y.14, 1		; <i32> [#uses=2]
-	%exitcond4 = icmp ne i32 %indvar.next1, %smax3		; <i1> [#uses=1]
-	br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
-
-bb18.bb20_crit_edge:		; preds = %bb18
-	br label %bb18.bb20_crit_edge.split
-
-bb18.bb20_crit_edge.split:		; preds = %bb18.bb20_crit_edge, %bb.nph5
-	br label %bb20
-
-bb20:		; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
-	switch i32 %d, label %return [
-		i32 3, label %bb22
-		i32 1, label %bb29
-	]
-
-bb22:		; preds = %bb20
-	%37 = mul i32 %x, %w		; <i32> [#uses=1]
-	%38 = udiv i32 %37, 4		; <i32> [#uses=1]
-	%.sum3 = add i32 %38, %.sum2		; <i32> [#uses=2]
-	%39 = add i32 %x, 15		; <i32> [#uses=1]
-	%40 = and i32 %39, -16		; <i32> [#uses=1]
-	%41 = add i32 %w, 15		; <i32> [#uses=1]
-	%42 = and i32 %41, -16		; <i32> [#uses=1]
-	%43 = mul i32 %40, %s		; <i32> [#uses=1]
-	%44 = icmp ugt i32 %x, 0		; <i1> [#uses=1]
-	br i1 %44, label %bb.nph, label %bb26
-
-bb.nph:		; preds = %bb22
-	br label %bb23
-
-bb23:		; preds = %bb24, %bb.nph
-	%y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]		; <i32> [#uses=3]
-	%45 = mul i32 %y.21, %42		; <i32> [#uses=1]
-	%.sum1 = add i32 %45, %43		; <i32> [#uses=1]
-	%46 = getelementptr i8* %r, i32 %.sum1		; <i8*> [#uses=1]
-	%47 = mul i32 %y.21, %w		; <i32> [#uses=1]
-	%.sum5 = add i32 %47, %.sum3		; <i32> [#uses=1]
-	%48 = getelementptr i8* %j, i32 %.sum5		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %48, i8* %46, i32 %w, i32 1)
-	br label %bb24
-
-bb24:		; preds = %bb23
-	%indvar.next5 = add i32 %y.21, 1		; <i32> [#uses=2]
-	%exitcond6 = icmp ne i32 %indvar.next5, %x		; <i1> [#uses=1]
-	br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
-
-bb24.bb26_crit_edge:		; preds = %bb24
-	br label %bb26
-
-bb26:		; preds = %bb24.bb26_crit_edge, %bb22
-	%49 = mul i32 %x, %w		; <i32> [#uses=1]
-	%.sum4 = add i32 %.sum3, %49		; <i32> [#uses=1]
-	%50 = getelementptr i8* %j, i32 %.sum4		; <i8*> [#uses=1]
-	%51 = mul i32 %x, %w		; <i32> [#uses=1]
-	%52 = udiv i32 %51, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %50, i8 -128, i32 %52, i32 1)
-	ret void
-
-bb29:		; preds = %bb20, %entry
-	%53 = add i32 %w, 15		; <i32> [#uses=1]
-	%54 = and i32 %53, -16		; <i32> [#uses=1]
-	%55 = icmp ugt i32 %x, 0		; <i1> [#uses=1]
-	br i1 %55, label %bb.nph11, label %bb33
-
-bb.nph11:		; preds = %bb29
-	br label %bb30
-
-bb30:		; preds = %bb31, %bb.nph11
-	%y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]		; <i32> [#uses=3]
-	%56 = mul i32 %y.310, %54		; <i32> [#uses=1]
-	%57 = getelementptr i8* %r, i32 %56		; <i8*> [#uses=1]
-	%58 = mul i32 %y.310, %w		; <i32> [#uses=1]
-	%59 = getelementptr i8* %j, i32 %58		; <i8*> [#uses=1]
-	tail call void @llvm.memcpy.i32(i8* %59, i8* %57, i32 %w, i32 1)
-	br label %bb31
-
-bb31:		; preds = %bb30
-	%indvar.next13 = add i32 %y.310, 1		; <i32> [#uses=2]
-	%exitcond14 = icmp ne i32 %indvar.next13, %x		; <i1> [#uses=1]
-	br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
-
-bb31.bb33_crit_edge:		; preds = %bb31
-	br label %bb33
-
-bb33:		; preds = %bb31.bb33_crit_edge, %bb29
-	%60 = mul i32 %x, %w		; <i32> [#uses=1]
-	%61 = getelementptr i8* %j, i32 %60		; <i8*> [#uses=1]
-	%62 = mul i32 %x, %w		; <i32> [#uses=1]
-	%63 = udiv i32 %62, 2		; <i32> [#uses=1]
-	tail call void @llvm.memset.i32(i8* %61, i8 -128, i32 %63, i32 1)
-	ret void
-
-return:		; preds = %bb20
-	ret void
+  %0 = mul i32 %x, %w
+  %1 = mul i32 %x, %w
+  %2 = udiv i32 %1, 4
+  %.sum2 = add i32 %2, %0
+  %cond = icmp eq i32 %d, 1
+  br i1 %cond, label %bb29, label %bb10.preheader
+
+bb10.preheader:                                   ; preds = %entry
+  %3 = icmp ne i32 %x, 0
+  br i1 %3, label %bb.nph9, label %bb18.loopexit
+
+bb.nph7:                                          ; preds = %bb7.preheader
+  %4 = mul i32 %y.08, %w
+  %5 = mul i32 %y.08, %s
+  %6 = add i32 %5, 1
+  %tmp8 = icmp ugt i32 1, %w
+  %smax9 = select i1 %tmp8, i32 1, i32 %w
+  br label %bb6
+
+bb6:                                              ; preds = %bb7, %bb.nph7
+  %x.06 = phi i32 [ 0, %bb.nph7 ], [ %indvar.next7, %bb7 ]
+  %7 = add i32 %x.06, %4
+  %8 = shl i32 %x.06, 1
+  %9 = add i32 %6, %8
+  %10 = getelementptr i8* %r, i32 %9
+  %11 = load i8* %10, align 1
+  %12 = getelementptr i8* %j, i32 %7
+  store i8 %11, i8* %12, align 1
+  br label %bb7
+
+bb7:                                              ; preds = %bb6
+  %indvar.next7 = add i32 %x.06, 1
+  %exitcond10 = icmp ne i32 %indvar.next7, %smax9
+  br i1 %exitcond10, label %bb6, label %bb7.bb9_crit_edge
+
+bb7.bb9_crit_edge:                                ; preds = %bb7
+  br label %bb9
+
+bb9:                                              ; preds = %bb7.preheader, %bb7.bb9_crit_edge
+  br label %bb10
+
+bb10:                                             ; preds = %bb9
+  %indvar.next11 = add i32 %y.08, 1
+  %exitcond12 = icmp ne i32 %indvar.next11, %x
+  br i1 %exitcond12, label %bb7.preheader, label %bb10.bb18.loopexit_crit_edge
+
+bb10.bb18.loopexit_crit_edge:                     ; preds = %bb10
+  br label %bb10.bb18.loopexit_crit_edge.split
+
+bb10.bb18.loopexit_crit_edge.split:               ; preds = %bb.nph9, %bb10.bb18.loopexit_crit_edge
+  br label %bb18.loopexit
+
+bb.nph9:                                          ; preds = %bb10.preheader
+  %13 = icmp ugt i32 %w, 0
+  br i1 %13, label %bb.nph9.split, label %bb10.bb18.loopexit_crit_edge.split
+
+bb.nph9.split:                                    ; preds = %bb.nph9
+  br label %bb7.preheader
+
+bb7.preheader:                                    ; preds = %bb.nph9.split, %bb10
+  %y.08 = phi i32 [ 0, %bb.nph9.split ], [ %indvar.next11, %bb10 ]
+  br i1 true, label %bb.nph7, label %bb9
+
+bb.nph5:                                          ; preds = %bb18.loopexit
+  %14 = udiv i32 %w, 2
+  %15 = icmp ult i32 %w, 2
+  %16 = udiv i32 %x, 2
+  br i1 %15, label %bb18.bb20_crit_edge.split, label %bb.nph5.split
+
+bb.nph5.split:                                    ; preds = %bb.nph5
+  %tmp2 = icmp ugt i32 1, %16
+  %smax3 = select i1 %tmp2, i32 1, i32 %16
+  br label %bb13
+
+bb13:                                             ; preds = %bb18, %bb.nph5.split
+  %y.14 = phi i32 [ 0, %bb.nph5.split ], [ %indvar.next1, %bb18 ]
+  %17 = mul i32 %14, %y.14
+  %18 = shl i32 %y.14, 1
+  %19 = urem i32 %y.14, 2
+  %20 = add i32 %19, %18
+  %21 = mul i32 %20, %s
+  br i1 true, label %bb.nph3, label %bb17
+
+bb.nph3:                                          ; preds = %bb13
+  %22 = add i32 %17, %0
+  %23 = add i32 %17, %.sum2
+  %24 = udiv i32 %w, 2
+  %tmp = icmp ugt i32 1, %24
+  %smax = select i1 %tmp, i32 1, i32 %24
+  br label %bb14
+
+bb14:                                             ; preds = %bb15, %bb.nph3
+  %x.12 = phi i32 [ 0, %bb.nph3 ], [ %indvar.next, %bb15 ]
+  %25 = shl i32 %x.12, 2
+  %26 = add i32 %25, %21
+  %27 = getelementptr i8* %r, i32 %26
+  %28 = load i8* %27, align 1
+  %.sum = add i32 %22, %x.12
+  %29 = getelementptr i8* %j, i32 %.sum
+  store i8 %28, i8* %29, align 1
+  %30 = shl i32 %x.12, 2
+  %31 = or i32 %30, 2
+  %32 = add i32 %31, %21
+  %33 = getelementptr i8* %r, i32 %32
+  %34 = load i8* %33, align 1
+  %.sum6 = add i32 %23, %x.12
+  %35 = getelementptr i8* %j, i32 %.sum6
+  store i8 %34, i8* %35, align 1
+  br label %bb15
+
+bb15:                                             ; preds = %bb14
+  %indvar.next = add i32 %x.12, 1
+  %exitcond = icmp ne i32 %indvar.next, %smax
+  br i1 %exitcond, label %bb14, label %bb15.bb17_crit_edge
+
+bb15.bb17_crit_edge:                              ; preds = %bb15
+  br label %bb17
+
+bb17:                                             ; preds = %bb15.bb17_crit_edge, %bb13
+  br label %bb18
+
+bb18.loopexit:                                    ; preds = %bb10.bb18.loopexit_crit_edge.split, %bb10.preheader
+  %36 = icmp ult i32 %x, 2
+  br i1 %36, label %bb20, label %bb.nph5
+
+bb18:                                             ; preds = %bb17
+  %indvar.next1 = add i32 %y.14, 1
+  %exitcond4 = icmp ne i32 %indvar.next1, %smax3
+  br i1 %exitcond4, label %bb13, label %bb18.bb20_crit_edge
+
+bb18.bb20_crit_edge:                              ; preds = %bb18
+  br label %bb18.bb20_crit_edge.split
+
+bb18.bb20_crit_edge.split:                        ; preds = %bb18.bb20_crit_edge, %bb.nph5
+  br label %bb20
+
+bb20:                                             ; preds = %bb18.bb20_crit_edge.split, %bb18.loopexit
+  switch i32 %d, label %return [
+    i32 3, label %bb22
+    i32 1, label %bb29
+  ]
+
+bb22:                                             ; preds = %bb20
+  %37 = mul i32 %x, %w
+  %38 = udiv i32 %37, 4
+  %.sum3 = add i32 %38, %.sum2
+  %39 = add i32 %x, 15
+  %40 = and i32 %39, -16
+  %41 = add i32 %w, 15
+  %42 = and i32 %41, -16
+  %43 = mul i32 %40, %s
+  %44 = icmp ugt i32 %x, 0
+  br i1 %44, label %bb.nph, label %bb26
+
+bb.nph:                                           ; preds = %bb22
+  br label %bb23
+
+bb23:                                             ; preds = %bb24, %bb.nph
+  %y.21 = phi i32 [ 0, %bb.nph ], [ %indvar.next5, %bb24 ]
+  %45 = mul i32 %y.21, %42
+  %.sum1 = add i32 %45, %43
+  %46 = getelementptr i8* %r, i32 %.sum1
+  %47 = mul i32 %y.21, %w
+  %.sum5 = add i32 %47, %.sum3
+  %48 = getelementptr i8* %j, i32 %.sum5
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %48, i8* %46, i32 %w, i32 1, i1 false)
+  br label %bb24
+
+bb24:                                             ; preds = %bb23
+  %indvar.next5 = add i32 %y.21, 1
+  %exitcond6 = icmp ne i32 %indvar.next5, %x
+  br i1 %exitcond6, label %bb23, label %bb24.bb26_crit_edge
+
+bb24.bb26_crit_edge:                              ; preds = %bb24
+  br label %bb26
+
+bb26:                                             ; preds = %bb24.bb26_crit_edge, %bb22
+  %49 = mul i32 %x, %w
+  %.sum4 = add i32 %.sum3, %49
+  %50 = getelementptr i8* %j, i32 %.sum4
+  %51 = mul i32 %x, %w
+  %52 = udiv i32 %51, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %50, i8 -128, i32 %52, i32 1, i1 false)
+  ret void
+
+bb29:                                             ; preds = %bb20, %entry
+  %53 = add i32 %w, 15
+  %54 = and i32 %53, -16
+  %55 = icmp ugt i32 %x, 0
+  br i1 %55, label %bb.nph11, label %bb33
+
+bb.nph11:                                         ; preds = %bb29
+  br label %bb30
+
+bb30:                                             ; preds = %bb31, %bb.nph11
+  %y.310 = phi i32 [ 0, %bb.nph11 ], [ %indvar.next13, %bb31 ]
+  %56 = mul i32 %y.310, %54
+  %57 = getelementptr i8* %r, i32 %56
+  %58 = mul i32 %y.310, %w
+  %59 = getelementptr i8* %j, i32 %58
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %59, i8* %57, i32 %w, i32 1, i1 false)
+  br label %bb31
+
+bb31:                                             ; preds = %bb30
+  %indvar.next13 = add i32 %y.310, 1
+  %exitcond14 = icmp ne i32 %indvar.next13, %x
+  br i1 %exitcond14, label %bb30, label %bb31.bb33_crit_edge
+
+bb31.bb33_crit_edge:                              ; preds = %bb31
+  br label %bb33
+
+bb33:                                             ; preds = %bb31.bb33_crit_edge, %bb29
+  %60 = mul i32 %x, %w
+  %61 = getelementptr i8* %j, i32 %60
+  %62 = mul i32 %x, %w
+  %63 = udiv i32 %62, 2
+  tail call void @llvm.memset.p0i8.i32(i8* %61, i8 -128, i32 %63, i32 1, i1 false)
+  ret void
+
+return:                                           ; preds = %bb20
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/CodeGen/X86/peep-test-3.ll b/test/CodeGen/X86/peep-test-3.ll
index a34a978..528c4bc 100644
--- a/test/CodeGen/X86/peep-test-3.ll
+++ b/test/CodeGen/X86/peep-test-3.ll
@@ -9,7 +9,7 @@ entry:
   %0 = ptrtoint float* %A to i32                  ; <i32> [#uses=1]
   %1 = and i32 %0, 3                              ; <i32> [#uses=1]
   %2 = xor i32 %IA, 1                             ; <i32> [#uses=1]
-; CHECK:      orl %ecx, %edx
+; CHECK:      orl %e
 ; CHECK-NEXT: je
   %3 = or i32 %2, %1                              ; <i32> [#uses=1]
   %4 = icmp eq i32 %3, 0                          ; <i1> [#uses=1]
diff --git a/test/CodeGen/X86/personality.ll b/test/CodeGen/X86/personality.ll
index e952a9b..d3d8e3f 100644
--- a/test/CodeGen/X86/personality.ll
+++ b/test/CodeGen/X86/personality.ll
@@ -4,40 +4,41 @@
 
 define void @_Z1fv() {
 entry:
-	invoke void @_Z1gv( )
-			to label %return unwind label %unwind
+  invoke void @_Z1gv()
+          to label %return unwind label %unwind
 
-unwind:		; preds = %entry
-	br i1 false, label %eh_then, label %cleanup20
+unwind:                                           ; preds = %entry
+  br i1 false, label %eh_then, label %cleanup20
 
-eh_then:		; preds = %unwind
-	invoke void @__cxa_end_catch( )
-			to label %return unwind label %unwind10
+eh_then:                                          ; preds = %unwind
+  invoke void @__cxa_end_catch()
+          to label %return unwind label %unwind10
 
-unwind10:		; preds = %eh_then
-	%eh_select13 = tail call i64 (i8*, i8*, ...)* @llvm.eh.selector.i64( i8* null, i8* bitcast (void ()* @__gxx_personality_v0 to i8*), i32 1 )		; <i32> [#uses=2]
-	%tmp18 = icmp slt i64 %eh_select13, 0		; <i1> [#uses=1]
-	br i1 %tmp18, label %filter, label %cleanup20
+unwind10:                                         ; preds = %eh_then
+  %upgraded.eh_select13 = tail call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* null, i8* bitcast (void ()* @__gxx_personality_v0 to i8*), i32 1)
+  %upgraded.eh_select131 = sext i32 %upgraded.eh_select13 to i64
+  %tmp18 = icmp slt i64 %upgraded.eh_select131, 0
+  br i1 %tmp18, label %filter, label %cleanup20
 
-filter:		; preds = %unwind10
-	unreachable
+filter:                                           ; preds = %unwind10
+  unreachable
 
-cleanup20:		; preds = %unwind10, %unwind
-	%eh_selector.0 = phi i64 [ 0, %unwind ], [ %eh_select13, %unwind10 ]		; <i32> [#uses=0]
-	ret void
+cleanup20:                                        ; preds = %unwind10, %unwind
+  %eh_selector.0 = phi i64 [ 0, %unwind ], [ %upgraded.eh_select131, %unwind10 ]
+  ret void
 
-return:		; preds = %eh_then, %entry
-	ret void
+return:                                           ; preds = %eh_then, %entry
+  ret void
 }
 
 declare void @_Z1gv()
 
-declare i64 @llvm.eh.selector.i64(i8*, i8*, ...)
-
 declare void @__gxx_personality_v0()
 
 declare void @__cxa_end_catch()
 
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
+
 ; X64:      zPLR
 ; X64:      .byte 155
 ; X64-NEXT: .long	___gxx_personality_v0@GOTPCREL+4
diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll
index b6761e3..8c16dc6 100644
--- a/test/CodeGen/X86/pic_jumptable.ll
+++ b/test/CodeGen/X86/pic_jumptable.ll
@@ -1,11 +1,14 @@
-; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false | grep -F .text._Z3fooILi1EEvi,"axG",@progbits,_Z3fooILi1EEvi,comdat
-; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -relocation-model=pic -mtriple=i386-linux-gnu -asm-verbose=false \
+; RUN:   | FileCheck %s --check-prefix=CHECK-LINUX
+; RUN: llc < %s -relocation-model=pic -mtriple=i686-apple-darwin -asm-verbose=false \
+; RUN:   | FileCheck %s
 ; RUN: llc < %s                       -mtriple=x86_64-apple-darwin | not grep 'lJTI'
 ; rdar://6971437
 ; rdar://7738756
 
 declare void @_Z3bari(i32)
 
+; CHECK-LINUX: .text._Z3fooILi1EEvi,"axG",@progbits,_Z3fooILi1EEvi,comdat
 define linkonce void @_Z3fooILi1EEvi(i32 %Y) nounwind {
 entry:
 ; CHECK:       L0$pb
diff --git a/test/CodeGen/X86/pr1505b.ll b/test/CodeGen/X86/pr1505b.ll
index 6a08dae..945ec4c 100644
--- a/test/CodeGen/X86/pr1505b.ll
+++ b/test/CodeGen/X86/pr1505b.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -mcpu=i486 | grep fstpl | count 5
-; RUN: llc < %s -mcpu=i486 | grep fstps | count 2
+; RUN: llc < %s -mcpu=i486 | FileCheck %s
 ; PR1505
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
@@ -30,19 +29,41 @@ declare void @_ZNSt8ios_base4InitC1Ev(%"struct.std::ctype_base"*)
 
 declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
 
+; CHECK: main
 define i32 @main() {
 entry:
+; CHECK: flds
 	%tmp6 = volatile load float* @a		; <float> [#uses=1]
+; CHECK: fstps (%esp)
+; CHECK: tanf
 	%tmp9 = tail call float @tanf( float %tmp6 )		; <float> [#uses=1]
+; Spill returned value:
+; CHECK: fstp
+
+; CHECK: fldl
 	%tmp12 = volatile load double* @b		; <double> [#uses=1]
+; CHECK: fstpl (%esp)
+; CHECK: tan
 	%tmp13 = tail call double @tan( double %tmp12 )		; <double> [#uses=1]
+; Spill returned value:
+; CHECK: fstp
 	%tmp1314 = fptrunc double %tmp13 to float		; <float> [#uses=1]
 	%tmp16 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0) )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
 	%tmp1920 = fpext float %tmp9 to double		; <double> [#uses=1]
+; reload:
+; CHECK: fld
+; CHECK: fstpl
+; CHECK: ZNSolsEd
 	%tmp22 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp16, double %tmp1920 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
 	%tmp30 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp22 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
+; reload:
+; CHECK: fld
+; CHECK: fstps
+; CHECK: ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
 	%tmp34 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
 	%tmp3940 = fpext float %tmp1314 to double		; <double> [#uses=1]
+; CHECK: fstpl
+; CHECK: ZNSolsEd
 	%tmp42 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp34, double %tmp3940 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
 	%tmp51 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp42 )		; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
 	ret i32 0
diff --git a/test/CodeGen/X86/pr2182.ll b/test/CodeGen/X86/pr2182.ll
index f97663c..2a8bb35 100644
--- a/test/CodeGen/X86/pr2182.ll
+++ b/test/CodeGen/X86/pr2182.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | grep {addl	\$3, (%eax)} | count 4
+; RUN: llc < %s | FileCheck %s
 ; PR2182
 
 target datalayout =
@@ -7,18 +7,25 @@ target triple = "i386-apple-darwin8"
 @x = weak global i32 0          ; <i32*> [#uses=8]
 
 define void @loop_2() nounwind  {
-entry:
-        %tmp = volatile load i32* @x, align 4           ; <i32> [#uses=1]
-        %tmp1 = add i32 %tmp, 3         ; <i32> [#uses=1]
-        volatile store i32 %tmp1, i32* @x, align 4
-        %tmp.1 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
-        %tmp1.1 = add i32 %tmp.1, 3             ; <i32> [#uses=1]
-        volatile store i32 %tmp1.1, i32* @x, align 4
-        %tmp.2 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
-        %tmp1.2 = add i32 %tmp.2, 3             ; <i32> [#uses=1]
-        volatile store i32 %tmp1.2, i32* @x, align 4
-        %tmp.3 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
-        %tmp1.3 = add i32 %tmp.3, 3             ; <i32> [#uses=1]
-        volatile store i32 %tmp1.3, i32* @x, align 4
-        ret void
+; CHECK: loop_2:
+; CHECK-NOT: ret
+; CHECK: addl $3, (%{{.*}})
+; CHECK-NEXT: addl $3, (%{{.*}})
+; CHECK-NEXT: addl $3, (%{{.*}})
+; CHECK-NEXT: addl $3, (%{{.*}})
+; CHECK-NEXT: ret
+
+  %tmp = volatile load i32* @x, align 4           ; <i32> [#uses=1]
+  %tmp1 = add i32 %tmp, 3         ; <i32> [#uses=1]
+  volatile store i32 %tmp1, i32* @x, align 4
+  %tmp.1 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  %tmp1.1 = add i32 %tmp.1, 3             ; <i32> [#uses=1]
+  volatile store i32 %tmp1.1, i32* @x, align 4
+  %tmp.2 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  %tmp1.2 = add i32 %tmp.2, 3             ; <i32> [#uses=1]
+  volatile store i32 %tmp1.2, i32* @x, align 4
+  %tmp.3 = volatile load i32* @x, align 4         ; <i32> [#uses=1]
+  %tmp1.3 = add i32 %tmp.3, 3             ; <i32> [#uses=1]
+  volatile store i32 %tmp1.3, i32* @x, align 4
+  ret void
 }
diff --git a/test/CodeGen/X86/pr2623.ll b/test/CodeGen/X86/pr2623.ll
deleted file mode 100644
index 5d0eb5d..0000000
--- a/test/CodeGen/X86/pr2623.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; RUN: llc < %s
-; PR2623
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-unknown-freebsd7.0"
-	%.objc_id = type { %.objc_id }*
-	%.objc_selector = type { i8*, i8* }*
-@.objc_sel_ptr = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
-@.objc_sel_ptr13 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
-@.objc_sel_ptr14 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
-@.objc_sel_ptr15 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
-@.objc_sel_ptr16 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
-@.objc_sel_ptr17 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
-@.objc_sel_ptr18 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
-@.objc_sel_ptr19 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
-@.objc_sel_ptr20 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
-@.objc_sel_ptr21 = external constant %.objc_selector		; <%.objc_selector*> [#uses=1]
-
-@.objc_untyped_selector_alias = alias internal %.objc_selector* @.objc_sel_ptr15		; <%.objc_selector*> [#uses=0]
-@.objc_untyped_selector_alias1 = alias internal %.objc_selector* @.objc_sel_ptr		; <%.objc_selector*> [#uses=0]
-@.objc_untyped_selector_alias2 = alias internal %.objc_selector* @.objc_sel_ptr17		; <%.objc_selector*> [#uses=0]
-@.objc_untyped_selector_alias3 = alias internal %.objc_selector* @.objc_sel_ptr16		; <%.objc_selector*> [#uses=0]
-@.objc_untyped_selector_alias4 = alias internal %.objc_selector* @.objc_sel_ptr13		; <%.objc_selector*> [#uses=0]
-@.objc_untyped_selector_alias7 = alias internal %.objc_selector* @.objc_sel_ptr14		; <%.objc_selector*> [#uses=0]
-@getRange = alias internal %.objc_selector* @.objc_sel_ptr18		; <%.objc_selector*> [#uses=0]
-@"valueWithRange:" = alias internal %.objc_selector* @.objc_sel_ptr21		; <%.objc_selector*> [#uses=0]
-@rangeValue = alias internal %.objc_selector* @.objc_sel_ptr20		; <%.objc_selector*> [#uses=0]
-@"printRange:" = alias internal %.objc_selector* @.objc_sel_ptr19		; <%.objc_selector*> [#uses=0]
-
-define void @"._objc_method_SmalltalkTool()-run"(i8* %self, %.objc_selector %_cmd) {
-entry:
-	br i1 false, label %small_int_messagerangeValue, label %real_object_messagerangeValue
-
-small_int_messagerangeValue:		; preds = %entry
-	br label %Continue
-
-real_object_messagerangeValue:		; preds = %entry
-	br label %Continue
-
-Continue:		; preds = %real_object_messagerangeValue, %small_int_messagerangeValue
-	%rangeValue = phi { i32, i32 } [ undef, %small_int_messagerangeValue ], [ undef, %real_object_messagerangeValue ]		; <{ i32, i32 }> [#uses=1]
-	call void (%.objc_id, %.objc_selector, ...)* null( %.objc_id null, %.objc_selector null, { i32, i32 } %rangeValue )
-	ret void
-}
diff --git a/test/CodeGen/X86/pr3216.ll b/test/CodeGen/X86/pr3216.ll
index 38c9f32..63676d9 100644
--- a/test/CodeGen/X86/pr3216.ll
+++ b/test/CodeGen/X86/pr3216.ll
@@ -1,14 +1,18 @@
-; RUN: llc < %s -march=x86 | grep {sar.	\$5}
+; RUN: llc < %s -march=x86 | FileCheck %s 
 
 @foo = global i8 127
 
 define i32 @main() nounwind {
-entry:
-        %tmp = load i8* @foo
-        %bf.lo = lshr i8 %tmp, 5
-        %bf.lo.cleared = and i8 %bf.lo, 7
-        %0 = shl i8 %bf.lo.cleared, 5
-        %bf.val.sext = ashr i8 %0, 5
-        %conv = sext i8 %bf.val.sext to i32
-        ret i32 %conv
+; CHECK: main:
+; CHECK-NOT: ret
+; CHECK: sar{{.}} $5
+; CHECK: ret
+
+   %tmp = load i8* @foo
+   %bf.lo = lshr i8 %tmp, 5
+   %bf.lo.cleared = and i8 %bf.lo, 7
+   %1 = shl i8 %bf.lo.cleared, 5
+   %bf.val.sext = ashr i8 %1, 5
+   %conv = sext i8 %bf.val.sext to i32
+   ret i32 %conv
 }
diff --git a/test/CodeGen/X86/pr3317.ll b/test/CodeGen/X86/pr3317.ll
index 9d6626b..d83daf0 100644
--- a/test/CodeGen/X86/pr3317.ll
+++ b/test/CodeGen/X86/pr3317.ll
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -march=x86
 ; PR3317
 
+%VT = type [0 x i32 (...)*]
         %ArraySInt16 = type { %JavaObject, i8*, [0 x i16] }
         %ArraySInt8 = type { %JavaObject, i8*, [0 x i8] }
         %Attribut = type { %ArraySInt16*, i32, i32 }
@@ -14,7 +15,6 @@
         %JavaObject = type { %VT*, %JavaCommonClass*, i8* }
         %TaskClassMirror = type { i32, i8* }
         %UTF8 = type { %JavaObject, i8*, [0 x i16] }
-        %VT = type [0 x i32 (...)*]
 
 declare void @jnjvmNullPointerException()
 
diff --git a/test/CodeGen/X86/pre-split1.ll b/test/CodeGen/X86/pre-split1.ll
deleted file mode 100644
index b55bf57..0000000
--- a/test/CodeGen/X86/pre-split1.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
-; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
-; XFAIL: *
-
-define void @test(double* %P, i32 %cond) nounwind {
-entry:
-	%0 = load double* %P, align 8		; <double> [#uses=1]
-	%1 = fadd double %0, 4.000000e+00		; <double> [#uses=2]
-	%2 = icmp eq i32 %cond, 0		; <i1> [#uses=1]
-	br i1 %2, label %bb1, label %bb
-
-bb:		; preds = %entry
-	%3 = fadd double %1, 4.000000e+00		; <double> [#uses=1]
-	br label %bb1
-
-bb1:		; preds = %bb, %entry
-	%A.0 = phi double [ %3, %bb ], [ %1, %entry ]		; <double> [#uses=1]
-	%4 = fmul double %A.0, 4.000000e+00		; <double> [#uses=1]
-	%5 = tail call i32 (...)* @bar() nounwind		; <i32> [#uses=0]
-	store double %4, double* %P, align 8
-	ret void
-}
-
-declare i32 @bar(...)
diff --git a/test/CodeGen/X86/pre-split10.ll b/test/CodeGen/X86/pre-split10.ll
deleted file mode 100644
index 83c6450..0000000
--- a/test/CodeGen/X86/pre-split10.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan
-
-define i32 @main(i32 %argc, i8** %argv) nounwind {
-entry:
-	br label %bb14.i
-
-bb14.i:		; preds = %bb14.i, %entry
-	%i8.0.reg2mem.0.i = phi i32 [ 0, %entry ], [ %0, %bb14.i ]		; <i32> [#uses=1]
-	%0 = add i32 %i8.0.reg2mem.0.i, 1		; <i32> [#uses=2]
-	%1 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%2 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%3 = fadd double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%exitcond75.i = icmp eq i32 %0, 32		; <i1> [#uses=1]
-	br i1 %exitcond75.i, label %bb24.i, label %bb14.i
-
-bb24.i:		; preds = %bb14.i
-	%4 = fdiv double 0.000000e+00, 0.000000e+00		; <double> [#uses=1]
-	%5 = fdiv double %1, 0.000000e+00		; <double> [#uses=1]
-	%6 = fdiv double %2, 0.000000e+00		; <double> [#uses=1]
-	%7 = fdiv double %3, 0.000000e+00		; <double> [#uses=1]
-	br label %bb31.i
-
-bb31.i:		; preds = %bb31.i, %bb24.i
-	%tmp.0.reg2mem.0.i = phi i32 [ 0, %bb24.i ], [ %indvar.next64.i, %bb31.i ]		; <i32> [#uses=1]
-	%indvar.next64.i = add i32 %tmp.0.reg2mem.0.i, 1		; <i32> [#uses=2]
-	%exitcond65.i = icmp eq i32 %indvar.next64.i, 64		; <i1> [#uses=1]
-	br i1 %exitcond65.i, label %bb33.i, label %bb31.i
-
-bb33.i:		; preds = %bb31.i
-	br label %bb35.preheader.i
-
-bb5.i.i:		; preds = %bb35.preheader.i
-	%8 = call double @floor(double 0.000000e+00) nounwind readnone		; <double> [#uses=0]
-	br label %bb7.i.i
-
-bb7.i.i:		; preds = %bb35.preheader.i, %bb5.i.i
-	br label %bb35.preheader.i
-
-bb35.preheader.i:		; preds = %bb7.i.i, %bb33.i
-	%9 = fsub double 0.000000e+00, %4		; <double> [#uses=1]
-	store double %9, double* null, align 8
-	%10 = fsub double 0.000000e+00, %5		; <double> [#uses=1]
-	store double %10, double* null, align 8
-	%11 = fsub double 0.000000e+00, %6		; <double> [#uses=1]
-	store double %11, double* null, align 8
-	%12 = fsub double 0.000000e+00, %7		; <double> [#uses=1]
-	store double %12, double* null, align 8
-	br i1 false, label %bb7.i.i, label %bb5.i.i
-}
-
-declare double @floor(double) nounwind readnone
diff --git a/test/CodeGen/X86/pre-split11.ll b/test/CodeGen/X86/pre-split11.ll
deleted file mode 100644
index 3d549f9..0000000
--- a/test/CodeGen/X86/pre-split11.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 -pre-alloc-split -regalloc=linearscan | FileCheck %s
-
-@.str = private constant [28 x i8] c"\0A\0ADOUBLE            D = %f\0A\00", align 1 ; <[28 x i8]*> [#uses=1]
-@.str1 = private constant [37 x i8] c"double to long    l1 = %ld\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1]
-@.str2 = private constant [35 x i8] c"double to uint   ui1 = %u\09\09(0x%x)\0A\00", align 8 ; <[35 x i8]*> [#uses=1]
-@.str3 = private constant [37 x i8] c"double to ulong  ul1 = %lu\09\09(0x%lx)\0A\00", align 8 ; <[37 x i8]*> [#uses=1]
-
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind ssp {
-; CHECK: movsd %xmm0, (%rsp)
-entry:
-  %0 = icmp sgt i32 %argc, 4                      ; <i1> [#uses=1]
-  br i1 %0, label %bb, label %bb2
-
-bb:                                               ; preds = %entry
-  %1 = getelementptr inbounds i8** %argv, i64 4   ; <i8**> [#uses=1]
-  %2 = load i8** %1, align 8                      ; <i8*> [#uses=1]
-  %3 = tail call double @atof(i8* %2) nounwind    ; <double> [#uses=1]
-  br label %bb2
-
-bb2:                                              ; preds = %bb, %entry
-  %storemerge = phi double [ %3, %bb ], [ 2.000000e+00, %entry ] ; <double> [#uses=4]
-  %4 = fptoui double %storemerge to i32           ; <i32> [#uses=2]
-  %5 = fptoui double %storemerge to i64           ; <i64> [#uses=2]
-  %6 = fptosi double %storemerge to i64           ; <i64> [#uses=2]
-  %7 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([28 x i8]* @.str, i64 0, i64 0), double %storemerge) nounwind ; <i32> [#uses=0]
-  %8 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([37 x i8]* @.str1, i64 0, i64 0), i64 %6, i64 %6) nounwind ; <i32> [#uses=0]
-  %9 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([35 x i8]* @.str2, i64 0, i64 0), i32 %4, i32 %4) nounwind ; <i32> [#uses=0]
-  %10 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([37 x i8]* @.str3, i64 0, i64 0), i64 %5, i64 %5) nounwind ; <i32> [#uses=0]
-  ret i32 0
-}
-
-declare double @atof(i8* nocapture) nounwind readonly
-
-declare i32 @printf(i8* nocapture, ...) nounwind
diff --git a/test/CodeGen/X86/pre-split2.ll b/test/CodeGen/X86/pre-split2.ll
deleted file mode 100644
index 670737b..0000000
--- a/test/CodeGen/X86/pre-split2.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -stats -regalloc=linearscan |& \
-; RUN:   grep {pre-alloc-split} | count 2
-
-define i32 @t(i32 %arg) {
-entry:
-	br label %bb6
-
-.noexc6:		; preds = %bb6
-	%0 = and i32 %2, -8		; <i32> [#uses=1]
-	tail call void @llvm.memmove.i32(i8* %3, i8* null, i32 %0, i32 1) nounwind
-	store double %1, double* null, align 8
-	br label %bb6
-
-bb6:		; preds = %.noexc6, %entry
-	%1 = uitofp i32 %arg to double		; <double> [#uses=1]
-	%2 = sub i32 0, 0		; <i32> [#uses=1]
-	%3 = invoke i8* @_Znwm(i32 0)
-			to label %.noexc6 unwind label %lpad32		; <i8*> [#uses=1]
-
-lpad32:		; preds = %bb6
-	unreachable
-}
-
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
-
-declare i8* @_Znwm(i32)
diff --git a/test/CodeGen/X86/pre-split3.ll b/test/CodeGen/X86/pre-split3.ll
deleted file mode 100644
index 0c49a91..0000000
--- a/test/CodeGen/X86/pre-split3.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
-; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
-
-define i32 @t(i32 %arg) {
-entry:
-	br label %bb6
-
-.noexc6:		; preds = %bb6
-	%0 = and i32 %2, -8		; <i32> [#uses=1]
-	tail call void @llvm.memmove.i32(i8* %3, i8* null, i32 %0, i32 1) nounwind
-	store double %1, double* null, align 8
-	br label %bb6
-
-bb6:		; preds = %.noexc6, %entry
-	%1 = uitofp i32 %arg to double		; <double> [#uses=1]
-	%2 = sub i32 0, 0		; <i32> [#uses=1]
-	%3 = invoke i8* @_Znwm(i32 0)
-			to label %.noexc6 unwind label %lpad32		; <i8*> [#uses=1]
-
-lpad32:		; preds = %bb6
-	unreachable
-}
-
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
-
-declare i8* @_Znwm(i32)
diff --git a/test/CodeGen/X86/pre-split4.ll b/test/CodeGen/X86/pre-split4.ll
deleted file mode 100644
index 37d1ac6..0000000
--- a/test/CodeGen/X86/pre-split4.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
-; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 2
-
-define i32 @main(i32 %argc, i8** %argv) nounwind {
-entry:
-	br label %bb
-
-bb:		; preds = %bb, %entry
-	%k.0.reg2mem.0 = phi double [ 1.000000e+00, %entry ], [ %6, %bb ]		; <double> [#uses=2]
-	%Flint.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %5, %bb ]		; <double> [#uses=1]
-	%twoThrd.0.reg2mem.0 = phi double [ 0.000000e+00, %entry ], [ %1, %bb ]		; <double> [#uses=1]
-	%0 = tail call double @llvm.pow.f64(double 0x3FE5555555555555, double 0.000000e+00)		; <double> [#uses=1]
-	%1 = fadd double %0, %twoThrd.0.reg2mem.0		; <double> [#uses=1]
-	%2 = tail call double @sin(double %k.0.reg2mem.0) nounwind readonly		; <double> [#uses=1]
-	%3 = fmul double 0.000000e+00, %2		; <double> [#uses=1]
-	%4 = fdiv double 1.000000e+00, %3		; <double> [#uses=1]
-        store double %Flint.0.reg2mem.0, double* null
-        store double %twoThrd.0.reg2mem.0, double* null
-	%5 = fadd double %4, %Flint.0.reg2mem.0		; <double> [#uses=1]
-	%6 = fadd double %k.0.reg2mem.0, 1.000000e+00		; <double> [#uses=1]
-	br label %bb
-}
-
-declare double @llvm.pow.f64(double, double) nounwind readonly
-
-declare double @sin(double) nounwind readonly
diff --git a/test/CodeGen/X86/pre-split5.ll b/test/CodeGen/X86/pre-split5.ll
deleted file mode 100644
index 9f41f24..0000000
--- a/test/CodeGen/X86/pre-split5.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan
-
-target triple = "i386-apple-darwin9.5"
-	%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
-	%struct.__sFILEX = type opaque
-	%struct.__sbuf = type { i8*, i32 }
-@"\01LC1" = external constant [48 x i8]		; <[48 x i8]*> [#uses=1]
-
-define i32 @main() nounwind {
-entry:
-	br label %bb5.us
-
-bb5.us:		; preds = %bb8.split, %bb5.us, %entry
-	%i.0.reg2mem.0.ph = phi i32 [ 0, %entry ], [ %indvar.next53, %bb8.split ], [ %i.0.reg2mem.0.ph, %bb5.us ]		; <i32> [#uses=2]
-	%j.0.reg2mem.0.us = phi i32 [ %indvar.next47, %bb5.us ], [ 0, %bb8.split ], [ 0, %entry ]		; <i32> [#uses=1]
-	%indvar.next47 = add i32 %j.0.reg2mem.0.us, 1		; <i32> [#uses=2]
-	%exitcond48 = icmp eq i32 %indvar.next47, 256		; <i1> [#uses=1]
-	br i1 %exitcond48, label %bb8.split, label %bb5.us
-
-bb8.split:		; preds = %bb5.us
-	%indvar.next53 = add i32 %i.0.reg2mem.0.ph, 1		; <i32> [#uses=2]
-	%exitcond54 = icmp eq i32 %indvar.next53, 256		; <i1> [#uses=1]
-	br i1 %exitcond54, label %bb11, label %bb5.us
-
-bb11:		; preds = %bb11, %bb8.split
-	%i.1.reg2mem.0 = phi i32 [ %indvar.next44, %bb11 ], [ 0, %bb8.split ]		; <i32> [#uses=1]
-	%indvar.next44 = add i32 %i.1.reg2mem.0, 1		; <i32> [#uses=2]
-	%exitcond45 = icmp eq i32 %indvar.next44, 63		; <i1> [#uses=1]
-	br i1 %exitcond45, label %bb14, label %bb11
-
-bb14:		; preds = %bb14, %bb11
-	%indvar = phi i32 [ %indvar.next40, %bb14 ], [ 0, %bb11 ]		; <i32> [#uses=1]
-	%indvar.next40 = add i32 %indvar, 1		; <i32> [#uses=2]
-	%exitcond41 = icmp eq i32 %indvar.next40, 32768		; <i1> [#uses=1]
-	br i1 %exitcond41, label %bb28, label %bb14
-
-bb28:		; preds = %bb14
-	%0 = fdiv double 2.550000e+02, 0.000000e+00		; <double> [#uses=1]
-	br label %bb30
-
-bb30:		; preds = %bb36, %bb28
-	%m.1.reg2mem.0 = phi i32 [ %m.0, %bb36 ], [ 0, %bb28 ]		; <i32> [#uses=1]
-	%1 = fmul double 0.000000e+00, %0		; <double> [#uses=1]
-	%2 = fptosi double %1 to i32		; <i32> [#uses=1]
-	br i1 false, label %bb36, label %bb35
-
-bb35:		; preds = %bb30
-	%3 = tail call i32 (%struct.FILE*, i8*, ...)* @fprintf(%struct.FILE* null, i8* getelementptr ([48 x i8]* @"\01LC1", i32 0, i32 0), i32 0, i32 0, i32 0, i32 %2) nounwind		; <i32> [#uses=0]
-	br label %bb36
-
-bb36:		; preds = %bb35, %bb30
-	%m.0 = phi i32 [ 0, %bb35 ], [ %m.1.reg2mem.0, %bb30 ]		; <i32> [#uses=1]
-	br label %bb30
-}
-
-declare i32 @fprintf(%struct.FILE*, i8*, ...) nounwind
diff --git a/test/CodeGen/X86/pre-split6.ll b/test/CodeGen/X86/pre-split6.ll
deleted file mode 100644
index d8f274d..0000000
--- a/test/CodeGen/X86/pre-split6.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 -pre-alloc-split -regalloc=linearscan | grep {divsd	24} | count 1
-
-@current_surfaces.b = external global i1		; <i1*> [#uses=1]
-
-declare double @sin(double) nounwind readonly
-
-declare double @asin(double) nounwind readonly
-
-define fastcc void @trace_line(i32 %line) nounwind {
-entry:
-	%.b3 = load i1* @current_surfaces.b		; <i1> [#uses=1]
-	br i1 %.b3, label %bb.nph, label %return
-
-bb.nph:		; preds = %entry
-	%0 = load double* null, align 8		; <double> [#uses=1]
-	%1 = load double* null, align 8		; <double> [#uses=2]
-	%2 = fcmp une double %0, 0.000000e+00		; <i1> [#uses=1]
-	br i1 %2, label %bb9.i, label %bb13.i
-
-bb9.i:		; preds = %bb.nph
-	%3 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
-	%4 = fdiv double 1.000000e+00, %1		; <double> [#uses=1]
-	%5 = fmul double %4, 0.000000e+00		; <double> [#uses=1]
-	%6 = tail call double @asin(double %5) nounwind readonly		; <double> [#uses=0]
-	unreachable
-
-bb13.i:		; preds = %bb.nph
-	%7 = fdiv double 1.000000e+00, %1		; <double> [#uses=1]
-	%8 = tail call double @sin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
-	%9 = fmul double %7, %8		; <double> [#uses=1]
-	%10 = tail call double @asin(double %9) nounwind readonly		; <double> [#uses=0]
-	unreachable
-
-return:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/X86/pre-split7.ll b/test/CodeGen/X86/pre-split7.ll
deleted file mode 100644
index 8c93faa..0000000
--- a/test/CodeGen/X86/pre-split7.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan
-
-@object_distance = external global double, align 8		; <double*> [#uses=1]
-@axis_slope_angle = external global double, align 8		; <double*> [#uses=1]
-@current_surfaces.b = external global i1		; <i1*> [#uses=1]
-
-declare double @sin(double) nounwind readonly
-
-declare double @asin(double) nounwind readonly
-
-declare double @tan(double) nounwind readonly
-
-define fastcc void @trace_line(i32 %line) nounwind {
-entry:
-	%.b3 = load i1* @current_surfaces.b		; <i1> [#uses=1]
-	br i1 %.b3, label %bb, label %return
-
-bb:		; preds = %bb, %entry
-	%0 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
-	%1 = fadd double 0.000000e+00, %0		; <double> [#uses=2]
-	%2 = tail call double @asin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
-	%3 = fsub double %1, %2		; <double> [#uses=2]
-	store double %3, double* @axis_slope_angle, align 8
-	%4 = fdiv double %1, 2.000000e+00		; <double> [#uses=1]
-	%5 = tail call double @sin(double %4) nounwind readonly		; <double> [#uses=1]
-	%6 = fmul double 0.000000e+00, %5		; <double> [#uses=1]
-	%7 = tail call double @tan(double %3) nounwind readonly		; <double> [#uses=0]
-	%8 = fadd double 0.000000e+00, %6		; <double> [#uses=1]
-	store double %8, double* @object_distance, align 8
-	br label %bb
-
-return:		; preds = %entry
-	ret void
-}
diff --git a/test/CodeGen/X86/pre-split8.ll b/test/CodeGen/X86/pre-split8.ll
deleted file mode 100644
index 7e6ad6e1..0000000
--- a/test/CodeGen/X86/pre-split8.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
-; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
-
-@current_surfaces.b = external global i1		; <i1*> [#uses=1]
-
-declare double @asin(double) nounwind readonly
-
-declare double @tan(double) nounwind readonly
-
-define fastcc void @trace_line(i32 %line) nounwind {
-entry:
-	%.b3 = load i1* @current_surfaces.b		; <i1> [#uses=1]
-	br i1 %.b3, label %bb, label %return
-
-bb:		; preds = %bb9.i, %entry
-	%.rle4 = phi double [ %7, %bb9.i ], [ 0.000000e+00, %entry ]		; <double> [#uses=1]
-	%0 = load double* null, align 8		; <double> [#uses=3]
-	%1 = fcmp une double %0, 0.000000e+00		; <i1> [#uses=1]
-	br i1 %1, label %bb9.i, label %bb13.i
-
-bb9.i:		; preds = %bb
-	%2 = fsub double %.rle4, %0		; <double> [#uses=0]
-	%3 = tail call double @asin(double %.rle4) nounwind readonly		; <double> [#uses=0]
-	%4 = fmul double 0.000000e+00, %0		; <double> [#uses=1]
-	%5 = tail call double @tan(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
-	%6 = fmul double %4, 0.000000e+00		; <double> [#uses=1]
-	%7 = fadd double %6, 0.000000e+00		; <double> [#uses=1]
-	br i1 false, label %return, label %bb
-
-bb13.i:		; preds = %bb
-	unreachable
-
-return:		; preds = %bb9.i, %entry
-	ret void
-}
diff --git a/test/CodeGen/X86/pre-split9.ll b/test/CodeGen/X86/pre-split9.ll
deleted file mode 100644
index 951e6fb..0000000
--- a/test/CodeGen/X86/pre-split9.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 -pre-alloc-split -regalloc=linearscan -stats |& \
-; RUN:   grep {pre-alloc-split} | grep {Number of intervals split} | grep 1
-
-@current_surfaces.b = external global i1		; <i1*> [#uses=1]
-
-declare double @sin(double) nounwind readonly
-
-declare double @asin(double) nounwind readonly
-
-declare double @tan(double) nounwind readonly
-
-define fastcc void @trace_line(i32 %line) nounwind {
-entry:
-	%.b3 = load i1* @current_surfaces.b		; <i1> [#uses=1]
-	br i1 %.b3, label %bb, label %return
-
-bb:		; preds = %bb9.i, %entry
-	%.rle4 = phi double [ %8, %bb9.i ], [ 0.000000e+00, %entry ]		; <double> [#uses=1]
-	%0 = load double* null, align 8		; <double> [#uses=3]
-	%1 = fcmp une double %0, 0.000000e+00		; <i1> [#uses=1]
-	br i1 %1, label %bb9.i, label %bb13.i
-
-bb9.i:		; preds = %bb
-	%2 = fsub double %.rle4, %0		; <double> [#uses=0]
-	%3 = tail call double @asin(double %.rle4) nounwind readonly		; <double> [#uses=0]
-	%4 = tail call double @sin(double 0.000000e+00) nounwind readonly		; <double> [#uses=1]
-	%5 = fmul double %4, %0		; <double> [#uses=1]
-	%6 = tail call double @tan(double 0.000000e+00) nounwind readonly		; <double> [#uses=0]
-	%7 = fmul double %5, 0.000000e+00		; <double> [#uses=1]
-	%8 = fadd double %7, 0.000000e+00		; <double> [#uses=1]
-	br i1 false, label %return, label %bb
-
-bb13.i:		; preds = %bb
-	unreachable
-
-return:		; preds = %bb9.i, %entry
-	ret void
-}
diff --git a/test/CodeGen/X86/prefetch.ll b/test/CodeGen/X86/prefetch.ll
index 48d2673..ebe11a5 100644
--- a/test/CodeGen/X86/prefetch.ll
+++ b/test/CodeGen/X86/prefetch.ll
@@ -6,11 +6,11 @@ entry:
 ; CHECK: prefetcht1
 ; CHECK: prefetcht0
 ; CHECK: prefetchnta
-	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1 )
-	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2 )
-	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3 )
-	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0, i32 1 )
 	ret void
 }
 
-declare void @llvm.prefetch(i8*, i32, i32) nounwind 
+declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind 
diff --git a/test/CodeGen/X86/private.ll b/test/CodeGen/X86/private.ll
index f52f8c7..484afc9 100644
--- a/test/CodeGen/X86/private.ll
+++ b/test/CodeGen/X86/private.ll
@@ -5,8 +5,6 @@
 ; RUN: llc < %s -mtriple=x86_64-pc-linux | grep .Lbaz:
 ; RUN: llc < %s -mtriple=x86_64-pc-linux | grep movl.*\.Lbaz
 
-declare void @foo()
-
 define private void @foo() {
         ret void
 }
diff --git a/test/CodeGen/X86/promote-i16.ll b/test/CodeGen/X86/promote-i16.ll
index 101bb29..3c91d74 100644
--- a/test/CodeGen/X86/promote-i16.ll
+++ b/test/CodeGen/X86/promote-i16.ll
@@ -3,9 +3,19 @@
 define signext i16 @foo(i16 signext %x) nounwind {
 entry:
 ; CHECK: foo:
-; CHECK: movzwl 4(%esp), %eax
+; CHECK-NOT: movzwl
+; CHECK: movswl 4(%esp), %eax
 ; CHECK: xorl $21998, %eax
-; CHECK: movswl %ax, %eax
   %0 = xor i16 %x, 21998
   ret i16 %0
 }
+
+define signext i16 @bar(i16 signext %x) nounwind {
+entry:
+; CHECK: bar:
+; CHECK-NOT: movzwl
+; CHECK: movswl 4(%esp), %eax
+; CHECK: xorl $-10770, %eax
+  %0 = xor i16 %x, 54766
+  ret i16 %0
+}
diff --git a/test/CodeGen/X86/promote-trunc.ll b/test/CodeGen/X86/promote-trunc.ll
new file mode 100644
index 0000000..4211d82
--- /dev/null
+++ b/test/CodeGen/X86/promote-trunc.ll
@@ -0,0 +1,11 @@
+; RUN: llc -promote-elements < %s -march=x86-64
+
+define<4 x i8> @func_8_64() {
+  %F = load <4 x i64>* undef
+  %G = trunc <4 x i64> %F to <4 x i8>
+  %H = load <4 x i64>* undef
+  %Y = trunc <4 x i64> %H to <4 x i8>
+  %T = add <4 x i8> %Y, %G
+  ret <4 x i8> %T
+}
+
diff --git a/test/CodeGen/X86/reghinting.ll b/test/CodeGen/X86/reghinting.ll
new file mode 100644
index 0000000..87f65ed
--- /dev/null
+++ b/test/CodeGen/X86/reghinting.ll
@@ -0,0 +1,35 @@
+; RUN: llc < %s -mtriple=x86_64-apple-macosx | FileCheck %s
+; PR10221
+
+;; The registers %x and %y must both spill across the finit call.
+;; Check that they are spilled early enough that not copies are needed for the
+;; fadd and fpext.
+
+; CHECK: pr10221
+; CHECK-NOT: movaps
+; CHECK:      movss
+; CHECK-NEXT: movss
+; CHECK-NEXT: addss
+; CHECK-NEXT: cvtss2sd
+; CHECK-NEXT: finit
+
+define i32 @pr10221(float %x, float %y, i8** nocapture %_retval) nounwind uwtable ssp {
+entry:
+  %add = fadd float %x, %y
+  %conv = fpext float %add to double
+  %call = tail call i32 @finit(double %conv) nounwind
+  %tobool = icmp eq i32 %call, 0
+  br i1 %tobool, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  tail call void @foo(float %x, float %y) nounwind
+  br label %return
+
+return:                                           ; preds = %entry, %if.end
+  %retval.0 = phi i32 [ 0, %if.end ], [ 5, %entry ]
+  ret i32 %retval.0
+}
+
+declare i32 @finit(double)
+
+declare void @foo(float, float)
diff --git a/test/CodeGen/X86/sdiv-exact.ll b/test/CodeGen/X86/sdiv-exact.ll
new file mode 100644
index 0000000..48bb883
--- /dev/null
+++ b/test/CodeGen/X86/sdiv-exact.ll
@@ -0,0 +1,18 @@
+; RUN: llc -march=x86 < %s | FileCheck %s
+
+define i32 @test1(i32 %x) {
+  %div = sdiv exact i32 %x, 25
+  ret i32 %div
+; CHECK: test1:
+; CHECK: imull	$-1030792151, 4(%esp)
+; CHECK-NEXT: ret
+}
+
+define i32 @test2(i32 %x) {
+  %div = sdiv exact i32 %x, 24
+  ret i32 %div
+; CHECK: test2:
+; CHECK: sarl	$3
+; CHECK-NEXT: imull	$-1431655765
+; CHECK-NEXT: ret
+}
diff --git a/test/CodeGen/X86/sext-trunc.ll b/test/CodeGen/X86/sext-trunc.ll
index 2eaf425..22b3791 100644
--- a/test/CodeGen/X86/sext-trunc.ll
+++ b/test/CodeGen/X86/sext-trunc.ll
@@ -3,7 +3,7 @@
 ; RUN: not grep movz %t
 ; RUN: not grep and %t
 
-define i8 @foo(i16 signext  %x) signext nounwind  {
+define signext  i8 @foo(i16 signext  %x) nounwind  {
 	%retval56 = trunc i16 %x to i8
 	ret i8 %retval56
 }
diff --git a/test/CodeGen/X86/shift-codegen.ll b/test/CodeGen/X86/shift-codegen.ll
index 4cba183..7d961e8 100644
--- a/test/CodeGen/X86/shift-codegen.ll
+++ b/test/CodeGen/X86/shift-codegen.ll
@@ -1,5 +1,4 @@
-; RUN: llc < %s -relocation-model=static -march=x86 | \
-; RUN:   grep {shll	\$3} | count 2
+; RUN: llc < %s -relocation-model=static -march=x86 | FileCheck %s
 
 ; This should produce two shll instructions, not any lea's.
 
@@ -9,19 +8,31 @@ target triple = "i686-apple-darwin8"
 
 
 define void @fn1() {
-entry:
-        %tmp = load i32* @Y             ; <i32> [#uses=1]
-        %tmp1 = shl i32 %tmp, 3         ; <i32> [#uses=1]
-        %tmp2 = load i32* @X            ; <i32> [#uses=1]
-        %tmp3 = or i32 %tmp1, %tmp2             ; <i32> [#uses=1]
-        store i32 %tmp3, i32* @X
-        ret void
+; CHECK: fn1:
+; CHECK-NOT: ret
+; CHECK-NOT: lea
+; CHECK: shll $3
+; CHECK-NOT: lea
+; CHECK: ret
+
+  %tmp = load i32* @Y             ; <i32> [#uses=1]
+  %tmp1 = shl i32 %tmp, 3         ; <i32> [#uses=1]
+  %tmp2 = load i32* @X            ; <i32> [#uses=1]
+  %tmp3 = or i32 %tmp1, %tmp2             ; <i32> [#uses=1]
+  store i32 %tmp3, i32* @X
+  ret void
 }
 
 define i32 @fn2(i32 %X, i32 %Y) {
-entry:
-        %tmp2 = shl i32 %Y, 3           ; <i32> [#uses=1]
-        %tmp4 = or i32 %tmp2, %X                ; <i32> [#uses=1]
-        ret i32 %tmp4
+; CHECK: fn2:
+; CHECK-NOT: ret
+; CHECK-NOT: lea
+; CHECK: shll $3
+; CHECK-NOT: lea
+; CHECK: ret
+
+  %tmp2 = shl i32 %Y, 3           ; <i32> [#uses=1]
+  %tmp4 = or i32 %tmp2, %X                ; <i32> [#uses=1]
+  ret i32 %tmp4
 }
 
diff --git a/test/CodeGen/X86/shl_undef.ll b/test/CodeGen/X86/shl_undef.ll
new file mode 100644
index 0000000..54b74cc
--- /dev/null
+++ b/test/CodeGen/X86/shl_undef.ll
@@ -0,0 +1,53 @@
+; RUN: llc < %s -O1 -mtriple=i386-apple-darwin | FileCheck %s
+;
+; Interesting test case where %tmp1220 = xor i32 %tmp862, %tmp592 and
+; %tmp1676 = xor i32 %tmp1634, %tmp1530 have zero demanded bits after
+; DAGCombiner optimization pass.  These are changed to undef and in turn
+; the successor shl(s) become shl undef, 1.  This pattern then matches
+; shl x, 1 -> add x, x.  add undef, undef doesn't guarentee the low
+; order bit is zero and is incorrect.
+;
+; See rdar://9453156 and rdar://9487392.
+;
+
+; CHECK-NOT: shl
+define i32 @foo(i8* %a0, i32* %a2) nounwind {
+entry:
+  %tmp0 = alloca i8
+  %tmp1 = alloca i32
+  store i8 1, i8* %tmp0
+  %tmp921.i7845 = load i8* %a0, align 1
+  %tmp309 = xor i8 %tmp921.i7845, 104
+  %tmp592 = zext i8 %tmp309 to i32
+  %tmp862 = xor i32 1293461297, %tmp592
+  %tmp1220 = xor i32 %tmp862, %tmp592
+  %tmp1506 = shl i32 %tmp1220, 1
+  %tmp1530 = sub i32 %tmp592, %tmp1506
+  %tmp1557 = sub i32 %tmp1530, 542767629
+  %tmp1607 = and i32 %tmp1557, 1
+  store i32 %tmp1607, i32* %tmp1
+  %tmp1634 = and i32 %tmp1607, 2080309246
+  %tmp1676 = xor i32 %tmp1634, %tmp1530
+  %tmp1618 = shl i32 %tmp1676, 1
+  %tmp1645 = sub i32 %tmp862, %tmp1618
+  %tmp1697 = and i32 %tmp1645, 1
+  store i32 %tmp1697, i32* %a2
+  ret i32 %tmp1607
+}
+
+; CHECK-NOT: shl
+; shl undef, 0 -> undef
+define i32 @foo2_undef() nounwind {
+entry:
+  %tmp2 = shl i32 undef, 0;
+  ret i32 %tmp2
+}
+
+; CHECK-NOT: shl
+; shl undef, x -> 0
+define i32 @foo1_undef(i32* %a0) nounwind {
+entry:
+  %tmp1 = load i32* %a0, align 1
+  %tmp2 = shl i32 undef, %tmp1;
+  ret i32 %tmp2
+}
diff --git a/test/CodeGen/X86/sibcall-byval.ll b/test/CodeGen/X86/sibcall-byval.ll
new file mode 100644
index 0000000..c335f30
--- /dev/null
+++ b/test/CodeGen/X86/sibcall-byval.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin   | FileCheck %s -check-prefix=32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s -check-prefix=64
+
+%struct.p = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+
+define i32 @f(%struct.p* byval align 4 %q) nounwind ssp {
+entry:
+; 32: _f:
+; 32: jmp L_g$stub
+
+; 64: _f:
+; 64: jmp _g
+  %call = tail call i32 @g(%struct.p* byval align 4 %q) nounwind
+  ret i32 %call
+}
+
+declare i32 @g(%struct.p* byval align 4)
+
+define i32 @h(%struct.p* byval align 4 %q, i32 %r) nounwind ssp {
+entry:
+; 32: _h:
+; 32: jmp L_i$stub
+
+; 64: _h:
+; 64: jmp _i
+
+  %call = tail call i32 @i(%struct.p* byval align 4 %q, i32 %r) nounwind
+  ret i32 %call
+}
+
+declare i32 @i(%struct.p* byval align 4, i32)
diff --git a/test/CodeGen/X86/sibcall.ll b/test/CodeGen/X86/sibcall.ll
index 4a98efb..a9a5420 100644
--- a/test/CodeGen/X86/sibcall.ll
+++ b/test/CodeGen/X86/sibcall.ll
@@ -312,8 +312,6 @@ entry:
   ret void
 }
 
-declare void @foo()
-
 ; If caller / callee calling convention mismatch then check if the return
 ; values are returned in the same registers.
 ; rdar://7874780
diff --git a/test/CodeGen/X86/sse1.ll b/test/CodeGen/X86/sse1.ll
index 73f88ae..9b2e05b 100644
--- a/test/CodeGen/X86/sse1.ll
+++ b/test/CodeGen/X86/sse1.ll
@@ -1,6 +1,6 @@
 ; Tests for SSE1 and below, without SSE2+.
 ; RUN: llc < %s -march=x86 -mcpu=pentium3 -O3 | FileCheck %s
-; RUN: llc < %s -march=x86-64 -mcpu=pentium3 -O3 | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mattr=-sse2,+sse -O3 | FileCheck %s
 
 define <8 x i16> @test1(<8 x i32> %a) nounwind {
 ; CHECK: test1
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 8c2e58d..8b3a317 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -1,6 +1,6 @@
-; These are tests for SSE3 codegen.  Yonah has SSE3 and earlier but not SSSE3+.
+; These are tests for SSE3 codegen.
 
-; RUN: llc < %s -march=x86-64 -mcpu=yonah -mtriple=i686-apple-darwin9 -O3 \
+; RUN: llc < %s -march=x86-64 -mcpu=nocona -mtriple=i686-apple-darwin9 -O3 \
 ; RUN:              | FileCheck %s --check-prefix=X64
 
 ; Test for v8xi16 lowering where we extract the first element of the vector and
@@ -169,10 +169,10 @@ define internal void @t10() nounwind {
 ; X64: 	t10:
 ; X64: 		pextrw	$4, [[X0:%xmm[0-9]+]], %eax
 ; X64: 		unpcklpd [[X1:%xmm[0-9]+]]
-; X64: 		pshuflw	$8, [[X1]], [[X1]]
-; X64: 		pinsrw	$2, %eax, [[X1]]
+; X64: 		pshuflw	$8, [[X1]], [[X2:%xmm[0-9]+]]
+; X64: 		pinsrw	$2, %eax, [[X2]]
 ; X64: 		pextrw	$6, [[X0]], %eax
-; X64: 		pinsrw	$3, %eax, [[X1]]
+; X64: 		pinsrw	$3, %eax, [[X2]]
 }
 
 
diff --git a/test/CodeGen/X86/switch-bt.ll b/test/CodeGen/X86/switch-bt.ll
index 9f491d4..8e39342 100644
--- a/test/CodeGen/X86/switch-bt.ll
+++ b/test/CodeGen/X86/switch-bt.ll
@@ -79,3 +79,23 @@ if.end:                                           ; preds = %entry
 }
 
 declare void @bar()
+
+define void @test3(i32 %x) nounwind {
+; CHECK: test3:
+; CHECK: cmpl $5
+; CHECK: ja
+; CHECK: cmpl $4
+; CHECK: jne
+  switch i32 %x, label %if.end [
+    i32 0, label %if.then
+    i32 1, label %if.then
+    i32 2, label %if.then
+    i32 3, label %if.then
+    i32 5, label %if.then
+  ]
+if.then:
+  tail call void @bar() nounwind
+  ret void
+if.end:
+  ret void
+}
diff --git a/test/CodeGen/X86/tail-dup-addr.ll b/test/CodeGen/X86/tail-dup-addr.ll
new file mode 100644
index 0000000..c5a105c
--- /dev/null
+++ b/test/CodeGen/X86/tail-dup-addr.ll
@@ -0,0 +1,28 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+
+; Test that we don't drop a block that has its address taken.
+
+; CHECK: Ltmp1:                                  ## Block address taken
+; CHECK: Ltmp2:                                  ## Block address taken
+
+@a = common global i32 0, align 4
+@p = common global i8* null, align 8
+
+define void @foo() noreturn nounwind uwtable ssp {
+entry:
+  %tmp = load i32* @a, align 4
+  %foo = icmp eq i32 0, %tmp
+  br i1 %foo, label %sw.bb, label %sw.default
+
+sw.bb:                                            ; preds = %entry
+  store i8* blockaddress(@foo, %sw.bb1), i8** @p, align 8
+  br label %sw.bb1
+
+sw.bb1:                                           ; preds = %sw.default, %sw.bb, %entry
+  store i8* blockaddress(@foo, %sw.default), i8** @p, align 8
+  br label %sw.default
+
+sw.default:                                       ; preds = %sw.bb1, %entry
+  store i8* blockaddress(@foo, %sw.bb1), i8** @p, align 8
+  br label %sw.bb1
+}
diff --git a/test/CodeGen/X86/tail-threshold.ll b/test/CodeGen/X86/tail-threshold.ll
new file mode 100644
index 0000000..f2296a0
--- /dev/null
+++ b/test/CodeGen/X86/tail-threshold.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mtriple=x86_64-pc-linux-gnu -tail-merge-threshold 2 < %s | FileCheck %s
+
+; Test that we still do some merging if a block has more than
+; tail-merge-threshold predecessors.
+
+; CHECK: 	callq	bar
+; CHECK:	callq	bar
+; CHECK:	callq	bar
+; CHECK-NOT:    callq
+
+declare void @bar()
+
+define void @foo(i32 %xxx) {
+entry:
+  switch i32 %xxx, label %bb4 [
+    i32 0, label %bb0
+    i32 1, label %bb1
+    i32 2, label %bb2
+    i32 3, label %bb3
+  ]
+
+bb0:
+  call void @bar()
+  br label %bb5
+
+bb1:
+ call void @bar()
+ br label %bb5
+
+bb2:
+  call void @bar()
+  br label %bb5
+
+bb3:
+  call void @bar()
+  br label %bb5
+
+bb4:
+  call void @bar()
+  br label %bb5
+
+bb5:
+  ret void
+}
diff --git a/test/CodeGen/X86/tailcallbyval.ll b/test/CodeGen/X86/tailcallbyval.ll
index 7002560..03d6f94 100644
--- a/test/CodeGen/X86/tailcallbyval.ll
+++ b/test/CodeGen/X86/tailcallbyval.ll
@@ -13,6 +13,6 @@ entry:
 
 define  fastcc i32 @tailcaller(%struct.s* byval %a) nounwind {
 entry:
-        %tmp4 = tail call fastcc i32 @tailcallee(%struct.s* %a byval)
+        %tmp4 = tail call fastcc i32 @tailcallee(%struct.s* byval %a )
         ret i32 %tmp4
 }
diff --git a/test/CodeGen/X86/tailcallbyval64.ll b/test/CodeGen/X86/tailcallbyval64.ll
index 1b1efe7..7ecf379 100644
--- a/test/CodeGen/X86/tailcallbyval64.ll
+++ b/test/CodeGen/X86/tailcallbyval64.ll
@@ -37,6 +37,6 @@ define  fastcc i64 @tailcaller(i64 %b, %struct.s* byval %a) {
 entry:
         %tmp2 = getelementptr %struct.s* %a, i32 0, i32 1
         %tmp3 = load i64* %tmp2, align 8
-        %tmp4 = tail call fastcc i64 @tailcallee(%struct.s* %a byval, i64 %tmp3, i64 %b, i64 7, i64 13, i64 17)
+        %tmp4 = tail call fastcc i64 @tailcallee(%struct.s* byval %a , i64 %tmp3, i64 %b, i64 7, i64 13, i64 17)
         ret i64 %tmp4
 }
diff --git a/test/CodeGen/X86/testl-commute.ll b/test/CodeGen/X86/testl-commute.ll
index 3d5f672..0e6f636 100644
--- a/test/CodeGen/X86/testl-commute.ll
+++ b/test/CodeGen/X86/testl-commute.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | grep {testl.*\(%r.i\), %} | count 3
+; RUN: llc < %s | FileCheck %s
 ; rdar://5671654
 ; The loads should fold into the testl instructions, no matter how
 ; the inputs are commuted.
@@ -7,6 +7,11 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
 target triple = "x86_64-apple-darwin7"
 
 define i32 @test(i32* %P, i32* %G) nounwind {
+; CHECK: test:
+; CHECK-NOT: ret
+; CHECK: testl (%{{.*}}), %{{.*}}
+; CHECK: ret
+
 entry:
 	%0 = load i32* %P, align 4		; <i32> [#uses=3]
 	%1 = load i32* %G, align 4		; <i32> [#uses=1]
@@ -23,6 +28,11 @@ bb1:		; preds = %entry
 }
 
 define i32 @test2(i32* %P, i32* %G) nounwind {
+; CHECK: test2:
+; CHECK-NOT: ret
+; CHECK: testl (%{{.*}}), %{{.*}}
+; CHECK: ret
+
 entry:
 	%0 = load i32* %P, align 4		; <i32> [#uses=3]
 	%1 = load i32* %G, align 4		; <i32> [#uses=1]
@@ -37,7 +47,13 @@ bb:		; preds = %entry
 bb1:		; preds = %entry
 	ret i32 %0
 }
+
 define i32 @test3(i32* %P, i32* %G) nounwind {
+; CHECK: test3:
+; CHECK-NOT: ret
+; CHECK: testl (%{{.*}}), %{{.*}}
+; CHECK: ret
+
 entry:
 	%0 = load i32* %P, align 4		; <i32> [#uses=3]
 	%1 = load i32* %G, align 4		; <i32> [#uses=1]
diff --git a/test/CodeGen/X86/tlv-1.ll b/test/CodeGen/X86/tlv-1.ll
index 42940f1..5773260 100644
--- a/test/CodeGen/X86/tlv-1.ll
+++ b/test/CodeGen/X86/tlv-1.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple x86_64-apple-darwin -mcpu=core2 | FileCheck %s
 
 %struct.A = type { [48 x i8], i32, i32, i32 }
 
diff --git a/test/CodeGen/X86/trunc-to-bool.ll b/test/CodeGen/X86/trunc-to-bool.ll
index 6062084..92b6859 100644
--- a/test/CodeGen/X86/trunc-to-bool.ll
+++ b/test/CodeGen/X86/trunc-to-bool.ll
@@ -3,7 +3,7 @@
 ; value and as the operand of a branch.
 ; RUN: llc < %s -march=x86 | FileCheck %s
 
-define i1 @test1(i32 %X) zeroext nounwind {
+define zeroext i1 @test1(i32 %X)  nounwind {
     %Y = trunc i32 %X to i1
     ret i1 %Y
 }
diff --git a/test/CodeGen/X86/twoaddr-remat.ll b/test/CodeGen/X86/twoaddr-remat.ll
deleted file mode 100644
index 4940c78..0000000
--- a/test/CodeGen/X86/twoaddr-remat.ll
+++ /dev/null
@@ -1,67 +0,0 @@
-; RUN: llc < %s -march=x86 | grep 59796 | count 3
-
-	%Args = type %Value*
-	%Exec = type opaque*
-	%Identifier = type opaque*
-	%JSFunction = type %Value (%Exec, %Scope, %Value, %Args)
-	%PropertyNameArray = type opaque*
-	%Scope = type opaque*
-	%Value = type opaque*
-
-declare i1 @X1(%Exec) readonly 
-
-declare %Value @X2(%Exec)
-
-declare i32 @X3(%Exec, %Value)
-
-declare %Value @X4(i32) readnone 
-
-define internal %Value @fast3bitlookup(%Exec %exec, %Scope %scope, %Value %this, %Args %args) nounwind {
-prologue:
-	%eh_check = tail call i1 @X1( %Exec %exec ) readonly 		; <i1> [#uses=1]
-	br i1 %eh_check, label %exception, label %no_exception
-
-exception:		; preds = %no_exception, %prologue
-	%rethrow_result = tail call %Value @X2( %Exec %exec )		; <%Value> [#uses=1]
-	ret %Value %rethrow_result
-
-no_exception:		; preds = %prologue
-	%args_intptr = bitcast %Args %args to i32*		; <i32*> [#uses=1]
-	%argc_val = load i32* %args_intptr		; <i32> [#uses=1]
-	%cmpParamArgc = icmp sgt i32 %argc_val, 0		; <i1> [#uses=1]
-	%arg_ptr = getelementptr %Args %args, i32 1		; <%Args> [#uses=1]
-	%arg_val = load %Args %arg_ptr		; <%Value> [#uses=1]
-	%ext_arg_val = select i1 %cmpParamArgc, %Value %arg_val, %Value inttoptr (i32 5 to %Value)		; <%Value> [#uses=1]
-	%toInt325 = tail call i32 @X3( %Exec %exec, %Value %ext_arg_val )		; <i32> [#uses=3]
-	%eh_check6 = tail call i1 @X1( %Exec %exec ) readonly 		; <i1> [#uses=1]
-	br i1 %eh_check6, label %exception, label %no_exception7
-
-no_exception7:		; preds = %no_exception
-	%shl_tmp_result = shl i32 %toInt325, 1		; <i32> [#uses=1]
-	%rhs_masked13 = and i32 %shl_tmp_result, 14		; <i32> [#uses=1]
-	%ashr_tmp_result = lshr i32 59796, %rhs_masked13		; <i32> [#uses=1]
-	%and_tmp_result15 = and i32 %ashr_tmp_result, 3		; <i32> [#uses=1]
-	%ashr_tmp_result3283 = lshr i32 %toInt325, 2		; <i32> [#uses=1]
-	%rhs_masked38 = and i32 %ashr_tmp_result3283, 14		; <i32> [#uses=1]
-	%ashr_tmp_result39 = lshr i32 59796, %rhs_masked38		; <i32> [#uses=1]
-	%and_tmp_result41 = and i32 %ashr_tmp_result39, 3		; <i32> [#uses=1]
-	%addconv = add i32 %and_tmp_result15, %and_tmp_result41		; <i32> [#uses=1]
-	%ashr_tmp_result6181 = lshr i32 %toInt325, 5		; <i32> [#uses=1]
-	%rhs_masked67 = and i32 %ashr_tmp_result6181, 6		; <i32> [#uses=1]
-	%ashr_tmp_result68 = lshr i32 59796, %rhs_masked67		; <i32> [#uses=1]
-	%and_tmp_result70 = and i32 %ashr_tmp_result68, 3		; <i32> [#uses=1]
-	%addconv82 = add i32 %addconv, %and_tmp_result70		; <i32> [#uses=3]
-	%rangetmp = add i32 %addconv82, 536870912		; <i32> [#uses=1]
-	%rangecmp = icmp ult i32 %rangetmp, 1073741824		; <i1> [#uses=1]
-	br i1 %rangecmp, label %NumberLiteralIntFast, label %NumberLiteralIntSlow
-
-NumberLiteralIntFast:		; preds = %no_exception7
-	%imm_shift = shl i32 %addconv82, 2		; <i32> [#uses=1]
-	%imm_or = or i32 %imm_shift, 3		; <i32> [#uses=1]
-	%imm_val = inttoptr i32 %imm_or to %Value		; <%Value> [#uses=1]
-	ret %Value %imm_val
-
-NumberLiteralIntSlow:		; preds = %no_exception7
-	%toVal = call %Value @X4( i32 %addconv82 )		; <%Value> [#uses=1]
-	ret %Value %toVal
-}
diff --git a/test/CodeGen/X86/umul-with-overflow.ll b/test/CodeGen/X86/umul-with-overflow.ll
index 84fcbc7..e5858de 100644
--- a/test/CodeGen/X86/umul-with-overflow.ll
+++ b/test/CodeGen/X86/umul-with-overflow.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -march=x86 | FileCheck %s
 
 declare {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b)
-define i1 @a(i32 %x) zeroext nounwind {
+define zeroext i1 @a(i32 %x)  nounwind {
   %res = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 3)
   %obil = extractvalue {i32, i1} %res, 1
   ret i1 %obil
diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll
index 9f70489..d8fffbe 100644
--- a/test/CodeGen/X86/unaligned-load.ll
+++ b/test/CodeGen/X86/unaligned-load.ll
@@ -10,9 +10,17 @@ entry:
   %String2Loc = alloca [31 x i8], align 1
   br label %bb
 
-bb:
+bb:                                               ; preds = %bb, %entry
   %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0
-  call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1, i1 false)
+  br label %bb
+
+return:                                           ; No predecessors!
+  ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
 ; I386: calll {{_?}}memcpy
 
 ; CORE2: movabsq
@@ -20,13 +28,6 @@ bb:
 ; CORE2: movabsq
 
 ; COREI7: movups _.str3
-  br label %bb
-
-return:
-  ret void
-}
-
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
 
 ; CORE2: .section
 ; CORE2: .align  3
diff --git a/test/CodeGen/X86/undef-label.ll b/test/CodeGen/X86/undef-label.ll
new file mode 100644
index 0000000..1afd935
--- /dev/null
+++ b/test/CodeGen/X86/undef-label.ll
@@ -0,0 +1,19 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+
+; This is a case where we would incorrectly conclude that LBB0_1 could only
+; be reached via fall through and would therefore omit the label.
+
+; CHECK:      jne     .LBB0_1
+; CHECK-NEXT: jnp     .LBB0_3
+; CHECK-NEXT: .LBB0_1:
+
+define void @xyz() {
+entry:
+  br i1 fcmp oeq (double fsub (double undef, double undef), double 0.000000e+00), label %bar, label %foo
+
+foo:
+  br i1 fcmp ogt (double fdiv (double fsub (double fmul (double undef, double undef), double fsub (double undef, double undef)), double fmul (double undef, double undef)), double 1.0), label %foo, label %bar
+
+bar:
+  ret void
+}
diff --git a/test/CodeGen/X86/variable-sized-darwin-bzero.ll b/test/CodeGen/X86/variable-sized-darwin-bzero.ll
index 4817db2..1e86d75 100644
--- a/test/CodeGen/X86/variable-sized-darwin-bzero.ll
+++ b/test/CodeGen/X86/variable-sized-darwin-bzero.ll
@@ -1,8 +1,8 @@
 ; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin10 | grep __bzero
 
-declare void @llvm.memset.i64(i8*, i8, i64, i32)
-
 define void @foo(i8* %p, i64 %n) {
-  call void @llvm.memset.i64(i8* %p, i8 0, i64 %n, i32 4)
+  call void @llvm.memset.p0i8.i64(i8* %p, i8 0, i64 %n, i32 4, i1 false)
   ret void
 }
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/CodeGen/X86/vec_insert-2.ll b/test/CodeGen/X86/vec_insert-2.ll
index b08044b..dee91fd 100644
--- a/test/CodeGen/X86/vec_insert-2.ll
+++ b/test/CodeGen/X86/vec_insert-2.ll
@@ -1,25 +1,42 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep {\$36,} | count 2
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep shufps | count 2
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep pinsrw | count 1
-; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | grep movhpd | count 1
-; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | grep unpcklpd | count 1
+; RUN: llc < %s -march=x86 -mattr=+sse2,-sse41 | FileCheck --check-prefix=X32 %s
+; RUN: llc < %s -march=x86-64 -mattr=+sse2,-sse41 | FileCheck --check-prefix=X64 %s
 
 define <4 x float> @t1(float %s, <4 x float> %tmp) nounwind {
-        %tmp1 = insertelement <4 x float> %tmp, float %s, i32 3
-        ret <4 x float> %tmp1
+; X32: t1:
+; X32: shufps $36
+; X32: ret
+
+  %tmp1 = insertelement <4 x float> %tmp, float %s, i32 3
+  ret <4 x float> %tmp1
 }
 
 define <4 x i32> @t2(i32 %s, <4 x i32> %tmp) nounwind {
-        %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 3
-        ret <4 x i32> %tmp1
+; X32: t2:
+; X32: shufps $36
+; X32: ret
+
+  %tmp1 = insertelement <4 x i32> %tmp, i32 %s, i32 3
+  ret <4 x i32> %tmp1
 }
 
 define <2 x double> @t3(double %s, <2 x double> %tmp) nounwind {
-        %tmp1 = insertelement <2 x double> %tmp, double %s, i32 1
-        ret <2 x double> %tmp1
+; X32: t3:
+; X32: movhpd
+; X32: ret
+
+; X64: t3:
+; X64: unpcklpd
+; X64: ret
+
+  %tmp1 = insertelement <2 x double> %tmp, double %s, i32 1
+  ret <2 x double> %tmp1
 }
 
 define <8 x i16> @t4(i16 %s, <8 x i16> %tmp) nounwind {
-        %tmp1 = insertelement <8 x i16> %tmp, i16 %s, i32 5
-        ret <8 x i16> %tmp1
+; X32: t4:
+; X32: pinsrw
+; X32: ret
+
+  %tmp1 = insertelement <8 x i16> %tmp, i16 %s, i32 5
+  ret <8 x i16> %tmp1
 }
diff --git a/test/CodeGen/X86/vec_set-A.ll b/test/CodeGen/X86/vec_set-A.ll
index f05eecf..92dda4c 100644
--- a/test/CodeGen/X86/vec_set-A.ll
+++ b/test/CodeGen/X86/vec_set-A.ll
@@ -1,4 +1,5 @@
-; RUN: llc < %s -march=x86 -mattr=+sse2 | grep {movl.*\$1, %}
+; RUN: llc < %s -march=x86 -mattr=+sse2 | FileCheck %s
+; CHECK: movl $1, %{{.*}}
 define <2 x i64> @test1() nounwind {
 entry:
 	ret <2 x i64> < i64 1, i64 0 >
diff --git a/test/CodeGen/X86/vector.ll b/test/CodeGen/X86/vector.ll
index 3fff849..46b0e18 100644
--- a/test/CodeGen/X86/vector.ll
+++ b/test/CodeGen/X86/vector.ll
@@ -1,6 +1,6 @@
 ; Test that vectors are scalarized/lowered correctly.
 ; RUN: llc < %s -march=x86 -mcpu=i386 > %t
-; RUN: llc < %s -march=x86 -mcpu=yonah > %t
+; RUN: llc < %s -march=x86 -mcpu=yonah >> %t
 
 %d8 = type <8 x double>
 %f1 = type <1 x float>
diff --git a/test/CodeGen/X86/x86-64-malloc.ll b/test/CodeGen/X86/x86-64-malloc.ll
deleted file mode 100644
index 4aa0ec3..0000000
--- a/test/CodeGen/X86/x86-64-malloc.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-win32 | FileCheck %s
-; CHECK: shll $3, {{%edi|%ecx}}
-; PR3829
-; The generated code should multiply by 3 (sizeof i8*) as an i32,
-; not as an i64!
-
-define i8** @test(i32 %sz) {
-	%sub = add i32 %sz, 536870911		; <i32> [#uses=1]
-	%call = malloc i8*, i32 %sub		; <i8**> [#uses=1]
-	ret i8** %call
-}
diff --git a/test/CodeGen/X86/x86-64-shortint.ll b/test/CodeGen/X86/x86-64-shortint.ll
index 7f96543..cbf6588 100644
--- a/test/CodeGen/X86/x86-64-shortint.ll
+++ b/test/CodeGen/X86/x86-64-shortint.ll
@@ -5,7 +5,7 @@ target triple = "x86_64-apple-darwin8"
 
 
 define void @bar(i16 zeroext  %A) {
-        tail call void @foo( i16 %A signext  )
+        tail call void @foo( i16 signext %A   )
         ret void
 }
 declare void @foo(i16 signext )
diff --git a/test/CodeGen/X86/zext-fold.ll b/test/CodeGen/X86/zext-fold.ll
new file mode 100644
index 0000000..b3f5cdb
--- /dev/null
+++ b/test/CodeGen/X86/zext-fold.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+;; Simple case
+define i32 @test1(i8 %x) nounwind readnone {
+  %A = and i8 %x, -32
+  %B = zext i8 %A to i32
+  ret i32 %B
+}
+; CHECK: test1
+; CHECK: movzbl
+; CHECK-NEXT: andl {{.*}}224
+
+;; Multiple uses of %x but easily extensible. 
+define i32 @test2(i8 %x) nounwind readnone {
+  %A = and i8 %x, -32
+  %B = zext i8 %A to i32
+  %C = or i8 %x, 63
+  %D = zext i8 %C to i32
+  %E = add i32 %B, %D
+  ret i32 %E
+}
+; CHECK: test2
+; CHECK: movzbl
+; CHECK: orl $63
+; CHECK: andl $224
+
+declare void @use(i32, i8)
+
+;; Multiple uses of %x where we shouldn't extend the load.
+define void @test3(i8 %x) nounwind readnone {
+  %A = and i8 %x, -32
+  %B = zext i8 %A to i32
+  call void @use(i32 %B, i8 %x)
+  ret void
+}
+; CHECK: test3
+; CHECK: movzbl 16(%esp), [[REGISTER:%e[a-z]{2}]]
+; CHECK-NEXT: movl [[REGISTER]], 4(%esp)
+; CHECK-NEXT: andl $224, [[REGISTER]]
+; CHECK-NEXT: movl [[REGISTER]], (%esp)
+; CHECK-NEXT: call{{.*}}use
diff --git a/test/CodeGen/XCore/private.ll b/test/CodeGen/XCore/private.ll
index c595a6d..537d63b 100644
--- a/test/CodeGen/XCore/private.ll
+++ b/test/CodeGen/XCore/private.ll
@@ -6,8 +6,6 @@
 ; RUN: grep .Lbaz: %t
 ; RUN: grep ldw.*\.Lbaz %t
 
-declare void @foo()
-
 define private void @foo() {
         ret void
 }
diff --git a/test/DebugInfo/X86/earlydup-crash.ll b/test/DebugInfo/X86/earlydup-crash.ll
new file mode 100644
index 0000000..5bd0c7e
--- /dev/null
+++ b/test/DebugInfo/X86/earlydup-crash.ll
@@ -0,0 +1,85 @@
+; RUN: llc %s -mtriple=i386-apple-macosx10.6.7 -o /dev/null
+
+; This used to crash because early dup was not ignoring debug instructions.
+
+%struct.cpp_dir = type { %struct.cpp_dir*, i8*, i32, i8, i8**, i8*, i8* (i8*, %struct.cpp_dir*)*, i64, i32, i8 }
+
+declare void @llvm.dbg.value(metadata, i64, metadata) nounwind readnone
+
+define internal i8* @framework_construct_pathname(i8* %fname, %struct.cpp_dir* %dir) nounwind ssp {
+entry:
+  br i1 undef, label %bb33, label %bb
+
+bb:                                               ; preds = %entry
+  %tmp = icmp eq i32 undef, 0
+  %tmp1 = add i32 0, 11
+  call void @llvm.dbg.value(metadata !{i32 %tmp1}, i64 0, metadata !0)
+  br i1 undef, label %bb18, label %bb31.preheader
+
+bb31.preheader:                                   ; preds = %bb19, %bb
+  %tmp2 = getelementptr inbounds i8* %fname, i32 0
+  br label %bb31
+
+bb18:                                             ; preds = %bb
+  %tmp3 = icmp eq i32 undef, 0
+  br i1 %tmp3, label %bb19, label %bb33
+
+bb19:                                             ; preds = %bb18
+  call void @foobar(i32 0)
+  br label %bb31.preheader
+
+bb22:                                             ; preds = %bb31
+  %tmp4 = add i32 0, %tmp1
+  call void @foobar(i32 %tmp4)
+  br i1 undef, label %bb33, label %bb31
+
+bb31:                                             ; preds = %bb22, %bb31.preheader
+  br i1 false, label %bb33, label %bb22
+
+bb33:                                             ; preds = %bb31, %bb22, %bb18, %entry
+  ret i8* undef
+}
+
+declare void @foobar(i32)
+
+!0 = metadata !{i32 590080, metadata !1, metadata !"frname_len", metadata !3, i32 517, metadata !38, i32 0} ; [ DW_TAG_auto_variable ]
+!1 = metadata !{i32 589835, metadata !2, i32 515, i32 0, metadata !3, i32 19} ; [ DW_TAG_lexical_block ]
+!2 = metadata !{i32 589870, i32 0, metadata !3, metadata !"framework_construct_pathname", metadata !"framework_construct_pathname", metadata !"", metadata !3, i32 515, metadata !5, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 true, i8* (i8*, %struct.cpp_dir*)* @framework_construct_pathname} ; [ DW_TAG_subprogram ]
+!3 = metadata !{i32 589865, metadata !"darwin-c.c", metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/config", metadata !4} ; [ DW_TAG_file_type ]
+!4 = metadata !{i32 589841, i32 0, i32 1, metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/config/darwin-c.c", metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc", metadata !"4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", i1 true, i1 true, metadata !"", i32 0} ; [ DW_TAG_compile_unit ]
+!5 = metadata !{i32 589845, metadata !3, metadata !"", metadata !3, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, null} ; [ DW_TAG_subroutine_type ]
+!6 = metadata !{metadata !7, metadata !9, metadata !11}
+!7 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !8} ; [ DW_TAG_pointer_type ]
+!8 = metadata !{i32 589860, metadata !3, metadata !"char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 6} ; [ DW_TAG_base_type ]
+!9 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !10} ; [ DW_TAG_pointer_type ]
+!10 = metadata !{i32 589862, metadata !3, metadata !"", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, metadata !8} ; [ DW_TAG_const_type ]
+!11 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !12} ; [ DW_TAG_pointer_type ]
+!12 = metadata !{i32 589846, metadata !13, metadata !"cpp_dir", metadata !13, i32 45, i64 0, i64 0, i64 0, i32 0, metadata !14} ; [ DW_TAG_typedef ]
+!13 = metadata !{i32 589865, metadata !"cpplib.h", metadata !"/Users/espindola/llvm/build-llvm-gcc/gcc/../../llvm-gcc-4.2/gcc/../libcpp/include", metadata !4} ; [ DW_TAG_file_type ]
+!14 = metadata !{i32 589843, metadata !3, metadata !"cpp_dir", metadata !13, i32 43, i64 352, i64 32, i64 0, i32 0, null, metadata !15, i32 0, null} ; [ DW_TAG_structure_type ]
+!15 = metadata !{metadata !16, metadata !18, metadata !19, metadata !21, metadata !23, metadata !25, metadata !27, metadata !29, metadata !33, metadata !36}
+!16 = metadata !{i32 589837, metadata !14, metadata !"next", metadata !13, i32 572, i64 32, i64 32, i64 0, i32 0, metadata !17} ; [ DW_TAG_member ]
+!17 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !14} ; [ DW_TAG_pointer_type ]
+!18 = metadata !{i32 589837, metadata !14, metadata !"name", metadata !13, i32 575, i64 32, i64 32, i64 32, i32 0, metadata !7} ; [ DW_TAG_member ]
+!19 = metadata !{i32 589837, metadata !14, metadata !"len", metadata !13, i32 576, i64 32, i64 32, i64 64, i32 0, metadata !20} ; [ DW_TAG_member ]
+!20 = metadata !{i32 589860, metadata !3, metadata !"unsigned int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!21 = metadata !{i32 589837, metadata !14, metadata !"sysp", metadata !13, i32 580, i64 8, i64 8, i64 96, i32 0, metadata !22} ; [ DW_TAG_member ]
+!22 = metadata !{i32 589860, metadata !3, metadata !"unsigned char", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 8} ; [ DW_TAG_base_type ]
+!23 = metadata !{i32 589837, metadata !14, metadata !"name_map", metadata !13, i32 584, i64 32, i64 32, i64 128, i32 0, metadata !24} ; [ DW_TAG_member ]
+!24 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ]
+!25 = metadata !{i32 589837, metadata !14, metadata !"header_map", metadata !13, i32 590, i64 32, i64 32, i64 160, i32 0, metadata !26} ; [ DW_TAG_member ]
+!26 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, null} ; [ DW_TAG_pointer_type ]
+!27 = metadata !{i32 589837, metadata !14, metadata !"construct", metadata !13, i32 597, i64 32, i64 32, i64 192, i32 0, metadata !28} ; [ DW_TAG_member ]
+!28 = metadata !{i32 589839, metadata !3, metadata !"", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, metadata !5} ; [ DW_TAG_pointer_type ]
+!29 = metadata !{i32 589837, metadata !14, metadata !"ino", metadata !13, i32 601, i64 64, i64 64, i64 224, i32 0, metadata !30} ; [ DW_TAG_member ]
+!30 = metadata !{i32 589846, metadata !31, metadata !"ino_t", metadata !31, i32 141, i64 0, i64 0, i64 0, i32 0, metadata !32} ; [ DW_TAG_typedef ]
+!31 = metadata !{i32 589865, metadata !"types.h", metadata !"/usr/include/sys", metadata !4} ; [ DW_TAG_file_type ]
+!32 = metadata !{i32 589860, metadata !3, metadata !"long long unsigned int", metadata !3, i32 0, i64 64, i64 64, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
+!33 = metadata !{i32 589837, metadata !14, metadata !"dev", metadata !13, i32 602, i64 32, i64 32, i64 288, i32 0, metadata !34} ; [ DW_TAG_member ]
+!34 = metadata !{i32 589846, metadata !31, metadata !"dev_t", metadata !31, i32 107, i64 0, i64 0, i64 0, i32 0, metadata !35} ; [ DW_TAG_typedef ]
+!35 = metadata !{i32 589860, metadata !3, metadata !"int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ]
+!36 = metadata !{i32 589837, metadata !14, metadata !"user_supplied_p", metadata !13, i32 605, i64 8, i64 8, i64 320, i32 0, metadata !37} ; [ DW_TAG_member ]
+!37 = metadata !{i32 589860, metadata !3, metadata !"_Bool", metadata !3, i32 0, i64 8, i64 8, i64 0, i32 0, i32 2} ; [ DW_TAG_base_type ]
+!38 = metadata !{i32 589846, metadata !39, metadata !"size_t", metadata !39, i32 326, i64 0, i64 0, i64 0, i32 0, metadata !40} ; [ DW_TAG_typedef ]
+!39 = metadata !{i32 589865, metadata !"stddef.h", metadata !"/Users/espindola/llvm/build-llvm-gcc/./prev-gcc/include", metadata !4} ; [ DW_TAG_file_type ]
+!40 = metadata !{i32 589860, metadata !3, metadata !"long unsigned int", metadata !3, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7} ; [ DW_TAG_base_type ]
diff --git a/test/DebugInfo/pr9951.ll b/test/DebugInfo/X86/pr9951.ll
index 7716cd7..7716cd7 100644
--- a/test/DebugInfo/pr9951.ll
+++ b/test/DebugInfo/X86/pr9951.ll
diff --git a/test/ExecutionEngine/test-malloc.ll b/test/ExecutionEngine/test-malloc.ll
deleted file mode 100644
index b3400df..0000000
--- a/test/ExecutionEngine/test-malloc.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: lli %s > /dev/null
-
-define i32 @main() {
-	%X = malloc i32		; <i32*> [#uses=1]
-	%Y = malloc i32, i32 100		; <i32*> [#uses=1]
-	%u = add i32 1, 2		; <i32> [#uses=1]
-	%Z = malloc i32, i32 %u		; <i32*> [#uses=1]
-	free i32* %X
-	free i32* %Y
-	free i32* %Z
-	ret i32 0
-}
-
diff --git a/test/Feature/alignment.ll b/test/Feature/alignment.ll
index ef35a13..f6dbe33 100644
--- a/test/Feature/alignment.ll
+++ b/test/Feature/alignment.ll
@@ -10,15 +10,6 @@ define i32* @test() align 32 {
         %Z = alloca i32         ; <i32*> [#uses=0]
         ret i32* %X
 }
-
-define i32* @test2() {
-        %X = malloc i32, align 4                ; <i32*> [#uses=1]
-        %Y = malloc i32, i32 42, align 16               ; <i32*> [#uses=0]
-        %Z = malloc i32         ; <i32*> [#uses=0]
-        %T = malloc i32, align 256              ; <i32*> [#uses=0]
-        ret i32* %X
-}
-
 define void @test3() alignstack(16) {
         ret void
 }
diff --git a/test/Feature/calltest.ll b/test/Feature/calltest.ll
index feafd3c..dcdb1a0 100644
--- a/test/Feature/calltest.ll
+++ b/test/Feature/calltest.ll
@@ -4,8 +4,6 @@
 
 %FunTy = type i32 (i32)
 
-declare i32 @test(i32)   ; Test forward declaration merging
-
 define void @invoke(%FunTy* %x) {
         %foo = call i32 %x( i32 123 )           ; <i32> [#uses=0]
         %foo2 = tail call i32 %x( i32 123 )             ; <i32> [#uses=0]
diff --git a/test/Feature/forwardreftest.ll b/test/Feature/forwardreftest.ll
index 26d214a..6ca1be7 100644
--- a/test/Feature/forwardreftest.ll
+++ b/test/Feature/forwardreftest.ll
@@ -4,9 +4,9 @@
 
 %myty = type i32 
 %myfn = type float (i32,double,i32,i16)
-type i32(%myfn*)
-type i32(i32)
-type i32(i32(i32)*)
+%0 = type i32(%myfn*)
+%1 = type i32(i32)
+%2 = type i32(i32(i32)*)
 
   %thisfuncty = type i32 (i32) *
 
diff --git a/test/Feature/globalredefinition.ll b/test/Feature/globalredefinition.ll
deleted file mode 100644
index 42e2d1a..0000000
--- a/test/Feature/globalredefinition.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-; Test forward references and redefinitions of globals
-
-@A = global i32* @B             ; <i32**> [#uses=0]
-@B = global i32 7               ; <i32*> [#uses=1]
-
-declare void @X()
-
-declare void @X()
-
-define void @X() {
-  ret void
-}
-
-declare void @X()
diff --git a/test/Feature/globalvars.ll b/test/Feature/globalvars.ll
index 9a23775..dad1cf3 100644
--- a/test/Feature/globalvars.ll
+++ b/test/Feature/globalvars.ll
@@ -3,7 +3,7 @@
 ; RUN: diff %t1.ll %t2.ll
 
 @MyVar = external global i32            ; <i32*> [#uses=1]
-@MyIntList = external global { \2*, i32 }               ; <{ \2*, i32 }*> [#uses=1]
+@MyIntList = external global { i32*, i32 }               ; <{ \2*, i32 }*> [#uses=1]
 external global i32             ; <i32*>:0 [#uses=0]
 @AConst = constant i32 123              ; <i32*> [#uses=0]
 @AString = constant [4 x i8] c"test"            ; <[4 x i8]*> [#uses=0]
@@ -11,7 +11,7 @@ external global i32             ; <i32*>:0 [#uses=0]
 
 define i32 @foo(i32 %blah) {
         store i32 5, i32* @MyVar
-        %idx = getelementptr { \2*, i32 }* @MyIntList, i64 0, i32 1             ; <i32*> [#uses=1]
+        %idx = getelementptr { i32*, i32 }* @MyIntList, i64 0, i32 1             ; <i32*> [#uses=1]
         store i32 12, i32* %idx
         ret i32 %blah
 }
diff --git a/test/Feature/noalias-ret.ll b/test/Feature/noalias-ret.ll
deleted file mode 100644
index d88452b..0000000
--- a/test/Feature/noalias-ret.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llvm-as < %s
-
-define noalias i8* @_Znwj(i32 %x) nounwind {
-  %A = malloc i8, i32 %x
-  ret i8* %A
-}
diff --git a/test/Feature/opaquetypes.ll b/test/Feature/opaquetypes.ll
deleted file mode 100644
index 6539c1a..0000000
--- a/test/Feature/opaquetypes.ll
+++ /dev/null
@@ -1,55 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-; This test case is used to test opaque type processing, forward references,
-; and recursive types.  Oh my.
-; 
-
-%SQ1 = type { i32 }
-%SQ2 = type { %ITy }
-%ITy = type i32
-
-
-%CCC = type { \2* }
-%BBB = type { \2*, \2 * }
-%AAA = type { \2*, {\2*}, [12x{\2*}], {[1x{\2*}]} }
-
-; Test numbered types
-type %CCC
-type %BBB
-%Composite = type { %0, %1 }
-
-; Test simple opaque type resolution...
-%intty = type i32
-
-; Perform a simple forward reference...
-%ty1 = type { %ty2, i32 }
-%ty2 = type float
-
-; Do a recursive type...
-%list = type { %list * }
-%listp = type { %listp } *
-
-; Do two mutually recursive types...
-%TyA = type { %ty2, %TyB * }
-%TyB = type { double, %TyA * }
-
-; A complex recursive type...
-%Y = type { {%Y*}, %Y* }
-%Z = type { { %Z * }, [12x%Z] *, {{{ %Z * }}} }
-
-; More ridiculous test cases...
-%A = type [ 123x %A*]
-%M = type %M (%M, %M) *
-%P = type %P*
-
-; Recursive ptrs
-%u = type %v*
-%v = type %u*
-
-; Test the parser for unnamed recursive types...
-%P1 = type \1 *
-%Y1 = type { { \3 * }, \2 * }
-%Z1 = type { { \3 * }, [12x\3] *, { { { \5 * } } } }
-
diff --git a/test/Feature/paramattrs.ll b/test/Feature/paramattrs.ll
index 3bee617..9860f5a 100644
--- a/test/Feature/paramattrs.ll
+++ b/test/Feature/paramattrs.ll
@@ -2,11 +2,11 @@
 ; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
 ; RUN: diff %t1.ll %t2.ll
 
-%ZFunTy = type i32(i8 zeroext)
-%SFunTy = type i32(i8 signext)
+%ZFunTy = type i32(i8)
+%SFunTy = type i32(i8)
 
-declare i16 @"test"(i16 signext %arg) signext 
-declare i8 @"test2" (i16 zeroext %a2) zeroext 
+declare signext i16 @"test"(i16 signext %arg)  
+declare zeroext i8 @"test2" (i16 zeroext %a2) 
 
 declare i32 @"test3"(i32* noalias %p)
 
@@ -14,9 +14,11 @@ declare void @exit(i32) noreturn nounwind
 
 define i32 @main(i32 inreg %argc, i8 ** inreg %argv) nounwind {
     %val = trunc i32 %argc to i16
-    %res1 = call i16 (i16 signext) signext *@test(i16 signext %val) signext
+    %res1 = call signext i16 (i16 ) *@test(i16 signext %val) 
     %two = add i16 %res1, %res1
-    %res2 = call i8 @test2(i16 %two zeroext) zeroext 
+    %res2 = call zeroext i8 @test2(i16 zeroext %two )  
     %retVal = sext i16 %two to i32
     ret i32 %retVal
 }
+
+declare void @function_to_resolve_eagerly() nonlazybind
diff --git a/test/Feature/testmemory.ll b/test/Feature/testmemory.ll
deleted file mode 100644
index a9019f0..0000000
--- a/test/Feature/testmemory.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-        %complexty = type { i32, { [4 x i8*], float }, double }
-        %struct = type { i32, { float, { i8 } }, i64 }
-
-define i32 @main() {
-        call i32 @testfunction( i64 0, i64 1 )          ; <i32>:1 [#uses=0]
-        ret i32 0
-}
-
-define i32 @testfunction(i64 %i0, i64 %j0) {
-        %array0 = malloc [4 x i8]               ; <[4 x i8]*> [#uses=2]
-        %size = add i32 2, 2            ; <i32> [#uses=1]
-        %array1 = malloc i8, i32 4              ; <i8*> [#uses=1]
-        %array2 = malloc i8, i32 %size          ; <i8*> [#uses=1]
-        %idx = getelementptr [4 x i8]* %array0, i64 0, i64 2            ; <i8*> [#uses=1]
-        store i8 123, i8* %idx
-        free [4 x i8]* %array0
-        free i8* %array1
-        free i8* %array2
-        %aa = alloca %complexty, i32 5          ; <%complexty*> [#uses=1]
-        %idx2 = getelementptr %complexty* %aa, i64 %i0, i32 1, i32 0, i64 %j0           ; <i8**> [#uses=1]
-        store i8* null, i8** %idx2
-        %ptr = alloca i32               ; <i32*> [#uses=2]
-        store i32 3, i32* %ptr
-        %val = load i32* %ptr           ; <i32> [#uses=0]
-        %sptr = alloca %struct          ; <%struct*> [#uses=1]
-        %ubsptr = getelementptr %struct* %sptr, i64 0, i32 1, i32 1             ; <{ i8 }*> [#uses=1]
-        %idx3 = getelementptr { i8 }* %ubsptr, i64 0, i32 0             ; <i8*> [#uses=1]
-        store i8 4, i8* %idx3
-        ret i32 3
-}
-
diff --git a/test/Feature/testtype.ll b/test/Feature/testtype.ll
index 124aa09..cdeb5a0 100644
--- a/test/Feature/testtype.ll
+++ b/test/Feature/testtype.ll
@@ -7,13 +7,13 @@
         %inners = type { float, { i8 } }
         %struct = type { i32, %inners, i64 }
 
-%fwdref = type { %fwd* }
 %fwd    = type %fwdref*
+%fwdref = type { %fwd* }
 
 ; same as above with unnamed types
-type { %1* }
-type %0* 
+%1 = type %0* 
 %test = type %1
+%0 = type { %1* }
 
 %test2 = type [2 x i32]
 ;%x = type %undefined*
diff --git a/test/Feature/weak_constant.ll b/test/Feature/weak_constant.ll
index 9025aaa..fba7f12 100644
--- a/test/Feature/weak_constant.ll
+++ b/test/Feature/weak_constant.ll
@@ -4,7 +4,7 @@
 ; RUN:   grep 7 %t | count 1
 ; RUN:   grep 9 %t | count 1
 
-	type { i32, i32 }		; type %0
+	%0 = type { i32, i32 }		; type %0
 @a = weak constant i32 undef		; <i32*> [#uses=1]
 @b = weak constant i32 5		; <i32*> [#uses=1]
 @c = weak constant %0 { i32 7, i32 9 }		; <%0*> [#uses=1]
diff --git a/test/FrontendC/2008-07-29-EHLabel.ll b/test/FrontendC/2008-07-29-EHLabel.ll
deleted file mode 100644
index 186eafa..0000000
--- a/test/FrontendC/2008-07-29-EHLabel.ll
+++ /dev/null
@@ -1,282 +0,0 @@
-; RUN: llc -disable-cfi %s -o - | %llvmgcc -xassembler -c -o /dev/null -
-; PR2609
-	%struct..0._11 = type { i32 }
-	%struct..1__pthread_mutex_s = type { i32, i32, i32, i32, i32, %struct..0._11 }
-	%struct.pthread_attr_t = type { i32, [32 x i8] }
-	%struct.pthread_mutex_t = type { %struct..1__pthread_mutex_s }
-	%"struct.std::__ctype_abstract_base<wchar_t>" = type { %"struct.std::locale::facet" }
-	%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::__ctype_abstract_base<wchar_t>"*, %"struct.std::__ctype_abstract_base<wchar_t>"* }
-	%"struct.std::basic_istream<char,std::char_traits<char> >" = type { i32 (...)**, i32, %"struct.std::basic_ios<char,std::char_traits<char> >" }
-	%"struct.std::basic_istream<char,std::char_traits<char> >::sentry" = type { i8 }
-	%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
-	%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
-	%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i16*, i8, [256 x i8], [256 x i8], i8 }
-	%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
-	%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
-	%"struct.std::ios_base::_Words" = type { i8*, i32 }
-	%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
-	%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
-	%"struct.std::locale::facet" = type { i32 (...)**, i32 }
-
-@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
-@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
-@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
-@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i32)* @pthread_cancel		; <i32 (i32)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
-@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %struct..0._11*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %struct..0._11*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
-@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
-@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%struct..0._11*)* @pthread_mutexattr_init		; <i32 (%struct..0._11*)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%struct..0._11*, i32)* @pthread_mutexattr_settype		; <i32 (%struct..0._11*, i32)*> [#uses=0]
-@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%struct..0._11*)* @pthread_mutexattr_destroy		; <i32 (%struct..0._11*)*> [#uses=0]
-
-define %"struct.std::basic_istream<char,std::char_traits<char> >"* @_ZNSi7getlineEPcic(%"struct.std::basic_istream<char,std::char_traits<char> >"* %this, i8* %__s, i32 %__n, i8 signext  %__delim) {
-entry:
-	%__cerb = alloca %"struct.std::basic_istream<char,std::char_traits<char> >::sentry"		; <%"struct.std::basic_istream<char,std::char_traits<char> >::sentry"*> [#uses=2]
-	getelementptr %"struct.std::basic_istream<char,std::char_traits<char> >"* %this, i32 0, i32 1		; <i32*>:0 [#uses=7]
-	store i32 0, i32* %0, align 4
-	call void @_ZNSi6sentryC1ERSib( %"struct.std::basic_istream<char,std::char_traits<char> >::sentry"* %__cerb, %"struct.std::basic_istream<char,std::char_traits<char> >"* %this, i8 zeroext  1 )
-	getelementptr %"struct.std::basic_istream<char,std::char_traits<char> >::sentry"* %__cerb, i32 0, i32 0		; <i8*>:1 [#uses=1]
-	load i8* %1, align 8		; <i8>:2 [#uses=1]
-	%toBool = icmp eq i8 %2, 0		; <i1> [#uses=1]
-	br i1 %toBool, label %bb162, label %bb
-
-bb:		; preds = %entry
-	zext i8 %__delim to i32		; <i32>:3 [#uses=1]
-	getelementptr %"struct.std::basic_istream<char,std::char_traits<char> >"* %this, i32 0, i32 0		; <i32 (...)***>:4 [#uses=1]
-	load i32 (...)*** %4, align 4		; <i32 (...)**>:5 [#uses=1]
-	getelementptr i32 (...)** %5, i32 -3		; <i32 (...)**>:6 [#uses=1]
-	bitcast i32 (...)** %6 to i32*		; <i32*>:7 [#uses=1]
-	load i32* %7, align 4		; <i32>:8 [#uses=1]
-	bitcast %"struct.std::basic_istream<char,std::char_traits<char> >"* %this to i8*		; <i8*>:9 [#uses=1]
-	%ctg2186 = getelementptr i8* %9, i32 %8		; <i8*> [#uses=1]
-	bitcast i8* %ctg2186 to %"struct.std::basic_ios<char,std::char_traits<char> >"*		; <%"struct.std::basic_ios<char,std::char_traits<char> >"*>:10 [#uses=1]
-	getelementptr %"struct.std::basic_ios<char,std::char_traits<char> >"* %10, i32 0, i32 4		; <%"struct.std::basic_streambuf<char,std::char_traits<char> >"**>:11 [#uses=1]
-	load %"struct.std::basic_streambuf<char,std::char_traits<char> >"** %11, align 4		; <%"struct.std::basic_streambuf<char,std::char_traits<char> >"*>:12 [#uses=9]
-	getelementptr %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12, i32 0, i32 2		; <i8**>:13 [#uses=10]
-	load i8** %13, align 4		; <i8*>:14 [#uses=2]
-	getelementptr %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12, i32 0, i32 3		; <i8**>:15 [#uses=6]
-	load i8** %15, align 4		; <i8*>:16 [#uses=1]
-	icmp ult i8* %14, %16		; <i1>:17 [#uses=1]
-	br i1 %17, label %bb81, label %bb82
-
-bb81:		; preds = %bb
-	load i8* %14, align 1		; <i8>:18 [#uses=1]
-	zext i8 %18 to i32		; <i32>:19 [#uses=1]
-	%.pre = getelementptr %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12, i32 0, i32 0		; <i32 (...)***> [#uses=1]
-	br label %bb119.preheader
-
-bb82:		; preds = %bb
-	getelementptr %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12, i32 0, i32 0		; <i32 (...)***>:20 [#uses=2]
-	load i32 (...)*** %20, align 4		; <i32 (...)**>:21 [#uses=1]
-	getelementptr i32 (...)** %21, i32 9		; <i32 (...)**>:22 [#uses=1]
-	load i32 (...)** %22, align 4		; <i32 (...)*>:23 [#uses=1]
-	bitcast i32 (...)* %23 to i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*		; <i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*>:24 [#uses=1]
-	invoke i32 %24( %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12 )
-			to label %bb119.preheader unwind label %lpad		; <i32>:25 [#uses=1]
-
-bb119.preheader:		; preds = %bb82, %bb81
-	%.pre-phi = phi i32 (...)*** [ %.pre, %bb81 ], [ %20, %bb82 ]		; <i32 (...)***> [#uses=4]
-	%__c79.0.ph = phi i32 [ %19, %bb81 ], [ %25, %bb82 ]		; <i32> [#uses=1]
-	sext i8 %__delim to i32		; <i32>:26 [#uses=1]
-	br label %bb119
-
-bb84:		; preds = %bb119
-	sub i32 %__n, %82		; <i32>:27 [#uses=1]
-	add i32 %27, -1		; <i32>:28 [#uses=2]
-	load i8** %15, align 4		; <i8*>:29 [#uses=1]
-	ptrtoint i8* %29 to i32		; <i32>:30 [#uses=1]
-	load i8** %13, align 4		; <i8*>:31 [#uses=3]
-	ptrtoint i8* %31 to i32		; <i32>:32 [#uses=2]
-	sub i32 %30, %32		; <i32>:33 [#uses=2]
-	icmp slt i32 %28, %33		; <i1>:34 [#uses=1]
-	select i1 %34, i32 %28, i32 %33		; <i32>:35 [#uses=3]
-	icmp sgt i32 %35, 1		; <i1>:36 [#uses=1]
-	br i1 %36, label %bb90, label %bb99
-
-bb90:		; preds = %bb84
-	call i8* @memchr( i8* %31, i32 %26, i32 %35 ) nounwind readonly 		; <i8*>:37 [#uses=2]
-	icmp eq i8* %37, null		; <i1>:38 [#uses=1]
-	br i1 %38, label %bb93, label %bb92
-
-bb92:		; preds = %bb90
-	ptrtoint i8* %37 to i32		; <i32>:39 [#uses=1]
-	sub i32 %39, %32		; <i32>:40 [#uses=1]
-	br label %bb93
-
-bb93:		; preds = %bb92, %bb90
-	%__size.0 = phi i32 [ %40, %bb92 ], [ %35, %bb90 ]		; <i32> [#uses=4]
-	call void @llvm.memcpy.i32( i8* %__s_addr.0, i8* %31, i32 %__size.0, i32 1 )
-	getelementptr i8* %__s_addr.0, i32 %__size.0		; <i8*>:41 [#uses=3]
-	load i8** %13, align 4		; <i8*>:42 [#uses=1]
-	getelementptr i8* %42, i32 %__size.0		; <i8*>:43 [#uses=1]
-	store i8* %43, i8** %13, align 4
-	load i32* %0, align 4		; <i32>:44 [#uses=1]
-	add i32 %44, %__size.0		; <i32>:45 [#uses=1]
-	store i32 %45, i32* %0, align 4
-	load i8** %13, align 4		; <i8*>:46 [#uses=2]
-	load i8** %15, align 4		; <i8*>:47 [#uses=1]
-	icmp ult i8* %46, %47		; <i1>:48 [#uses=1]
-	br i1 %48, label %bb95, label %bb96
-
-bb95:		; preds = %bb93
-	load i8* %46, align 1		; <i8>:49 [#uses=1]
-	zext i8 %49 to i32		; <i32>:50 [#uses=1]
-	br label %bb119
-
-bb96:		; preds = %bb93
-	load i32 (...)*** %.pre-phi, align 4		; <i32 (...)**>:51 [#uses=1]
-	getelementptr i32 (...)** %51, i32 9		; <i32 (...)**>:52 [#uses=1]
-	load i32 (...)** %52, align 4		; <i32 (...)*>:53 [#uses=1]
-	bitcast i32 (...)* %53 to i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*		; <i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*>:54 [#uses=1]
-	invoke i32 %54( %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12 )
-			to label %bb119 unwind label %lpad		; <i32>:55 [#uses=1]
-
-bb99:		; preds = %bb84
-	trunc i32 %__c79.0 to i8		; <i8>:56 [#uses=1]
-	store i8 %56, i8* %__s_addr.0, align 1
-	getelementptr i8* %__s_addr.0, i32 1		; <i8*>:57 [#uses=5]
-	load i32* %0, align 4		; <i32>:58 [#uses=1]
-	add i32 %58, 1		; <i32>:59 [#uses=1]
-	store i32 %59, i32* %0, align 4
-	load i8** %13, align 4		; <i8*>:60 [#uses=3]
-	load i8** %15, align 4		; <i8*>:61 [#uses=1]
-	icmp ult i8* %60, %61		; <i1>:62 [#uses=1]
-	br i1 %62, label %bb101, label %bb102
-
-bb101:		; preds = %bb99
-	load i8* %60, align 1		; <i8>:63 [#uses=1]
-	zext i8 %63 to i32		; <i32>:64 [#uses=1]
-	getelementptr i8* %60, i32 1		; <i8*>:65 [#uses=1]
-	store i8* %65, i8** %13, align 4
-	br label %bb104
-
-bb102:		; preds = %bb99
-	load i32 (...)*** %.pre-phi, align 4		; <i32 (...)**>:66 [#uses=1]
-	getelementptr i32 (...)** %66, i32 10		; <i32 (...)**>:67 [#uses=1]
-	load i32 (...)** %67, align 4		; <i32 (...)*>:68 [#uses=1]
-	bitcast i32 (...)* %68 to i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*		; <i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*>:69 [#uses=1]
-	invoke i32 %69( %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12 )
-			to label %bb104 unwind label %lpad		; <i32>:70 [#uses=1]
-
-bb104:		; preds = %bb102, %bb101
-	%__ret44.0 = phi i32 [ %64, %bb101 ], [ %70, %bb102 ]		; <i32> [#uses=1]
-	icmp eq i32 %__ret44.0, -1		; <i1>:71 [#uses=1]
-	br i1 %71, label %bb119, label %bb112
-
-bb112:		; preds = %bb104
-	load i8** %13, align 4		; <i8*>:72 [#uses=2]
-	load i8** %15, align 4		; <i8*>:73 [#uses=1]
-	icmp ult i8* %72, %73		; <i1>:74 [#uses=1]
-	br i1 %74, label %bb114, label %bb115
-
-bb114:		; preds = %bb112
-	load i8* %72, align 1		; <i8>:75 [#uses=1]
-	zext i8 %75 to i32		; <i32>:76 [#uses=1]
-	br label %bb119
-
-bb115:		; preds = %bb112
-	load i32 (...)*** %.pre-phi, align 4		; <i32 (...)**>:77 [#uses=1]
-	getelementptr i32 (...)** %77, i32 9		; <i32 (...)**>:78 [#uses=1]
-	load i32 (...)** %78, align 4		; <i32 (...)*>:79 [#uses=1]
-	bitcast i32 (...)* %79 to i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*		; <i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*>:80 [#uses=1]
-	invoke i32 %80( %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12 )
-			to label %bb119 unwind label %lpad		; <i32>:81 [#uses=1]
-
-bb119:		; preds = %bb115, %bb114, %bb104, %bb96, %bb95, %bb119.preheader
-	%__c79.0 = phi i32 [ %__c79.0.ph, %bb119.preheader ], [ %50, %bb95 ], [ %76, %bb114 ], [ %55, %bb96 ], [ -1, %bb104 ], [ %81, %bb115 ]		; <i32> [#uses=3]
-	%__s_addr.0 = phi i8* [ %__s, %bb119.preheader ], [ %41, %bb95 ], [ %57, %bb114 ], [ %41, %bb96 ], [ %57, %bb104 ], [ %57, %bb115 ]		; <i8*> [#uses=5]
-	load i32* %0, align 4		; <i32>:82 [#uses=2]
-	add i32 %82, 1		; <i32>:83 [#uses=2]
-	%.not = icmp sge i32 %83, %__n		; <i1> [#uses=1]
-	icmp eq i32 %__c79.0, -1		; <i1>:84 [#uses=3]
-	icmp eq i32 %__c79.0, %3		; <i1>:85 [#uses=2]
-	%or.cond = or i1 %84, %85		; <i1> [#uses=1]
-	%or.cond188 = or i1 %or.cond, %.not		; <i1> [#uses=1]
-	br i1 %or.cond188, label %bb141, label %bb84
-
-bb141:		; preds = %bb119
-	%.not194 = xor i1 %85, true		; <i1> [#uses=1]
-	%brmerge = or i1 %84, %.not194		; <i1> [#uses=1]
-	%.mux = select i1 %84, i32 2, i32 4		; <i32> [#uses=0]
-	br i1 %brmerge, label %bb162, label %bb146
-
-bb146:		; preds = %bb141
-	store i32 %83, i32* %0, align 4
-	load i8** %13, align 4		; <i8*>:86 [#uses=2]
-	load i8** %15, align 4		; <i8*>:87 [#uses=1]
-	icmp ult i8* %86, %87		; <i1>:88 [#uses=1]
-	br i1 %88, label %bb148, label %bb149
-
-bb148:		; preds = %bb146
-	getelementptr i8* %86, i32 1		; <i8*>:89 [#uses=1]
-	store i8* %89, i8** %13, align 4
-	ret %"struct.std::basic_istream<char,std::char_traits<char> >"* %this
-
-bb149:		; preds = %bb146
-	load i32 (...)*** %.pre-phi, align 4		; <i32 (...)**>:90 [#uses=1]
-	getelementptr i32 (...)** %90, i32 10		; <i32 (...)**>:91 [#uses=1]
-	load i32 (...)** %91, align 4		; <i32 (...)*>:92 [#uses=1]
-	bitcast i32 (...)* %92 to i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*		; <i32 (%"struct.std::basic_streambuf<char,std::char_traits<char> >"*)*>:93 [#uses=1]
-	invoke i32 %93( %"struct.std::basic_streambuf<char,std::char_traits<char> >"* %12 )
-			to label %bb162 unwind label %lpad		; <i32>:94 [#uses=0]
-
-bb162:		; preds = %bb149, %bb141, %entry
-	ret %"struct.std::basic_istream<char,std::char_traits<char> >"* %this
-
-lpad:		; preds = %bb149, %bb115, %bb102, %bb96, %bb82
-	%__s_addr.1 = phi i8* [ %__s, %bb82 ], [ %__s_addr.0, %bb149 ], [ %41, %bb96 ], [ %57, %bb102 ], [ %57, %bb115 ]		; <i8*> [#uses=0]
-	call void @__cxa_rethrow( ) noreturn 
-	unreachable
-}
-
-declare i8* @__cxa_begin_catch(i8*) nounwind 
-
-declare i8* @llvm.eh.exception() nounwind 
-
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind 
-
-declare void @__cxa_rethrow() noreturn 
-
-declare void @__cxa_end_catch()
-
-declare i32 @__gxx_personality_v0(...)
-
-declare void @_ZNSi6sentryC1ERSib(%"struct.std::basic_istream<char,std::char_traits<char> >::sentry"*, %"struct.std::basic_istream<char,std::char_traits<char> >"*, i8 zeroext )
-
-declare i8* @memchr(i8*, i32, i32) nounwind readonly 
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
-
-declare void @_ZNSt9basic_iosIcSt11char_traitsIcEE5clearESt12_Ios_Iostate(%"struct.std::basic_ios<char,std::char_traits<char> >"*, i32)
-
-declare extern_weak i32 @pthread_once(i32*, void ()*)
-
-declare extern_weak i8* @pthread_getspecific(i32)
-
-declare extern_weak i32 @pthread_setspecific(i32, i8*)
-
-declare extern_weak i32 @pthread_create(i32*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
-
-declare extern_weak i32 @pthread_cancel(i32)
-
-declare extern_weak i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
-
-declare extern_weak i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
-
-declare extern_weak i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
-
-declare extern_weak i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %struct..0._11*)
-
-declare extern_weak i32 @pthread_key_create(i32*, void (i8*)*)
-
-declare extern_weak i32 @pthread_key_delete(i32)
-
-declare extern_weak i32 @pthread_mutexattr_init(%struct..0._11*)
-
-declare extern_weak i32 @pthread_mutexattr_settype(%struct..0._11*, i32)
-
-declare extern_weak i32 @pthread_mutexattr_destroy(%struct..0._11*)
diff --git a/test/FrontendC/2010-11-16-asmblock.c b/test/FrontendC/2010-11-16-asmblock.c
index c264223..2d97681 100644
--- a/test/FrontendC/2010-11-16-asmblock.c
+++ b/test/FrontendC/2010-11-16-asmblock.c
@@ -5,9 +5,9 @@
 
 void foo()
 {
-// CHECK:  %0 = call i32 asm sideeffect "", "={ecx}"() nounwind 
-// CHECK:  %asmtmp = call i32 asm sideeffect alignstack "sall $$3, $0", "={ecx},{ecx},~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %0) nounwind 
-// CHECK:  store i32 %asmtmp, i32* %"%ecx"
+// CHECK:  %0 = call i32 asm sideeffect "", "={ecx}"() nounwind
+// CHECK:  %1 = call i32 asm sideeffect alignstack "sall $$3, $0", "={ecx},{ecx},~{dirflag},~{fpsr},~{flags},~{memory}"(i32 %0) nounwind
+// CHECK:  store i32 %1, i32* %"%ecx"
  __asm {
    sal ecx, 3;
    add esi, ecx;
diff --git a/test/FrontendC/ARM/inline-asm-multichar.c b/test/FrontendC/ARM/inline-asm-multichar.c
index 7e2eeef..bd88390 100644
--- a/test/FrontendC/ARM/inline-asm-multichar.c
+++ b/test/FrontendC/ARM/inline-asm-multichar.c
@@ -1,11 +1,11 @@
-// RUN: %llvmgcc -S -march=armv7a %s 
+// RUN: %llvmgcc -S -march=armv7a %s | FileCheck %s
 
 // XFAIL: *
 // XTARGET: arm
 
 int t1() {
   static float k = 1.0f;
-CHECK: call void asm sideeffect "flds s15, $0 \0A", "*^Uv,~{s15}"
+  // CHECK: "flds s15, $0 \0A", "*^Uv,~{s15}"
   __asm__ volatile ("flds s15, %[k] \n" :: [k] "Uv,m" (k) : "s15");
   return 0;
 }
diff --git a/test/FrontendC/asm-reg-var-local.c b/test/FrontendC/asm-reg-var-local.c
index 22bd43c..e0be10c 100644
--- a/test/FrontendC/asm-reg-var-local.c
+++ b/test/FrontendC/asm-reg-var-local.c
@@ -10,23 +10,23 @@ int foo() {
 // CHECK: store i32 5, i32* %a, align 4
 
   asm volatile("; %0 This asm defines rsi" : "=r"(a));
-// CHECK: %asmtmp = call i32 asm sideeffect "; $0 This asm defines rsi", "={rsi}
-// CHECK: store i32 %asmtmp, i32* %a
+// CHECK: %1 = call i32 asm sideeffect "; $0 This asm defines rsi", "={rsi}
+// CHECK: store i32 %1, i32* %a
 
   a = 42;
 // CHECK:  store i32 42, i32* %a, align 4
 
   asm volatile("; %0 This asm uses rsi" : : "r"(a));
-// CHECK:  %1 = load i32* %a, align 4                    
-// CHECK:  call void asm sideeffect "", "{rsi}"(i32 %1) nounwind
-// CHECK:  %2 = call i32 asm sideeffect "", "={rsi}"() nounwind
-// CHECK:  call void asm sideeffect "; $0 This asm uses rsi", "{rsi},~{dirflag},~{fpsr},~{flags}"(i32 %2)
+// CHECK:  %2 = load i32* %a, align 4
+// CHECK:  call void asm sideeffect "", "{rsi}"(i32 %2) nounwind
+// CHECK:  %3 = call i32 asm sideeffect "", "={rsi}"() nounwind
+// CHECK:  call void asm sideeffect "; $0 This asm uses rsi", "{rsi},~{dirflag},~{fpsr},~{flags}"(i32 %3)
 
   return a;
-// CHECK:  %3 = load i32* %a, align 4
-// CHECK:  call void asm sideeffect "", "{rsi}"(i32 %3) nounwind
-// CHECK:  %4 = call i32 asm sideeffect "", "={rsi}"() nounwind 
-// CHECK:  store i32 %4, i32* %0, align 4
-// CHECK:  %5 = load i32* %0, align 4                     
-// CHECK:  store i32 %5, i32* %retval, align 4
+// CHECK:  %4 = load i32* %a, align 4
+// CHECK:  call void asm sideeffect "", "{rsi}"(i32 %4) nounwind
+// CHECK:  %5 = call i32 asm sideeffect "", "={rsi}"() nounwind
+// CHECK:  store i32 %5, i32* %0, align 4
+// CHECK:  %6 = load i32* %0, align 4
+// CHECK:  store i32 %6, i32* %retval, align 4
 }
diff --git a/test/FrontendC/mmx-inline-asm.c b/test/FrontendC/mmx-inline-asm.c
index b66137c..5c09a41 100644
--- a/test/FrontendC/mmx-inline-asm.c
+++ b/test/FrontendC/mmx-inline-asm.c
@@ -5,7 +5,7 @@
 #include <mmintrin.h>
 #include <stdint.h>
 
-// CHECK: type { x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx }
+// CHECK: { x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx, x86_mmx }
 
 void foo(__m64 vfill) {
   __m64 v1, v2, v3, v4, v5, v6, v7;
diff --git a/test/Integer/BitArith.ll b/test/Integer/BitArith.ll
deleted file mode 100644
index 350a984..0000000
--- a/test/Integer/BitArith.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-declare void @"foo"(i31 %i, i63 %j, i10 %k)
-
-
-; foo test basic arith operations
-define void @"foo"(i31 %i, i63 %j, i10 %k)
-begin
-	%t1 = trunc i63 %j to i31 
-        %t2 = add i31 %t1, %i
-        %t20 = add i31 3, %t1
-        %t3 = zext i31 %i to i63
-        %t4 = sub i63 %t3, %j
-        %t40 = sub i63 %j, -100 
-        %t5 = mul i10 %k, 7
-        %t6 = sdiv i63 %j, -2
-        %t7 = udiv i63 %j, %t3
-        %t8 = urem i10 %k, 10
-        %t9 = srem i10 %k, -10
-	ret void
-end
-
diff --git a/test/Integer/BitBit.ll b/test/Integer/BitBit.ll
deleted file mode 100644
index 420bbe5..0000000
--- a/test/Integer/BitBit.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-declare void @"foo"(i31 %i, i33 %j)
-
-
-; foo test basic bitwise operations
-define void @"foo"(i31 %i, i33 %j)
-begin
-	%t1 = trunc i33 %j to i31 
-        %t2 = and i31 %t1, %i
-        %t3 = sext i31 %i to i33
-        %t4 = or i33 %t3, %j 
-        %t5 = xor i31 %t2, 7 
-        %t6 = shl i31 %i, 2
-        %t7 = trunc i31 %i to i8
-        %t8 = shl i8 %t7, 3
-        %t9 = lshr i33 %j, 31
-        %t7z = zext i8 %t7 to i33
-        %t10 = ashr i33 %j, %t7z
-	ret void
-end
-
diff --git a/test/Integer/BitCast.ll b/test/Integer/BitCast.ll
deleted file mode 100644
index 0bef023..0000000
--- a/test/Integer/BitCast.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-declare void @"foo"(i31 %i, i1280 %j, i1 %k, float %f)
-
-
-; foo test basic arith operations
-define void @"foo"(i31 %i, i1280 %j, i1 %k, float %f)
-begin
-	%t1 = trunc i1280 %j to i31
-        %t2 = trunc i31 %t1 to i1
- 
-        %t3 = zext i31 %i to i1280
-        %t4 = sext i31 %i to i1280
-
-        %t5 = fptoui float 0x400921FA00000000 to i31
-        %t6 = uitofp i31 %t5 to double
-
-        %t7 = fptosi double 0xC0934A456D5CFAAD to i28
-        %t8 = sitofp i8 -1 to double
-        %t9 = uitofp i8 255 to double
-        
-	ret void
-end
-
diff --git a/test/Integer/BitIcmp.ll b/test/Integer/BitIcmp.ll
deleted file mode 100644
index c224612..0000000
--- a/test/Integer/BitIcmp.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-define i55 @"simpleIcmp"(i55 %i0, i55 %j0)
-begin
-	%t1 = icmp eq i55 %i0, %j0
-	%t2 = icmp ne i55 %i0, %j0
-	%t3 = icmp ult i55 %i0, %j0
-        %t4 = icmp sgt i55 %i0, %j0
-	%t5 = icmp ule i55 %i0, %j0
-        %t6 = icmp sge i55 %i0, %j0
-
-	%t7 = icmp eq i55 %i0, 1098765432
-        %t8 = icmp ne i55 %i0, -31415926
-
-        %t9 = icmp ult i55 10000, %j0
-        %t10 = icmp sgt i55 -10000, %j0
-
-	ret i55 %i0
-end
-
-define i31 @"phitest"(i12 %i)
-begin
-
-HasArg:
-        %n1 = add i12 1, %i
-        br label %Continue
-        
-Continue:
-        %n = phi i12 [%n1, %HasArg], [%next, %Continue]
-        %next = add i12 1, %n
-        br label %Continue
-end
-
-define i18 @"select"(i18 %i)
-begin
-        %t = icmp sgt i18 %i, 100
-        %k = select i1 %t, i18 %i, i18 999
-        ret i18 %k
-end
-
diff --git a/test/Integer/BitMem.ll b/test/Integer/BitMem.ll
deleted file mode 100644
index 2c093bc..0000000
--- a/test/Integer/BitMem.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-declare void @"foo"()
-
-
-; foo test basic arith operations
-define void @"foo"() {
-	%t1 = malloc i31, i32 4
-        %t2 = malloc i31, i32 7, align 1024
-        %t3 = malloc [4 x i15]
-
-        %idx = getelementptr [4 x i15]* %t3, i64 0, i64 2
-        store i15 -123, i15* %idx
-
-        free [4 x i15]* %t3
-        free i31* %t2
-        free i31* %t1
-        
-        %t4 = alloca i12, i32 100
-        free i12* %t4
-
-        %t5 = alloca i31
-        store i31 -123, i31* %t5
-
-        free i31* %t5
-	ret void
-}
diff --git a/test/Integer/BitMisc.ll b/test/Integer/BitMisc.ll
deleted file mode 100644
index 8ce4d4a..0000000
--- a/test/Integer/BitMisc.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-@MyVar     = external global i19
-@MyIntList = external global { i39 *, i19 }
-             external global i19      ; i19*:0
-
-@AConst    = constant i19 -123
-
-@AString   = constant [4 x i8] c"test"
-
-@ZeroInit  = global { [100 x i19 ], [40 x float ] } { [100 x i19] zeroinitializer,
-                                                      [40  x float] zeroinitializer }
-
-
-define i19 @"foo"(i19 %blah)
-begin
-	store i19 5, i19* @MyVar
-	%idx = getelementptr { i39 *, i19 } * @MyIntList, i64 0, i32 1
-  	store i19 12, i19* %idx
-  	ret i19 %blah
-end
diff --git a/test/Integer/alignment_bt.ll b/test/Integer/alignment_bt.ll
deleted file mode 100644
index 3a9d051..0000000
--- a/test/Integer/alignment_bt.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-@X = global i19 4, align 16
-
-define i19 *@test() align 32 {
-	%X = alloca i19, align 4
-	%Y = alloca i51, i32 42, align 16
-	%Z = alloca i32, align 1
-	ret i19 *%X
-}
-
-define i19 *@test2() {
-	%X = malloc i19, align 4
-	%Y = malloc i51, i32 42, align 16
-	%Z = malloc i32, align 1
-	ret i19 *%X
-}
-
-
diff --git a/test/Integer/cfgstructures_bt.ll b/test/Integer/cfgstructures_bt.ll
deleted file mode 100644
index 09aec1f..0000000
--- a/test/Integer/cfgstructures_bt.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-;; This is an irreducible flow graph
-
-
-define void @"irreducible"(i1 %cond)
-begin
-	br i1 %cond, label %X, label %Y
-
-X:
-	br label %Y
-Y:
-	br label %X
-end
-
-;; This is a pair of loops that share the same header
-
-define void @"sharedheader"(i1 %cond)
-begin
-	br label %A
-A:
-	br i1 %cond, label %X, label %Y
-
-X:
-	br label %A
-Y:
-	br label %A
-end
-
-;; This is a simple nested loop
-define void @"nested"(i1 %cond1, i1 %cond2, i1 %cond3)
-begin
-	br label %Loop1
-
-Loop1:
-	br label %Loop2
-
-Loop2:
-	br label %Loop3
-
-Loop3:
-	br i1 %cond3, label %Loop3, label %L3Exit
-
-L3Exit:
-	br i1 %cond2, label %Loop2, label %L2Exit
-
-L2Exit:
-	br i1 %cond1, label %Loop1, label %L1Exit
-
-L1Exit:
-	ret void
-end
-
diff --git a/test/Integer/forwardreftest_bt.ll b/test/Integer/forwardreftest_bt.ll
deleted file mode 100644
index 5d73eff..0000000
--- a/test/Integer/forwardreftest_bt.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-  %myty = type i55 
-  %myfn = type float (i55,double,i55,i16)
-  type i55(%myfn*)
-  type i55(i55)
-  type i55(i55(i55)*)
-
-  %thisfuncty = type i55 (i55) *
-
-declare void @F(%thisfuncty, %thisfuncty, %thisfuncty)
-
-; This function always returns zero
-define i55 @zarro(i55 %Func)
-begin
-Startup:
-    add i55 0, 10
-    ret i55 0 
-end
-
-define i55 @test(i55) 
-begin
-    call void @F(%thisfuncty @zarro, %thisfuncty @test, %thisfuncty @foozball)
-    ret i55 0
-end
-
-define i55 @foozball(i55)
-begin
-    ret i55 0
-end
-
diff --git a/test/Integer/globalredefinition_bt.ll b/test/Integer/globalredefinition_bt.ll
deleted file mode 100644
index b369b2a..0000000
--- a/test/Integer/globalredefinition_bt.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-; Test forward references and redefinitions of globals
-
-@A = global i17* @B
-@B = global i17 7
-
-declare void @X()
-
-declare void @X()
-
-define void @X() {
-  ret void
-}
-
-declare void @X()
diff --git a/test/Integer/globalvars_bt.ll b/test/Integer/globalvars_bt.ll
deleted file mode 100644
index 5c43185..0000000
--- a/test/Integer/globalvars_bt.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-
-@MyVar     = external global i27
-@MyIntList = external global { \2 *, i27 }
-             external global i27      ; i27*:0
-
-@AConst    = constant i27 123
-
-@AString   = constant [4 x i8] c"test"
-
-@ZeroInit  = global { [100 x i27 ], [40 x float ] } { [100 x i27] zeroinitializer,
-                                                      [40  x float] zeroinitializer }
-
-
-define i27 @"foo"(i27 %blah)
-begin
-	store i27 5, i27 *@MyVar
-        %idx = getelementptr { \2 *, i27 } * @MyIntList, i64 0, i32 1
-  	store i27 12, i27* %idx
-  	ret i27 %blah
-end
-
diff --git a/test/Integer/indirectcall2_bt.ll b/test/Integer/indirectcall2_bt.ll
deleted file mode 100644
index 5b7c68d..0000000
--- a/test/Integer/indirectcall2_bt.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-define i63 @"test"(i63 %X)
-begin
-	ret i63 %X
-end
-
-define i63 @"fib"(i63 %n)
-begin
-  %T = icmp ult i63 %n, 2       ; {i1}:0
-  br i1 %T, label %BaseCase, label %RecurseCase
-
-RecurseCase:
-  %result = call i63 @test(i63 %n)
-  br label %BaseCase
-
-BaseCase:
-  %X = phi i63 [1, %0], [2, %RecurseCase]
-  ret i63 %X
-end
-
diff --git a/test/Integer/indirectcall_bt.ll b/test/Integer/indirectcall_bt.ll
deleted file mode 100644
index d586fca..0000000
--- a/test/Integer/indirectcall_bt.ll
+++ /dev/null
@@ -1,52 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-declare i32 @"atoi"(i8 *)
-
-define i63 @"fib"(i63 %n)
-begin
-  icmp ult i63 %n, 2       ; {i1}:1
-  br i1 %1, label %BaseCase, label %RecurseCase
-
-BaseCase:
-  ret i63 1
-
-RecurseCase:
-  %n2 = sub i63 %n, 2
-  %n1 = sub i63 %n, 1
-  %f2 = call i63(i63) * @fib(i63 %n2)
-  %f1 = call i63(i63) * @fib(i63 %n1)
-  %result = add i63 %f2, %f1
-  ret i63 %result
-end
-
-define i63 @"realmain"(i32 %argc, i8 ** %argv)
-begin
-  icmp eq i32 %argc, 2      ; {i1}:1
-  br i1 %1, label %HasArg, label %Continue
-HasArg:
-  ; %n1 = atoi(argv[1])
-  %n1 = add i32 1, 1
-  br label %Continue
-
-Continue:
-  %n = phi i32 [%n1, %HasArg], [1, %0]
-  %N = sext i32 %n to i63
-  %F = call i63(i63) *@fib(i63 %N)
-  ret i63 %F
-end
-
-define i63 @"trampoline"(i63 %n, i63(i63)* %fibfunc)
-begin
-  %F = call i63(i63) *%fibfunc(i63 %n)
-  ret i63 %F
-end
-
-define i32 @"main"()
-begin
-  %Result = call i63 @trampoline(i63 10, i63(i63) *@fib)
-  %Result2 = trunc i63 %Result to i32
-  ret i32 %Result2
-end
diff --git a/test/Integer/opaquetypes_bt.ll b/test/Integer/opaquetypes_bt.ll
deleted file mode 100644
index 5771342..0000000
--- a/test/Integer/opaquetypes_bt.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-; This test case is used to test opaque type processing, forward references,
-; and recursive types.  Oh my.
-; 
-
-%SQ1 = type { i31 }
-%SQ2 = type { %ITy }
-%ITy = type i31
-
-
-%CCC = type { \2* }
-%BBB = type { \2*, \2 * }
-%AAA = type { \2*, {\2*}, [12x{\2*}], {[1x{\2*}]} }
-
-; Test numbered types
-type %CCC
-type %BBB
-%Composite = type { %0, %1 }
-
-; Test simple opaque type resolution...
-%i31ty = type i31
-
-; Perform a simple forward reference...
-%ty1 = type { %ty2, i31 }
-%ty2 = type float
-
-; Do a recursive type...
-%list = type { %list * }
-%listp = type { %listp } *
-
-; Do two mutually recursive types...
-%TyA = type { %ty2, %TyB * }
-%TyB = type { double, %TyA * }
-
-; A complex recursive type...
-%Y = type { {%Y*}, %Y* }
-%Z = type { { %Z * }, [12x%Z] *, {{{ %Z * }}} }
-
-; More ridiculous test cases...
-%A = type [ 123x %A*]
-%M = type %M (%M, %M) *
-%P = type %P*
-
-; Recursive ptrs
-%u = type %v*
-%v = type %u*
-
-; Test the parser for unnamed recursive types...
-%P1 = type \1 *
-%Y1 = type { { \3 * }, \2 * }
-%Z1 = type { { \3 * }, [12x\3] *, { { { \5 * } } } }
-
-
-
-
diff --git a/test/Integer/paramattrs_bt.ll b/test/Integer/paramattrs_bt.ll
deleted file mode 100644
index 47ef753..0000000
--- a/test/Integer/paramattrs_bt.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: llvm-as < %s | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-%ZFunTy = type i33(i8 zeroext)
-%SFunTy = type i33(i8 signext)
-
-declare i16 @"test"(i16 signext %arg) signext 
-declare i8  @"test2" (i16 zeroext %a2) zeroext 
-
-
-define i33 @main(i33 %argc, i8 **%argv) {
-    %val = trunc i33 %argc to i16
-    %res = call i16 (i16 signext) signext *@test(i16 signext %val) signext
-    %two = add i16 %res, %res
-    %res2 = call i8 @test2(i16 %two zeroext) zeroext 
-    %retVal = sext i16 %two to i33
-    ret i33 %retVal
-}
diff --git a/test/Integer/prototype_bt.ll b/test/Integer/prototype_bt.ll
deleted file mode 100644
index 2236e8b..0000000
--- a/test/Integer/prototype_bt.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-declare i31 @"bar"(i31 %in) 
-
-define i31 @"foo"(i31 %blah)
-begin
-  %xx = call i31 @bar(i31 %blah)
-  ret i31 %xx
-end
-
diff --git a/test/Integer/recursivetype_bt.ll b/test/Integer/recursivetype_bt.ll
deleted file mode 100644
index d5ce3f5..0000000
--- a/test/Integer/recursivetype_bt.ll
+++ /dev/null
@@ -1,108 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-; This file contains the output from the following compiled C code:
-; typedef struct list {
-;   struct list *Next;
-;   i32 Data;
-; } list;
-;
-; // Iterative insert fn
-; void InsertIntoListTail(list **L, i32 Data) {
-;   while (*L)
-;     L = &(*L)->Next;
-;   *L = (list*)malloc(sizeof(list));
-;   (*L)->Data = Data;
-;   (*L)->Next = 0;
-; }
-;
-; // Recursive list search fn
-; list *FindData(list *L, i32 Data) {
-;   if (L == 0) return 0;
-;   if (L->Data == Data) return L;
-;   return FindData(L->Next, Data);
-; }
-;
-; void DoListStuff() {
-;   list *MyList = 0;
-;   InsertIntoListTail(&MyList, 100);
-;   InsertIntoListTail(&MyList, 12);
-;   InsertIntoListTail(&MyList, 42);
-;   InsertIntoListTail(&MyList, 1123);
-;   InsertIntoListTail(&MyList, 1213);
-;
-;   if (FindData(MyList, 75)) foundIt();
-;   if (FindData(MyList, 42)) foundIt();
-;   if (FindData(MyList, 700)) foundIt();
-; }
-
-%list = type { %list*, i36 }
-
-declare i8 *@"malloc"(i32)
-
-;;**********************
-;;**********************
-
-define void @"InsertIntoListTail"(%list** %L, i36 %Data)
-begin
-bb1:
-        %reg116 = load %list** %L                               ;;<%list*>
-        %cast1004 = inttoptr i64 0 to %list*                      ;;<%list*>
-        %cond1000 = icmp eq %list* %reg116, %cast1004             ;;<i1>
-        br i1 %cond1000, label %bb3, label %bb2
-
-bb2:
-        %reg117 = phi %list** [ %reg118, %bb2 ], [ %L, %bb1 ]   ;;<%list**>
-        %cast1010 = bitcast %list** %reg117 to %list***            ;;<%list***>
-        %reg118 = load %list*** %cast1010                       ;;<%list**>
-        %reg109 = load %list** %reg118                          ;;<%list*>
-        %cast1005 = inttoptr i64 0 to %list*                      ;;<%list*>
-        %cond1001 = icmp ne %list* %reg109, %cast1005             ;;<i1>
-        br i1 %cond1001, label %bb2, label %bb3
-
-bb3:
-        %reg119 = phi %list** [ %reg118, %bb2 ], [ %L, %bb1 ]   ;;<%list**>
-        %cast1006 = bitcast %list** %reg119 to i8**             ;;<i8**>
-        %reg111 = call i8* @malloc(i32 16)                  ;;<i8*>
-        store i8* %reg111, i8** %cast1006                 ;;<void>
-	%reg112 = ptrtoint i8* %reg111 to i64
-	%reg1002 = add i64 %reg112, 8
-        %reg1005 = inttoptr i64 %reg1002 to i8*             ;;<i8*>
-        %cast1008 = bitcast i8* %reg1005 to i36*                ;;<i36*>
-        store i36 %Data, i36* %cast1008                         ;;<void>
-        %cast1003 = inttoptr i64 0 to i64*                      ;;<i64*>
-        %cast1009 = bitcast i8* %reg111 to i64**              ;;<i64**>
-        store i64* %cast1003, i64** %cast1009               ;;<void>
-        ret void
-end
-
-define %list* @"FindData"(%list* %L, i36 %Data)
-begin
-bb1:
-        br label %bb2
-
-bb2:
-        %reg115 = phi %list* [ %reg116, %bb6 ], [ %L, %bb1 ]    ;;<%list*>
-        %cast1014 = inttoptr i64 0 to %list*                      ;;<%list*>
-        %cond1011 = icmp ne %list* %reg115, %cast1014             ;;<i1>
-        br i1 %cond1011, label %bb4, label %bb3
-
-bb3:
-        ret %list* null
-
-bb4:
-	%idx = getelementptr %list* %reg115, i64 0, i32 1                  ;;<i36>
-        %reg111 = load i36* %idx
-        %cond1013 = icmp ne i36 %reg111, %Data                    ;;<i1>
-        br i1 %cond1013, label %bb6, label %bb5
-
-bb5:
-        ret %list* %reg115
-
-bb6:
-	%idx2 = getelementptr %list* %reg115, i64 0, i32 0                  ;;<%list*>
-        %reg116 = load %list** %idx2
-        br label %bb2
-end
diff --git a/test/Integer/simplecalltest_bt.ll b/test/Integer/simplecalltest_bt.ll
deleted file mode 100644
index 45dc0f1..0000000
--- a/test/Integer/simplecalltest_bt.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-%FunTy = type i31(i31)
-
-
-define void @"invoke"(%FunTy *%x)
-begin
-	%foo = call %FunTy* %x(i31 123)
-	ret void
-end
-
-define i31 @"main"(i31 %argc, i8 **%argv, i8 **%envp)
-begin
-        %retval = call i31 (i31) *@test(i31 %argc)
-        %two    = add i31 %retval, %retval
-	%retval2 = call i31 @test(i31 %argc)
-
-	%two2 = add i31 %two, %retval2
-	call void @invoke (%FunTy* @test)
-        ret i31 %two2
-end
-
-define i31 @"test"(i31 %i0)
-begin
-    ret i31 %i0
-end
diff --git a/test/Integer/small_bt.ll b/test/Integer/small_bt.ll
deleted file mode 100644
index 00fcace..0000000
--- a/test/Integer/small_bt.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-%x = type i19
-
-
-define i19 @"foo"(i19 %in) 
-begin
-label: 
-  ret i19 2
-end
-
diff --git a/test/Integer/testalloca_bt.ll b/test/Integer/testalloca_bt.ll
deleted file mode 100644
index e8e73c5..0000000
--- a/test/Integer/testalloca_bt.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-%inners = type {float, {i8 } }
-%struct = type { i33 , {float, {i8 } } , i64 }
-
-
-define i33 @testfunction(i33 %i0, i33 %j0)
-begin
-    alloca i8, i32 5
-    %ptr = alloca i33                       ; yields {i33*}:ptr
-    store i33 3, i33* %ptr                  ; yields {void}
-    %val = load i33* %ptr                   ; yields {i33}:val = i33 %3
-
-    %sptr = alloca %struct                  ; yields {%struct*}:sptr
-    %nsptr = getelementptr %struct * %sptr, i64 0, i32 1  ; yields {inners*}:nsptr
-    %ubsptr = getelementptr %inners * %nsptr, i64 0, i32 1  ; yields {{i8}*}:ubsptr
-    %idx = getelementptr {i8} * %ubsptr, i64 0, i32 0
-    store i8 4, i8* %idx
-    
-    %fptr = getelementptr %struct * %sptr, i64 0, i32 1, i32 0  ; yields {float*}:fptr
-    store float 4.0, float * %fptr
-    
-    ret i33 3
-end
-
diff --git a/test/Integer/testarith_bt.ll b/test/Integer/testarith_bt.ll
deleted file mode 100644
index 0820399..0000000
--- a/test/Integer/testarith_bt.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-define i31 @"simpleArith"(i31 %i0, i31 %j0)
-begin
-	%t1 = add i31 %i0, %j0
-	%t2 = sub i31 %i0, %j0
-	%t3 = mul i31 %t1, %t2
-        %t4 = udiv i31 %t1, %t2
-        %t5 = sdiv i31 %t1, %t2
-        %t6 = urem i31 %t1, %t2
-        %t7 = srem i31 %t1, %t2
-        %t8 = shl  i31 %t1, 9
-        %t9 = lshr i31 %t1, 9
-        %t10= ashr i31 %t1, 9
-        %f1 = sitofp i31 %t1 to float
-        %f2 = fdiv float 4.0, %f1
-	ret i31 %t3
-end
diff --git a/test/Integer/testconstants_bt.ll b/test/Integer/testconstants_bt.ll
deleted file mode 100644
index 8ca49cf..0000000
--- a/test/Integer/testconstants_bt.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-@somestr = constant [11x i8] c"hello world"
-@array   = constant [2 x i55] [ i55 12, i55 52 ]
-           constant { i55, i55 } { i55 4, i55 3 }
-
- 
-define [2 x i55]* @testfunction(i55 %i0, i55 %j0)
-begin
-	ret [2x i55]* @array
-end
-
-define  i8* @otherfunc(i55, double)
-begin
-	%somestr = getelementptr [11x i8]* @somestr, i64 0, i64 0
-	ret i8* %somestr
-end
-
-define i8* @yetanotherfunc(i55, double)
-begin
-	ret i8* null            ; Test null
-end
-
-define i55 @negativeUnsigned() {
-        ret i55 -1
-}
-
-define i55 @largeSigned() {
-       ret i55 3900000000
-}
diff --git a/test/Integer/testicmp_bt.ll b/test/Integer/testicmp_bt.ll
deleted file mode 100644
index 40a2465..0000000
--- a/test/Integer/testicmp_bt.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-define i31 @"simpleIcmp"(i31 %i0, i31 %j0)
-begin
-	%t1 = icmp eq i31 %i0, %j0
-	%t2 = icmp ne i31 %i0, %j0
-	%t3 = icmp ult i31 %i0, %j0
-        %t4 = icmp sgt i31 %i0, %j0
-	%t5 = icmp ule i31 %i0, %j0
-        %t6 = icmp sge i31 %i0, %j0
-
-	%t7 = icmp eq i31 %i0, 1098765432
-        %t8 = icmp ne i31 %i0, -31415926
-
-        %t9 = icmp ult i31 10000, %j0
-        %t10 = icmp sgt i31 -10000, %j0
-
-
-	ret i31 %i0
-end
diff --git a/test/Integer/testlogical_bt.ll b/test/Integer/testlogical_bt.ll
deleted file mode 100644
index a2c927d..0000000
--- a/test/Integer/testlogical_bt.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-define i31 @"simpleAdd"(i31 %i0, i31 %j0)
-begin
-	%t1 = xor i31 %i0, %j0
-	%t2 = or i31 %i0, %j0
-	%t3 = and i31 %t1, %t2
-	ret i31 %t3
-end
-
diff --git a/test/Integer/testlogical_new_bt.ll b/test/Integer/testlogical_new_bt.ll
deleted file mode 100644
index 49a26dc..0000000
--- a/test/Integer/testlogical_new_bt.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-define i31 @"simpleAdd"(i31 %i0, i31 %j0)
-begin
-	%t1 = xor i31 %i0, %j0
-	%t2 = or i31 %i0, %j0
-	%t3 = and i31 %t1, %t2
-        %t4 = shl i31 %i0, 2
-        %t5 = ashr i31 %i0, 2
-        %t6 = lshr i31 %j0, 22
-	ret i31 %t3
-end
diff --git a/test/Integer/testmemory_bt.ll b/test/Integer/testmemory_bt.ll
deleted file mode 100644
index e503c56..0000000
--- a/test/Integer/testmemory_bt.ll
+++ /dev/null
@@ -1,45 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-%struct = type { i31 , {float, {i9 } } , i64 }
-%complexty = type {i31, {[4 x i9 *], float}, double}
-
-
-define i31 @"main"()
-begin
-  call i31 @testfunction(i64 0, i64 1)
-  ret i31 0
-end
-
-define i31 @"testfunction"(i64 %i0, i64 %j0)
-begin
-    %array0 = malloc [4 x i9]            ; yields {[4 x i9]*}:array0
-    %size   = add i32 2, 2                 ; yields {i31}:size = i31 %4
-    %array1 = malloc i9, i32 4          ; yields {i9*}:array1
-    %array2 = malloc i9, i32 %size      ; yields {i9*}:array2
-
-    %idx = getelementptr [4 x i9]* %array0, i64 0, i64 2
-    store i9 123, i9* %idx
-    free [4x i9]* %array0
-    free i9* %array1
-    free i9* %array2
-
-
-    %aa = alloca %complexty, i32 5
-    %idx2 = getelementptr %complexty* %aa, i64 %i0, i32 1, i32 0, i64 %j0
-    store i9 *null, i9** %idx2
-    
-    %ptr = alloca i31                       ; yields {i31*}:ptr
-    store i31 3, i31* %ptr                  ; yields {void}
-    %val = load i31* %ptr                   ; yields {i31}:val = i31 %3
-
-    %sptr = alloca %struct                  ; yields {%struct*}:sptr
-    %ubsptr = getelementptr %struct * %sptr, i64 0, i32 1, i32 1  ; yields {{i9}*}:ubsptr
-    %idx3 = getelementptr {i9} * %ubsptr, i64 0, i32 0
-    store i9 4, i9* %idx3
-
-    ret i31 3
-end
-
diff --git a/test/Integer/testswitch_bt.ll b/test/Integer/testswitch_bt.ll
deleted file mode 100644
index bf7cdc5..0000000
--- a/test/Integer/testswitch_bt.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-  %i35 = type i35
-
-
-define i35 @"squared"(%i35 %i0)
-begin
-	switch i35 %i0, label %Default [ 
-		i35 1, label %Case1
-		i35 2, label %Case2
-		i35 4, label %Case4 ]
-
-Default:
-    ret i35 -1                      ; Unrecognized input value
-
-Case1:
-    ret i35 1
-Case2:
-    ret i35 4
-Case4:
-    ret i35 16
-end
diff --git a/test/Integer/testvarargs_bt.ll b/test/Integer/testvarargs_bt.ll
deleted file mode 100644
index 3227d14..0000000
--- a/test/Integer/testvarargs_bt.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llvm-as %s -o - | llvm-dis > %t1.ll
-; RUN: llvm-as %t1.ll -o - | llvm-dis > %t2.ll
-; RUN: diff %t1.ll %t2.ll
-
-
-declare i31 @"printf"(i8*, ...)   ;; Prototype for: i32 __builtin_printf(const char*, ...)
-
-define i31 @"testvarar"()
-begin
-	call i31(i8*, ...) *@printf(i8 * null, i31 12, i8 42)
-	ret i31 %1
-end
-
-
diff --git a/test/Linker/2003-01-30-LinkerRename.ll b/test/Linker/2003-01-30-LinkerRename.ll
index af0e643..cc34634 100644
--- a/test/Linker/2003-01-30-LinkerRename.ll
+++ b/test/Linker/2003-01-30-LinkerRename.ll
@@ -3,7 +3,7 @@
 
 ; RUN: echo {define internal i32 @foo() \{ ret i32 7 \} } | llvm-as > %t.1.bc
 ; RUN: llvm-as %s -o %t.2.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep @foo() | grep -v internal
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {@foo()} | grep -v internal
 
 define i32 @foo() { ret i32 0 }
 
diff --git a/test/Linker/2003-01-30-LinkerTypeRename.ll b/test/Linker/2003-01-30-LinkerTypeRename.ll
index 67a0626..6cd2406 100644
--- a/test/Linker/2003-01-30-LinkerTypeRename.ll
+++ b/test/Linker/2003-01-30-LinkerTypeRename.ll
@@ -5,5 +5,6 @@
 ; RUN: llvm-as < %s > %t.2.bc
 ; RUN: llvm-link %t.1.bc %t.2.bc -S | grep {%Ty } | not grep opaque
 
-%Ty = type i32
+%Ty = type {i32}
 
+@GV = global %Ty* null
+\ No newline at end of file
diff --git a/test/Linker/2003-05-31-LinkerRename.ll b/test/Linker/2003-05-31-LinkerRename.ll
index 498fc14..80e0a69 100644
--- a/test/Linker/2003-05-31-LinkerRename.ll
+++ b/test/Linker/2003-05-31-LinkerRename.ll
@@ -6,7 +6,7 @@
 
 ; RUN: echo { define internal i32 @foo() \{ ret i32 7 \} } | llvm-as > %t.1.bc
 ; RUN: llvm-as < %s > %t.2.bc
-; RUN: llvm-link %t.1.bc %t.2.bc -S | grep internal | not grep @foo(
+; RUN: llvm-link %t.1.bc %t.2.bc -S | grep internal | not grep {@foo(}
 
 declare i32 @foo() 
 
diff --git a/test/Linker/2003-08-23-GlobalVarLinking.ll b/test/Linker/2003-08-23-GlobalVarLinking.ll
index c3f61f8..8acbbd2 100644
--- a/test/Linker/2003-08-23-GlobalVarLinking.ll
+++ b/test/Linker/2003-08-23-GlobalVarLinking.ll
@@ -1,5 +1,5 @@
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: echo {@S = external global \{ i32, opaque* \} declare void @F(opaque*)}\
+; RUN: echo {%T1 = type opaque %T2 = type opaque @S = external global \{ i32, %T1* \} declare void @F(%T2*)}\
 ; RUN:   | llvm-as > %t.out2.bc
 ; RUN: llvm-link %t.out1.bc %t.out2.bc -S | not grep opaque
 
diff --git a/test/Linker/2003-08-23-RecursiveOpaqueTypeResolve.ll b/test/Linker/2003-08-23-RecursiveOpaqueTypeResolve.ll
index ea82075..d810dba 100644
--- a/test/Linker/2003-08-23-RecursiveOpaqueTypeResolve.ll
+++ b/test/Linker/2003-08-23-RecursiveOpaqueTypeResolve.ll
@@ -5,5 +5,6 @@
 ; RUN: echo "%M = type { %M*, i32* }" | llvm-as > %t.out2.bc
 ; RUN: llvm-link %t.out1.bc %t.out2.bc
 
-%M = type { %M*, opaque* }
+%T1 = type opaque
+%M = type { %M*, %T1* }
 
diff --git a/test/Linker/2003-08-28-TypeResolvesGlobal.ll b/test/Linker/2003-08-28-TypeResolvesGlobal.ll
index 80b6162..53ae581 100644
--- a/test/Linker/2003-08-28-TypeResolvesGlobal.ll
+++ b/test/Linker/2003-08-28-TypeResolvesGlobal.ll
@@ -1,5 +1,5 @@
 ; RUN: llvm-as < %s > %t.out1.bc
-; RUN: echo "%M = type i32" | llvm-as > %t.out2.bc
+; RUN: echo "%M = type { i32} " | llvm-as > %t.out2.bc
 ; RUN: llvm-link %t.out2.bc %t.out1.bc
 
 %M = type opaque
diff --git a/test/Linker/2003-10-21-ConflictingTypesTolerance.ll b/test/Linker/2003-10-21-ConflictingTypesTolerance.ll
deleted file mode 100644
index 7cdf7ad..0000000
--- a/test/Linker/2003-10-21-ConflictingTypesTolerance.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-; RUN: llvm-as < %s > %t.out1.bc
-; RUN: echo { %M = type \[8 x i32\] external global %M } | llvm-as > %t.out2.bc
-; RUN: llvm-link %t.out1.bc %t.out2.bc -S | grep %M | grep \\{
-%M = type { i32 }
-
-
diff --git a/test/Linker/inlineasm.ll b/test/Linker/inlineasm.ll
new file mode 100644
index 0000000..d77f3a7
--- /dev/null
+++ b/test/Linker/inlineasm.ll
@@ -0,0 +1,17 @@
+; RUN: echo > %t.ll
+; RUN: llvm-link %t.ll %s -S
+
+; ModuleID = 'bitfield-access-2.o'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-macosx10.6.8"
+
+; rdar://9776316 - type remapping needed for inline asm blobs.
+
+%T = type { [18 x i32], [4 x i8*] }
+
+define void @f(%T* %x) nounwind ssp {
+entry:
+call void asm sideeffect "", "=*m"(%T* %x) nounwind
+unreachable
+}
+
diff --git a/test/Linker/testlink1.ll b/test/Linker/testlink1.ll
index 4a94025..a874637 100644
--- a/test/Linker/testlink1.ll
+++ b/test/Linker/testlink1.ll
@@ -1,42 +1,95 @@
 ; RUN: llvm-as < %s > %t.bc
 ; RUN: llvm-as < %p/testlink2.ll > %t2.bc
-; RUN: llvm-link %t.bc %t2.bc
+; RUN: llvm-link %t.bc %t2.bc -S | FileCheck %s
+
+; CHECK: %Ty2 = type { %Ty1* }
+; CHECK: %Ty1 = type { %Ty2* }
+%Ty1 = type opaque
+%Ty2 = type { %Ty1* }
+
+; CHECK: %intlist = type { %intlist*, i32 }
+%intlist = type { %intlist*, i32 }
+
+; The uses of intlist in the other file should be remapped.
+; CHECK-NOT: {{%intlist.[0-9]}}
+
+%Struct1 = type opaque
+@S1GV = external global %Struct1*
+
+
+@GVTy1 = external global %Ty1*
+@GVTy2 = global %Ty2* null
+
+
+; This should stay the same
+; CHECK: @MyIntList = global %intlist { %intlist* null, i32 17 }
+@MyIntList = global %intlist { %intlist* null, i32 17 }
+
+
+; Nothing to link here.
+
+; CHECK: @0 = external global i32
+@0 = external global i32
+; CHECK: @Inte = global i32 1
+@Inte = global i32 1
+
+; Intern1 is intern in both files, rename testlink2's.
+; CHECK: @Intern1 = internal constant i32 42
+@Intern1 = internal constant i32 42
+
+; This should get renamed since there is a definition that is non-internal in
+; the other module.
+; CHECK: @Intern2{{[0-9]+}} = internal constant i32 792
+@Intern2 = internal constant i32 792
+
+
+; CHECK: @MyVarPtr = linkonce global { i32* } { i32* @MyVar }
+@MyVarPtr = linkonce global { i32* } { i32* @MyVar }
+
+; CHECK: @MyVar = global i32 4
+@MyVar = external global i32
+
+; Take value from other module.
+; CHECK: AConst = constant i32 1234
+@AConst = linkonce constant i32 123
+
+; Renamed version of Intern1.
+; CHECK: @Intern1{{[0-9]+}} = internal constant i32 52
+
+
+; Globals linked from testlink2.
+; CHECK: @Intern2 = constant i32 12345
+
+; CHECK: @MyIntListPtr = constant 
+; CHECK: @1 = constant i32 412
 
-@MyVar = external global i32		; <i32*> [#uses=3]
-@MyIntList = global { \2*, i32 } { { \2*, i32 }* null, i32 17 }		; <{ \2*, i32 }*> [#uses=1]
-external global i32		; <i32*>:0 [#uses=0]
-@Inte = global i32 1		; <i32*> [#uses=0]
-@AConst = linkonce constant i32 123		; <i32*> [#uses=0]
-@Intern1 = internal constant i32 42		; <i32*> [#uses=0]
-@Intern2 = internal constant i32 792		; <i32*> [#uses=0]
-@MyVarPtr = linkonce global { i32* } { i32* @MyVar }		; <{ i32* }*> [#uses=0]
 
 declare i32 @foo(i32)
 
 declare void @print(i32)
 
 define void @main() {
-	%v1 = load i32* @MyVar		; <i32> [#uses=1]
-	call void @print( i32 %v1 )
-	%idx = getelementptr { \2*, i32 }* @MyIntList, i64 0, i32 1		; <i32*> [#uses=2]
-	%v2 = load i32* %idx		; <i32> [#uses=1]
-	call void @print( i32 %v2 )
-	call i32 @foo( i32 5 )		; <i32>:1 [#uses=0]
-	%v3 = load i32* @MyVar		; <i32> [#uses=1]
-	call void @print( i32 %v3 )
-	%v4 = load i32* %idx		; <i32> [#uses=1]
-	call void @print( i32 %v4 )
-	ret void
+  %v1 = load i32* @MyVar
+  call void @print(i32 %v1)
+  %idx = getelementptr %intlist* @MyIntList, i64 0, i32 1
+  %v2 = load i32* %idx
+  call void @print(i32 %v2)
+  %1 = call i32 @foo(i32 5)
+  %v3 = load i32* @MyVar
+  call void @print(i32 %v3)
+  %v4 = load i32* %idx
+  call void @print(i32 %v4)
+  ret void
 }
 
 define internal void @testintern() {
-	ret void
+  ret void
 }
 
 define internal void @Testintern() {
-	ret void
+  ret void
 }
 
 define void @testIntern() {
-	ret void
+  ret void
 }
diff --git a/test/Linker/testlink2.ll b/test/Linker/testlink2.ll
index d243e3c..1798e31 100644
--- a/test/Linker/testlink2.ll
+++ b/test/Linker/testlink2.ll
@@ -2,40 +2,54 @@
 ;
 ; RUN: true
 
-@MyVar = global i32 4		; <i32*> [#uses=2]
-@MyIntList = external global { \2*, i32 }		; <{ \2*, i32 }*> [#uses=2]
-@AConst = constant i32 123		; <i32*> [#uses=0]
+%intlist = type { %intlist*, i32 }
+
+
+%Ty1 = type { %Ty2* }
+%Ty2 = type opaque
+
+@GVTy1 = global %Ty1* null
+@GVTy2 = external global %Ty2*
+
+
+@MyVar = global i32 4
+@MyIntList = external global %intlist
+@AConst = constant i32 1234
 
 ;; Intern in both testlink[12].ll
-@Intern1 = internal constant i32 52		; <i32*> [#uses=0]
+@Intern1 = internal constant i32 52
 
 ;; Intern in one but not in other
-@Intern2 = constant i32 12345		; <i32*> [#uses=0]
+@Intern2 = constant i32 12345
+
+@MyIntListPtr = constant { %intlist* } { %intlist* @MyIntList }
+@MyVarPtr = linkonce global { i32* } { i32* @MyVar }
+@0 = constant i32 412
 
-@MyIntListPtr = constant { { \2*, i32 }* } { { \2*, i32 }* @MyIntList }		; <{ { \2*, i32 }* }*> [#uses=0]
-@MyVarPtr = linkonce global { i32* } { i32* @MyVar }		; <{ i32* }*> [#uses=0]
-constant i32 412		; <i32*>:0 [#uses=1]
+; Provides definition of Struct1 and of S1GV.
+%Struct1 = type { i32 }
+@S1GV = global %Struct1* null
 
 define i32 @foo(i32 %blah) {
-	store i32 %blah, i32* @MyVar
-	%idx = getelementptr { \2*, i32 }* @MyIntList, i64 0, i32 1		; <i32*> [#uses=1]
-	store i32 12, i32* %idx
-	%ack = load i32* @0		; <i32> [#uses=1]
-	%fzo = add i32 %ack, %blah		; <i32> [#uses=1]
-	ret i32 %fzo
+  store i32 %blah, i32* @MyVar
+  %idx = getelementptr %intlist* @MyIntList, i64 0, i32 1
+  store i32 12, i32* %idx
+  %ack = load i32* @0
+  %fzo = add i32 %ack, %blah
+  ret i32 %fzo
 }
 
 declare void @unimp(float, double)
 
 define internal void @testintern() {
-	ret void
+  ret void
 }
 
 define void @Testintern() {
-	ret void
+  ret void
 }
 
 define internal void @testIntern() {
-	ret void
+  ret void
 }
 
diff --git a/test/Linker/unnamed-addr1-a.ll b/test/Linker/unnamed-addr1-a.ll
index 1ddac9c..e9c03ee 100644
--- a/test/Linker/unnamed-addr1-a.ll
+++ b/test/Linker/unnamed-addr1-a.ll
@@ -10,15 +10,15 @@
 @c = common unnamed_addr global i32 0
 ; CHECK: @c = common unnamed_addr global i32 0
 @d = external global i32
-; CHECK: @d = global i32 42
+; CHECK: @d = unnamed_addr global i32 42
 @e = external unnamed_addr global i32
 ; CHECK: @e = unnamed_addr global i32 42
 @f = weak global i32 42
-; CHECK: @f = global i32 42
+; CHECK: @f = unnamed_addr global i32 42
 
 ; Other file has non-unnamed_addr definition
 @g = common unnamed_addr global i32 0
-; CHECK: @g = common global i32 0
+; CHECK: @g = common unnamed_addr global i32 0
 @h = external global i32
 ; CHECK: @h = global i32 42
 @i = external unnamed_addr global i32
diff --git a/test/MC/ARM/arm-arithmetic-aliases.s b/test/MC/ARM/arm-arithmetic-aliases.s
new file mode 100644
index 0000000..9895cfc
--- /dev/null
+++ b/test/MC/ARM/arm-arithmetic-aliases.s
@@ -0,0 +1,126 @@
+@ RUN: llvm-mc -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+
+foo:
+@ CHECK: foo
+
+sub r2, r2, #6
+sub r2, #6
+sub r2, r2, r3
+sub r2, r3
+
+@ CHECK: sub r2, r2, #6              @ encoding: [0x06,0x20,0x42,0xe2]
+@ CHECK: sub r2, r2, #6              @ encoding: [0x06,0x20,0x42,0xe2]
+@ CHECK: sub r2, r2, r3              @ encoding: [0x03,0x20,0x42,0xe0]
+@ CHECK: sub r2, r2, r3              @ encoding: [0x03,0x20,0x42,0xe0]
+
+add r2, r2, #6
+add r2, #6
+add r2, r2, r3
+add r2, r3
+
+@ CHECK: add r2, r2, #6              @ encoding: [0x06,0x20,0x82,0xe2]
+@ CHECK: add r2, r2, #6              @ encoding: [0x06,0x20,0x82,0xe2]
+@ CHECK: add r2, r2, r3              @ encoding: [0x03,0x20,0x82,0xe0]
+@ CHECK: add r2, r2, r3              @ encoding: [0x03,0x20,0x82,0xe0]
+
+and r2, r2, #6
+and r2, #6
+and r2, r2, r3
+and r2, r3
+
+@ CHECK: and r2, r2, #6              @ encoding: [0x06,0x20,0x02,0xe2]
+@ CHECK: and r2, r2, #6              @ encoding: [0x06,0x20,0x02,0xe2]
+@ CHECK: and r2, r2, r3              @ encoding: [0x03,0x20,0x02,0xe0]
+@ CHECK: and r2, r2, r3              @ encoding: [0x03,0x20,0x02,0xe0]
+
+orr r2, r2, #6
+orr r2, #6
+orr r2, r2, r3
+orr r2, r3
+
+@ CHECK: orr r2, r2, #6              @ encoding: [0x06,0x20,0x82,0xe3]
+@ CHECK: orr r2, r2, #6              @ encoding: [0x06,0x20,0x82,0xe3]
+@ CHECK: orr r2, r2, r3              @ encoding: [0x03,0x20,0x82,0xe1]
+@ CHECK: orr r2, r2, r3              @ encoding: [0x03,0x20,0x82,0xe1]
+
+eor r2, r2, #6
+eor r2, #6
+eor r2, r2, r3
+eor r2, r3
+
+@ CHECK: eor r2, r2, #6              @ encoding: [0x06,0x20,0x22,0xe2]
+@ CHECK: eor r2, r2, #6              @ encoding: [0x06,0x20,0x22,0xe2]
+@ CHECK: eor r2, r2, r3              @ encoding: [0x03,0x20,0x22,0xe0]
+@ CHECK: eor r2, r2, r3              @ encoding: [0x03,0x20,0x22,0xe0]
+
+bic r2, r2, #6
+bic r2, #6
+bic r2, r2, r3
+bic r2, r3
+
+@ CHECK: bic r2, r2, #6              @ encoding: [0x06,0x20,0xc2,0xe3]
+@ CHECK: bic r2, r2, #6              @ encoding: [0x06,0x20,0xc2,0xe3]
+@ CHECK: bic r2, r2, r3              @ encoding: [0x03,0x20,0xc2,0xe1]
+@ CHECK: bic r2, r2, r3              @ encoding: [0x03,0x20,0xc2,0xe1]
+
+
+@ Also check that we handle the predicate and cc_out operands.
+subseq r2, r2, #6
+subseq r2, #6
+subseq r2, r2, r3
+subseq r2, r3
+
+@ CHECK: subseq r2, r2, #6              @ encoding: [0x06,0x20,0x52,0x02]
+@ CHECK: subseq r2, r2, #6              @ encoding: [0x06,0x20,0x52,0x02]
+@ CHECK: subseq r2, r2, r3              @ encoding: [0x03,0x20,0x52,0x00]
+@ CHECK: subseq r2, r2, r3              @ encoding: [0x03,0x20,0x52,0x00]
+
+addseq r2, r2, #6
+addseq r2, #6
+addseq r2, r2, r3
+addseq r2, r3
+
+@ CHECK: addseq r2, r2, #6              @ encoding: [0x06,0x20,0x92,0x02]
+@ CHECK: addseq r2, r2, #6              @ encoding: [0x06,0x20,0x92,0x02]
+@ CHECK: addseq r2, r2, r3              @ encoding: [0x03,0x20,0x92,0x00]
+@ CHECK: addseq r2, r2, r3              @ encoding: [0x03,0x20,0x92,0x00]
+
+andseq r2, r2, #6
+andseq r2, #6
+andseq r2, r2, r3
+andseq r2, r3
+
+@ CHECK: andseq r2, r2, #6              @ encoding: [0x06,0x20,0x12,0x02]
+@ CHECK: andseq r2, r2, #6              @ encoding: [0x06,0x20,0x12,0x02]
+@ CHECK: andseq r2, r2, r3              @ encoding: [0x03,0x20,0x12,0x00]
+@ CHECK: andseq r2, r2, r3              @ encoding: [0x03,0x20,0x12,0x00]
+
+orrseq r2, r2, #6
+orrseq r2, #6
+orrseq r2, r2, r3
+orrseq r2, r3
+
+@ CHECK: orrseq r2, r2, #6              @ encoding: [0x06,0x20,0x92,0x03]
+@ CHECK: orrseq r2, r2, #6              @ encoding: [0x06,0x20,0x92,0x03]
+@ CHECK: orrseq r2, r2, r3              @ encoding: [0x03,0x20,0x92,0x01]
+@ CHECK: orrseq r2, r2, r3              @ encoding: [0x03,0x20,0x92,0x01]
+
+eorseq r2, r2, #6
+eorseq r2, #6
+eorseq r2, r2, r3
+eorseq r2, r3
+
+@ CHECK: eorseq r2, r2, #6              @ encoding: [0x06,0x20,0x32,0x02]
+@ CHECK: eorseq r2, r2, #6              @ encoding: [0x06,0x20,0x32,0x02]
+@ CHECK: eorseq r2, r2, r3              @ encoding: [0x03,0x20,0x32,0x00]
+@ CHECK: eorseq r2, r2, r3              @ encoding: [0x03,0x20,0x32,0x00]
+
+bicseq r2, r2, #6
+bicseq r2, #6
+bicseq r2, r2, r3
+bicseq r2, r3
+
+@ CHECK: bicseq r2, r2, #6              @ encoding: [0x06,0x20,0xd2,0x03]
+@ CHECK: bicseq r2, r2, #6              @ encoding: [0x06,0x20,0xd2,0x03]
+@ CHECK: bicseq r2, r2, r3              @ encoding: [0x03,0x20,0xd2,0x01]
+@ CHECK: bicseq r2, r2, r3              @ encoding: [0x03,0x20,0xd2,0x01]
diff --git a/test/MC/ARM/arm_instructions.s b/test/MC/ARM/arm_instructions.s
index f789441..650fcd2 100644
--- a/test/MC/ARM/arm_instructions.s
+++ b/test/MC/ARM/arm_instructions.s
@@ -20,68 +20,28 @@
 @ CHECK: encoding: [0xa0,0x0d,0xe1,0xf2]
         vqdmull.s32     q8, d17, d16
 
-@ CHECK: ldmia r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe8]
-@ CHECK: ldmib r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe9]
-@ CHECK: ldmda r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x12,0xe8]
-@ CHECK: ldmdb r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x12,0xe9]
-        ldmia     r2, {r1,r3-r6,sp}
-        ldmib     r2, {r1,r3-r6,sp}
-        ldmda     r2, {r1,r3-r6,sp}
-        ldmdb     r2, {r1,r3-r6,sp}
-
-@ CHECK: stmia r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x82,0xe8]
-@ CHECK: stmib r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x82,0xe9]
-@ CHECK: stmda r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x02,0xe8]
-@ CHECK: stmdb r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x02,0xe9]
-        stmia     r2, {r1,r3-r6,sp}
-        stmib     r2, {r1,r3-r6,sp}
-        stmda     r2, {r1,r3-r6,sp}
-        stmdb     r2, {r1,r3-r6,sp}
-
-@ CHECK: ldmia r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xb2,0xe8]
-@ CHECK: ldmib r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xb2,0xe9]
-@ CHECK: ldmda r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe8]
-@ CHECK: ldmdb r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe9]
-        ldmia     r2!, {r1,r3-r6,sp}
-        ldmib     r2!, {r1,r3-r6,sp}
-        ldmda     r2!, {r1,r3-r6,sp}
-        ldmdb     r2!, {r1,r3-r6,sp}
-
-@ CHECK: stmia r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xa2,0xe8]
-@ CHECK: stmib r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xa2,0xe9]
-@ CHECK: stmda r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x22,0xe8]
-@ CHECK: stmdb r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x22,0xe9]
-        stmia     r2!, {r1,r3-r6,sp}
-        stmib     r2!, {r1,r3-r6,sp}
-        stmda     r2!, {r1,r3-r6,sp}
-        stmdb     r2!, {r1,r3-r6,sp}
-
 @ CHECK: and	r1, r2, r3 @ encoding: [0x03,0x10,0x02,0xe0]
         and r1,r2,r3
 
-@ FIXME: This is wrong, we are dropping the 's' for now.
-@ CHECK-FIXME: ands	r1, r2, r3 @ encoding: [0x03,0x10,0x12,0xe0]
+@ CHECK: ands	r1, r2, r3 @ encoding: [0x03,0x10,0x12,0xe0]
         ands r1,r2,r3
 
 @ CHECK: eor	r1, r2, r3 @ encoding: [0x03,0x10,0x22,0xe0]
         eor r1,r2,r3
 
-@ FIXME: This is wrong, we are dropping the 's' for now.
-@ CHECK-FIXME: eors	r1, r2, r3 @ encoding: [0x03,0x10,0x32,0xe0]
+@ CHECK: eors	r1, r2, r3 @ encoding: [0x03,0x10,0x32,0xe0]
         eors r1,r2,r3
 
 @ CHECK: sub	r1, r2, r3 @ encoding: [0x03,0x10,0x42,0xe0]
         sub r1,r2,r3
 
-@ FIXME: This is wrong, we are dropping the 's' for now.
-@ CHECK-FIXME: subs	r1, r2, r3 @ encoding: [0x03,0x10,0x52,0xe0]
+@ CHECK: subs	r1, r2, r3 @ encoding: [0x03,0x10,0x52,0xe0]
         subs r1,r2,r3
 
 @ CHECK: add	r1, r2, r3 @ encoding: [0x03,0x10,0x82,0xe0]
         add r1,r2,r3
 
-@ FIXME: This is wrong, we are dropping the 's' for now.
-@ CHECK-FIXME: adds	r1, r2, r3 @ encoding: [0x03,0x10,0x92,0xe0]
+@ CHECK: adds	r1, r2, r3 @ encoding: [0x03,0x10,0x92,0xe0]
         adds r1,r2,r3
 
 @ CHECK: adc	r1, r2, r3 @ encoding: [0x03,0x10,0xa2,0xe0]
@@ -93,15 +53,13 @@
 @ CHECK: orr	r1, r2, r3 @ encoding: [0x03,0x10,0x82,0xe1]
         orr r1,r2,r3
 
-@ FIXME: This is wrong, we are dropping the 's' for now.
-@ CHECK-FIXME: orrs	r1, r2, r3 @ encoding: [0x03,0x10,0x92,0xe1]
+@ CHECK: orrs	r1, r2, r3 @ encoding: [0x03,0x10,0x92,0xe1]
         orrs r1,r2,r3
 
 @ CHECK: bic	r1, r2, r3 @ encoding: [0x03,0x10,0xc2,0xe1]
         bic r1,r2,r3
 
-@ FIXME: This is wrong, we are dropping the 's' for now.
-@ CHECK-FIXME: bics	r1, r2, r3 @ encoding: [0x03,0x10,0xd2,0xe1]
+@ CHECK: bics	r1, r2, r3 @ encoding: [0x03,0x10,0xd2,0xe1]
         bics r1,r2,r3
 
 @ CHECK: mov	r1, r2 @ encoding: [0x02,0x10,0xa0,0xe1]
@@ -110,8 +68,7 @@
 @ CHECK: mvn	r1, r2 @ encoding: [0x02,0x10,0xe0,0xe1]
         mvn r1,r2
 
-@ FIXME: This is wrong, we are dropping the 's' for now.
-@ CHECK-FIXME: mvns	r1, r2 @ encoding: [0x02,0x10,0xf0,0xe1]
+@ CHECK: mvns	r1, r2 @ encoding: [0x02,0x10,0xf0,0xe1]
         mvns r1,r2
 
 @ CHECK: rsb	r1, r2, r3 @ encoding: [0x03,0x10,0x62,0xe0]
@@ -120,37 +77,22 @@
 @ CHECK: rsc	r1, r2, r3 @ encoding: [0x03,0x10,0xe2,0xe0]
         rsc r1,r2,r3
 
-@ FIXME: This is broken, CCOut operands don't work correctly when their presence
-@ may depend on flags.
-@ CHECK-FIXME:: mlas	r1, r2, r3, r4 @ encoding: [0x92,0x43,0x31,0xe0]
-@        mlas r1,r2,r3,r4
-
 @ CHECK: bfi  r0, r0, #5, #7 @ encoding: [0x90,0x02,0xcb,0xe7]
         bfi  r0, r0, #5, #7
 
 @ CHECK: bkpt  #10 @ encoding: [0x7a,0x00,0x20,0xe1]
         bkpt  #10
 
-@ CHECK: isb @ encoding: [0x6f,0xf0,0x7f,0xf5]
-        isb
 @ CHECK: mrs  r8, cpsr @ encoding: [0x00,0x80,0x0f,0xe1]
         mrs  r8, cpsr
 
-@ CHECK: mcr  p7, #1, r5, c1, c1, #4 @ encoding: [0x91,0x57,0x21,0xee]
-        mcr  p7, #1, r5, c1, c1, #4
 @ CHECK: mrc  p14, #0, r1, c1, c2, #4 @ encoding: [0x92,0x1e,0x11,0xee]
         mrc  p14, #0, r1, c1, c2, #4
-@ CHECK: mcrr  p7, #1, r5, r4, c1 @ encoding: [0x11,0x57,0x44,0xec]
-        mcrr  p7, #1, r5, r4, c1
 @ CHECK: mrrc  p7, #1, r5, r4, c1 @ encoding: [0x11,0x57,0x54,0xec]
         mrrc  p7, #1, r5, r4, c1
 
-@ CHECK: mcr2  p7, #1, r5, c1, c1, #4 @ encoding: [0x91,0x57,0x21,0xfe]
-        mcr2  p7, #1, r5, c1, c1, #4
 @ CHECK: mrc2  p14, #0, r1, c1, c2, #4 @ encoding: [0x92,0x1e,0x11,0xfe]
         mrc2  p14, #0, r1, c1, c2, #4
-@ CHECK: mcrr2  p7, #1, r5, r4, c1 @ encoding: [0x11,0x57,0x44,0xfc]
-        mcrr2  p7, #1, r5, r4, c1
 @ CHECK: mrrc2  p7, #1, r5, r4, c1 @ encoding: [0x11,0x57,0x54,0xfc]
         mrrc2  p7, #1, r5, r4, c1
 
@@ -159,12 +101,6 @@
 @ CHECK: cdp2  p7, #1, c1, c1, c1, #4 @ encoding: [0x81,0x17,0x11,0xfe]
         cdp2  p7, #1, c1, c1, c1, #4
 
-@ CHECK: clrex @ encoding: [0x1f,0xf0,0x7f,0xf5]
-        clrex
-
-@ CHECK: clz  r9, r0 @ encoding: [0x10,0x9f,0x6f,0xe1]
-        clz  r9, r0
-
 @ CHECK: qadd  r1, r2, r3 @ encoding: [0x52,0x10,0x03,0xe1]
         qadd  r1, r2, r3
 
@@ -189,54 +125,6 @@
 @ CHECK: nop @ encoding: [0x00,0xf0,0x20,0xe3]
         nop
 
-@ CHECK: dmb  sy @ encoding: [0x5f,0xf0,0x7f,0xf5]
-        dmb  sy
-
-@ CHECK: dmb  st @ encoding: [0x5e,0xf0,0x7f,0xf5]
-        dmb  st
-
-@ CHECK: dmb  ish @ encoding: [0x5b,0xf0,0x7f,0xf5]
-        dmb  ish
-
-@ CHECK: dmb  ishst @ encoding: [0x5a,0xf0,0x7f,0xf5]
-        dmb  ishst
-
-@ CHECK: dmb  nsh @ encoding: [0x57,0xf0,0x7f,0xf5]
-        dmb  nsh
-
-@ CHECK: dmb  nshst @ encoding: [0x56,0xf0,0x7f,0xf5]
-        dmb  nshst
-
-@ CHECK: dmb  osh @ encoding: [0x53,0xf0,0x7f,0xf5]
-        dmb  osh
-
-@ CHECK: dmb  oshst @ encoding: [0x52,0xf0,0x7f,0xf5]
-        dmb  oshst
-
-@ CHECK: dsb  sy @ encoding: [0x4f,0xf0,0x7f,0xf5]
-        dsb  sy
-
-@ CHECK: dsb  st @ encoding: [0x4e,0xf0,0x7f,0xf5]
-        dsb  st
-
-@ CHECK: dsb  ish @ encoding: [0x4b,0xf0,0x7f,0xf5]
-        dsb  ish
-
-@ CHECK: dsb  ishst @ encoding: [0x4a,0xf0,0x7f,0xf5]
-        dsb  ishst
-
-@ CHECK: dsb  nsh @ encoding: [0x47,0xf0,0x7f,0xf5]
-        dsb  nsh
-
-@ CHECK: dsb  nshst @ encoding: [0x46,0xf0,0x7f,0xf5]
-        dsb  nshst
-
-@ CHECK: dsb  osh @ encoding: [0x43,0xf0,0x7f,0xf5]
-        dsb  osh
-
-@ CHECK: dsb  oshst @ encoding: [0x42,0xf0,0x7f,0xf5]
-        dsb  oshst
-
 @ CHECK: cpsie  aif @ encoding: [0xc0,0x01,0x08,0xf1]
         cpsie  aif
 
diff --git a/test/MC/ARM/basic-arm-instructions.s b/test/MC/ARM/basic-arm-instructions.s
new file mode 100644
index 0000000..0b728bc
--- /dev/null
+++ b/test/MC/ARM/basic-arm-instructions.s
@@ -0,0 +1,698 @@
+@ RUN: llvm-mc -triple=armv7-apple-darwin -show-encoding < %s | FileCheck %s
+  .syntax unified
+  .globl _func
+
+@ Check that the assembler can handle the documented syntax from the ARM ARM.
+@ For complex constructs like shifter operands, check more thoroughly for them
+@ once then spot check that following instructions accept the form generally.
+@ This gives us good coverage while keeping the overall size of the test
+@ more reasonable.
+
+_func:
+@ CHECK: _func
+
+@------------------------------------------------------------------------------
+@ ADC (immediate)
+@------------------------------------------------------------------------------
+  adc r1, r2, #0xf
+  adc r1, r2, #0xf0
+  adc r1, r2, #0xf00
+  adc r1, r2, #0xf000
+  adc r1, r2, #0xf0000
+  adc r1, r2, #0xf00000
+  adc r1, r2, #0xf000000
+  adc r1, r2, #0xf0000000
+  adc r1, r2, #0xf000000f
+  adcs r1, r2, #0xf00
+  adcseq r1, r2, #0xf00
+  adceq r1, r2, #0xf00
+
+@ CHECK: adc	r1, r2, #15             @ encoding: [0x0f,0x10,0xa2,0xe2]
+@ CHECK: adc	r1, r2, #240            @ encoding: [0xf0,0x10,0xa2,0xe2]
+@ CHECK: adc	r1, r2, #3840           @ encoding: [0x0f,0x1c,0xa2,0xe2]
+@ CHECK: adc	r1, r2, #61440          @ encoding: [0x0f,0x1a,0xa2,0xe2]
+@ CHECK: adc	r1, r2, #983040         @ encoding: [0x0f,0x18,0xa2,0xe2]
+@ CHECK: adc	r1, r2, #15728640       @ encoding: [0x0f,0x16,0xa2,0xe2]
+@ CHECK: adc	r1, r2, #251658240      @ encoding: [0x0f,0x14,0xa2,0xe2]
+@ CHECK: adc	r1, r2, #4026531840     @ encoding: [0x0f,0x12,0xa2,0xe2]
+@ CHECK: adc	r1, r2, #4026531855     @ encoding: [0xff,0x12,0xa2,0xe2]
+
+@ CHECK: adcs	r1, r2, #3840           @ encoding: [0x0f,0x1c,0xb2,0xe2]
+@ CHECK: adcseq	r1, r2, #3840           @ encoding: [0x0f,0x1c,0xb2,0x02]
+@ CHECK: adceq	r1, r2, #3840           @ encoding: [0x0f,0x1c,0xa2,0x02]
+
+@------------------------------------------------------------------------------
+@ ADC (register)
+@ ADC (shifted register)
+@------------------------------------------------------------------------------
+  adc r4, r5, r6
+  @ Constant shifts
+  adc r4, r5, r6, lsl #1
+  adc r4, r5, r6, lsl #31
+  adc r4, r5, r6, lsr #1
+  adc r4, r5, r6, lsr #31
+  adc r4, r5, r6, lsr #32
+  adc r4, r5, r6, asr #1
+  adc r4, r5, r6, asr #31
+  adc r4, r5, r6, asr #32
+  adc r4, r5, r6, ror #1
+  adc r4, r5, r6, ror #31
+
+  @ Register shifts
+  adc r6, r7, r8, lsl r9
+  adc r6, r7, r8, lsr r9
+  adc r6, r7, r8, asr r9
+  adc r6, r7, r8, ror r9
+  adc r4, r5, r6, rrx
+
+  @ Destination register is optional
+  adc r5, r6
+  adc r4, r5, lsl #1
+  adc r4, r5, lsl #31
+  adc r4, r5, lsr #1
+  adc r4, r5, lsr #31
+  adc r4, r5, lsr #32
+  adc r4, r5, asr #1
+  adc r4, r5, asr #31
+  adc r4, r5, asr #32
+  adc r4, r5, ror #1
+  adc r4, r5, ror #31
+  adc r4, r5, rrx
+  adc r6, r7, lsl r9
+  adc r6, r7, lsr r9
+  adc r6, r7, asr r9
+  adc r6, r7, ror r9
+  adc r4, r5, rrx
+
+@ CHECK: adc	r4, r5, r6              @ encoding: [0x06,0x40,0xa5,0xe0]
+
+@ CHECK: adc	r4, r5, r6, lsl #1      @ encoding: [0x86,0x40,0xa5,0xe0]
+@ CHECK: adc	r4, r5, r6, lsl #31     @ encoding: [0x86,0x4f,0xa5,0xe0]
+@ CHECK: adc	r4, r5, r6, lsr #1      @ encoding: [0xa6,0x40,0xa5,0xe0]
+@ CHECK: adc	r4, r5, r6, lsr #31     @ encoding: [0xa6,0x4f,0xa5,0xe0]
+@ CHECK: adc	r4, r5, r6, lsr #32     @ encoding: [0x26,0x40,0xa5,0xe0]
+@ CHECK: adc	r4, r5, r6, asr #1      @ encoding: [0xc6,0x40,0xa5,0xe0]
+@ CHECK: adc	r4, r5, r6, asr #31     @ encoding: [0xc6,0x4f,0xa5,0xe0]
+@ CHECK: adc	r4, r5, r6, asr #32     @ encoding: [0x46,0x40,0xa5,0xe0]
+@ CHECK: adc	r4, r5, r6, ror #1      @ encoding: [0xe6,0x40,0xa5,0xe0]
+@ CHECK: adc	r4, r5, r6, ror #31     @ encoding: [0xe6,0x4f,0xa5,0xe0]
+
+@ CHECK: adc	r6, r7, r8, lsl r9      @ encoding: [0x18,0x69,0xa7,0xe0]
+@ CHECK: adc	r6, r7, r8, lsr r9      @ encoding: [0x38,0x69,0xa7,0xe0]
+@ CHECK: adc	r6, r7, r8, asr r9      @ encoding: [0x58,0x69,0xa7,0xe0]
+@ CHECK: adc	r6, r7, r8, ror r9      @ encoding: [0x78,0x69,0xa7,0xe0]
+@ CHECK: adc	r4, r5, r6, rrx         @ encoding: [0x66,0x40,0xa5,0xe0]
+
+@ CHECK: adc	r5, r5, r6              @ encoding: [0x06,0x50,0xa5,0xe0]
+@ CHECK: adc	r4, r4, r5, lsl #1      @ encoding: [0x85,0x40,0xa4,0xe0]
+@ CHECK: adc	r4, r4, r5, lsl #31     @ encoding: [0x85,0x4f,0xa4,0xe0]
+@ CHECK: adc	r4, r4, r5, lsr #1      @ encoding: [0xa5,0x40,0xa4,0xe0]
+@ CHECK: adc	r4, r4, r5, lsr #31     @ encoding: [0xa5,0x4f,0xa4,0xe0]
+@ CHECK: adc	r4, r4, r5, lsr #32     @ encoding: [0x25,0x40,0xa4,0xe0]
+@ CHECK: adc	r4, r4, r5, asr #1      @ encoding: [0xc5,0x40,0xa4,0xe0]
+@ CHECK: adc	r4, r4, r5, asr #31     @ encoding: [0xc5,0x4f,0xa4,0xe0]
+@ CHECK: adc	r4, r4, r5, asr #32     @ encoding: [0x45,0x40,0xa4,0xe0]
+@ CHECK: adc	r4, r4, r5, ror #1      @ encoding: [0xe5,0x40,0xa4,0xe0]
+@ CHECK: adc	r4, r4, r5, ror #31     @ encoding: [0xe5,0x4f,0xa4,0xe0]
+@ CHECK: adc	r4, r4, r5, rrx         @ encoding: [0x65,0x40,0xa4,0xe0]
+@ CHECK: adc	r6, r6, r7, lsl r9      @ encoding: [0x17,0x69,0xa6,0xe0]
+@ CHECK: adc	r6, r6, r7, lsr r9      @ encoding: [0x37,0x69,0xa6,0xe0]
+@ CHECK: adc	r6, r6, r7, asr r9      @ encoding: [0x57,0x69,0xa6,0xe0]
+@ CHECK: adc	r6, r6, r7, ror r9      @ encoding: [0x77,0x69,0xa6,0xe0]
+@ CHECK: adc	r4, r4, r5, rrx         @ encoding: [0x65,0x40,0xa4,0xe0]
+
+
+@------------------------------------------------------------------------------
+@ FIXME: ADR
+@------------------------------------------------------------------------------
+
+@------------------------------------------------------------------------------
+@ ADD
+@------------------------------------------------------------------------------
+  add r4, r5, #0xf000
+  add r4, r5, r6
+  add r4, r5, r6, lsl #5
+  add r4, r5, r6, lsr #5
+  add r4, r5, r6, lsr #5
+  add r4, r5, r6, asr #5
+  add r4, r5, r6, ror #5
+  add r6, r7, r8, lsl r9
+  add r6, r7, r8, lsr r9
+  add r6, r7, r8, asr r9
+  add r6, r7, r8, ror r9
+  add r4, r5, r6, rrx
+
+  @ destination register is optional
+  add r5, #0xf000
+  add r4, r5
+  add r4, r5, lsl #5
+  add r4, r5, lsr #5
+  add r4, r5, lsr #5
+  add r4, r5, asr #5
+  add r4, r5, ror #5
+  add r6, r7, lsl r9
+  add r6, r7, lsr r9
+  add r6, r7, asr r9
+  add r6, r7, ror r9
+  add r4, r5, rrx
+
+@ CHECK: add	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x85,0xe2]
+@ CHECK: add	r4, r5, r6              @ encoding: [0x06,0x40,0x85,0xe0]
+@ CHECK: add	r4, r5, r6, lsl #5      @ encoding: [0x86,0x42,0x85,0xe0]
+@ CHECK: add	r4, r5, r6, lsr #5      @ encoding: [0xa6,0x42,0x85,0xe0]
+@ CHECK: add	r4, r5, r6, lsr #5      @ encoding: [0xa6,0x42,0x85,0xe0]
+@ CHECK: add	r4, r5, r6, asr #5      @ encoding: [0xc6,0x42,0x85,0xe0]
+@ CHECK: add	r4, r5, r6, ror #5      @ encoding: [0xe6,0x42,0x85,0xe0]
+@ CHECK: add	r6, r7, r8, lsl r9      @ encoding: [0x18,0x69,0x87,0xe0]
+@ CHECK: add	r6, r7, r8, lsr r9      @ encoding: [0x38,0x69,0x87,0xe0]
+@ CHECK: add	r6, r7, r8, asr r9      @ encoding: [0x58,0x69,0x87,0xe0]
+@ CHECK: add	r6, r7, r8, ror r9      @ encoding: [0x78,0x69,0x87,0xe0]
+@ CHECK: add	r4, r5, r6, rrx         @ encoding: [0x66,0x40,0x85,0xe0]
+
+
+@ CHECK: add	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x85,0xe2]
+@ CHECK: add	r4, r4, r5              @ encoding: [0x05,0x40,0x84,0xe0]
+@ CHECK: add	r4, r4, r5, lsl #5      @ encoding: [0x85,0x42,0x84,0xe0]
+@ CHECK: add	r4, r4, r5, lsr #5      @ encoding: [0xa5,0x42,0x84,0xe0]
+@ CHECK: add	r4, r4, r5, lsr #5      @ encoding: [0xa5,0x42,0x84,0xe0]
+@ CHECK: add	r4, r4, r5, asr #5      @ encoding: [0xc5,0x42,0x84,0xe0]
+@ CHECK: add	r4, r4, r5, ror #5      @ encoding: [0xe5,0x42,0x84,0xe0]
+@ CHECK: add	r6, r6, r7, lsl r9      @ encoding: [0x17,0x69,0x86,0xe0]
+@ CHECK: add	r6, r6, r7, lsr r9      @ encoding: [0x37,0x69,0x86,0xe0]
+@ CHECK: add	r6, r6, r7, asr r9      @ encoding: [0x57,0x69,0x86,0xe0]
+@ CHECK: add	r6, r6, r7, ror r9      @ encoding: [0x77,0x69,0x86,0xe0]
+@ CHECK: add	r4, r4, r5, rrx         @ encoding: [0x65,0x40,0x84,0xe0]
+
+
+@------------------------------------------------------------------------------
+@ AND
+@------------------------------------------------------------------------------
+  and r10, r1, #0xf
+  and r10, r1, r6
+  and r10, r1, r6, lsl #10
+  and r10, r1, r6, lsr #10
+  and r10, r1, r6, lsr #10
+  and r10, r1, r6, asr #10
+  and r10, r1, r6, ror #10
+  and r6, r7, r8, lsl r2
+  and r6, r7, r8, lsr r2
+  and r6, r7, r8, asr r2
+  and r6, r7, r8, ror r2
+  and r10, r1, r6, rrx
+
+  @ destination register is optional
+  and r1, #0xf
+  and r10, r1
+  and r10, r1, lsl #10
+  and r10, r1, lsr #10
+  and r10, r1, lsr #10
+  and r10, r1, asr #10
+  and r10, r1, ror #10
+  and r6, r7, lsl r2
+  and r6, r7, lsr r2
+  and r6, r7, asr r2
+  and r6, r7, ror r2
+  and r10, r1, rrx
+
+@ CHECK: and	r10, r1, #15            @ encoding: [0x0f,0xa0,0x01,0xe2]
+@ CHECK: and	r10, r1, r6             @ encoding: [0x06,0xa0,0x01,0xe0]
+@ CHECK: and	r10, r1, r6, lsl #10    @ encoding: [0x06,0xa5,0x01,0xe0]
+@ CHECK: and	r10, r1, r6, lsr #10    @ encoding: [0x26,0xa5,0x01,0xe0]
+@ CHECK: and	r10, r1, r6, lsr #10    @ encoding: [0x26,0xa5,0x01,0xe0]
+@ CHECK: and	r10, r1, r6, asr #10    @ encoding: [0x46,0xa5,0x01,0xe0]
+@ CHECK: and	r10, r1, r6, ror #10    @ encoding: [0x66,0xa5,0x01,0xe0]
+@ CHECK: and	r6, r7, r8, lsl r2      @ encoding: [0x18,0x62,0x07,0xe0]
+@ CHECK: and	r6, r7, r8, lsr r2      @ encoding: [0x38,0x62,0x07,0xe0]
+@ CHECK: and	r6, r7, r8, asr r2      @ encoding: [0x58,0x62,0x07,0xe0]
+@ CHECK: and	r6, r7, r8, ror r2      @ encoding: [0x78,0x62,0x07,0xe0]
+@ CHECK: and	r10, r1, r6, rrx        @ encoding: [0x66,0xa0,0x01,0xe0]
+
+@ CHECK: and	r1, r1, #15             @ encoding: [0x0f,0x10,0x01,0xe2]
+@ CHECK: and	r10, r10, r1            @ encoding: [0x01,0xa0,0x0a,0xe0]
+@ CHECK: and	r10, r10, r1, lsl #10   @ encoding: [0x01,0xa5,0x0a,0xe0]
+@ CHECK: and	r10, r10, r1, lsr #10   @ encoding: [0x21,0xa5,0x0a,0xe0]
+@ CHECK: and	r10, r10, r1, lsr #10   @ encoding: [0x21,0xa5,0x0a,0xe0]
+@ CHECK: and	r10, r10, r1, asr #10   @ encoding: [0x41,0xa5,0x0a,0xe0]
+@ CHECK: and	r10, r10, r1, ror #10   @ encoding: [0x61,0xa5,0x0a,0xe0]
+@ CHECK: and	r6, r6, r7, lsl r2      @ encoding: [0x17,0x62,0x06,0xe0]
+@ CHECK: and	r6, r6, r7, lsr r2      @ encoding: [0x37,0x62,0x06,0xe0]
+@ CHECK: and	r6, r6, r7, asr r2      @ encoding: [0x57,0x62,0x06,0xe0]
+@ CHECK: and	r6, r6, r7, ror r2      @ encoding: [0x77,0x62,0x06,0xe0]
+@ CHECK: and	r10, r10, r1, rrx       @ encoding: [0x61,0xa0,0x0a,0xe0]
+
+@------------------------------------------------------------------------------
+@ FIXME: ASR
+@------------------------------------------------------------------------------
+@------------------------------------------------------------------------------
+@ FIXME: B
+@------------------------------------------------------------------------------
+@------------------------------------------------------------------------------
+@ FIXME: BFC
+@------------------------------------------------------------------------------
+@------------------------------------------------------------------------------
+@ FIXME: BFI
+@------------------------------------------------------------------------------
+
+@------------------------------------------------------------------------------
+@ BIC
+@------------------------------------------------------------------------------
+  bic r10, r1, #0xf
+  bic r10, r1, r6
+  bic r10, r1, r6, lsl #10
+  bic r10, r1, r6, lsr #10
+  bic r10, r1, r6, lsr #10
+  bic r10, r1, r6, asr #10
+  bic r10, r1, r6, ror #10
+  bic r6, r7, r8, lsl r2
+  bic r6, r7, r8, lsr r2
+  bic r6, r7, r8, asr r2
+  bic r6, r7, r8, ror r2
+  bic r10, r1, r6, rrx
+
+  @ destination register is optional
+  bic r1, #0xf
+  bic r10, r1
+  bic r10, r1, lsl #10
+  bic r10, r1, lsr #10
+  bic r10, r1, lsr #10
+  bic r10, r1, asr #10
+  bic r10, r1, ror #10
+  bic r6, r7, lsl r2
+  bic r6, r7, lsr r2
+  bic r6, r7, asr r2
+  bic r6, r7, ror r2
+  bic r10, r1, rrx
+
+@ CHECK: bic	r10, r1, #15            @ encoding: [0x0f,0xa0,0xc1,0xe3]
+@ CHECK: bic	r10, r1, r6             @ encoding: [0x06,0xa0,0xc1,0xe1]
+@ CHECK: bic	r10, r1, r6, lsl #10    @ encoding: [0x06,0xa5,0xc1,0xe1]
+@ CHECK: bic	r10, r1, r6, lsr #10    @ encoding: [0x26,0xa5,0xc1,0xe1]
+@ CHECK: bic	r10, r1, r6, lsr #10    @ encoding: [0x26,0xa5,0xc1,0xe1]
+@ CHECK: bic	r10, r1, r6, asr #10    @ encoding: [0x46,0xa5,0xc1,0xe1]
+@ CHECK: bic	r10, r1, r6, ror #10    @ encoding: [0x66,0xa5,0xc1,0xe1]
+@ CHECK: bic	r6, r7, r8, lsl r2      @ encoding: [0x18,0x62,0xc7,0xe1]
+@ CHECK: bic	r6, r7, r8, lsr r2      @ encoding: [0x38,0x62,0xc7,0xe1]
+@ CHECK: bic	r6, r7, r8, asr r2      @ encoding: [0x58,0x62,0xc7,0xe1]
+@ CHECK: bic	r6, r7, r8, ror r2      @ encoding: [0x78,0x62,0xc7,0xe1]
+@ CHECK: bic	r10, r1, r6, rrx        @ encoding: [0x66,0xa0,0xc1,0xe1]
+
+
+@ CHECK: bic	r1, r1, #15             @ encoding: [0x0f,0x10,0xc1,0xe3]
+@ CHECK: bic	r10, r10, r1            @ encoding: [0x01,0xa0,0xca,0xe1]
+@ CHECK: bic	r10, r10, r1, lsl #10   @ encoding: [0x01,0xa5,0xca,0xe1]
+@ CHECK: bic	r10, r10, r1, lsr #10   @ encoding: [0x21,0xa5,0xca,0xe1]
+@ CHECK: bic	r10, r10, r1, lsr #10   @ encoding: [0x21,0xa5,0xca,0xe1]
+@ CHECK: bic	r10, r10, r1, asr #10   @ encoding: [0x41,0xa5,0xca,0xe1]
+@ CHECK: bic	r10, r10, r1, ror #10   @ encoding: [0x61,0xa5,0xca,0xe1]
+@ CHECK: bic	r6, r6, r7, lsl r2      @ encoding: [0x17,0x62,0xc6,0xe1]
+@ CHECK: bic	r6, r6, r7, lsr r2      @ encoding: [0x37,0x62,0xc6,0xe1]
+@ CHECK: bic	r6, r6, r7, asr r2      @ encoding: [0x57,0x62,0xc6,0xe1]
+@ CHECK: bic	r6, r6, r7, ror r2      @ encoding: [0x77,0x62,0xc6,0xe1]
+@ CHECK: bic	r10, r10, r1, rrx       @ encoding: [0x61,0xa0,0xca,0xe1]
+
+@------------------------------------------------------------------------------
+@ BKPT
+@------------------------------------------------------------------------------
+  bkpt #10
+  bkpt #65535
+
+@ CHECK: bkpt  #10                      @ encoding: [0x7a,0x00,0x20,0xe1]
+@ CHECK: bkpt  #65535                   @ encoding: [0x7f,0xff,0x2f,0xe1]
+
+@------------------------------------------------------------------------------
+@ BL/BLX (immediate)
+@------------------------------------------------------------------------------
+
+  bl _bar
+  @ FIXME: blx _bar
+
+@ CHECK: bl  _bar @ encoding: [A,A,A,0xeb]
+@ CHECK:   @   fixup A - offset: 0, value: _bar, kind: fixup_arm_uncondbranch
+
+@------------------------------------------------------------------------------
+@ BLX (register)
+@------------------------------------------------------------------------------
+  blx r2
+  blxne r2
+
+@ CHECK: blx r2                         @ encoding: [0x32,0xff,0x2f,0xe1]
+@ CHECK: blxne r2                       @ encoding: [0x32,0xff,0x2f,0x11]
+
+@------------------------------------------------------------------------------
+@ BX
+@------------------------------------------------------------------------------
+
+  bx r2
+  bxne r2
+
+@ CHECK: bx	r2                      @ encoding: [0x12,0xff,0x2f,0xe1]
+@ CHECK: bxne	r2                      @ encoding: [0x12,0xff,0x2f,0x11]
+
+@------------------------------------------------------------------------------
+@ BXJ
+@------------------------------------------------------------------------------
+
+  bxj r2
+  bxjne r2
+
+@ CHECK: bxj	r2                      @ encoding: [0x22,0xff,0x2f,0xe1]
+@ CHECK: bxjne	r2                      @ encoding: [0x22,0xff,0x2f,0x11]
+
+@------------------------------------------------------------------------------
+@ FIXME: CBNZ/CBZ
+@------------------------------------------------------------------------------
+
+
+@------------------------------------------------------------------------------
+@ CDP/CDP2
+@------------------------------------------------------------------------------
+  cdp  p7, #1, c1, c1, c1, #4
+  cdp2  p7, #1, c1, c1, c1, #4
+
+@ CHECK: cdp  p7, #1, c1, c1, c1, #4     @ encoding: [0x81,0x17,0x11,0xee]
+@ CHECK: cdp2  p7, #1, c1, c1, c1, #4    @ encoding: [0x81,0x17,0x11,0xfe]
+
+
+@------------------------------------------------------------------------------
+@ CLREX
+@------------------------------------------------------------------------------
+  clrex
+
+@ CHECK: clrex                           @ encoding: [0x1f,0xf0,0x7f,0xf5]
+
+
+@------------------------------------------------------------------------------
+@ CLZ
+@------------------------------------------------------------------------------
+  clz r1, r2
+  clzeq r1, r2
+
+@ CHECK: clz r1, r2                      @ encoding: [0x12,0x1f,0x6f,0xe1]
+@ CHECK: clzeq r1, r2                    @ encoding: [0x12,0x1f,0x6f,0x01]
+
+@------------------------------------------------------------------------------
+@ CMN
+@------------------------------------------------------------------------------
+  cmn r1, #0xf
+  cmn r1, r6
+  cmn r1, r6, lsl #10
+  cmn r1, r6, lsr #10
+  cmn sp, r6, lsr #10
+  cmn r1, r6, asr #10
+  cmn r1, r6, ror #10
+  cmn r7, r8, lsl r2
+  cmn sp, r8, lsr r2
+  cmn r7, r8, asr r2
+  cmn r7, r8, ror r2
+  cmn r1, r6, rrx
+
+@ CHECK: cmn	r1, #15                 @ encoding: [0x0f,0x00,0x71,0xe3]
+@ CHECK: cmn	r1, r6                  @ encoding: [0x06,0x00,0x71,0xe1]
+@ CHECK: cmn	r1, r6, lsl #10         @ encoding: [0x06,0x05,0x71,0xe1]
+@ CHECK: cmn	r1, r6, lsr #10         @ encoding: [0x26,0x05,0x71,0xe1]
+@ CHECK: cmn	sp, r6, lsr #10         @ encoding: [0x26,0x05,0x7d,0xe1]
+@ CHECK: cmn	r1, r6, asr #10         @ encoding: [0x46,0x05,0x71,0xe1]
+@ CHECK: cmn	r1, r6, ror #10         @ encoding: [0x66,0x05,0x71,0xe1]
+@ CHECK: cmn	r7, r8, lsl r2          @ encoding: [0x18,0x02,0x77,0xe1]
+@ CHECK: cmn	sp, r8, lsr r2          @ encoding: [0x38,0x02,0x7d,0xe1]
+@ CHECK: cmn	r7, r8, asr r2          @ encoding: [0x58,0x02,0x77,0xe1]
+@ CHECK: cmn	r7, r8, ror r2          @ encoding: [0x78,0x02,0x77,0xe1]
+@ CHECK: cmn	r1, r6, rrx             @ encoding: [0x66,0x00,0x71,0xe1]
+
+@------------------------------------------------------------------------------
+@ CMP
+@------------------------------------------------------------------------------
+  cmp r1, #0xf
+  cmp r1, r6
+  cmp r1, r6, lsl #10
+  cmp r1, r6, lsr #10
+  cmp sp, r6, lsr #10
+  cmp r1, r6, asr #10
+  cmp r1, r6, ror #10
+  cmp r7, r8, lsl r2
+  cmp sp, r8, lsr r2
+  cmp r7, r8, asr r2
+  cmp r7, r8, ror r2
+  cmp r1, r6, rrx
+
+@ CHECK: cmp	r1, #15                 @ encoding: [0x0f,0x00,0x51,0xe3]
+@ CHECK: cmp	r1, r6                  @ encoding: [0x06,0x00,0x51,0xe1]
+@ CHECK: cmp	r1, r6, lsl #10         @ encoding: [0x06,0x05,0x51,0xe1]
+@ CHECK: cmp	r1, r6, lsr #10         @ encoding: [0x26,0x05,0x51,0xe1]
+@ CHECK: cmp	sp, r6, lsr #10         @ encoding: [0x26,0x05,0x5d,0xe1]
+@ CHECK: cmp	r1, r6, asr #10         @ encoding: [0x46,0x05,0x51,0xe1]
+@ CHECK: cmp	r1, r6, ror #10         @ encoding: [0x66,0x05,0x51,0xe1]
+@ CHECK: cmp	r7, r8, lsl r2          @ encoding: [0x18,0x02,0x57,0xe1]
+@ CHECK: cmp	sp, r8, lsr r2          @ encoding: [0x38,0x02,0x5d,0xe1]
+@ CHECK: cmp	r7, r8, asr r2          @ encoding: [0x58,0x02,0x57,0xe1]
+@ CHECK: cmp	r7, r8, ror r2          @ encoding: [0x78,0x02,0x57,0xe1]
+@ CHECK: cmp	r1, r6, rrx             @ encoding: [0x66,0x00,0x51,0xe1]
+
+@------------------------------------------------------------------------------
+@ DBG
+@------------------------------------------------------------------------------
+  dbg #0
+  dbg #5
+  dbg #15
+
+@ CHECK: dbg #0                         @ encoding: [0xf0,0xf0,0x20,0xe3]
+@ CHECK: dbg #5                         @ encoding: [0xf5,0xf0,0x20,0xe3]
+@ CHECK: dbg #15                        @ encoding: [0xff,0xf0,0x20,0xe3]
+
+
+@------------------------------------------------------------------------------
+@ DMB
+@------------------------------------------------------------------------------
+  dmb sy
+  dmb st
+  dmb sh
+  dmb ish
+  dmb shst
+  dmb ishst
+  dmb un
+  dmb nsh
+  dmb unst
+  dmb nshst
+  dmb osh
+  dmb oshst
+  dmb
+
+@ CHECK: dmb	sy                      @ encoding: [0x5f,0xf0,0x7f,0xf5]
+@ CHECK: dmb	st                      @ encoding: [0x5e,0xf0,0x7f,0xf5]
+@ CHECK: dmb	ish                     @ encoding: [0x5b,0xf0,0x7f,0xf5]
+@ CHECK: dmb	ish                     @ encoding: [0x5b,0xf0,0x7f,0xf5]
+@ CHECK: dmb	ishst                   @ encoding: [0x5a,0xf0,0x7f,0xf5]
+@ CHECK: dmb	ishst                   @ encoding: [0x5a,0xf0,0x7f,0xf5]
+@ CHECK: dmb	nsh                     @ encoding: [0x57,0xf0,0x7f,0xf5]
+@ CHECK: dmb	nsh                     @ encoding: [0x57,0xf0,0x7f,0xf5]
+@ CHECK: dmb	nshst                   @ encoding: [0x56,0xf0,0x7f,0xf5]
+@ CHECK: dmb	nshst                   @ encoding: [0x56,0xf0,0x7f,0xf5]
+@ CHECK: dmb	osh                     @ encoding: [0x53,0xf0,0x7f,0xf5]
+@ CHECK: dmb	oshst                   @ encoding: [0x52,0xf0,0x7f,0xf5]
+@ CHECK: dmb	sy                      @ encoding: [0x5f,0xf0,0x7f,0xf5]
+
+@------------------------------------------------------------------------------
+@ DSB
+@------------------------------------------------------------------------------
+  dsb sy
+  dsb st
+  dsb sh
+  dsb ish
+  dsb shst
+  dsb ishst
+  dsb un
+  dsb nsh
+  dsb unst
+  dsb nshst
+  dsb osh
+  dsb oshst
+  dsb
+
+@ CHECK: dsb	sy                      @ encoding: [0x4f,0xf0,0x7f,0xf5]
+@ CHECK: dsb	st                      @ encoding: [0x4e,0xf0,0x7f,0xf5]
+@ CHECK: dsb	ish                     @ encoding: [0x4b,0xf0,0x7f,0xf5]
+@ CHECK: dsb	ish                     @ encoding: [0x4b,0xf0,0x7f,0xf5]
+@ CHECK: dsb	ishst                   @ encoding: [0x4a,0xf0,0x7f,0xf5]
+@ CHECK: dsb	ishst                   @ encoding: [0x4a,0xf0,0x7f,0xf5]
+@ CHECK: dsb	nsh                     @ encoding: [0x47,0xf0,0x7f,0xf5]
+@ CHECK: dsb	nsh                     @ encoding: [0x47,0xf0,0x7f,0xf5]
+@ CHECK: dsb	nshst                   @ encoding: [0x46,0xf0,0x7f,0xf5]
+@ CHECK: dsb	nshst                   @ encoding: [0x46,0xf0,0x7f,0xf5]
+@ CHECK: dsb	osh                     @ encoding: [0x43,0xf0,0x7f,0xf5]
+@ CHECK: dsb	oshst                   @ encoding: [0x42,0xf0,0x7f,0xf5]
+@ CHECK: dsb	sy                      @ encoding: [0x4f,0xf0,0x7f,0xf5]
+
+@------------------------------------------------------------------------------
+@ EOR
+@------------------------------------------------------------------------------
+  eor r4, r5, #0xf000
+  eor r4, r5, r6
+  eor r4, r5, r6, lsl #5
+  eor r4, r5, r6, lsr #5
+  eor r4, r5, r6, lsr #5
+  eor r4, r5, r6, asr #5
+  eor r4, r5, r6, ror #5
+  eor r6, r7, r8, lsl r9
+  eor r6, r7, r8, lsr r9
+  eor r6, r7, r8, asr r9
+  eor r6, r7, r8, ror r9
+  eor r4, r5, r6, rrx
+
+  @ destination register is optional
+  eor r5, #0xf000
+  eor r4, r5
+  eor r4, r5, lsl #5
+  eor r4, r5, lsr #5
+  eor r4, r5, lsr #5
+  eor r4, r5, asr #5
+  eor r4, r5, ror #5
+  eor r6, r7, lsl r9
+  eor r6, r7, lsr r9
+  eor r6, r7, asr r9
+  eor r6, r7, ror r9
+  eor r4, r5, rrx
+
+@ CHECK: eor	r4, r5, #61440          @ encoding: [0x0f,0x4a,0x25,0xe2]
+@ CHECK: eor	r4, r5, r6              @ encoding: [0x06,0x40,0x25,0xe0]
+@ CHECK: eor	r4, r5, r6, lsl #5      @ encoding: [0x86,0x42,0x25,0xe0]
+@ CHECK: eor	r4, r5, r6, lsr #5      @ encoding: [0xa6,0x42,0x25,0xe0]
+@ CHECK: eor	r4, r5, r6, lsr #5      @ encoding: [0xa6,0x42,0x25,0xe0]
+@ CHECK: eor	r4, r5, r6, asr #5      @ encoding: [0xc6,0x42,0x25,0xe0]
+@ CHECK: eor	r4, r5, r6, ror #5      @ encoding: [0xe6,0x42,0x25,0xe0]
+@ CHECK: eor	r6, r7, r8, lsl r9      @ encoding: [0x18,0x69,0x27,0xe0]
+@ CHECK: eor	r6, r7, r8, lsr r9      @ encoding: [0x38,0x69,0x27,0xe0]
+@ CHECK: eor	r6, r7, r8, asr r9      @ encoding: [0x58,0x69,0x27,0xe0]
+@ CHECK: eor	r6, r7, r8, ror r9      @ encoding: [0x78,0x69,0x27,0xe0]
+@ CHECK: eor	r4, r5, r6, rrx         @ encoding: [0x66,0x40,0x25,0xe0]
+
+
+@ CHECK: eor	r5, r5, #61440          @ encoding: [0x0f,0x5a,0x25,0xe2]
+@ CHECK: eor	r4, r4, r5              @ encoding: [0x05,0x40,0x24,0xe0]
+@ CHECK: eor	r4, r4, r5, lsl #5      @ encoding: [0x85,0x42,0x24,0xe0]
+@ CHECK: eor	r4, r4, r5, lsr #5      @ encoding: [0xa5,0x42,0x24,0xe0]
+@ CHECK: eor	r4, r4, r5, lsr #5      @ encoding: [0xa5,0x42,0x24,0xe0]
+@ CHECK: eor	r4, r4, r5, asr #5      @ encoding: [0xc5,0x42,0x24,0xe0]
+@ CHECK: eor	r4, r4, r5, ror #5      @ encoding: [0xe5,0x42,0x24,0xe0]
+@ CHECK: eor	r6, r6, r7, lsl r9      @ encoding: [0x17,0x69,0x26,0xe0]
+@ CHECK: eor	r6, r6, r7, lsr r9      @ encoding: [0x37,0x69,0x26,0xe0]
+@ CHECK: eor	r6, r6, r7, asr r9      @ encoding: [0x57,0x69,0x26,0xe0]
+@ CHECK: eor	r6, r6, r7, ror r9      @ encoding: [0x77,0x69,0x26,0xe0]
+@ CHECK: eor	r4, r4, r5, rrx         @ encoding: [0x65,0x40,0x24,0xe0]
+
+
+@------------------------------------------------------------------------------
+@ ISB
+@------------------------------------------------------------------------------
+        isb sy
+        isb
+
+@ CHECK: isb sy                         @ encoding: [0x6f,0xf0,0x7f,0xf5]
+@ CHECK: isb sy                         @ encoding: [0x6f,0xf0,0x7f,0xf5]
+
+
+
+@------------------------------------------------------------------------------
+@ LDM*
+@------------------------------------------------------------------------------
+        ldm       r2, {r1,r3-r6,sp}
+        ldmia     r2, {r1,r3-r6,sp}
+        ldmib     r2, {r1,r3-r6,sp}
+        ldmda     r2, {r1,r3-r6,sp}
+        ldmdb     r2, {r1,r3-r6,sp}
+        ldmfd     r2, {r1,r3-r6,sp}
+
+        @ with update
+        ldm       r2!, {r1,r3-r6,sp}
+        ldmib     r2!, {r1,r3-r6,sp}
+        ldmda     r2!, {r1,r3-r6,sp}
+        ldmdb     r2!, {r1,r3-r6,sp}
+
+@ CHECK: ldm   r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe8]
+@ CHECK: ldm   r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe8]
+@ CHECK: ldmib r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe9]
+@ CHECK: ldmda r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x12,0xe8]
+@ CHECK: ldmdb r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x12,0xe9]
+@ CHECK: ldm   r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x92,0xe8]
+
+@ CHECK: ldm   r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xb2,0xe8]
+@ CHECK: ldmib r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xb2,0xe9]
+@ CHECK: ldmda r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe8]
+@ CHECK: ldmdb r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x32,0xe9]
+
+@------------------------------------------------------------------------------
+@ FIXME: LDR*
+@------------------------------------------------------------------------------
+@------------------------------------------------------------------------------
+@ FIXME: LSL
+@------------------------------------------------------------------------------
+@------------------------------------------------------------------------------
+@ FIXME: LSR
+@------------------------------------------------------------------------------
+
+@------------------------------------------------------------------------------
+@ MCR/MCR2
+@------------------------------------------------------------------------------
+        mcr  p7, #1, r5, c1, c1, #4
+        mcr2  p7, #1, r5, c1, c1, #4
+
+@ CHECK: mcr  p7, #1, r5, c1, c1, #4 @ encoding: [0x91,0x57,0x21,0xee]
+@ CHECK: mcr2  p7, #1, r5, c1, c1, #4 @ encoding: [0x91,0x57,0x21,0xfe]
+
+@------------------------------------------------------------------------------
+@ MCRR/MCRR2
+@------------------------------------------------------------------------------
+        mcrr  p7, #15, r5, r4, c1
+        mcrr2  p7, #15, r5, r4, c1
+
+@ CHECK: mcrr  p7, #15, r5, r4, c1 @ encoding: [0xf1,0x57,0x44,0xec]
+@ CHECK: mcrr2  p7, #15, r5, r4, c1 @ encoding: [0xf1,0x57,0x44,0xfc]
+
+
+@------------------------------------------------------------------------------
+@ MLA
+@------------------------------------------------------------------------------
+        mla  r1,r2,r3,r4
+        mlas r1,r2,r3,r4
+        mlane  r1,r2,r3,r4
+        mlasne r1,r2,r3,r4
+
+@ CHECK: mla 	r1, r2, r3, r4 @ encoding: [0x92,0x43,0x21,0xe0]
+@ CHECK: mlas	r1, r2, r3, r4 @ encoding: [0x92,0x43,0x31,0xe0]
+@ CHECK: mlane 	r1, r2, r3, r4 @ encoding: [0x92,0x43,0x21,0x10]
+@ CHECK: mlasne	r1, r2, r3, r4 @ encoding: [0x92,0x43,0x31,0x10]
+
+@------------------------------------------------------------------------------
+@ MLS
+@------------------------------------------------------------------------------
+        mls  r2,r5,r6,r3
+        mlsne  r2,r5,r6,r3
+
+@ CHECK: mls	r2, r5, r6, r3          @ encoding: [0x95,0x36,0x62,0xe0]
+@ CHECK: mlsne	r2, r5, r6, r3          @ encoding: [0x95,0x36,0x62,0x10]
+
+@------------------------------------------------------------------------------
+@ STM*
+@------------------------------------------------------------------------------
+        stm       r2, {r1,r3-r6,sp}
+        stmia     r2, {r1,r3-r6,sp}
+        stmib     r2, {r1,r3-r6,sp}
+        stmda     r2, {r1,r3-r6,sp}
+        stmdb     r2, {r1,r3-r6,sp}
+        stmfd     r2, {r1,r3-r6,sp}
+
+        @ with update
+        stmia     r2!, {r1,r3-r6,sp}
+        stmib     r2!, {r1,r3-r6,sp}
+        stmda     r2!, {r1,r3-r6,sp}
+        stmdb     r2!, {r1,r3-r6,sp}
+@ CHECK: stm   r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x82,0xe8]
+@ CHECK: stm   r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x82,0xe8]
+@ CHECK: stmib r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x82,0xe9]
+@ CHECK: stmda r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x02,0xe8]
+@ CHECK: stmdb r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x02,0xe9]
+@ CHECK: stmdb r2, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x02,0xe9]
+
+@ CHECK: stm   r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xa2,0xe8]
+@ CHECK: stmib r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0xa2,0xe9]
+@ CHECK: stmda r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x22,0xe8]
+@ CHECK: stmdb r2!, {r1, r3, r4, r5, r6, sp} @ encoding: [0x7a,0x20,0x22,0xe9]
diff --git a/test/MC/ARM/diagnostics.s b/test/MC/ARM/diagnostics.s
new file mode 100644
index 0000000..4537a0f
--- /dev/null
+++ b/test/MC/ARM/diagnostics.s
@@ -0,0 +1,90 @@
+@ RUN: not llvm-mc -triple=armv7-apple-darwin < %s 2> %t
+@ RUN: FileCheck --check-prefix=CHECK-ERRORS < %t %s
+
+@ Check for various assembly diagnostic messages on invalid input.
+
+@ 's' bit on an instruction that can't accept it.
+        mlss r1, r2, r3, r4
+@ CHECK-ERRORS: error: instruction 'mls' can not set flags,
+@ CHECK-ERRORS: but 's' suffix specified
+
+
+        @ Out of range shift immediate values.
+        adc r1, r2, r3, lsl #invalid
+        adc r4, r5, r6, lsl #-1
+        adc r4, r5, r6, lsl #32
+        adc r4, r5, r6, lsr #-1
+        adc r4, r5, r6, lsr #33
+        adc r4, r5, r6, asr #-1
+        adc r4, r5, r6, asr #33
+        adc r4, r5, r6, ror #-1
+        adc r4, r5, r6, ror #32
+
+@ CHECK-ERRORS: error: invalid immediate shift value
+@ CHECK-ERRORS:         adc r1, r2, r3, lsl #invalid
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: immediate shift value out of range
+@ CHECK-ERRORS:         adc r4, r5, r6, lsl #-1
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: immediate shift value out of range
+@ CHECK-ERRORS:         adc r4, r5, r6, lsl #32
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: immediate shift value out of range
+@ CHECK-ERRORS:         adc r4, r5, r6, lsr #-1
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: immediate shift value out of range
+@ CHECK-ERRORS:         adc r4, r5, r6, lsr #33
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: immediate shift value out of range
+@ CHECK-ERRORS:         adc r4, r5, r6, asr #-1
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: immediate shift value out of range
+@ CHECK-ERRORS:         adc r4, r5, r6, asr #33
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: immediate shift value out of range
+@ CHECK-ERRORS:         adc r4, r5, r6, ror #-1
+@ CHECK-ERRORS:                              ^
+@ CHECK-ERRORS: error: immediate shift value out of range
+@ CHECK-ERRORS:         adc r4, r5, r6, ror #32
+
+
+        @ Out of range 16-bit immediate on BKPT
+        bkpt #65536
+
+@ CHECK-ERRORS: error: invalid operand for instruction
+
+        @ Out of range 4 and 3 bit immediates on CDP[2]
+
+        @ Out of range immediates for CDP/CDP2
+        cdp  p7, #2, c1, c1, c1, #8
+        cdp  p7, #1, c1, c1, c1, #8
+        cdp2  p7, #2, c1, c1, c1, #8
+        cdp2  p7, #1, c1, c1, c1, #8
+
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
+
+        @ Out of range immediates for DBG
+        dbg #-1
+        dbg #16
+
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
+@  Double-check that we're synced up with the right diagnostics.
+@ CHECK-ERRORS: dbg #16
+
+        @ Out of range immediate for MCR/MCR2/MCRR/MCRR2
+        mcr  p7, #8, r5, c1, c1, #4
+        mcr  p7, #2, r5, c1, c1, #8
+        mcr2  p7, #8, r5, c1, c1, #4
+        mcr2  p7, #1, r5, c1, c1, #8
+        mcrr  p7, #16, r5, r4, c1
+        mcrr2  p7, #16, r5, r4, c1
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
+@ CHECK-ERRORS: error: invalid operand for instruction
diff --git a/test/MC/ARM/mode-switch.s b/test/MC/ARM/mode-switch.s
new file mode 100644
index 0000000..4cc986a
--- /dev/null
+++ b/test/MC/ARM/mode-switch.s
@@ -0,0 +1,17 @@
+@ Test ARM / Thumb mode switching with .code
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple arm-unknown-unknown -show-encoding < %s | FileCheck %s
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumb-unknown-unknown -show-encoding < %s | FileCheck %s
+
+.code 16
+
+@ CHECK:	add.w	r0, r0, r1              @ encoding: [0x00,0xeb,0x01,0x00]
+	add.w	r0, r0, r1
+
+.code 32
+@ CHECK:	add	r0, r0, r1              @ encoding: [0x01,0x00,0x80,0xe0]
+	add	r0, r0, r1
+
+.code 16
+@ CHECK:	add	r0, r0, r1              @ encoding: [0x40,0x18]
+        
+        add     r0, r0, r1
diff --git a/test/MC/ARM/prefetch.ll b/test/MC/ARM/prefetch.ll
index 674b8f3..e77fdb1 100644
--- a/test/MC/ARM/prefetch.ll
+++ b/test/MC/ARM/prefetch.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -mtriple=armv7-apple-darwin   -mattr=+v7a,+mp -show-mc-encoding | FileCheck %s -check-prefix=ARM
-; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+v7a     -show-mc-encoding | FileCheck %s -check-prefix=T2
+; RUN: llc < %s -mtriple=armv7-apple-darwin   -mattr=+v7,+mp -show-mc-encoding | FileCheck %s -check-prefix=ARM
+; RUN: llc < %s -mtriple=thumbv7-apple-darwin -mattr=+v7     -show-mc-encoding | FileCheck %s -check-prefix=T2
 ; rdar://8924681
 
 define void @t1(i8* %ptr) nounwind  {
diff --git a/test/MC/ARM/simple-encoding.ll b/test/MC/ARM/simple-encoding.ll
index 3322803..14ed945 100644
--- a/test/MC/ARM/simple-encoding.ll
+++ b/test/MC/ARM/simple-encoding.ll
@@ -39,8 +39,7 @@ define i32 @f3(i32 %a, i32 %b) {
 
 define i32 @f4(i32 %a, i32 %b) {
 ; CHECK: f4
-; CHECK: add r0, r0, #254, #28        @ encoding: [0xfe,0x0e,0x80,0xe2]
-; CHECK:                              @ 4064
+; CHECK: add r0, r0, #4064            @ encoding: [0xfe,0x0e,0x80,0xe2]
 ; CHECK: bx lr                        @ encoding: [0x1e,0xff,0x2f,0xe1]
   %add = add nsw i32 %a, 4064
   ret i32 %add
@@ -118,7 +117,7 @@ define i32 @f12(i32 %a) {
 define i64 @f13() {
 ; CHECK: f13:
 ; CHECK: mvn r0, #0                   @ encoding: [0x00,0x00,0xe0,0xe3]
-; CHECK: mvn r1, #2, #2               @ encoding: [0x02,0x11,0xe0,0xe3]
+; CHECK: mvn r1, #-2147483648         @ encoding: [0x02,0x11,0xe0,0xe3]
         ret i64 9223372036854775807
 }
 
@@ -229,7 +228,7 @@ define i32 @f23(i32 %X, i32 %Y) {
 
 define void @f24(i32 %a) {
 ; CHECK: f24
-; CHECK: cmp r0, #1, #16               @ encoding: [0x01,0x08,0x50,0xe3]
+; CHECK: cmp r0, #65536               @ encoding: [0x01,0x08,0x50,0xe3]
         %b = icmp ugt i32 %a, 65536
         br i1 %b, label %r, label %r
 r:
diff --git a/test/MC/ARM/thumb.s b/test/MC/ARM/thumb.s
index 55d9789..79ea2e4 100644
--- a/test/MC/ARM/thumb.s
+++ b/test/MC/ARM/thumb.s
@@ -41,21 +41,6 @@
 @ CHECK: bkpt  #2                  @ encoding: [0x02,0xbe]
          bkpt  #2
 
-@ CHECK: mcr  p7, #1, r5, c1, c1, #4 @ encoding: [0x21,0xee,0x91,0x57]
-        mcr  p7, #1, r5, c1, c1, #4
-
-@ CHECK: mrc  p14, #0, r1, c1, c2, #4 @ encoding: [0x11,0xee,0x92,0x1e]
-        mrc  p14, #0, r1, c1, c2, #4
-
-@ CHECK: mcrr  p7, #1, r5, r4, c1 @ encoding: [0x44,0xec,0x11,0x57]
-        mcrr  p7, #1, r5, r4, c1
-
-@ CHECK: mrrc  p7, #1, r5, r4, c1 @ encoding: [0x54,0xec,0x11,0x57]
-        mrrc  p7, #1, r5, r4, c1
-
-@ CHECK: cdp  p7, #1, c1, c1, c1, #4 @ encoding: [0x11,0xee,0x81,0x17]
-        cdp  p7, #1, c1, c1, c1, #4
-
 @ CHECK: nop @ encoding: [0x00,0xbf]
         nop
 
diff --git a/test/MC/ARM/thumb2-movt-fixup.s b/test/MC/ARM/thumb2-movt-fixup.s
new file mode 100644
index 0000000..ddd95b5
--- /dev/null
+++ b/test/MC/ARM/thumb2-movt-fixup.s
@@ -0,0 +1,17 @@
+@ RUN: llvm-mc -mcpu=cortex-a8 -triple thumbv7-apple-darwin10 -filetype=obj -o - < %s | macho-dump | FileCheck %s
+
+_fred:
+	movt	r3, :upper16:(_wilma-(LPC0_0+4))
+LPC0_0:
+
+_wilma:
+  .long 0
+
+@ CHECK:  ('_relocations', [
+@ CHECK:    # Relocation 0
+@ CHECK:    (('word-0', 0xb9000000),
+@ CHECK:     ('word-1', 0x4)),
+@ CHECK:    # Relocation 1
+@ CHECK:    (('word-0', 0xb100fffc),
+@ CHECK:     ('word-1', 0x4)),
+
diff --git a/test/MC/ARM/thumb2.s b/test/MC/ARM/thumb2.s
index 4e9d4e1..7d632db 100644
--- a/test/MC/ARM/thumb2.s
+++ b/test/MC/ARM/thumb2.s
@@ -49,6 +49,22 @@
 @ CHECK: mov.w	r0, #66846720           @ encoding: [0x7f,0x70,0x4f,0xf0]
   mov.w	r0, #66846720
 
+@ Aliases w/ the vanilla 'mov' mnemonic, and explicit alternative selection.
+  mov r2, #0xbf000000
+  mov r1, #0x100
+  mov r3, #32
+  mov.w r3, #32
+  movw r3, #32
+
+@ CHECK: mov.w r2, #3204448256 @ encoding: [0x4f,0xf0,0x3f,0x42]
+@ CHECK: mov.w r1, #256 @ encoding: [0x4f,0xf4,0x80,0x71]
+@ CHECK: mov r3, #32 @ encoding: [0x20,0x23]
+@ CHECK: mov.w r3, #32 @ encoding: [0x4f,0xf0,0x20,0x03]
+@ CHECK: movw  r3, #32 @ encoding: [0x40,0xf2,0x20,0x03]
+
+
+
+
 @ CHECK: rrx	r0, r0                  @ encoding: [0x30,0x00,0x4f,0xea]
   rrx	r0, r0
 
@@ -173,6 +189,18 @@
 @ CHECK: vmsr  fpsid, r0 @ encoding: [0xe0,0xee,0x10,0x0a]
   vmsr  fpsid, r0
 
+@ CHECK: mcr  p7, #1, r5, c1, c1, #4 @ encoding: [0x21,0xee,0x91,0x57]
+        mcr  p7, #1, r5, c1, c1, #4
+
+@ CHECK: mrc  p14, #0, r1, c1, c2, #4 @ encoding: [0x11,0xee,0x92,0x1e]
+        mrc  p14, #0, r1, c1, c2, #4
+
+@ CHECK: mcrr  p7, #1, r5, r4, c1 @ encoding: [0x44,0xec,0x11,0x57]
+        mcrr  p7, #1, r5, r4, c1
+
+@ CHECK: mrrc  p7, #1, r5, r4, c1 @ encoding: [0x54,0xec,0x11,0x57]
+        mrrc  p7, #1, r5, r4, c1
+
 @ CHECK: mcr2  p7, #1, r5, c1, c1, #4 @ encoding: [0x21,0xfe,0x91,0x57]
         mcr2  p7, #1, r5, c1, c1, #4
 
@@ -185,6 +213,9 @@
 @ CHECK: mrrc2  p7, #1, r5, r4, c1 @ encoding: [0x54,0xfc,0x11,0x57]
         mrrc2  p7, #1, r5, r4, c1
 
+@ CHECK: cdp  p7, #1, c1, c1, c1, #4 @ encoding: [0x11,0xee,0x81,0x17]
+        cdp  p7, #1, c1, c1, c1, #4
+
 @ CHECK: cdp2  p7, #1, c1, c1, c1, #4 @ encoding: [0x11,0xfe,0x81,0x17]
         cdp2  p7, #1, c1, c1, c1, #4
 
@@ -302,3 +333,23 @@
   ldrexd  r0, r1, [r0]
 @ CHECK: ssat16  r0, #7, r0 @ encoding: [0x20,0xf3,0x06,0x00]
   ssat16  r0, #7, r0
+
+  and r1, #0xff
+  and r1, r1, #0xff
+  orr r1, 0x100
+  orr r1, r1, 0x100
+  eor r1, 0x100
+  eor r1, r1, 0x100
+  bic r1, 0x100
+  bic r1, r1, 0x100
+
+@ CHECK: and r1, r1, #255 @ encoding: [0x01,0xf0,0xff,0x01]
+@ CHECK: and r1, r1, #255 @ encoding: [0x01,0xf0,0xff,0x01]
+@ CHECK: orr r1, r1, #256 @ encoding: [0x41,0xf4,0x80,0x71]
+@ CHECK: orr r1, r1, #256 @ encoding: [0x41,0xf4,0x80,0x71]
+@ CHECK: eor r1, r1, #256 @ encoding: [0x81,0xf4,0x80,0x71]
+@ CHECK: eor r1, r1, #256 @ encoding: [0x81,0xf4,0x80,0x71]
+@ CHECK: bic r1, r1, #256 @ encoding: [0x21,0xf4,0x80,0x71]
+@ CHECK: bic r1, r1, #256 @ encoding: [0x21,0xf4,0x80,0x71]
+
+
diff --git a/test/MC/ARM/vpush-vpop.s b/test/MC/ARM/vpush-vpop.s
new file mode 100644
index 0000000..1212c83
--- /dev/null
+++ b/test/MC/ARM/vpush-vpop.s
@@ -0,0 +1,19 @@
+@ RUN: llvm-mc -triple armv7-unknown-unknown -show-encoding < %s | FileCheck --check-prefix=CHECK-ARM %s
+@ RUN: llvm-mc -triple thumbv7-unknown-unknown -show-encoding < %s | FileCheck --check-prefix=CHECK-THUMB %s
+
+foo:
+@ CHECK: foo
+ vpush {d8, d9, d10, d11, d12}
+ vpush {s8, s9, s10, s11, s12}
+ vpop  {d8, d9, d10, d11, d12}
+ vpop  {s8, s9, s10, s11, s12}
+
+@ CHECK-THUMB: vpush {d8, d9, d10, d11, d12} @ encoding: [0x2d,0xed,0x0a,0x8b]
+@ CHECK-THUMB: vpush {s8, s9, s10, s11, s12} @ encoding: [0x2d,0xed,0x05,0x4a]
+@ CHECK-THUMB: vpop  {d8, d9, d10, d11, d12} @ encoding: [0xbd,0xec,0x0a,0x8b]
+@ CHECK-THUMB: vpop  {s8, s9, s10, s11, s12} @ encoding: [0xbd,0xec,0x05,0x4a]
+
+@ CHECK-ARM: vpush {d8, d9, d10, d11, d12} @ encoding: [0x0a,0x8b,0x2d,0xed]
+@ CHECK-ARM: vpush {s8, s9, s10, s11, s12} @ encoding: [0x05,0x4a,0x2d,0xed]
+@ CHECK-ARM: vpop  {d8, d9, d10, d11, d12} @ encoding: [0x0a,0x8b,0xbd,0xec]
+@ CHECK-ARM: vpop  {s8, s9, s10, s11, s12} @ encoding: [0x05,0x4a,0xbd,0xec]
diff --git a/test/MC/AsmParser/directive_comm.s b/test/MC/AsmParser/directive_comm.s
index 6cc7937..5e60ab7 100644
--- a/test/MC/AsmParser/directive_comm.s
+++ b/test/MC/AsmParser/directive_comm.s
@@ -3,6 +3,8 @@
 # CHECK: TEST0:
 # CHECK: .comm a,6,2
 # CHECK: .comm b,8
+# CHECK: .comm c,8
 TEST0:  
         .comm a, 4+2, 2
         .comm b,8
+        .common c,8
diff --git a/test/MC/AsmParser/exprs-invalid.s b/test/MC/AsmParser/exprs-invalid.s
index dc27d80..88b2a0a 100644
--- a/test/MC/AsmParser/exprs-invalid.s
+++ b/test/MC/AsmParser/exprs-invalid.s
@@ -6,3 +6,9 @@
 
 // CHECK-ERRORS: error: invalid hexadecimal number
 .long 80+0xzz
+
+// CHECK-ERRORS: error: literal value out of range for directive
+.byte 256
+
+// CHECK-ERRORS: error: literal value out of range for directive
+.long 4e71cf69 // double floating point constant due to missing "0x"
diff --git a/test/MC/Disassembler/ARM/arm-tests.txt b/test/MC/Disassembler/ARM/arm-tests.txt
index ca072c7..0536eeb 100644
--- a/test/MC/Disassembler/ARM/arm-tests.txt
+++ b/test/MC/Disassembler/ARM/arm-tests.txt
@@ -1,6 +1,6 @@
 # RUN: llvm-mc --disassemble %s -triple=arm-apple-darwin9 | FileCheck %s
 
-# CHECK:	addpl	r4, pc, #19, #8
+# CHECK:	addpl	r4, pc, #318767104
 0x4c 0x45 0x8f 0x52
 
 # CHECK:	b	#0
@@ -21,7 +21,7 @@
 # CHECK:	mov	pc, lr
 0x0e 0xf0 0xa0 0xe1
 
-# CHECK:	mov	pc, #255, #2
+# CHECK:	mov	pc, #3221225535
 0xff 0xf1 0xa0 0xe3
 
 # CHECK:	movw	r7, #4096
@@ -72,7 +72,7 @@
 # CHECK:	movt	r8, #65535
 0xff 0x8f 0x4f 0xe3
 
-# CHECK:	mvnspl	r7, #245, #2
+# CHECK:	mvnspl	r7, #1073741885
 0xf5 0x71 0xf0 0x53
 
 # CHECK-NOT:	orr	r7, r8, r7, rrx #0
@@ -152,7 +152,7 @@
 # CHECK: msr cpsr_fc, r0
 0x00 0xf0 0x29 0xe1
 
-# CHECK:	msrmi	cpsr_c, #241, #8
+# CHECK:	msrmi	cpsr_c, #4043309056
 0xf1 0xf4 0x21 0x43
 
 # CHECK: rsbs r6, r7, r8
diff --git a/test/MC/Disassembler/ARM/neon-tests.txt b/test/MC/Disassembler/ARM/neon-tests.txt
index cfb5949..4fa5723 100644
--- a/test/MC/Disassembler/ARM/neon-tests.txt
+++ b/test/MC/Disassembler/ARM/neon-tests.txt
@@ -27,7 +27,7 @@
 # CHECK:	vld4.16	{d3[], d4[], d5[], d6[]}, [r0, :64]!
 0x7d 0x3f 0xa0 0xf4
 
-# CHECK:	vmov	d0, d15
+# CHECK:	vorr	d0, d15, d15
 0x1f 0x01 0x2f 0xf2
 
 # CHECK:	vmov.i64	q6, #0xFF00FF00FF
diff --git a/test/MC/Disassembler/X86/x86-32.txt b/test/MC/Disassembler/X86/x86-32.txt
new file mode 100644
index 0000000..dd313f1
--- /dev/null
+++ b/test/MC/Disassembler/X86/x86-32.txt
@@ -0,0 +1,26 @@
+# RUN: llvm-mc --disassemble %s -triple=i686-apple-darwin9 | FileCheck %s
+
+# Coverage
+
+# CHECK: pushl
+0xff 0x34 0x24
+
+# CHECK: popl
+0x58
+
+# CHECK: calll
+0xff 0xd0
+
+# CHECK: incl
+0x40
+
+# CHECK: leave
+0xc9
+
+# PR8873: some instructions not recognized in 32-bit mode
+
+# CHECK: fld
+0xdd 0x04 0x24
+
+# CHECK: pshufb
+0x0f 0x38 0x00 0xc0
diff --git a/test/MC/X86/padlock.s b/test/MC/X86/padlock.s
index 874786f..5c523e7 100644
--- a/test/MC/X86/padlock.s
+++ b/test/MC/X86/padlock.s
@@ -4,6 +4,10 @@
 // CHECK: xstore
 // CHECK: encoding: [0x0f,0xa7,0xc0]
 
+	xstorerng
+// CHECK: xstore
+// CHECK: encoding: [0x0f,0xa7,0xc0]
+
 	rep xcryptecb
 // CHECK: rep
 // CHECK: encoding: [0xf3]
diff --git a/test/MC/X86/x86-32-coverage.s b/test/MC/X86/x86-32-coverage.s
index d2dd78d..bdc54a6 100644
--- a/test/MC/X86/x86-32-coverage.s
+++ b/test/MC/X86/x86-32-coverage.s
@@ -503,7 +503,7 @@
 // CHECK: 	ud2
         	ud2
 
-// CHECK: 	movnti	%ecx, 3735928559(%ebx,%ecx,8)
+// CHECK: 	movntil	%ecx, 3735928559(%ebx,%ecx,8)
         	movnti	%ecx,0xdeadbeef(%ebx,%ecx,8)
 
 // CHECK: 	clflush	3735928559(%ebx,%ecx,8)
@@ -4505,23 +4505,23 @@
 // CHECK:  encoding: [0xdf,0xea]
         	fucomip	%st(2),%st
 
-// CHECK: movnti	%ecx, 3735928559(%ebx,%ecx,8)
+// CHECK: movntil	%ecx, 3735928559(%ebx,%ecx,8)
 // CHECK:  encoding: [0x0f,0xc3,0x8c,0xcb,0xef,0xbe,0xad,0xde]
         	movnti	%ecx,0xdeadbeef(%ebx,%ecx,8)
 
-// CHECK: movnti	%ecx, 69
+// CHECK: movntil	%ecx, 69
 // CHECK:  encoding: [0x0f,0xc3,0x0d,0x45,0x00,0x00,0x00]
         	movnti	%ecx,0x45
 
-// CHECK: movnti	%ecx, 32493
+// CHECK: movntil	%ecx, 32493
 // CHECK:  encoding: [0x0f,0xc3,0x0d,0xed,0x7e,0x00,0x00]
         	movnti	%ecx,0x7eed
 
-// CHECK: movnti	%ecx, 3133065982
+// CHECK: movntil	%ecx, 3133065982
 // CHECK:  encoding: [0x0f,0xc3,0x0d,0xfe,0xca,0xbe,0xba]
         	movnti	%ecx,0xbabecafe
 
-// CHECK: movnti	%ecx, 305419896
+// CHECK: movntil	%ecx, 305419896
 // CHECK:  encoding: [0x0f,0xc3,0x0d,0x78,0x56,0x34,0x12]
         	movnti	%ecx,0x12345678
 
@@ -14177,19 +14177,19 @@
 // CHECK: 	fucompi	%st(2)
         	fucomip	%st(2),%st
 
-// CHECK: 	movnti	%ecx, 3735928559(%ebx,%ecx,8)
+// CHECK: 	movntil	%ecx, 3735928559(%ebx,%ecx,8)
         	movnti	%ecx,0xdeadbeef(%ebx,%ecx,8)
 
-// CHECK: 	movnti	%ecx, 69
-        	movnti	%ecx,0x45
+// CHECK: 	movntil	%ecx, 69
+        	movntil	%ecx,0x45
 
-// CHECK: 	movnti	%ecx, 32493
+// CHECK: 	movntil	%ecx, 32493
         	movnti	%ecx,0x7eed
 
-// CHECK: 	movnti	%ecx, 3133065982
+// CHECK: 	movntil	%ecx, 3133065982
         	movnti	%ecx,0xbabecafe
 
-// CHECK: 	movnti	%ecx, 305419896
+// CHECK: 	movntil	%ecx, 305419896
         	movnti	%ecx,0x12345678
 
 // CHECK: 	clflush	3735928559(%ebx,%ecx,8)
diff --git a/test/MC/X86/x86-64.s b/test/MC/X86/x86-64.s
index fe08559..6f828e8 100644
--- a/test/MC/X86/x86-64.s
+++ b/test/MC/X86/x86-64.s
@@ -219,6 +219,12 @@ inb	$161, %al
 // CHECK: pushq	$1
 push $1
 
+// rdar://9716860
+pushq $1
+// CHECK: encoding: [0x6a,0x01]
+pushq $1111111
+// CHECK: encoding: [0x68,0x47,0xf4,0x10,0x00]
+
 // rdar://8017530
 // CHECK: sldtw	4
 sldt	4
@@ -308,10 +314,10 @@ fucomi
 fucomi	%st(2)
 fucomi	%st(2), %st
 
-// CHECK: fnstsw %ax
-// CHECK: fnstsw %ax
-// CHECK: fnstsw %ax
-// CHECK: fnstsw %ax
+// CHECK: fnstsw
+// CHECK: fnstsw
+// CHECK: fnstsw
+// CHECK: fnstsw
 
 fnstsw
 fnstsw %ax
@@ -457,7 +463,7 @@ cdq   // CHECK: cltd
 // rdar://8456378 and PR7557 - fstsw
 fstsw %ax
 // CHECK: wait
-// CHECK: fnstsw %ax
+// CHECK: fnstsw
 fstsw (%rax)
 // CHECK: wait
 // CHECK: fnstsw (%rax)
@@ -1128,3 +1134,39 @@ xsetbv // CHECK: xsetbv # encoding: [0x0f,0x01,0xd1]
 // CHECK: strq
 // CHECK: encoding: [0x48,0x0f,0x00,0xc8]
 	str %rax
+
+// CHECK: movd %rdi, %xmm0
+// CHECK: encoding: [0x66,0x48,0x0f,0x6e,0xc7]
+	movq %rdi,%xmm0
+
+// CHECK: movd %rdi, %xmm0
+// CHECK: encoding: [0x66,0x48,0x0f,0x6e,0xc7]
+	movd %rdi,%xmm0
+
+// CHECK: movntil %eax, (%rdi)
+// CHECK: encoding: [0x0f,0xc3,0x07]
+// CHECK: movntil
+movntil %eax, (%rdi)
+movnti %eax, (%rdi)
+
+// CHECK: movntiq %rax, (%rdi)
+// CHECK: encoding: [0x48,0x0f,0xc3,0x07]
+// CHECK: movntiq
+movntiq %rax, (%rdi)
+movnti %rax, (%rdi)
+
+// CHECK: pclmulqdq	$17, %xmm0, %xmm1
+// CHECK: encoding: [0x66,0x0f,0x3a,0x44,0xc8,0x11]
+pclmulhqhqdq %xmm0, %xmm1
+
+// CHECK: pclmulqdq	$1, %xmm0, %xmm1
+// CHECK: encoding: [0x66,0x0f,0x3a,0x44,0xc8,0x01]
+pclmulqdq $1, %xmm0, %xmm1
+
+// CHECK: pclmulqdq	$16, (%rdi), %xmm1
+// CHECK: encoding: [0x66,0x0f,0x3a,0x44,0x0f,0x10]
+pclmullqhqdq (%rdi), %xmm1
+
+// CHECK: pclmulqdq	$0, (%rdi), %xmm1
+// CHECK: encoding: [0x66,0x0f,0x3a,0x44,0x0f,0x00]
+pclmulqdq $0, (%rdi), %xmm1
diff --git a/test/Makefile b/test/Makefile
index 0d84186..c0bc36c 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -171,6 +171,12 @@ site.exp: FORCE
 	@test ! -f site.exp || mv site.exp site.bak
 	@mv site.tmp site.exp
 
+ifeq ($(DISABLE_ASSERTIONS),1)
+ENABLE_ASSERTIONS=0
+else
+ENABLE_ASSERTIONS=1
+endif
+
 lit.site.cfg: site.exp
 	@echo "Making LLVM 'lit.site.cfg' file..."
 	@$(ECHOPATH) s=@LLVM_SOURCE_DIR@=$(LLVM_SRC_ROOT)=g > lit.tmp
@@ -179,6 +185,7 @@ lit.site.cfg: site.exp
 	@$(ECHOPATH) s=@LLVMGCCDIR@=$(LLVMGCCDIR)=g >> lit.tmp
 	@$(ECHOPATH) s=@PYTHON_EXECUTABLE@=python=g >> lit.tmp
 	@$(ECHOPATH) s=@ENABLE_SHARED@=$(ENABLE_SHARED)=g >> lit.tmp
+	@$(ECHOPATH) s=@ENABLE_ASSERTIONS@=$(ENABLE_ASSERTIONS)=g >> lit.tmp
 	@sed -f lit.tmp $(PROJ_SRC_DIR)/lit.site.cfg.in > $@
 	@-rm -f lit.tmp
 
diff --git a/test/Other/constant-fold-gep.ll b/test/Other/constant-fold-gep.ll
index 926bdbc..e4521d5 100644
--- a/test/Other/constant-fold-gep.ll
+++ b/test/Other/constant-fold-gep.ll
@@ -13,20 +13,6 @@
 ; "SCEV" - ScalarEvolution but no targetdata.
 ; RUN: opt -analyze -scalar-evolution < %s | FileCheck --check-prefix=SCEV %s
 
-; ScalarEvolution with targetdata isn't interesting on these testcases
-; because ScalarEvolution doesn't attempt to duplicate all of instcombine's
-; and the constant folders' folding.
-
-; PLAIN: %0 = type { i1, double }
-; PLAIN: %1 = type { double, float, double, double }
-; PLAIN: %2 = type { i1, i1* }
-; PLAIN: %3 = type { i64, i64 }
-; PLAIN: %4 = type { i32, i32 }
-; OPT: %0 = type { i1, double }
-; OPT: %1 = type { double, float, double, double }
-; OPT: %2 = type { i1, i1* }
-; OPT: %3 = type { i64, i64 }
-; OPT: %4 = type { i32, i32 }
 
 ; The automatic constant folder in opt does not have targetdata access, so
 ; it can't fold gep arithmetic, in general. However, the constant folder run
@@ -63,23 +49,23 @@
 ; target-dependent folder should fold these down to constants.
 
 ; PLAIN: @a = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310)
-; PLAIN: @b = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
+; PLAIN: @b = constant i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
 ; PLAIN: @c = constant i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2)
 ; PLAIN: @d = constant i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 11)
-; PLAIN: @e = constant i64 ptrtoint (double* getelementptr (%1* null, i64 0, i32 2) to i64)
+; PLAIN: @e = constant i64 ptrtoint (double* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64)
 ; PLAIN: @f = constant i64 1
-; PLAIN: @g = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
+; PLAIN: @g = constant i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
 ; PLAIN: @h = constant i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64)
-; PLAIN: @i = constant i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64)
+; PLAIN: @i = constant i64 ptrtoint (i1** getelementptr ({ i1, i1* }* null, i64 0, i32 1) to i64)
 ; OPT: @a = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310)
-; OPT: @b = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
+; OPT: @b = constant i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
 ; OPT: @c = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2)
 ; OPT: @d = constant i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 11)
-; OPT: @e = constant i64 ptrtoint (double* getelementptr (%1* null, i64 0, i32 2) to i64)
+; OPT: @e = constant i64 ptrtoint (double* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64)
 ; OPT: @f = constant i64 1
-; OPT: @g = constant i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
+; OPT: @g = constant i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
 ; OPT: @h = constant i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64)
-; OPT: @i = constant i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64)
+; OPT: @i = constant i64 ptrtoint (i1** getelementptr ({ i1, i1* }* null, i64 0, i32 1) to i64)
 ; TO: @a = constant i64 18480
 ; TO: @b = constant i64 8
 ; TO: @c = constant i64 16
@@ -103,10 +89,10 @@
 ; The target-dependent folder should cast GEP indices to integer-sized pointers.
 
 ; PLAIN: @M = constant i64* getelementptr (i64* null, i32 1)
-; PLAIN: @N = constant i64* getelementptr (%3* null, i32 0, i32 1)
+; PLAIN: @N = constant i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1)
 ; PLAIN: @O = constant i64* getelementptr ([2 x i64]* null, i32 0, i32 1)
 ; OPT: @M = constant i64* getelementptr (i64* null, i32 1)
-; OPT: @N = constant i64* getelementptr (%3* null, i32 0, i32 1)
+; OPT: @N = constant i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1)
 ; OPT: @O = constant i64* getelementptr ([2 x i64]* null, i32 0, i32 1)
 ; TO: @M = constant i64* inttoptr (i64 8 to i64*)
 ; TO: @N = constant i64* inttoptr (i64 8 to i64*)
@@ -119,9 +105,9 @@
 ; Fold GEP of a GEP. Theoretically some of these cases could be folded
 ; without using targetdata, however that's not implemented yet.
 
-; PLAIN: @Z = global i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x %4]* @ext, i64 0, i64 1, i32 0), i64 1)
-; OPT: @Z = global i32* getelementptr (i32* getelementptr inbounds ([3 x %4]* @ext, i64 0, i64 1, i32 0), i64 1)
-; TO: @Z = global i32* getelementptr inbounds ([3 x %0]* @ext, i64 0, i64 1, i32 1)
+; PLAIN: @Z = global i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
+; OPT: @Z = global i32* getelementptr (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
+; TO: @Z = global i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 1)
 
 @ext = external global [3 x { i32, i32 }]
 @Z = global i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
@@ -236,7 +222,7 @@ define i1* @hoo1() nounwind {
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
 ; PLAIN: define i64 @fb() nounwind {
-; PLAIN:   %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64
+; PLAIN:   %t = bitcast i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
 ; PLAIN: define i64 @fc() nounwind {
@@ -248,7 +234,7 @@ define i1* @hoo1() nounwind {
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
 ; PLAIN: define i64 @fe() nounwind {
-; PLAIN:   %t = bitcast i64 ptrtoint (double* getelementptr (%1* null, i64 0, i32 2) to i64) to i64
+; PLAIN:   %t = bitcast i64 ptrtoint (double* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
 ; PLAIN: define i64 @ff() nounwind {
@@ -256,7 +242,7 @@ define i1* @hoo1() nounwind {
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
 ; PLAIN: define i64 @fg() nounwind {
-; PLAIN:   %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64
+; PLAIN:   %t = bitcast i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
 ; PLAIN: define i64 @fh() nounwind {
@@ -264,14 +250,14 @@ define i1* @hoo1() nounwind {
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
 ; PLAIN: define i64 @fi() nounwind {
-; PLAIN:   %t = bitcast i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) to i64
+; PLAIN:   %t = bitcast i64 ptrtoint (i1** getelementptr ({ i1, i1* }* null, i64 0, i32 1) to i64) to i64
 ; PLAIN:   ret i64 %t
 ; PLAIN: }
 ; OPT: define i64 @fa() nounwind {
 ; OPT:   ret i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310)
 ; OPT: }
 ; OPT: define i64 @fb() nounwind {
-; OPT:   ret i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
+; OPT:   ret i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
 ; OPT: }
 ; OPT: define i64 @fc() nounwind {
 ; OPT:   ret i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2)
@@ -280,19 +266,19 @@ define i1* @hoo1() nounwind {
 ; OPT:   ret i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 11)
 ; OPT: }
 ; OPT: define i64 @fe() nounwind {
-; OPT:   ret i64 ptrtoint (double* getelementptr (%1* null, i64 0, i32 2) to i64)
+; OPT:   ret i64 ptrtoint (double* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64)
 ; OPT: }
 ; OPT: define i64 @ff() nounwind {
 ; OPT:   ret i64 1
 ; OPT: }
 ; OPT: define i64 @fg() nounwind {
-; OPT:   ret i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64)
+; OPT:   ret i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64)
 ; OPT: }
 ; OPT: define i64 @fh() nounwind {
 ; OPT:   ret i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64)
 ; OPT: }
 ; OPT: define i64 @fi() nounwind {
-; OPT:   ret i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64)
+; OPT:   ret i64 ptrtoint (i1** getelementptr ({ i1, i1* }* null, i64 0, i32 1) to i64)
 ; OPT: }
 ; TO: define i64 @fa() nounwind {
 ; TO:   ret i64 18480
@@ -325,7 +311,7 @@ define i1* @hoo1() nounwind {
 ; SCEV:   %t = bitcast i64 mul (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2310) to i64 
 ; SCEV:   -->  (2310 * sizeof(double))
 ; SCEV: Classifying expressions for: @fb
-; SCEV:   %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64 
+; SCEV:   %t = bitcast i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64 
 ; SCEV:   -->  alignof(double)
 ; SCEV: Classifying expressions for: @fc
 ; SCEV:   %t = bitcast i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 2) to i64 
@@ -334,19 +320,19 @@ define i1* @hoo1() nounwind {
 ; SCEV:   %t = bitcast i64 mul nuw (i64 ptrtoint (double* getelementptr (double* null, i32 1) to i64), i64 11) to i64 
 ; SCEV:   -->  (11 * sizeof(double))
 ; SCEV: Classifying expressions for: @fe
-; SCEV:   %t = bitcast i64 ptrtoint (double* getelementptr (%1* null, i64 0, i32 2) to i64) to i64 
+; SCEV:   %t = bitcast i64 ptrtoint (double* getelementptr ({ double, float, double, double }* null, i64 0, i32 2) to i64) to i64 
 ; SCEV:   -->  offsetof({ double, float, double, double }, 2)
 ; SCEV: Classifying expressions for: @ff
 ; SCEV:   %t = bitcast i64 1 to i64 
 ; SCEV:   -->  1
 ; SCEV: Classifying expressions for: @fg
-; SCEV:   %t = bitcast i64 ptrtoint (double* getelementptr (%0* null, i64 0, i32 1) to i64) to i64
+; SCEV:   %t = bitcast i64 ptrtoint (double* getelementptr ({ i1, double }* null, i64 0, i32 1) to i64) to i64
 ; SCEV:   -->  alignof(double)
 ; SCEV: Classifying expressions for: @fh
 ; SCEV:   %t = bitcast i64 ptrtoint (i1** getelementptr (i1** null, i32 1) to i64) to i64
 ; SCEV:   -->  sizeof(i1*)
 ; SCEV: Classifying expressions for: @fi
-; SCEV:   %t = bitcast i64 ptrtoint (i1** getelementptr (%2* null, i64 0, i32 1) to i64) to i64
+; SCEV:   %t = bitcast i64 ptrtoint (i1** getelementptr ({ i1, i1* }* null, i64 0, i32 1) to i64) to i64
 ; SCEV:   -->  alignof(i1*)
 
 define i64 @fa() nounwind {
@@ -391,7 +377,7 @@ define i64 @fi() nounwind {
 ; PLAIN:   ret i64* %t
 ; PLAIN: }
 ; PLAIN: define i64* @fN() nounwind {
-; PLAIN:   %t = bitcast i64* getelementptr (%3* null, i32 0, i32 1) to i64*
+; PLAIN:   %t = bitcast i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1) to i64*
 ; PLAIN:   ret i64* %t
 ; PLAIN: }
 ; PLAIN: define i64* @fO() nounwind {
@@ -402,7 +388,7 @@ define i64 @fi() nounwind {
 ; OPT:   ret i64* getelementptr (i64* null, i32 1)
 ; OPT: }
 ; OPT: define i64* @fN() nounwind {
-; OPT:   ret i64* getelementptr (%3* null, i32 0, i32 1)
+; OPT:   ret i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1)
 ; OPT: }
 ; OPT: define i64* @fO() nounwind {
 ; OPT:   ret i64* getelementptr ([2 x i64]* null, i32 0, i32 1)
@@ -420,7 +406,7 @@ define i64 @fi() nounwind {
 ; SCEV:   %t = bitcast i64* getelementptr (i64* null, i32 1) to i64* 
 ; SCEV:   -->  sizeof(i64)
 ; SCEV: Classifying expressions for: @fN
-; SCEV:   %t = bitcast i64* getelementptr (%3* null, i32 0, i32 1) to i64* 
+; SCEV:   %t = bitcast i64* getelementptr ({ i64, i64 }* null, i32 0, i32 1) to i64* 
 ; SCEV:   -->  sizeof(i64)
 ; SCEV: Classifying expressions for: @fO
 ; SCEV:   %t = bitcast i64* getelementptr ([2 x i64]* null, i32 0, i32 1) to i64* 
@@ -440,17 +426,17 @@ define i64* @fO() nounwind {
 }
 
 ; PLAIN: define i32* @fZ() nounwind {
-; PLAIN:   %t = bitcast i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x %4]* @ext, i64 0, i64 1, i32 0), i64 1) to i32*
+; PLAIN:   %t = bitcast i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1) to i32*
 ; PLAIN:   ret i32* %t
 ; PLAIN: }
 ; OPT: define i32* @fZ() nounwind {
-; OPT:   ret i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x %4]* @ext, i64 0, i64 1, i32 0), i64 1)
+; OPT:   ret i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1)
 ; OPT: }
 ; TO: define i32* @fZ() nounwind {
-; TO:   ret i32* getelementptr inbounds ([3 x %0]* @ext, i64 0, i64 1, i32 1)
+; TO:   ret i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 1)
 ; TO: }
 ; SCEV: Classifying expressions for: @fZ
-; SCEV:   %t = bitcast i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x %4]* @ext, i64 0, i64 1, i32 0), i64 1) to i32*
+; SCEV:   %t = bitcast i32* getelementptr inbounds (i32* getelementptr inbounds ([3 x { i32, i32 }]* @ext, i64 0, i64 1, i32 0), i64 1) to i32*
 ; SCEV:   -->  ((3 * sizeof(i32)) + @ext)
 
 define i32* @fZ() nounwind {
diff --git a/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll b/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll
index 43462fa..8910bda 100644
--- a/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll
+++ b/test/Transforms/ADCE/2002-01-31-UseStuckAround.ll
@@ -1,7 +1,6 @@
 ; RUN:  opt < %s -adce
 
-define i32 @"main"(i32 %argc)
-begin
+define i32 @"main"(i32 %argc) {
 	br label %2
 
 	%retval = phi i32 [ %argc, %2 ]		; <i32>	[#uses=2]
@@ -9,5 +8,4 @@ begin
 	ret i32 %two
 
 	br label %1
-end
-
+}
diff --git a/test/Transforms/CodeExtractor/2004-03-17-OutputMismatch.ll b/test/Transforms/CodeExtractor/2004-03-17-OutputMismatch.ll
deleted file mode 100644
index 0fbd330..0000000
--- a/test/Transforms/CodeExtractor/2004-03-17-OutputMismatch.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt < %s -loop-extract -disable-output
-
-%struct.node_t = type { double*, %struct.node_t*, %struct.node_t**, double**, double*, i32, i32 }
-%struct.table_t = type { [1 x %struct.node_t**], [1 x %struct.node_t**] }
-
-define void @make_tables() {
-entry:
-        %tmp.0.i = malloc %struct.node_t                ; <%struct.node_t*> [#uses=1]
-        br i1 false, label %no_exit.i, label %loopexit.i
-
-no_exit.i:              ; preds = %no_exit.i, %entry
-        %prev_node.0.i.1 = phi %struct.node_t* [ %tmp.16.i, %no_exit.i ], [ %tmp.0.i, %entry ]          ; <%struct.node_t*> [#uses=0]
-        %tmp.16.i = malloc %struct.node_t               ; <%struct.node_t*> [#uses=2]
-        br i1 false, label %no_exit.i, label %loopexit.i
-
-loopexit.i:             ; preds = %no_exit.i, %entry
-        %cur_node.0.i.0 = phi %struct.node_t* [ null, %entry ], [ %tmp.16.i, %no_exit.i ]               ; <%struct.node_t*> [#uses=0]
-        ret void
-}
-
diff --git a/test/Transforms/ConstProp/extractvalue.ll b/test/Transforms/ConstProp/extractvalue.ll
index 32d5291..f947b22 100644
--- a/test/Transforms/ConstProp/extractvalue.ll
+++ b/test/Transforms/ConstProp/extractvalue.ll
@@ -1,7 +1,6 @@
 ; RUN: opt < %s -constprop -S | FileCheck %s
 
 %struct = type { i32, [4 x i8] }
-%array = type [3 x %struct]
 
 define i32 @test1() {
   %A = extractvalue %struct { i32 2, [4 x i8] c"foo\00" }, 0
@@ -18,7 +17,7 @@ define i8 @test2() {
 }
 
 define i32 @test3() {
-  %A = extractvalue %array [ %struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" } ], 1, 0
+  %A = extractvalue [3 x %struct] [ %struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" } ], 1, 0
   ret i32 %A
 ; CHECK: @test3
 ; CHECK: ret i32 1
@@ -39,7 +38,7 @@ define i8 @zeroinitializer-test2() {
 }
 
 define i32 @zeroinitializer-test3() {
-  %A = extractvalue %array zeroinitializer, 1, 0
+  %A = extractvalue [3 x %struct] zeroinitializer, 1, 0
   ret i32 %A
 ; CHECK: @zeroinitializer-test3
 ; CHECK: ret i32 0
@@ -60,7 +59,7 @@ define i8 @undef-test2() {
 }
 
 define i32 @undef-test3() {
-  %A = extractvalue %array undef, 1, 0
+  %A = extractvalue [3 x %struct] undef, 1, 0
   ret i32 %A
 ; CHECK: @undef-test3
 ; CHECK: ret i32 undef
diff --git a/test/Transforms/ConstProp/insertvalue.ll b/test/Transforms/ConstProp/insertvalue.ll
index f0eb553..a4b7bb1 100644
--- a/test/Transforms/ConstProp/insertvalue.ll
+++ b/test/Transforms/ConstProp/insertvalue.ll
@@ -1,7 +1,6 @@
 ; RUN: opt < %s -constprop -S | FileCheck %s
 
 %struct = type { i32, [4 x i8] }
-%array = type [3 x %struct]
 
 define %struct @test1() {
   %A = insertvalue %struct { i32 2, [4 x i8] c"foo\00" }, i32 1, 0
@@ -17,11 +16,11 @@ define %struct @test2() {
 ; CHECK: ret %struct { i32 2, [4 x i8] c"fo\01\00" }
 }
 
-define %array @test3() {
-  %A = insertvalue %array [ %struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" } ], i32 -1, 1, 0
-  ret %array %A
+define [3 x %struct] @test3() {
+  %A = insertvalue [3 x %struct] [ %struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" } ], i32 -1, 1, 0
+  ret [3 x %struct] %A
 ; CHECK: @test3
-; CHECK:ret %array [%struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 -1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" }]
+; CHECK:ret [3 x %struct] [%struct { i32 0, [4 x i8] c"aaaa" }, %struct { i32 -1, [4 x i8] c"bbbb" }, %struct { i32 2, [4 x i8] c"cccc" }]
 }
 
 define %struct @zeroinitializer-test1() {
@@ -38,11 +37,11 @@ define %struct @zeroinitializer-test2() {
 ; CHECK: ret %struct { i32 0, [4 x i8] c"\00\00\01\00" }
 }
 
-define %array @zeroinitializer-test3() {
-  %A = insertvalue %array zeroinitializer, i32 1, 1, 0
-  ret %array %A
+define [3 x %struct] @zeroinitializer-test3() {
+  %A = insertvalue [3 x %struct] zeroinitializer, i32 1, 1, 0
+  ret [3 x %struct] %A
 ; CHECK: @zeroinitializer-test3
-; CHECK: ret %array [%struct zeroinitializer, %struct { i32 1, [4 x i8] zeroinitializer }, %struct zeroinitializer]
+; CHECK: ret [3 x %struct] [%struct zeroinitializer, %struct { i32 1, [4 x i8] zeroinitializer }, %struct zeroinitializer]
 }
 
 define %struct @undef-test1() {
@@ -59,10 +58,10 @@ define %struct @undef-test2() {
 ; CHECK: ret %struct { i32 undef, [4 x i8] [i8 undef, i8 undef, i8 0, i8 undef] }
 }
 
-define %array @undef-test3() {
-  %A = insertvalue %array undef, i32 0, 1, 0
-  ret %array %A
+define [3 x %struct] @undef-test3() {
+  %A = insertvalue [3 x %struct] undef, i32 0, 1, 0
+  ret [3 x %struct] %A
 ; CHECK: @undef-test3
-; CHECK: ret %array [%struct undef, %struct { i32 0, [4 x i8] undef }, %struct undef]
+; CHECK: ret [3 x %struct] [%struct undef, %struct { i32 0, [4 x i8] undef }, %struct undef]
 }
 
diff --git a/test/Transforms/ConstProp/overflow-ops.ll b/test/Transforms/ConstProp/overflow-ops.ll
index d1cc2eb..849bf9e 100644
--- a/test/Transforms/ConstProp/overflow-ops.ll
+++ b/test/Transforms/ConstProp/overflow-ops.ll
@@ -1,6 +1,5 @@
 ; RUN: opt < %s -constprop -S | FileCheck %s
 
-%i8i1 = type {i8, i1}
 
 declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
 declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8)
@@ -20,7 +19,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @uadd_1
-; CHECK: ret %i8i1 { i8 -114, i1 false }
+; CHECK: ret { i8, i1 } { i8 -114, i1 false }
 }
 
 define {i8, i1} @uadd_2() nounwind {
@@ -29,7 +28,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @uadd_2
-; CHECK: ret %i8i1 { i8 6, i1 true }
+; CHECK: ret { i8, i1 } { i8 6, i1 true }
 }
 
 ;;-----------------------------
@@ -42,7 +41,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @usub_1
-; CHECK: ret %i8i1 { i8 2, i1 false }
+; CHECK: ret { i8, i1 } { i8 2, i1 false }
 }
 
 define {i8, i1} @usub_2() nounwind {
@@ -51,7 +50,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @usub_2
-; CHECK: ret %i8i1 { i8 -2, i1 true }
+; CHECK: ret { i8, i1 } { i8 -2, i1 true }
 }
 
 ;;-----------------------------
@@ -64,7 +63,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @umul_1
-; CHECK: ret %i8i1 { i8 44, i1 true }
+; CHECK: ret { i8, i1 } { i8 44, i1 true }
 }
 
 define {i8, i1} @umul_2() nounwind {
@@ -73,7 +72,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @umul_2
-; CHECK: ret %i8i1 { i8 -56, i1 false }
+; CHECK: ret { i8, i1 } { i8 -56, i1 false }
 }
 
 ;;-----------------------------
@@ -86,7 +85,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @sadd_1
-; CHECK: ret %i8i1 { i8 44, i1 false }
+; CHECK: ret { i8, i1 } { i8 44, i1 false }
 }
 
 define {i8, i1} @sadd_2() nounwind {
@@ -95,7 +94,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @sadd_2
-; CHECK: ret %i8i1 { i8 -126, i1 true }
+; CHECK: ret { i8, i1 } { i8 -126, i1 true }
 }
 
 define {i8, i1} @sadd_3() nounwind {
@@ -104,7 +103,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @sadd_3
-; CHECK: ret %i8i1 { i8 -110, i1 false }
+; CHECK: ret { i8, i1 } { i8 -110, i1 false }
 }
 
 define {i8, i1} @sadd_4() nounwind {
@@ -113,7 +112,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @sadd_4
-; CHECK: ret %i8i1 { i8 126, i1 true }
+; CHECK: ret { i8, i1 } { i8 126, i1 true }
 }
 
 define {i8, i1} @sadd_5() nounwind {
@@ -122,7 +121,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @sadd_5
-; CHECK: ret %i8i1 { i8 -8, i1 false }
+; CHECK: ret { i8, i1 } { i8 -8, i1 false }
 }
 
 
@@ -136,7 +135,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @ssub_1
-; CHECK: ret %i8i1 { i8 2, i1 false }
+; CHECK: ret { i8, i1 } { i8 2, i1 false }
 }
 
 define {i8, i1} @ssub_2() nounwind {
@@ -145,7 +144,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @ssub_2
-; CHECK: ret %i8i1 { i8 -2, i1 false }
+; CHECK: ret { i8, i1 } { i8 -2, i1 false }
 }
 
 define {i8, i1} @ssub_3() nounwind {
@@ -154,7 +153,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @ssub_3
-; CHECK: ret %i8i1 { i8 126, i1 true }
+; CHECK: ret { i8, i1 } { i8 126, i1 true }
 }
 
 define {i8, i1} @ssub_3b() nounwind {
@@ -163,7 +162,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @ssub_3b
-; CHECK: ret %i8i1 { i8 -20, i1 false }
+; CHECK: ret { i8, i1 } { i8 -20, i1 false }
 }
 
 define {i8, i1} @ssub_4() nounwind {
@@ -172,7 +171,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @ssub_4
-; CHECK: ret %i8i1 { i8 -126, i1 true }
+; CHECK: ret { i8, i1 } { i8 -126, i1 true }
 }
 
 define {i8, i1} @ssub_4b() nounwind {
@@ -181,7 +180,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @ssub_4b
-; CHECK: ret %i8i1 { i8 30, i1 false }
+; CHECK: ret { i8, i1 } { i8 30, i1 false }
 }
 
 define {i8, i1} @ssub_5() nounwind {
@@ -190,7 +189,7 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @ssub_5
-; CHECK: ret %i8i1 { i8 -10, i1 false }
+; CHECK: ret { i8, i1 } { i8 -10, i1 false }
 }
 
 ;;-----------------------------
@@ -204,5 +203,5 @@ entry:
   ret {i8, i1} %t
 
 ; CHECK: @smul_1
-; CHECK: ret %i8i1 { i8 -56, i1 true }
+; CHECK: ret { i8, i1 } { i8 -56, i1 true }
 }
diff --git a/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll b/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
index 0e9c4f7..7c6c575 100644
--- a/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
+++ b/test/Transforms/DeadArgElim/2007-12-20-ParamAttrs.ll
@@ -9,12 +9,12 @@
 
 @g = global i8 0
 
-define internal i8 @foo(i8* inreg %p, i8 signext %y, ... ) zeroext nounwind {
+define internal zeroext i8 @foo(i8* inreg %p, i8 signext %y, ... )  nounwind {
 	store i8 %y, i8* @g
 	ret i8 0
 }
 
 define i32 @bar() {
-	%A = call i8(i8*, i8, ...)* @foo(i8* inreg null, i8 signext 1, %struct* byval null ) zeroext nounwind
+	%A = call zeroext i8(i8*, i8, ...)* @foo(i8* inreg null, i8 signext 1, %struct* byval null ) nounwind
 	ret i32 0
 }
diff --git a/test/Transforms/DeadArgElim/keepalive.ll b/test/Transforms/DeadArgElim/keepalive.ll
index b0b9bf3..4d6aae3 100644
--- a/test/Transforms/DeadArgElim/keepalive.ll
+++ b/test/Transforms/DeadArgElim/keepalive.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -deadargelim -S > %t
 ; RUN: grep {define internal zeroext i32 @test1() nounwind} %t
-; RUN: grep {define internal %Ty @test2} %t
+; RUN: grep {define internal <{ i32, i32 }> @test2} %t
 
 %Ty = type <{ i32, i32 }>
 
diff --git a/test/Transforms/DeadStoreElimination/crash.ll b/test/Transforms/DeadStoreElimination/crash.ll
index bb279cd..148695f 100644
--- a/test/Transforms/DeadStoreElimination/crash.ll
+++ b/test/Transforms/DeadStoreElimination/crash.ll
@@ -36,11 +36,11 @@ bb14:                                             ; preds = %bb4
   %6 = getelementptr inbounds i16* %2, i64 undef  ; <i16*> [#uses=1]
   store i16 undef, i16* %6, align 2
   %7 = getelementptr inbounds i8* %5, i64 undef   ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i64(i8* %7, i8* undef, i64 undef, i32 1) nounwind
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* undef, i64 undef, i32 1, i1 false)
   unreachable
 }
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 
 ; rdar://7635088
diff --git a/test/Transforms/DeadStoreElimination/free.ll b/test/Transforms/DeadStoreElimination/free.ll
index 3c980cc..aa3f0ab 100644
--- a/test/Transforms/DeadStoreElimination/free.ll
+++ b/test/Transforms/DeadStoreElimination/free.ll
@@ -9,7 +9,8 @@ target datalayout = "e-p:64:64:64"
 define void @test(i32* %Q, i32* %P) {
         %DEAD = load i32* %Q            ; <i32> [#uses=1]
         store i32 %DEAD, i32* %P
-        free i32* %P
+        %1 = bitcast i32* %P to i8*
+        tail call void @free(i8* %1)
         ret void
 }
 
@@ -20,7 +21,8 @@ define void @test(i32* %Q, i32* %P) {
 define void @test2({i32, i32}* %P) {
 	%Q = getelementptr {i32, i32} *%P, i32 0, i32 1
 	store i32 4, i32* %Q
-	free {i32,i32}* %P
+        %1 = bitcast {i32, i32}* %P to i8*
+        tail call void @free(i8* %1)
 	ret void
 }
 
diff --git a/test/Transforms/DeadStoreElimination/lifetime.ll b/test/Transforms/DeadStoreElimination/lifetime.ll
index 2b5cc5a..6785653 100644
--- a/test/Transforms/DeadStoreElimination/lifetime.ll
+++ b/test/Transforms/DeadStoreElimination/lifetime.ll
@@ -4,7 +4,7 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 
 declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
 declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
-declare void @llvm.memset.i8(i8*, i8, i8, i32)
+declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
 
 define void @test1() {
 ; CHECK: @test1
@@ -14,7 +14,7 @@ define void @test1() {
   call void @llvm.lifetime.end(i64 1, i8* %A)
 ; CHECK: lifetime.end
 
-  call void @llvm.memset.i8(i8* %A, i8 0, i8 -1, i32 0)
+  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i32 0, i1 false)
 ; CHECK-NOT: memset
 
   ret void
diff --git a/test/Transforms/DeadStoreElimination/memintrinsics.ll b/test/Transforms/DeadStoreElimination/memintrinsics.ll
index e31e9fa..d5c5365 100644
--- a/test/Transforms/DeadStoreElimination/memintrinsics.ll
+++ b/test/Transforms/DeadStoreElimination/memintrinsics.ll
@@ -1,8 +1,8 @@
 ; RUN: opt -S -dse < %s | FileCheck %s
 
-declare void @llvm.memcpy.i8(i8*, i8*, i8, i32)
-declare void @llvm.memmove.i8(i8*, i8*, i8, i32)
-declare void @llvm.memset.i8(i8*, i8, i8, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i8(i8* nocapture, i8* nocapture, i8, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i8(i8* nocapture, i8, i8, i32, i1) nounwind
 
 define void @test1() {
 ; CHECK: @test1
@@ -12,7 +12,7 @@ define void @test1() {
   store i8 0, i8* %A  ;; Written to by memcpy
 ; CHECK-NOT: store
 
-  call void @llvm.memcpy.i8(i8* %A, i8* %B, i8 -1, i32 0)
+  call void @llvm.memcpy.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false)
 
   ret void
 ; CHECK: ret void
@@ -26,7 +26,7 @@ define void @test2() {
   store i8 0, i8* %A  ;; Written to by memmove
 ; CHECK-NOT: store
 
-  call void @llvm.memmove.i8(i8* %A, i8* %B, i8 -1, i32 0)
+  call void @llvm.memmove.p0i8.p0i8.i8(i8* %A, i8* %B, i8 -1, i32 0, i1 false)
 
   ret void
 ; CHECK: ret void
@@ -40,7 +40,7 @@ define void @test3() {
   store i8 0, i8* %A  ;; Written to by memset
 ; CHECK-NOT: store
 
-  call void @llvm.memset.i8(i8* %A, i8 0, i8 -1, i32 0)
+  call void @llvm.memset.p0i8.i8(i8* %A, i8 0, i8 -1, i32 0, i1 false)
 
   ret void
 ; CHECK: ret void
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index 23576dad..5f143fc 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -3,8 +3,6 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-declare void @llvm.memset.i64(i8*, i8, i64, i32)
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32)
 declare i8* @llvm.init.trampoline(i8*, i8*, i8*)
 
 define void @test1(i32* %Q, i32* %P) {
@@ -65,7 +63,7 @@ define void @test5(i32* %Q) {
 ; alias).
 define void @test6(i32 *%p, i8 *%q) {
   store i32 10, i32* %p, align 4       ;; dead.
-  call void @llvm.memset.i64(i8* %q, i8 42, i64 900, i32 1)
+  call void @llvm.memset.p0i8.i64(i8* %q, i8 42, i64 900, i32 1, i1 false)
   store i32 30, i32* %p, align 4
   ret void
 ; CHECK: @test6
@@ -76,7 +74,7 @@ define void @test6(i32 *%p, i8 *%q) {
 ; alias).
 define void @test7(i32 *%p, i8 *%q, i8* noalias %r) {
   store i32 10, i32* %p, align 4       ;; dead.
-  call void @llvm.memcpy.i64(i8* %q, i8* %r, i64 900, i32 1)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %r, i64 900, i32 1, i1 false)
   store i32 30, i32* %p, align 4
   ret void
 ; CHECK: @test7
@@ -184,8 +182,8 @@ define void @test14(i32* %Q) {
 
 ;; Fully dead overwrite of memcpy.
 define void @test15(i8* %P, i8* %Q) nounwind ssp {
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
 ; CHECK: @test15
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -194,8 +192,8 @@ define void @test15(i8* %P, i8* %Q) nounwind ssp {
 
 ;; Full overwrite of smaller memcpy.
 define void @test16(i8* %P, i8* %Q) nounwind ssp {
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 8, i32 1)
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 8, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
 ; CHECK: @test16
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -204,8 +202,8 @@ define void @test16(i8* %P, i8* %Q) nounwind ssp {
 
 ;; Overwrite of memset by memcpy.
 define void @test17(i8* %P, i8* noalias %Q) nounwind ssp {
-  tail call void @llvm.memset.i64(i8* %P, i8 42, i64 8, i32 1)
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
+  tail call void @llvm.memset.p0i8.i64(i8* %P, i8 42, i64 8, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
   ret void
 ; CHECK: @test17
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -228,8 +226,8 @@ define void @test17v(i8* %P, i8* %Q) nounwind ssp {
 ; A = B
 ; A = A
 define void @test18(i8* %P, i8* %Q, i8* %R) nounwind ssp {
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %Q, i64 12, i32 1)
-  tail call void @llvm.memcpy.i64(i8* %P, i8* %R, i64 12, i32 1)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %Q, i64 12, i32 1, i1 false)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %P, i8* %R, i64 12, i32 1, i1 false)
   ret void
 ; CHECK: @test18
 ; CHECK-NEXT: call void @llvm.memcpy
diff --git a/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll b/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
deleted file mode 100644
index faac118..0000000
--- a/test/Transforms/FunctionAttrs/2008-10-04-LocalMemory.ll
+++ /dev/null
@@ -1,64 +0,0 @@
-; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
-
-%struct.X = type { i32*, i32* }
-
-declare i32 @g(i32*) readnone
-
-define i32 @f() {
-; CHECK: @f() readnone
-	%x = alloca i32		; <i32*> [#uses=2]
-	store i32 0, i32* %x
-	%y = call i32 @g(i32* %x)		; <i32> [#uses=1]
-	ret i32 %y
-}
-
-define i32 @foo() nounwind {
-; CHECK: @foo() nounwind readonly
-entry:
-  %y = alloca %struct.X                           ; <%struct.X*> [#uses=2]
-  %x = alloca %struct.X                           ; <%struct.X*> [#uses=2]
-  %j = alloca i32                                 ; <i32*> [#uses=2]
-  %i = alloca i32                                 ; <i32*> [#uses=2]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  store i32 0, i32* %i, align 4
-  store i32 1, i32* %j, align 4
-  %0 = getelementptr inbounds %struct.X* %y, i32 0, i32 0 ; <i32**> [#uses=1]
-  store i32* %i, i32** %0, align 8
-  %1 = getelementptr inbounds %struct.X* %x, i32 0, i32 1 ; <i32**> [#uses=1]
-  store i32* %j, i32** %1, align 8
-  %x1 = bitcast %struct.X* %x to i8*              ; <i8*> [#uses=2]
-  %y2 = bitcast %struct.X* %y to i8*              ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i64(i8* %x1, i8* %y2, i64 8, i32 1)
-  %2 = bitcast i8* %x1 to i32**                   ; <i32**> [#uses=1]
-  %3 = load i32** %2, align 8                     ; <i32*> [#uses=1]
-  %4 = load i32* %3, align 4                      ; <i32> [#uses=1]
-  br label %return
-
-return:                                           ; preds = %entry
-  ret i32 %4
-}
-
-define i32 @t(i32 %a, i32 %b, i32 %c) nounwind {
-; CHECK: @t(i32 %a, i32 %b, i32 %c) nounwind readnone
-entry:
-  %a.addr = alloca i32                            ; <i32*> [#uses=3]
-  %c.addr = alloca i32                            ; <i32*> [#uses=2]
-  store i32 %a, i32* %a.addr
-  store i32 %c, i32* %c.addr
-  %tmp = load i32* %a.addr                        ; <i32> [#uses=1]
-  %tobool = icmp ne i32 %tmp, 0                   ; <i1> [#uses=1]
-  br i1 %tobool, label %if.then, label %if.else
-
-if.then:                                          ; preds = %entry
-  br label %if.end
-
-if.else:                                          ; preds = %entry
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %if.then
-  %p.0 = phi i32* [ %a.addr, %if.then ], [ %c.addr, %if.else ] ; <i32*> [#uses=1]
-  %tmp2 = load i32* %p.0                          ; <i32> [#uses=1]
-  ret i32 %tmp2
-}
-
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
diff --git a/test/Transforms/FunctionAttrs/2009-05-06-Malloc.ll b/test/Transforms/FunctionAttrs/2009-05-06-Malloc.ll
deleted file mode 100644
index 488e6a9..0000000
--- a/test/Transforms/FunctionAttrs/2009-05-06-Malloc.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: opt < %s -functionattrs -S | not grep read
-; PR3754
-
-define i8* @m(i32 %size) {
-	%tmp = malloc i8, i32 %size		; <i8*> [#uses=1]
-	ret i8* %tmp
-}
diff --git a/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll b/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll
deleted file mode 100644
index 9a75e1a..0000000
--- a/test/Transforms/GVN/2008-02-24-NonDominatedMemcpy.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: opt < %s -gvn -dse -S | grep {call.*memcpy} | count 1
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-apple-darwin8"
-	%struct.ggFrame3 = type { %struct.ggPoint3, %struct.ggONB3 }
-	%struct.ggHMatrix3 = type { [4 x [4 x double]] }
-	%struct.ggONB3 = type { %struct.ggPoint3, %struct.ggPoint3, %struct.ggPoint3 }
-	%struct.ggPoint3 = type { [3 x double] }
-	%struct.ggQuaternion = type { [4 x double], i32, %struct.ggHMatrix3 }
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32) nounwind 
-
-define void @_Z10ggCRSplineRK8ggFrame3S1_S1_S1_d(%struct.ggFrame3* noalias sret  %agg.result, %struct.ggFrame3* %f0, %struct.ggFrame3* %f1, %struct.ggFrame3* %f2, %struct.ggFrame3* %f3, double %t) nounwind  {
-entry:
-	%qresult = alloca %struct.ggQuaternion		; <%struct.ggQuaternion*> [#uses=1]
-	%tmp = alloca %struct.ggONB3		; <%struct.ggONB3*> [#uses=2]
-	call void @_ZN12ggQuaternion7getONB3Ev( %struct.ggONB3* noalias sret  %tmp, %struct.ggQuaternion* %qresult ) nounwind 
-	%tmp1.i = getelementptr %struct.ggFrame3* %agg.result, i32 0, i32 1		; <%struct.ggONB3*> [#uses=1]
-	%tmp13.i = bitcast %struct.ggONB3* %tmp1.i to i8*		; <i8*> [#uses=1]
-	%tmp24.i = bitcast %struct.ggONB3* %tmp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i64( i8* %tmp13.i, i8* %tmp24.i, i64 72, i32 8 ) nounwind 
-	ret void
-}
-
-declare void @_ZN12ggQuaternion7getONB3Ev(%struct.ggONB3* noalias sret , %struct.ggQuaternion*) nounwind 
diff --git a/test/Transforms/GVN/2008-02-26-MemCpySize.ll b/test/Transforms/GVN/2008-02-26-MemCpySize.ll
deleted file mode 100644
index 6ed8a76..0000000
--- a/test/Transforms/GVN/2008-02-26-MemCpySize.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; RUN: opt < %s -gvn -dse -S | grep {call.*memcpy.*cell} | count 2
-; PR2099
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin9"
-	%struct.s = type { [11 x i8], i32 }
-@.str = internal constant [11 x i8] c"0123456789\00"		; <[11 x i8]*> [#uses=1]
-@cell = weak global %struct.s zeroinitializer		; <%struct.s*> [#uses=2]
-
-declare i32 @check(%struct.s* byval  %p) nounwind
-
-declare i32 @strcmp(i8*, i8*) nounwind readonly 
-
-define i32 @main() noreturn nounwind  {
-entry:
-	%p = alloca %struct.s, align 8		; <%struct.s*> [#uses=2]
-	store i32 99, i32* getelementptr (%struct.s* @cell, i32 0, i32 1), align 4
-	call void @llvm.memcpy.i32( i8* getelementptr (%struct.s* @cell, i32 0, i32 0, i32 0), i8* getelementptr ([11 x i8]* @.str, i32 0, i32 0), i32 11, i32 1 )
-	%tmp = getelementptr %struct.s* %p, i32 0, i32 0, i32 0		; <i8*> [#uses=2]
-	call void @llvm.memcpy.i64( i8* %tmp, i8* getelementptr (%struct.s* @cell, i32 0, i32 0, i32 0), i64 16, i32 8 )
-	%tmp1.i = getelementptr %struct.s* %p, i32 0, i32 1		; <i32*> [#uses=1]
-	%tmp2.i = load i32* %tmp1.i, align 4		; <i32> [#uses=1]
-	%tmp3.i = icmp eq i32 %tmp2.i, 99		; <i1> [#uses=1]
-	br i1 %tmp3.i, label %bb5.i, label %bb
-
-bb5.i:		; preds = %entry
-	%tmp91.i = call i32 @strcmp( i8* %tmp, i8* getelementptr ([11 x i8]* @.str, i32 0, i32 0) ) nounwind readonly 		; <i32> [#uses=1]
-	%tmp53 = icmp eq i32 %tmp91.i, 0		; <i1> [#uses=1]
-	br i1 %tmp53, label %bb7, label %bb
-
-bb:		; preds = %bb5.i, %entry
-	call void @abort( ) noreturn nounwind 
-	unreachable
-
-bb7:		; preds = %bb5.i
-	call void @exit( i32 0 ) noreturn nounwind 
-	unreachable
-}
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
-
-declare void @abort() noreturn nounwind 
-
-declare void @exit(i32) noreturn nounwind 
-
-declare void @llvm.memcpy.i64(i8*, i8*, i64, i32) nounwind 
diff --git a/test/Transforms/GVN/2011-04-27-phioperands.ll b/test/Transforms/GVN/2011-04-27-phioperands.ll
new file mode 100644
index 0000000..6e5075d
--- /dev/null
+++ b/test/Transforms/GVN/2011-04-27-phioperands.ll
@@ -0,0 +1,106 @@
+; RUN: opt %s -gvn -disable-output
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
+
+@nuls = external global [10 x i8]
+
+define fastcc void @p_ere() nounwind {
+entry:
+  br label %"<bb 5>"
+
+"<L18>.i":
+  br i1 undef, label %"<bb 3>.i30.i", label %doemit.exit51.i
+
+"<bb 3>.i30.i":
+  unreachable
+
+doemit.exit51.i:
+  br label %"<bb 53>.i"
+
+"<L19>.i":
+  br i1 undef, label %"<bb 3>.i55.i", label %doemit.exit76.i
+
+"<bb 3>.i55.i":
+  unreachable
+
+doemit.exit76.i:
+  br label %"<bb 53>.i"
+
+"<L98>.i":
+  store i8* getelementptr inbounds ([10 x i8]* @nuls, i64 0, i64 0), i8** undef, align 8
+  br label %"<bb 53>.i"
+
+"<L99>.i":
+  br label %"<bb 53>.i"
+
+"<L24>.i":
+  br i1 undef, label %"<bb 53>.i", label %"<bb 35>.i"
+
+"<bb 35>.i":
+  br label %"<bb 53>.i"
+
+"<L28>.i":
+  br label %"<bb 53>.i"
+
+"<L29>.i":
+  br label %"<bb 53>.i"
+
+"<L39>.i":
+  br label %"<bb 53>.i"
+
+"<bb 53>.i":
+  %wascaret_2.i = phi i32 [ 0, %"<L39>.i" ], [ 0, %"<L29>.i" ], [ 0, %"<L28>.i" ], [ 0, %"<bb 35>.i" ], [ 0, %"<L99>.i" ], [ 0, %"<L98>.i" ], [ 0, %doemit.exit76.i ], [ 1, %doemit.exit51.i ], [ 0, %"<L24>.i" ]
+  %D.5496_84.i = load i8** undef, align 8
+  br i1 undef, label %"<bb 54>.i", label %"<bb 5>"
+
+"<bb 54>.i":
+  br i1 undef, label %"<bb 5>", label %"<bb 58>.i"
+
+"<bb 58>.i":
+  br i1 undef, label %"<bb 64>.i", label %"<bb 59>.i"
+
+"<bb 59>.i":
+  br label %"<bb 64>.i"
+
+"<bb 64>.i":
+  switch i32 undef, label %"<bb 5>" [
+    i32 42, label %"<L54>.i"
+    i32 43, label %"<L55>.i"
+    i32 63, label %"<L56>.i"
+    i32 123, label %"<bb 5>.i258.i"
+  ]
+
+"<L54>.i":
+  br i1 undef, label %"<bb 3>.i105.i", label %doemit.exit127.i
+
+"<bb 3>.i105.i":
+  unreachable
+
+doemit.exit127.i:
+  unreachable
+
+"<L55>.i":
+  br i1 undef, label %"<bb 3>.i157.i", label %"<bb 5>"
+
+"<bb 3>.i157.i":
+  unreachable
+
+"<L56>.i":
+  br label %"<bb 5>"
+
+"<bb 5>.i258.i":
+  unreachable
+
+"<bb 5>":
+  switch i32 undef, label %"<L39>.i" [
+    i32 36, label %"<L19>.i"
+    i32 94, label %"<L18>.i"
+    i32 124, label %"<L98>.i"
+    i32 42, label %"<L99>.i"
+    i32 43, label %"<L99>.i"
+    i32 46, label %"<L24>.i"
+    i32 63, label %"<L99>.i"
+    i32 91, label %"<L28>.i"
+    i32 92, label %"<L29>.i"
+  ]
+}
diff --git a/test/Transforms/GVN/2011-07-07-MatchIntrinsicExtract.ll b/test/Transforms/GVN/2011-07-07-MatchIntrinsicExtract.ll
new file mode 100644
index 0000000..18178e4
--- /dev/null
+++ b/test/Transforms/GVN/2011-07-07-MatchIntrinsicExtract.ll
@@ -0,0 +1,85 @@
+; RUN: opt < %s -gvn -S | FileCheck %s
+;
+
+%0 = type { i64, i1 }
+
+define i64 @test1(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %uadd = tail call %0 @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
+  %uadd.0 = extractvalue %0 %uadd, 0
+  %add1 = add i64 %a, %b
+  ret i64 %add1
+}
+
+; CHECK: @test1
+; CHECK-NOT: add1
+; CHECK: ret
+
+define i64 @test2(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %usub = tail call %0 @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
+  %usub.0 = extractvalue %0 %usub, 0
+  %sub1 = sub i64 %a, %b
+  ret i64 %sub1
+}
+
+; CHECK: @test2
+; CHECK-NOT: sub1
+; CHECK: ret
+
+define i64 @test3(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %umul = tail call %0 @llvm.umul.with.overflow.i64(i64 %a, i64 %b)
+  %umul.0 = extractvalue %0 %umul, 0
+  %mul1 = mul i64 %a, %b
+  ret i64 %mul1
+}
+
+; CHECK: @test3
+; CHECK-NOT: mul1
+; CHECK: ret
+
+define i64 @test4(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %sadd = tail call %0 @llvm.sadd.with.overflow.i64(i64 %a, i64 %b)
+  %sadd.0 = extractvalue %0 %sadd, 0
+  %add1 = add i64 %a, %b
+  ret i64 %add1
+}
+
+; CHECK: @test4
+; CHECK-NOT: add1
+; CHECK: ret
+
+define i64 @test5(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %ssub = tail call %0 @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
+  %ssub.0 = extractvalue %0 %ssub, 0
+  %sub1 = sub i64 %a, %b
+  ret i64 %sub1
+}
+
+; CHECK: @test5
+; CHECK-NOT: sub1
+; CHECK: ret
+
+define i64 @test6(i64 %a, i64 %b) nounwind ssp {
+entry:
+  %smul = tail call %0 @llvm.smul.with.overflow.i64(i64 %a, i64 %b)
+  %smul.0 = extractvalue %0 %smul, 0
+  %mul1 = mul i64 %a, %b
+  ret i64 %mul1
+}
+
+; CHECK: @test6
+; CHECK-NOT: mul1
+; CHECK: ret
+
+declare void @exit(i32) noreturn
+declare %0 @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone
+declare %0 @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone
+declare %0 @llvm.umul.with.overflow.i64(i64, i64) nounwind readnone
+declare %0 @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone
+declare %0 @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
+declare %0 @llvm.smul.with.overflow.i64(i64, i64) nounwind readnone
+
diff --git a/test/Transforms/GVN/nonescaping-malloc.ll b/test/Transforms/GVN/nonescaping-malloc.ll
index 1d50205..dba9d81 100644
--- a/test/Transforms/GVN/nonescaping-malloc.ll
+++ b/test/Transforms/GVN/nonescaping-malloc.ll
@@ -20,13 +20,13 @@ target triple = "x86_64-apple-darwin10.0"
 
 define %"struct.llvm::StringMapEntry<void*>"* @_Z3fooRN4llvm9StringMapIPvNS_15MallocAllocatorEEEPKc(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %X, i8* %P) ssp {
 entry:
-  %tmp = alloca %"struct.llvm::StringRef", align 8 ; <%"struct.llvm::StringRef"*> [#uses=3]
-  %tmp.i = getelementptr inbounds %"struct.llvm::StringRef"* %tmp, i64 0, i32 0 ; <i8**> [#uses=1]
+  %tmp = alloca %"struct.llvm::StringRef", align 8
+  %tmp.i = getelementptr inbounds %"struct.llvm::StringRef"* %tmp, i64 0, i32 0
   store i8* %P, i8** %tmp.i, align 8
-  %tmp1.i = call i64 @strlen(i8* %P) nounwind readonly ; <i64> [#uses=1]
-  %tmp2.i = getelementptr inbounds %"struct.llvm::StringRef"* %tmp, i64 0, i32 1 ; <i64*> [#uses=1]
+  %tmp1.i = call i64 @strlen(i8* %P) nounwind readonly
+  %tmp2.i = getelementptr inbounds %"struct.llvm::StringRef"* %tmp, i64 0, i32 1
   store i64 %tmp1.i, i64* %tmp2.i, align 8
-  %tmp1 = call %"struct.llvm::StringMapEntry<void*>"* @_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueERKNS_9StringRefE(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %X, %"struct.llvm::StringRef"* %tmp) ssp ; <%"struct.llvm::StringMapEntry<void*>"*> [#uses=1]
+  %tmp1 = call %"struct.llvm::StringMapEntry<void*>"* @_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueERKNS_9StringRefE(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %X, %"struct.llvm::StringRef"* %tmp) ssp
   ret %"struct.llvm::StringMapEntry<void*>"* %tmp1
 }
 
@@ -34,75 +34,75 @@ declare i64 @strlen(i8* nocapture) nounwind readonly
 
 declare noalias i8* @malloc(i64) nounwind
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
-
 declare i32 @_ZN4llvm13StringMapImpl15LookupBucketForENS_9StringRefE(%"struct.llvm::StringMapImpl"*, i64, i64)
 
 define linkonce_odr %"struct.llvm::StringMapEntry<void*>"* @_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueERKNS_9StringRefE(%"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, %"struct.llvm::StringRef"* nocapture %Key) ssp align 2 {
 entry:
-  %elt = bitcast %"struct.llvm::StringRef"* %Key to i64* ; <i64*> [#uses=1]
-  %val = load i64* %elt                           ; <i64> [#uses=3]
-  %tmp = getelementptr inbounds %"struct.llvm::StringRef"* %Key, i64 0, i32 1 ; <i64*> [#uses=1]
-  %val2 = load i64* %tmp                          ; <i64> [#uses=2]
-  %tmp2.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0 ; <%"struct.llvm::StringMapImpl"*> [#uses=1]
-  %tmp3.i = tail call i32 @_ZN4llvm13StringMapImpl15LookupBucketForENS_9StringRefE(%"struct.llvm::StringMapImpl"* %tmp2.i, i64 %val, i64 %val2) ; <i32> [#uses=1]
-  %tmp4.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 0 ; <%"struct.llvm::StringMapImpl::ItemBucket"**> [#uses=1]
-  %tmp5.i = load %"struct.llvm::StringMapImpl::ItemBucket"** %tmp4.i, align 8 ; <%"struct.llvm::StringMapImpl::ItemBucket"*> [#uses=1]
-  %tmp6.i = zext i32 %tmp3.i to i64               ; <i64> [#uses=1]
-  %tmp7.i = getelementptr inbounds %"struct.llvm::StringMapImpl::ItemBucket"* %tmp5.i, i64 %tmp6.i, i32 1 ; <%"struct.llvm::StringMapEntryBase"**> [#uses=2]
-  %tmp8.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8 ; <%"struct.llvm::StringMapEntryBase"*> [#uses=3]
-  %tmp9.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, null ; <i1> [#uses=1]
-  %tmp13.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*) ; <i1> [#uses=1]
-  %or.cond.i = or i1 %tmp9.i, %tmp13.i            ; <i1> [#uses=1]
+  %elt = bitcast %"struct.llvm::StringRef"* %Key to i64*
+  %val = load i64* %elt
+  %tmp = getelementptr inbounds %"struct.llvm::StringRef"* %Key, i64 0, i32 1
+  %val2 = load i64* %tmp
+  %tmp2.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0
+  %tmp3.i = tail call i32 @_ZN4llvm13StringMapImpl15LookupBucketForENS_9StringRefE(%"struct.llvm::StringMapImpl"* %tmp2.i, i64 %val, i64 %val2)
+  %tmp4.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 0
+  %tmp5.i = load %"struct.llvm::StringMapImpl::ItemBucket"** %tmp4.i, align 8
+  %tmp6.i = zext i32 %tmp3.i to i64
+  %tmp7.i = getelementptr inbounds %"struct.llvm::StringMapImpl::ItemBucket"* %tmp5.i, i64 %tmp6.i, i32 1
+  %tmp8.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8
+  %tmp9.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, null
+  %tmp13.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp8.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*)
+  %or.cond.i = or i1 %tmp9.i, %tmp13.i
   br i1 %or.cond.i, label %bb4.i, label %bb6.i
 
 bb4.i:                                            ; preds = %entry
-  %tmp41.i = inttoptr i64 %val to i8*             ; <i8*> [#uses=2]
-  %tmp4.i35.i = getelementptr inbounds i8* %tmp41.i, i64 %val2 ; <i8*> [#uses=1]
-  %tmp.i.i = ptrtoint i8* %tmp4.i35.i to i64      ; <i64> [#uses=1]
-  %tmp1.i.i = trunc i64 %tmp.i.i to i32           ; <i32> [#uses=1]
-  %tmp3.i.i = trunc i64 %val to i32               ; <i32> [#uses=1]
-  %tmp4.i.i = sub i32 %tmp1.i.i, %tmp3.i.i        ; <i32> [#uses=3]
-  %tmp5.i.i = add i32 %tmp4.i.i, 17               ; <i32> [#uses=1]
-  %tmp8.i.i = zext i32 %tmp5.i.i to i64           ; <i64> [#uses=1]
-  %tmp.i20.i.i = tail call noalias i8* @malloc(i64 %tmp8.i.i) nounwind ; <i8*> [#uses=7]
-  %tmp10.i.i = bitcast i8* %tmp.i20.i.i to %"struct.llvm::StringMapEntry<void*>"* ; <%"struct.llvm::StringMapEntry<void*>"*> [#uses=2]
-  %tmp12.i.i = icmp eq i8* %tmp.i20.i.i, null     ; <i1> [#uses=1]
+  %tmp41.i = inttoptr i64 %val to i8*
+  %tmp4.i35.i = getelementptr inbounds i8* %tmp41.i, i64 %val2
+  %tmp.i.i = ptrtoint i8* %tmp4.i35.i to i64
+  %tmp1.i.i = trunc i64 %tmp.i.i to i32
+  %tmp3.i.i = trunc i64 %val to i32
+  %tmp4.i.i = sub i32 %tmp1.i.i, %tmp3.i.i
+  %tmp5.i.i = add i32 %tmp4.i.i, 17
+  %tmp8.i.i = zext i32 %tmp5.i.i to i64
+  %tmp.i20.i.i = tail call noalias i8* @malloc(i64 %tmp8.i.i) nounwind
+  %tmp10.i.i = bitcast i8* %tmp.i20.i.i to %"struct.llvm::StringMapEntry<void*>"*
+  %tmp12.i.i = icmp eq i8* %tmp.i20.i.i, null
   br i1 %tmp12.i.i, label %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i, label %bb.i.i
 
 bb.i.i:                                           ; preds = %bb4.i
-  %tmp.i.i.i.i = bitcast i8* %tmp.i20.i.i to i32* ; <i32*> [#uses=1]
+  %tmp.i.i.i.i = bitcast i8* %tmp.i20.i.i to i32*
   store i32 %tmp4.i.i, i32* %tmp.i.i.i.i, align 4
-  %tmp1.i19.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 8 ; <i8*> [#uses=1]
-  %0 = bitcast i8* %tmp1.i19.i.i to i8**          ; <i8**> [#uses=1]
+  %tmp1.i19.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 8
+  %0 = bitcast i8* %tmp1.i19.i.i to i8**
   store i8* null, i8** %0, align 8
   br label %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i
 
-_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i: ; preds = %bb4.i, %bb.i.i
-  %tmp.i18.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 16 ; <i8*> [#uses=1]
-  %tmp15.i.i = zext i32 %tmp4.i.i to i64          ; <i64> [#uses=2]
-  tail call void @llvm.memcpy.i64(i8* %tmp.i18.i.i, i8* %tmp41.i, i64 %tmp15.i.i, i32 1) nounwind
-  %tmp.i18.sum.i.i = add i64 %tmp15.i.i, 16       ; <i64> [#uses=1]
-  %tmp17.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 %tmp.i18.sum.i.i ; <i8*> [#uses=1]
+_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i: ; preds = %bb.i.i, %bb4.i
+  %tmp.i18.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 16
+  %tmp15.i.i = zext i32 %tmp4.i.i to i64
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i18.i.i, i8* %tmp41.i, i64 %tmp15.i.i, i32 1, i1 false)
+  %tmp.i18.sum.i.i = add i64 %tmp15.i.i, 16
+  %tmp17.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 %tmp.i18.sum.i.i
   store i8 0, i8* %tmp17.i.i, align 1
-  %tmp.i.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 8 ; <i8*> [#uses=1]
-  %1 = bitcast i8* %tmp.i.i.i to i8**             ; <i8**> [#uses=1]
+  %tmp.i.i.i = getelementptr inbounds i8* %tmp.i20.i.i, i64 8
+  %1 = bitcast i8* %tmp.i.i.i to i8**
   store i8* null, i8** %1, align 8
-  %tmp22.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8 ; <%"struct.llvm::StringMapEntryBase"*> [#uses=1]
-  %tmp24.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp22.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*) ; <i1> [#uses=1]
+  %tmp22.i = load %"struct.llvm::StringMapEntryBase"** %tmp7.i, align 8
+  %tmp24.i = icmp eq %"struct.llvm::StringMapEntryBase"* %tmp22.i, inttoptr (i64 -1 to %"struct.llvm::StringMapEntryBase"*)
   br i1 %tmp24.i, label %bb9.i, label %_ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueIS1_EERNS_14StringMapEntryIS1_EENS_9StringRefET_.exit
 
 bb6.i:                                            ; preds = %entry
-  %tmp16.i = bitcast %"struct.llvm::StringMapEntryBase"* %tmp8.i to %"struct.llvm::StringMapEntry<void*>"* ; <%"struct.llvm::StringMapEntry<void*>"*> [#uses=1]
+  %tmp16.i = bitcast %"struct.llvm::StringMapEntryBase"* %tmp8.i to %"struct.llvm::StringMapEntry<void*>"*
   ret %"struct.llvm::StringMapEntry<void*>"* %tmp16.i
 
 bb9.i:                                            ; preds = %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i
-  %tmp25.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 3 ; <i32*> [#uses=2]
-  %tmp26.i = load i32* %tmp25.i, align 8          ; <i32> [#uses=1]
-  %tmp27.i = add i32 %tmp26.i, -1                 ; <i32> [#uses=1]
+  %tmp25.i = getelementptr inbounds %"struct.llvm::StringMap<void*,llvm::MallocAllocator>"* %this, i64 0, i32 0, i32 3
+  %tmp26.i = load i32* %tmp25.i, align 8
+  %tmp27.i = add i32 %tmp26.i, -1
   store i32 %tmp27.i, i32* %tmp25.i, align 8
   ret %"struct.llvm::StringMapEntry<void*>"* %tmp10.i.i
 
 _ZN4llvm9StringMapIPvNS_15MallocAllocatorEE16GetOrCreateValueIS1_EERNS_14StringMapEntryIS1_EENS_9StringRefET_.exit: ; preds = %_ZN4llvm14StringMapEntryIPvE6CreateINS_15MallocAllocatorES1_EEPS2_PKcS7_RT_T0_.exit.i
   ret %"struct.llvm::StringMapEntry<void*>"* %tmp10.i.i
 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index 28b1fc7..2f0d2eb 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -135,7 +135,7 @@ define i8* @coerce_mustalias7(i64 %V, i64* %P) {
 define signext i16 @memset_to_i16_local(i16* %A) nounwind ssp {
 entry:
   %conv = bitcast i16* %A to i8* 
-  tail call void @llvm.memset.i64(i8* %conv, i8 1, i64 200, i32 1)
+  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 1, i64 200, i32 1, i1 false)
   %arrayidx = getelementptr inbounds i16* %A, i64 42
   %tmp2 = load i16* %arrayidx
   ret i16 %tmp2
@@ -148,7 +148,7 @@ entry:
 define float @memset_to_float_local(float* %A, i8 %Val) nounwind ssp {
 entry:
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
-  tail call void @llvm.memset.i64(i8* %conv, i8 %Val, i64 400, i32 1)
+  tail call void @llvm.memset.p0i8.i64(i8* %conv, i8 %Val, i64 400, i32 1, i1 false)
   %arrayidx = getelementptr inbounds float* %A, i64 42 ; <float*> [#uses=1]
   %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
@@ -168,11 +168,11 @@ define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) {
   %P3 = bitcast i16* %P to i8*
   br i1 %cond, label %T, label %F
 T:
-  tail call void @llvm.memset.i64(i8* %P3, i8 1, i64 400, i32 1)
+  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 1, i64 400, i32 1, i1 false)
   br label %Cont
   
 F:
-  tail call void @llvm.memset.i64(i8* %P3, i8 2, i64 400, i32 1)
+  tail call void @llvm.memset.p0i8.i64(i8* %P3, i8 2, i64 400, i32 1, i1 false)
   br label %Cont
 
 Cont:
@@ -193,7 +193,7 @@ Cont:
 define float @memcpy_to_float_local(float* %A) nounwind ssp {
 entry:
   %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
-  tail call void @llvm.memcpy.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i32 1)
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i32 1, i1 false)
   %arrayidx = getelementptr inbounds float* %A, i64 1 ; <float*> [#uses=1]
   %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
   ret float %tmp2
@@ -203,11 +203,6 @@ entry:
 }
 
 
-declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
-
-
-
 
 ;; non-local i32/float -> i8 load forwarding.
 define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
@@ -539,7 +534,7 @@ define i32 @memset_to_load() nounwind readnone {
 entry:
   %x = alloca [256 x i32], align 4                ; <[256 x i32]*> [#uses=2]
   %tmp = bitcast [256 x i32]* %x to i8*           ; <i8*> [#uses=1]
-  call void @llvm.memset.i64(i8* %tmp, i8 0, i64 1024, i32 4)
+  call void @llvm.memset.p0i8.i64(i8* %tmp, i8 0, i64 1024, i32 4, i1 false)
   %arraydecay = getelementptr inbounds [256 x i32]* %x, i32 0, i32 0 ; <i32*>
   %tmp1 = load i32* %arraydecay                   ; <i32> [#uses=1]
   ret i32 %tmp1
@@ -643,3 +638,7 @@ entry:
 ; CHECK-ret i32
 }
 
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
diff --git a/test/Transforms/GlobalOpt/2005-09-27-Crash.ll b/test/Transforms/GlobalOpt/2005-09-27-Crash.ll
index ab2077a..43597bf 100644
--- a/test/Transforms/GlobalOpt/2005-09-27-Crash.ll
+++ b/test/Transforms/GlobalOpt/2005-09-27-Crash.ll
@@ -2,7 +2,7 @@
         %RPyString = type { i32, %arraytype.Char }
         %arraytype.Char = type { i32, [0 x i8] }
         %arraytype.Signed = type { i32, [0 x i32] }
-        %functiontype.1 = type %RPyString* (i32)
+        %functiontype.1 = type { %RPyString* (i32) *} 
         %structtype.test = type { i32, %arraytype.Signed }
 @structinstance.test = internal global { i32, { i32, [2 x i32] } } { i32 41, { i32, [2 x i32] } { i32 2, [2 x i32] [ i32 100, i32 101 ] } }              ; <{ i32, { i32, [2 x i32] } }*> [#uses=1]
 
diff --git a/test/Transforms/GlobalOpt/2007-04-05-Crash.ll b/test/Transforms/GlobalOpt/2007-04-05-Crash.ll
index d306d14..c7aca62 100644
--- a/test/Transforms/GlobalOpt/2007-04-05-Crash.ll
+++ b/test/Transforms/GlobalOpt/2007-04-05-Crash.ll
@@ -6,7 +6,7 @@ target triple = "thumb-apple-darwin8"
 @"L_OBJC_IMAGE_INFO" = internal global [2 x i32] zeroinitializer		; <[2 x i32]*> [#uses=1]
 @llvm.used = appending global [1 x i8*] [ i8* bitcast ([2 x i32]* @"L_OBJC_IMAGE_INFO" to i8*) ]		; <[1 x i8*]*> [#uses=0]
 
-define i16 @__NSCharToUnicharCFWrapper(i8 zeroext  %ch) zeroext  {
+define zeroext i16 @__NSCharToUnicharCFWrapper(i8 zeroext  %ch)   {
 entry:
 	%iftmp.0.0.in.in = select i1 false, i16* @replacementUnichar, i16* null		; <i16*> [#uses=1]
 	%iftmp.0.0.in = load i16* %iftmp.0.0.in.in		; <i16> [#uses=1]
diff --git a/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll b/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll
index 3242e1e..b74e4fc 100644
--- a/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll
+++ b/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash-2.ll
@@ -6,12 +6,16 @@ target triple = "i386-apple-darwin7"
 
 define void @bar(i32 %Size) nounwind noinline {
 entry:
-	%tmp = malloc [1000000 x %struct.foo]		; <[1000000 x %struct.foo]*> [#uses=1]
+        %malloccall = tail call i8* @malloc(i32 trunc (i64 mul (i64 ptrtoint (i32* getelementptr (i32* null, i32 1) to i64), i64 2000000) to i32))
+        %tmp = bitcast i8* %malloccall to [1000000 x %struct.foo]*
 	%.sub = getelementptr [1000000 x %struct.foo]* %tmp, i32 0, i32 0		; <%struct.foo*> [#uses=1]
 	store %struct.foo* %.sub, %struct.foo** @X, align 4
 	ret void
 }
 
+declare noalias i8* @malloc(i32)
+
+
 define i32 @baz() nounwind readonly noinline {
 bb1.thread:
 	%tmpLD1 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=2]
diff --git a/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll b/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll
index 51dcac1..613cb7b 100644
--- a/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll
+++ b/test/Transforms/GlobalOpt/2008-12-16-HeapSRACrash.ll
@@ -6,12 +6,15 @@ target triple = "i386-apple-darwin7"
 
 define void @bar(i32 %Size) nounwind noinline {
 entry:
-	%tmp = malloc [1000000 x %struct.foo]		; <[1000000 x %struct.foo]*> [#uses=1]
+        %malloccall = tail call i8* @malloc(i32 trunc (i64 mul (i64 ptrtoint (i32* getelementptr (i32* null, i32 1) to i64), i64 2000000) to i32))
+        %tmp = bitcast i8* %malloccall to [1000000 x %struct.foo]*
 	%.sub = getelementptr [1000000 x %struct.foo]* %tmp, i32 0, i32 0		; <%struct.foo*> [#uses=1]
 	store %struct.foo* %.sub, %struct.foo** @X, align 4
 	ret void
 }
 
+declare noalias i8* @malloc(i32)
+
 define i32 @baz() nounwind readonly noinline {
 bb1.thread:
 	%tmpLD1 = load %struct.foo** @X, align 4		; <%struct.foo*> [#uses=3]
diff --git a/test/Transforms/GlobalOpt/memcpy.ll b/test/Transforms/GlobalOpt/memcpy.ll
index 8f063a2..94e07a0 100644
--- a/test/Transforms/GlobalOpt/memcpy.ll
+++ b/test/Transforms/GlobalOpt/memcpy.ll
@@ -3,13 +3,11 @@
 
 @G1 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00"         ; <[58 x i8]*> [#uses=1]
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
 define void @foo() {
-        %Blah = alloca [58 x i8]                ; <[58 x i8]*> [#uses=1]
-        %tmp.0 = getelementptr [58 x i8]* %Blah, i32 0, i32 0           ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32( i8* %tmp.0, i8* getelementptr ([58 x i8]* @G1, i32 0, i32 0), i32 58, i32 1 )
-        ret void
+  %Blah = alloca [58 x i8]
+  %tmp.0 = getelementptr [58 x i8]* %Blah, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp.0, i8* getelementptr inbounds ([58 x i8]* @G1, i32 0, i32 0), i32 58, i32 1, i1 false)
+  ret void
 }
 
-
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/GlobalOpt/memset.ll b/test/Transforms/GlobalOpt/memset.ll
index a9b9d5e..3bb5ce9 100644
--- a/test/Transforms/GlobalOpt/memset.ll
+++ b/test/Transforms/GlobalOpt/memset.ll
@@ -1,21 +1,18 @@
 ; both globals are write only, delete them.
 
-; RUN: opt < %s -globalopt -S | \
-; RUN:   not grep internal
+; RUN: opt < %s -globalopt -S | not grep internal
 
 @G0 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00"         ; <[58 x i8]*> [#uses=1]
 @G1 = internal global [4 x i32] [ i32 1, i32 2, i32 3, i32 4 ]          ; <[4 x i32]*> [#uses=1]
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-
 define void @foo() {
-        %Blah = alloca [58 x i8]                ; <[58 x i8]*> [#uses=1]
-        %tmp3 = bitcast [58 x i8]* %Blah to i8*         ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32( i8* bitcast ([4 x i32]* @G1 to i8*), i8* %tmp3, i32 16, i32 1 )
-        call void @llvm.memset.i32( i8* getelementptr ([58 x i8]* @G0, i32 0, i32 0), i8 17, i32 58, i32 1 )
-        ret void
+  %Blah = alloca [58 x i8]
+  %tmp3 = bitcast [58 x i8]* %Blah to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* bitcast ([4 x i32]* @G1 to i8*), i8* %tmp3, i32 16, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* getelementptr inbounds ([58 x i8]* @G0, i32 0, i32 0), i8 17, i32 58, i32 1, i1 false)
+  ret void
 }
 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Transforms/IPConstantProp/return-constants.ll b/test/Transforms/IPConstantProp/return-constants.ll
index 79220dd..2cd99fe 100644
--- a/test/Transforms/IPConstantProp/return-constants.ll
+++ b/test/Transforms/IPConstantProp/return-constants.ll
@@ -4,38 +4,43 @@
 ;; Check that the second return values didn't get propagated
 ; RUN: cat %t | grep {%N = add i32 %B, %D}
 
-define internal {i32, i32} @foo(i1 %Q) {
-        br i1 %Q, label %T, label %F
+%0 = type { i32, i32 }
 
-T:              ; preds = %0
-        ret i32 21, i32 22
+define internal %0 @foo(i1 %Q) {
+  br i1 %Q, label %T, label %F
 
-F:              ; preds = %0
-        ret i32 21, i32 23
+T:                                                ; preds = %0
+  %mrv = insertvalue %0 undef, i32 21, 0
+  %mrv1 = insertvalue %0 %mrv, i32 22, 1
+  ret %0 %mrv1
+
+F:                                                ; preds = %0
+  %mrv2 = insertvalue %0 undef, i32 21, 0
+  %mrv3 = insertvalue %0 %mrv2, i32 23, 1
+  ret %0 %mrv3
 }
 
-define internal {i32, i32} @bar(i1 %Q) {
-        %A = insertvalue { i32, i32 } undef, i32 21, 0
-        br i1 %Q, label %T, label %F
+define internal %0 @bar(i1 %Q) {
+  %A = insertvalue %0 undef, i32 21, 0
+  br i1 %Q, label %T, label %F
 
-T:              ; preds = %0
-        %B = insertvalue { i32, i32 } %A, i32 22, 1
-        ret { i32, i32 } %B
+T:                                                ; preds = %0
+  %B = insertvalue %0 %A, i32 22, 1
+  ret %0 %B
 
-F:              ; preds = %0
-        %C = insertvalue { i32, i32 } %A, i32 23, 1
-        ret { i32, i32 } %C
+F:                                                ; preds = %0
+  %C = insertvalue %0 %A, i32 23, 1
+  ret %0 %C
 }
 
-define { i32, i32 } @caller(i1 %Q) {
-        %X = call {i32, i32} @foo( i1 %Q )
-        %A = getresult {i32, i32} %X, 0
-        %B = getresult {i32, i32} %X, 1
-        %Y = call {i32, i32} @bar( i1 %Q )
-        %C = extractvalue {i32, i32} %Y, 0
-        %D = extractvalue {i32, i32} %Y, 1
-        %M = add i32 %A, %C
-        %N = add i32 %B, %D
-        ret { i32, i32 } %X
+define %0 @caller(i1 %Q) {
+  %X = call %0 @foo(i1 %Q)
+  %A = extractvalue %0 %X, 0
+  %B = extractvalue %0 %X, 1
+  %Y = call %0 @bar(i1 %Q)
+  %C = extractvalue %0 %Y, 0
+  %D = extractvalue %0 %Y, 1
+  %M = add i32 %A, %C
+  %N = add i32 %B, %D
+  ret %0 %X
 }
-
diff --git a/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll b/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll
index 288431a..a004831 100644
--- a/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll
+++ b/test/Transforms/IndVarSimplify/2008-09-02-IVType.ll
@@ -16,7 +16,7 @@
 	%struct.YUVGeneralParams = type { i16*, i8*, i8*, i8*, i8*, i8*, void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, i16, i16, i16, [6 x i8], void (i8*, i16**, i32, %struct.YUVGeneralParams*)*, i16, i16 }
 @llvm.used = appending global [1 x i8*] [ i8* bitcast (i16 (%struct.JPEGGlobals*)* @ExtractBufferedBlocksIgnored to i8*) ], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
 
-define i16 @ExtractBufferedBlocksIgnored(%struct.JPEGGlobals* %globp) signext nounwind {
+define signext i16 @ExtractBufferedBlocksIgnored(%struct.JPEGGlobals* %globp)  nounwind {
 entry:
 	%tmp4311 = getelementptr %struct.JPEGGlobals* %globp, i32 0, i32 70		; <i32*> [#uses=1]
 	%tmp4412 = load i32* %tmp4311, align 16		; <i32> [#uses=2]
diff --git a/test/Transforms/IndVarSimplify/ada-loops.ll b/test/Transforms/IndVarSimplify/ada-loops.ll
index 4a07d99..9e635fd 100644
--- a/test/Transforms/IndVarSimplify/ada-loops.ll
+++ b/test/Transforms/IndVarSimplify/ada-loops.ll
@@ -1,14 +1,18 @@
-; RUN: opt < %s -indvars -S > %t
-; RUN: grep phi %t | count 4
-; RUN: grep {= phi i32} %t | count 4
-; RUN: not grep {sext i} %t
-; RUN: not grep {zext i} %t
-; RUN: not grep {trunc i} %t
-; RUN: not grep {add i8} %t
+; RUN: opt < %s -indvars -S | FileCheck %s
+; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s
+;
 ; PR1301
 
 ; Do a bunch of analysis and prove that the loops can use an i32 trip
 ; count without casting.
+;
+; Note that all four functions should actually be converted to
+; memset. However, this test case validates indvars behavior.  We
+; don't check that phis are "folded together" because that is a job
+; for loop strength reduction. But indvars must remove sext, zext,
+; trunc, and add i8.
+;
+; CHECK-NOT: {{sext|zext|trunc|add i8}}
 
 ; ModuleID = 'ada.bc'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-n:8:16:32"
diff --git a/test/Transforms/IndVarSimplify/iv-zext.ll b/test/Transforms/IndVarSimplify/iv-zext.ll
index 00018ec..3a05c89 100644
--- a/test/Transforms/IndVarSimplify/iv-zext.ll
+++ b/test/Transforms/IndVarSimplify/iv-zext.ll
@@ -1,4 +1,5 @@
 ; RUN: opt < %s -indvars -S | FileCheck %s
+; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s
 ; CHECK-NOT: and
 ; CHECK-NOT: zext
 
diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
index c35feef..9605670 100644
--- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -23,6 +23,7 @@ ph:
 ; sext should be eliminated while preserving gep inboundsness.
 ; CHECK-NOT: sext
 ; CHECK: getelementptr inbounds
+; CHECK: exit:
 loop:
   %i.02 = phi i32 [ 0, %ph ], [ %iinc, %loop ]
   %s.01 = phi i32 [ 0, %ph ], [ %sinc, %loop ]
@@ -63,6 +64,7 @@ ph:
 ; CHECK: getelementptr inbounds
 ; %vall sext should obviously not be eliminated
 ; CHECK: sext
+; CHECK: exit:
 loop:
   %i.02 = phi i32 [ 0, %ph ], [ %iinc, %loop ]
   %s.01 = phi i64 [ 0, %ph ], [ %sinc, %loop ]
@@ -106,6 +108,7 @@ ph:
 ; Preserve gep inboundsness, and don't factor it.
 ; CHECK: getelementptr inbounds i32* %ptriv, i32 1
 ; CHECK-NOT: add
+; CHECK: exit:
 loop:
   %ptriv = phi i32* [ %first, %ph ], [ %ptrpost, %loop ]
   %ofs = sext i32 %idx to i64
@@ -121,3 +124,199 @@ exit:
 return:
   ret void
 }
+
+%struct = type { i32 }
+
+define void @bitcastiv(i32 %start, i32 %limit, i32 %step, %struct* %base)
+nounwind
+{
+entry:
+  br label %loop
+
+; CHECK: loop:
+;
+; Preserve casts
+; CHECK: phi i32
+; CHECK: bitcast
+; CHECK: getelementptr
+; CHECK: exit:
+loop:
+  %iv = phi i32 [%start, %entry], [%next, %loop]
+  %p = phi %struct* [%base, %entry], [%pinc, %loop]
+  %adr = getelementptr %struct* %p, i32 0, i32 0
+  store i32 3, i32* %adr
+  %pp = bitcast %struct* %p to i32*
+  store i32 4, i32* %pp
+  %pinc = getelementptr %struct* %p, i32 1
+  %next = add i32 %iv, 1
+  %cond = icmp ne i32 %next, %limit
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @maxvisitor(i32 %limit, i32* %base) nounwind {
+entry:
+ br label %loop
+
+; Test inserting a truncate at a phi use.
+;
+; CHECK: loop:
+; CHECK: phi i64
+; CHECK: trunc
+; CHECK: exit:
+loop:
+  %idx = phi i32 [ 0, %entry ], [ %idx.next, %loop.inc ]
+  %max = phi i32 [ 0, %entry ], [ %max.next, %loop.inc ]
+  %idxprom = sext i32 %idx to i64
+  %adr = getelementptr inbounds i32* %base, i64 %idxprom
+  %val = load i32* %adr
+  %cmp19 = icmp sgt i32 %val, %max
+  br i1 %cmp19, label %if.then, label %if.else
+
+if.then:
+  br label %loop.inc
+
+if.else:
+  br label %loop.inc
+
+loop.inc:
+  %max.next = phi i32 [ %idx, %if.then ], [ %max, %if.else ]
+  %idx.next = add nsw i32 %idx, 1
+  %cmp = icmp slt i32 %idx.next, %limit
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define void @identityphi(i32 %limit) nounwind {
+entry:
+  br label %loop
+
+; Test an edge case of removing an identity phi that directly feeds
+; back to the loop iv.
+;
+; CHECK: loop:
+; CHECK: phi i32
+; CHECK-NOT: phi
+; CHECK: exit:
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %control ]
+  br i1 undef, label %if.then, label %control
+
+if.then:
+  br label %control
+
+control:
+  %iv.next = phi i32 [ %iv, %loop ], [ undef, %if.then ]
+  %cmp = icmp slt i32 %iv.next, %limit
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  ret void
+}
+
+define i64 @cloneOr(i32 %limit, i64* %base) nounwind {
+entry:
+  ; ensure that the loop can't overflow
+  %halfLim = ashr i32 %limit, 2
+  br label %loop
+
+; Test cloning an or, which is not an OverflowBinaryOperator.
+;
+; CHECK: loop:
+; CHECK: phi i64
+; CHECK-NOT: sext
+; CHECK: or i64
+; CHECK: exit:
+loop:
+  %iv = phi i32 [ 0, %entry], [ %iv.next, %loop ]
+  %t1 = sext i32 %iv to i64
+  %adr = getelementptr i64* %base, i64 %t1
+  %val = load i64* %adr
+  %t2 = or i32 %iv, 1
+  %t3 = sext i32 %t2 to i64
+  %iv.next = add i32 %iv, 2
+  %cmp = icmp slt i32 %iv.next, %halfLim
+  br i1 %cmp, label %loop, label %exit
+
+exit:
+  %result = and i64 %val, %t3
+  ret i64 %result
+}
+
+; The i induction variable looks like a wrap-around, but it really is just
+; a simple affine IV.  Make sure that indvars simplifies through.
+define i32 @indirectRecurrence() nounwind {
+entry:
+  br label %loop
+
+; ReplaceLoopExitValue should fold the return value to constant 9.
+; CHECK: loop:
+; CHECK: phi i32
+; CHECK: ret i32 9
+loop:
+  %j.0 = phi i32 [ 1, %entry ], [ %j.next, %cond_true ]
+  %i.0 = phi i32 [ 0, %entry ], [ %j.0, %cond_true ]
+  %tmp = icmp ne i32 %j.0, 10
+  br i1 %tmp, label %cond_true, label %return
+
+cond_true:
+  %j.next = add i32 %j.0, 1
+  br label %loop
+
+return:
+  ret i32 %i.0
+}
+
+; Eliminate the congruent phis j, k, and l.
+; Eliminate the redundant IV increments k.next and l.next.
+; Two phis should remain, one starting at %init, and one at %init1.
+; Two increments should remain, one by %step and one by %step1.
+; CHECK: loop:
+; CHECK: phi i32
+; CHECK: phi i32
+; CHECK-NOT: phi
+; CHECK: add i32
+; CHECK: add i32
+; CHECK-NOT: add
+; CHECK: return:
+;
+; Five live-outs should remain.
+; CHECK: lcssa = phi
+; CHECK: lcssa = phi
+; CHECK: lcssa = phi
+; CHECK: lcssa = phi
+; CHECK: lcssa = phi
+; CHECK-NOT: phi
+; CHECK: ret
+define i32 @isomorphic(i32 %init, i32 %step, i32 %lim) nounwind {
+entry:
+  %step1 = add i32 %step, 1
+  %init1 = add i32 %init, %step1
+  %l.0 = sub i32 %init1, %step1
+  br label %loop
+
+loop:
+  %ii = phi i32 [ %init1, %entry ], [ %ii.next, %loop ]
+  %i = phi i32 [ %init, %entry ], [ %ii, %loop ]
+  %j = phi i32 [ %init, %entry ], [ %j.next, %loop ]
+  %k = phi i32 [ %init1, %entry ], [ %k.next, %loop ]
+  %l = phi i32 [ %l.0, %entry ], [ %l.next, %loop ]
+  %ii.next = add i32 %ii, %step1
+  %j.next = add i32 %j, %step1
+  %k.next = add i32 %k, %step1
+  %l.step = add i32 %l, %step
+  %l.next = add i32 %l.step, 1
+  %cmp = icmp ne i32 %ii.next, %lim
+  br i1 %cmp, label %loop, label %return
+
+return:
+  %sum1 = add i32 %i, %j.next
+  %sum2 = add i32 %sum1, %k.next
+  %sum3 = add i32 %sum1, %l.step
+  %sum4 = add i32 %sum1, %l.next
+  ret i32 %sum4
+}
diff --git a/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll b/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll
index 9e46a78..5063b17 100644
--- a/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll
+++ b/test/Transforms/IndVarSimplify/preserve-signed-wrap.ll
@@ -1,7 +1,5 @@
-; RUN: opt < %s -indvars -S > %t
-; RUN: grep sext %t | count 1
-; RUN: grep phi %t | count 1
-; RUN: grep {phi i64} %t
+; RUN: opt < %s -indvars -S | FileCheck %s
+; RUN: opt < %s -indvars -disable-iv-rewrite -S | FileCheck %s
 
 ; Indvars should insert a 64-bit induction variable to eliminate the
 ; sext for the addressing, however it shouldn't eliminate the sext
@@ -15,6 +13,10 @@ entry:
 bb.nph:		; preds = %entry
 	br label %bb
 
+; CHECK: bb:
+; CHECK: phi i64
+; CHECK: sext i8
+; CHECK-NOT: sext
 bb:		; preds = %bb1, %bb.nph
 	%i.02 = phi i32 [ %5, %bb1 ], [ 0, %bb.nph ]		; <i32> [#uses=2]
 	%p.01 = phi i8 [ %4, %bb1 ], [ -1, %bb.nph ]		; <i8> [#uses=2]
diff --git a/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll b/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
index 0c8857f..ace74ff 100644
--- a/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
+++ b/test/Transforms/IndVarSimplify/variable-stride-ivs-0.ll
@@ -1,9 +1,9 @@
-; RUN: opt < %s -indvars -instcombine -S | \
-; RUN:   grep {store i32 0}
+; RUN: opt < %s -indvars -instcombine -S | FileCheck %s
+; RUN: opt < %s -indvars -disable-iv-rewrite -instcombine -S | FileCheck %s
+;
 ; Test that -indvars can reduce variable stride IVs.  If it can reduce variable
-; stride iv's, it will make %iv. and %m.0.0 isomorphic to each other without 
+; stride iv's, it will make %iv. and %m.0.0 isomorphic to each other without
 ; cycles, allowing the tmp.21 subtraction to be eliminated.
-; END.
 
 define void @vnum_test8(i32* %data) {
 entry:
@@ -20,6 +20,7 @@ no_exit.preheader:              ; preds = %entry
         %tmp.16 = getelementptr i32* %data, i32 %tmp.9          ; <i32*> [#uses=1]
         br label %no_exit
 
+; CHECK: store i32 0
 no_exit:                ; preds = %no_exit, %no_exit.preheader
         %iv.ui = phi i32 [ 0, %no_exit.preheader ], [ %iv..inc.ui, %no_exit ]           ; <i32> [#uses=1]
         %iv. = phi i32 [ %tmp.5, %no_exit.preheader ], [ %iv..inc, %no_exit ]           ; <i32> [#uses=2]
diff --git a/test/Transforms/Inline/2008-03-04-StructRet.ll b/test/Transforms/Inline/2008-03-04-StructRet.ll
deleted file mode 100644
index 3311d56..0000000
--- a/test/Transforms/Inline/2008-03-04-StructRet.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: opt < %s -inline -disable-output
-	%struct.Benchmark = type { i32 (...)** }
-	%struct.Complex = type { double, double }
-	%struct.ComplexBenchmark = type { %struct.Benchmark }
-
-define %struct.Complex @_Zml7ComplexS_1(double %a.0, double %a.1, double %b.0, double %b.1) nounwind  {
-entry:
-	%mrv = alloca %struct.Complex		; <%struct.Complex*> [#uses=2]
-	%mrv.gep = getelementptr %struct.Complex* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld = load double* %mrv.gep		; <double> [#uses=1]
-	%mrv.gep1 = getelementptr %struct.Complex* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld2 = load double* %mrv.gep1		; <double> [#uses=1]
-	ret double %mrv.ld, double %mrv.ld2
-}
-
-define void @_ZNK16ComplexBenchmark9oop_styleEv(%struct.ComplexBenchmark* %this) nounwind  {
-entry:
-	%tmp = alloca %struct.Complex		; <%struct.Complex*> [#uses=0]
-	br label %bb31
-bb:		; preds = %bb31
-	call %struct.Complex @_Zml7ComplexS_1( double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00 ) nounwind 		; <%struct.Complex>:0 [#uses=1]
-	%gr = getresult %struct.Complex %0, 1		; <double> [#uses=0]
-	br label %bb31
-bb31:		; preds = %bb, %entry
-	br i1 false, label %bb, label %return
-return:		; preds = %bb31
-	ret void
-}
diff --git a/test/Transforms/Inline/2008-03-07-Inline-2.ll b/test/Transforms/Inline/2008-03-07-Inline-2.ll
deleted file mode 100644
index 0c968e6..0000000
--- a/test/Transforms/Inline/2008-03-07-Inline-2.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: opt < %s -inline -disable-output
-	%struct.Demand = type { double, double }
-	%struct.branch = type { %struct.Demand, double, double, double, double, %struct.branch*, [12 x %struct.leaf*] }
-	%struct.leaf = type { %struct.Demand, double, double }
-@P = external global double		; <double*> [#uses=1]
-
-define %struct.leaf* @build_leaf() nounwind  {
-entry:
-	unreachable
-}
-
-define %struct.Demand @Compute_Branch2(%struct.branch* %br, double %theta_R, double %theta_I, double %pi_R, double %pi_I) nounwind  {
-entry:
-	%mrv = alloca %struct.Demand		; <%struct.Demand*> [#uses=4]
-	%a2 = alloca %struct.Demand		; <%struct.Demand*> [#uses=0]
-	br i1 false, label %bb46, label %bb
-bb:		; preds = %entry
-	%mrv.gep = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld = load double* %mrv.gep		; <double> [#uses=1]
-	%mrv.gep1 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld2 = load double* %mrv.gep1		; <double> [#uses=1]
-	ret double %mrv.ld, double %mrv.ld2
-bb46:		; preds = %entry
-	br label %bb72
-bb49:		; preds = %bb72
-	call %struct.Demand @Compute_Leaf1( %struct.leaf* null, double 0.000000e+00, double 0.000000e+00 ) nounwind 		; <%struct.Demand>:0 [#uses=1]
-	%gr = getresult %struct.Demand %0, 1		; <double> [#uses=0]
-	br label %bb72
-bb72:		; preds = %bb49, %bb46
-	br i1 false, label %bb49, label %bb77
-bb77:		; preds = %bb72
-	%mrv.gep3 = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld4 = load double* %mrv.gep3		; <double> [#uses=1]
-	%mrv.gep5 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld6 = load double* %mrv.gep5		; <double> [#uses=1]
-	ret double %mrv.ld4, double %mrv.ld6
-}
-
-define %struct.Demand @Compute_Leaf1(%struct.leaf* %l, double %pi_R, double %pi_I) nounwind  {
-entry:
-	%mrv = alloca %struct.Demand		; <%struct.Demand*> [#uses=2]
-	%tmp10 = load double* @P, align 8		; <double> [#uses=1]
-	%tmp11 = fcmp olt double %tmp10, 0.000000e+00		; <i1> [#uses=1]
-	br i1 %tmp11, label %bb, label %bb13
-bb:		; preds = %entry
-	br label %bb13
-bb13:		; preds = %bb, %entry
-	%mrv.gep = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld = load double* %mrv.gep		; <double> [#uses=1]
-	%mrv.gep1 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld2 = load double* %mrv.gep1		; <double> [#uses=1]
-	ret double %mrv.ld, double %mrv.ld2
-}
diff --git a/test/Transforms/Inline/2008-03-07-Inline.ll b/test/Transforms/Inline/2008-03-07-Inline.ll
deleted file mode 100644
index 86afb2d..0000000
--- a/test/Transforms/Inline/2008-03-07-Inline.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: opt < %s -inline -disable-output
-	%struct.Demand = type { double, double }
-	%struct.branch = type { %struct.Demand, double, double, double, double, %struct.branch*, [12 x %struct.leaf*] }
-	%struct.leaf = type { %struct.Demand, double, double }
-@P = external global double		; <double*> [#uses=1]
-
-define %struct.leaf* @build_leaf() nounwind  {
-entry:
-	unreachable
-}
-
-define %struct.Demand @Compute_Branch2(%struct.branch* %br, double %theta_R, double %theta_I, double %pi_R, double %pi_I) nounwind  {
-entry:
-	%mrv = alloca %struct.Demand		; <%struct.Demand*> [#uses=4]
-	%a2 = alloca %struct.Demand		; <%struct.Demand*> [#uses=0]
-	br i1 false, label %bb46, label %bb
-bb:		; preds = %entry
-	%mrv.gep = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld = load double* %mrv.gep		; <double> [#uses=1]
-	%mrv.gep1 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld2 = load double* %mrv.gep1		; <double> [#uses=1]
-	ret double %mrv.ld, double %mrv.ld2
-bb46:		; preds = %entry
-	br label %bb72
-bb49:		; preds = %bb72
-	call %struct.Demand @Compute_Leaf1( %struct.leaf* null, double 0.000000e+00, double 0.000000e+00 ) nounwind 		; <%struct.Demand>:0 [#uses=1]
-	%gr = getresult %struct.Demand %0, 1		; <double> [#uses=0]
-	br label %bb72
-bb72:		; preds = %bb49, %bb46
-	br i1 false, label %bb49, label %bb77
-bb77:		; preds = %bb72
-	%mrv.gep3 = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld4 = load double* %mrv.gep3		; <double> [#uses=1]
-	%mrv.gep5 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld6 = load double* %mrv.gep5		; <double> [#uses=1]
-	ret double %mrv.ld4, double %mrv.ld6
-}
-
-define %struct.Demand @Compute_Leaf1(%struct.leaf* %l, double %pi_R, double %pi_I) nounwind  {
-entry:
-	%mrv = alloca %struct.Demand		; <%struct.Demand*> [#uses=4]
-	%tmp10 = load double* @P, align 8		; <double> [#uses=1]
-	%tmp11 = fcmp olt double %tmp10, 0.000000e+00		; <i1> [#uses=1]
-	br i1 %tmp11, label %bb, label %bb13
-bb:		; preds = %entry
-	%mrv.gep = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld = load double* %mrv.gep		; <double> [#uses=1]
-	%mrv.gep1 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld2 = load double* %mrv.gep1		; <double> [#uses=1]
-	ret double %mrv.ld, double %mrv.ld2
-bb13:		; preds = %entry
-	%mrv.gep3 = getelementptr %struct.Demand* %mrv, i32 0, i32 0		; <double*> [#uses=1]
-	%mrv.ld4 = load double* %mrv.gep3		; <double> [#uses=1]
-	%mrv.gep5 = getelementptr %struct.Demand* %mrv, i32 0, i32 1		; <double*> [#uses=1]
-	%mrv.ld6 = load double* %mrv.gep5		; <double> [#uses=1]
-	ret double %mrv.ld4, double %mrv.ld6
-}
diff --git a/test/Transforms/Inline/inline-invoke-tail.ll b/test/Transforms/Inline/inline-invoke-tail.ll
index 961f678..462c29a 100644
--- a/test/Transforms/Inline/inline-invoke-tail.ll
+++ b/test/Transforms/Inline/inline-invoke-tail.ll
@@ -4,7 +4,7 @@
 define internal void @foo(i32* %p, i32* %q) {
 	%pp = bitcast i32* %p to i8*
 	%qq = bitcast i32* %q to i8*
-	tail call void @llvm.memcpy.i32(i8* %pp, i8* %qq, i32 4, i32 1)
+	tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %pp, i8* %qq, i32 4, i32 1, i1 false)
 	ret void
 }
 
@@ -24,12 +24,14 @@ invcont:
 
 lpad:
 	%eh_ptr = call i8* @llvm.eh.exception()
-	%eh_select = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null)
+	%eh_select = call i32 (i8*, i8*, ...)* @llvm.eh.selector(i8* %eh_ptr, i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*), i8* null)
 	unreachable
 }
 
-declare i8* @llvm.eh.exception() nounwind
+declare i8* @llvm.eh.exception() nounwind readonly
 
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
+declare i32 @llvm.eh.selector(i8*, i8*, ...) nounwind
 
 declare i32 @__gxx_personality_v0(...)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/2006-11-03-Memmove64.ll b/test/Transforms/InstCombine/2006-11-03-Memmove64.ll
deleted file mode 100644
index 35bb45e..0000000
--- a/test/Transforms/InstCombine/2006-11-03-Memmove64.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:    not grep memmove.i32
-; Instcombine was trying to turn this into a memmove.i32
-
-target datalayout = "e-p:64:64"
-target triple = "alphaev67-unknown-linux-gnu"
-@str10 = internal constant [1 x i8] zeroinitializer             ; <[1 x i8]*> [#uses=1]
-
-define void @do_join(i8* %b) {
-entry:
-        call void @llvm.memmove.i64( i8* %b, i8* getelementptr ([1 x i8]* @str10, i32 0, i64 0), i64 1, i32 1 )
-        ret void
-}
-
-declare void @llvm.memmove.i64(i8*, i8*, i64, i32)
-
diff --git a/test/Transforms/InstCombine/2007-04-04-BadFoldBitcastIntoMalloc.ll b/test/Transforms/InstCombine/2007-04-04-BadFoldBitcastIntoMalloc.ll
deleted file mode 100644
index b59d3c8..0000000
--- a/test/Transforms/InstCombine/2007-04-04-BadFoldBitcastIntoMalloc.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; In the presence of a negative offset (the -8 below), a fold of a bitcast into
-; a malloc messes up the element count, causing an extra 4GB to be allocated on
-; 64-bit targets.
-;
-; RUN: opt < %s -instcombine -S | not grep {= add }
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "x86_64-unknown-freebsd6.2"
-
-define i1 @test(i32 %tmp141, double** %tmp145)
-{
-  %tmp133 = add i32 %tmp141, 1
-  %tmp134 = shl i32 %tmp133, 3
-  %tmp135 = add i32 %tmp134, -8
-  %tmp136 = malloc i8, i32 %tmp135
-  %tmp137 = bitcast i8* %tmp136 to double*
-  store double* %tmp137, double** %tmp145
-  ret i1 false
-}
diff --git a/test/Transforms/InstCombine/2007-05-04-Crash.ll b/test/Transforms/InstCombine/2007-05-04-Crash.ll
deleted file mode 100644
index 9f50d8a..0000000
--- a/test/Transforms/InstCombine/2007-05-04-Crash.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: opt < %s -instcombine -disable-output
-; PR1384
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-apple-darwin8"
-	%struct.CFRuntimeBase = type { i32, [4 x i8] }
-	%struct.CGColor = type opaque
-	%struct.CGColorSpace = type { %struct.CFRuntimeBase, i8, i8, i8, i32, i32, i32, %struct.CGColor*, float*, %struct.CGMD5Signature, %struct.CGMD5Signature*, [0 x %struct.CGColorSpaceDescriptor] }
-	%struct.CGColorSpaceCalibratedRGBData = type { [3 x float], [3 x float], [3 x float], [9 x float] }
-	%struct.CGColorSpaceDescriptor = type { %struct.CGColorSpaceCalibratedRGBData }
-	%struct.CGColorSpaceLabData = type { [3 x float], [3 x float], [4 x float] }
-	%struct.CGMD5Signature = type { [16 x i8], i8 }
-
-declare fastcc %struct.CGColorSpace* @CGColorSpaceCreate(i32, i32)
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-define %struct.CGColorSpace* @CGColorSpaceCreateLab(float* %whitePoint, float* %blackPoint, float* %range) {
-entry:
-	%tmp17 = call fastcc %struct.CGColorSpace* @CGColorSpaceCreate( i32 5, i32 3 )		; <%struct.CGColorSpace*> [#uses=2]
-	%tmp28 = getelementptr %struct.CGColorSpace* %tmp17, i32 0, i32 11		; <[0 x %struct.CGColorSpaceDescriptor]*> [#uses=1]
-	%tmp29 = getelementptr [0 x %struct.CGColorSpaceDescriptor]* %tmp28, i32 0, i32 0		; <%struct.CGColorSpaceDescriptor*> [#uses=1]
-	%tmp30 = getelementptr %struct.CGColorSpaceDescriptor* %tmp29, i32 0, i32 0		; <%struct.CGColorSpaceCalibratedRGBData*> [#uses=1]
-	%tmp3031 = bitcast %struct.CGColorSpaceCalibratedRGBData* %tmp30 to %struct.CGColorSpaceLabData*		; <%struct.CGColorSpaceLabData*> [#uses=1]
-	%tmp45 = getelementptr %struct.CGColorSpaceLabData* %tmp3031, i32 0, i32 2		; <[4 x float]*> [#uses=1]
-	%tmp46 = getelementptr [4 x float]* %tmp45, i32 0, i32 0		; <float*> [#uses=1]
-	%tmp4648 = bitcast float* %tmp46 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp4648, i8* null, i32 16, i32 4 )
-	ret %struct.CGColorSpace* %tmp17
-}
diff --git a/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll b/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
index 40818d4..15988b6 100644
--- a/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
+++ b/test/Transforms/InstCombine/2007-05-18-CastFoldBug.ll
@@ -3,7 +3,7 @@
 
 define void @blah(i16* %tmp10) {
 entry:
-	call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend_stret to void (i16* sret )*)( i16* %tmp10 sret  )
+	call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend_stret to void (i16*)*)( i16*  sret %tmp10  )
 	ret void
 }
 
diff --git a/test/Transforms/InstCombine/2007-09-11-Trampoline.ll b/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
index d8f3d97..6190aa9 100644
--- a/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
+++ b/test/Transforms/InstCombine/2007-09-11-Trampoline.ll
@@ -15,7 +15,7 @@ entry:
 	%tmp3 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 0		; <i32*> [#uses=1]
 	store i32 %n, i32* %tmp3, align 8
 	%FRAME.06 = bitcast %struct.FRAME.nest* %FRAME.0 to i8*		; <i8*> [#uses=1]
-	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest* nest , i32)* @f to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
+	%tramp = call i8* @llvm.init.trampoline( i8* %TRAMP.216.sub, i8* bitcast (i32 (%struct.FRAME.nest* , i32)* @f to i8*), i8* %FRAME.06 )		; <i8*> [#uses=1]
 	%tmp7 = getelementptr %struct.FRAME.nest* %FRAME.0, i32 0, i32 1		; <i32 (i32)**> [#uses=1]
 	%tmp89 = bitcast i8* %tramp to i32 (i32)*		; <i32 (i32)*> [#uses=2]
 	store i32 (i32)* %tmp89, i32 (i32)** %tmp7, align 8
diff --git a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
index 710aff2..fe935f9 100644
--- a/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
+++ b/test/Transforms/InstCombine/2007-10-10-EliminateMemCpy.ll
@@ -6,16 +6,15 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 
 define void @foo(i8* %P) {
 entry:
-	%P_addr = alloca i8*		; <i8**> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i8* %P, i8** %P_addr
-	%tmp = load i8** %P_addr, align 4		; <i8*> [#uses=1]
-	%tmp1 = getelementptr [4 x i8]* @.str, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp, i8* %tmp1, i32 4, i32 1 )
-	br label %return
+  %P_addr = alloca i8*
+  store i8* %P, i8** %P_addr
+  %tmp = load i8** %P_addr, align 4
+  %tmp1 = getelementptr [4 x i8]* @.str, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* %tmp1, i32 4, i32 1, i1 false)
+  br label %return
 
-return:		; preds = %entry
-	ret void
+return:                                           ; preds = %entry
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll b/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll
index e1549a0..e6c9bcd 100644
--- a/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll
+++ b/test/Transforms/InstCombine/2007-11-07-OpaqueAlignCrash.ll
@@ -5,8 +5,8 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i686-pc-linux-gnu"
 
 %opaque_t = type opaque
-
-%op_ts = type {opaque, i32}
+%opaque2 = type opaque
+%op_ts = type {%opaque2, i32}
 
 @g = external global %opaque_t
 @h = external global %op_ts
diff --git a/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll b/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
index 24394c6..e3192a9 100644
--- a/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
+++ b/test/Transforms/InstCombine/2007-11-25-CompatibleAttributes.ll
@@ -5,7 +5,7 @@
 
 define i32 @main(i32 %argc, i8** %argv) {
 entry:
-	%tmp32 = tail call i32 (i8* noalias , ...) nounwind * bitcast (i32 (i8*, ...) nounwind * @printf to i32 (i8* noalias , ...) nounwind *)( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0) noalias , i32 0 ) nounwind 		; <i32> [#uses=0]
+	%tmp32 = tail call i32 (i8*  , ...) * bitcast (i32 (i8*, ...)  * @printf to i32 (i8*  , ...)  *)( i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0)  , i32 0 ) nounwind 		; <i32> [#uses=0]
 	ret i32 undef
 }
 
diff --git a/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll b/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll
index 5f4fa47..23b6067 100644
--- a/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll
+++ b/test/Transforms/InstCombine/2008-01-06-BitCastAttributes.ll
@@ -5,7 +5,7 @@ define void @a() {
 	ret void
 }
 
-define i32 @b(i32* inreg  %x) signext  {
+define signext i32 @b(i32* inreg  %x)   {
 	ret i32 0
 }
 
diff --git a/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll b/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll
index 7b3281f..510a68c 100644
--- a/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll
+++ b/test/Transforms/InstCombine/2008-01-13-NoBitCastAttributes.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -instcombine -S | grep bitcast | count 2
 
-define i32 @b(i32* inreg  %x) signext  {
+define signext i32 @b(i32* inreg  %x)   {
 	ret i32 0
 }
 
diff --git a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
index b29d8d2..a51c47d 100644
--- a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
+++ b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
@@ -4,275 +4,276 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
-	%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >" = type { i32* }
-	%"struct.std::_Vector_base<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" }
-	%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
-	%"struct.std::bidirectional_iterator_tag" = type <{ i8 }>
-	%"struct.std::forward_iterator_tag" = type <{ i8 }>
-	%"struct.std::input_iterator_tag" = type <{ i8 }>
-	%"struct.std::random_access_iterator_tag" = type <{ i8 }>
-	%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
+
+%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >" = type { i32* }
+%"struct.std::_Vector_base<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" }
+%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
+%"struct.std::bidirectional_iterator_tag" = type <{ i8 }>
+%"struct.std::forward_iterator_tag" = type <{ i8 }>
+%"struct.std::input_iterator_tag" = type <{ i8 }>
+%"struct.std::random_access_iterator_tag" = type <{ i8 }>
+%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
 
 define i32* @_Z3fooRSt6vectorIiSaIiEE(%"struct.std::vector<int,std::allocator<int> >"* %X) {
 entry:
-	%0 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=2]
-	%__first_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=31]
-	%__last_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=4]
-	%unnamed_arg.i = alloca %"struct.std::bidirectional_iterator_tag", align 8		; <%"struct.std::bidirectional_iterator_tag"*> [#uses=1]
-	%1 = alloca %"struct.std::bidirectional_iterator_tag"		; <%"struct.std::bidirectional_iterator_tag"*> [#uses=1]
-	%__first_addr.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=2]
-	%2 = alloca %"struct.std::bidirectional_iterator_tag"		; <%"struct.std::bidirectional_iterator_tag"*> [#uses=2]
-	%3 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"		; <%"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"*> [#uses=2]
-	%4 = alloca i32		; <i32*> [#uses=8]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i32 42, i32* %4, align 4
-	%5 = getelementptr %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >"*> [#uses=1]
-	%6 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >"* %5, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"*> [#uses=1]
-	%7 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %6, i32 0, i32 1		; <i32**> [#uses=1]
-	%8 = load i32** %7, align 4		; <i32*> [#uses=1]
-	%9 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %8, i32** %9, align 4
-	%10 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0		; <i32**> [#uses=1]
-	%11 = load i32** %10, align 4		; <i32*> [#uses=1]
-	%tmp2.i = ptrtoint i32* %11 to i32		; <i32> [#uses=1]
-	%tmp1.i = inttoptr i32 %tmp2.i to i32*		; <i32*> [#uses=1]
-	%tmp3 = ptrtoint i32* %tmp1.i to i32		; <i32> [#uses=1]
-	%tmp2 = inttoptr i32 %tmp3 to i32*		; <i32*> [#uses=1]
-	%12 = getelementptr %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >"*> [#uses=1]
-	%13 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >"* %12, i32 0, i32 0		; <%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"*> [#uses=1]
-	%14 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %13, i32 0, i32 0		; <i32**> [#uses=1]
-	%15 = load i32** %14, align 4		; <i32*> [#uses=1]
-	%16 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %15, i32** %16, align 4
-	%17 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0		; <i32**> [#uses=1]
-	%18 = load i32** %17, align 4		; <i32*> [#uses=1]
-	%tmp2.i17 = ptrtoint i32* %18 to i32		; <i32> [#uses=1]
-	%tmp1.i18 = inttoptr i32 %tmp2.i17 to i32*		; <i32*> [#uses=1]
-	%tmp8 = ptrtoint i32* %tmp1.i18 to i32		; <i32> [#uses=1]
-	%tmp6 = inttoptr i32 %tmp8 to i32*		; <i32*> [#uses=1]
-	%19 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %tmp6, i32** %19
-	%20 = getelementptr %"struct.std::bidirectional_iterator_tag"* %1, i32 0, i32 0		; <i8*> [#uses=1]
-	%21 = load i8* %20, align 1		; <i8> [#uses=1]
-	%22 = or i8 %21, 0		; <i8> [#uses=1]
-	%23 = or i8 %22, 0		; <i8> [#uses=1]
-	%24 = or i8 %23, 0		; <i8> [#uses=0]
-	%25 = getelementptr %"struct.std::bidirectional_iterator_tag"* %2, i32 0, i32 0		; <i8*> [#uses=1]
-	store i8 0, i8* %25, align 1
-	%elt.i = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%val.i = load i32** %elt.i		; <i32*> [#uses=1]
-	%tmp.i = bitcast %"struct.std::bidirectional_iterator_tag"* %unnamed_arg.i to i8*		; <i8*> [#uses=1]
-	%tmp9.i = bitcast %"struct.std::bidirectional_iterator_tag"* %2 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i64(i8* %tmp.i, i8* %tmp9.i, i64 1, i32 1)
-	%26 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %val.i, i32** %26
-	%27 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %tmp2, i32** %27
-	%28 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%29 = load i32** %28, align 4		; <i32*> [#uses=1]
-	%30 = ptrtoint i32* %29 to i32		; <i32> [#uses=1]
-	%31 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%32 = load i32** %31, align 4		; <i32*> [#uses=1]
-	%33 = ptrtoint i32* %32 to i32		; <i32> [#uses=1]
-	%34 = sub i32 %30, %33		; <i32> [#uses=1]
-	%35 = ashr i32 %34, 2		; <i32> [#uses=1]
-	%36 = ashr i32 %35, 2		; <i32> [#uses=1]
-	br label %bb12.i.i
+  %0 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %__first_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %__last_addr.i.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %unnamed_arg.i = alloca %"struct.std::bidirectional_iterator_tag", align 8
+  %1 = alloca %"struct.std::bidirectional_iterator_tag"
+  %__first_addr.i = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %2 = alloca %"struct.std::bidirectional_iterator_tag"
+  %3 = alloca %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"
+  %4 = alloca i32
+  %"alloca point" = bitcast i32 0 to i32
+  store i32 42, i32* %4, align 4
+  %5 = getelementptr %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0
+  %6 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >"* %5, i32 0, i32 0
+  %7 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %6, i32 0, i32 1
+  %8 = load i32** %7, align 4
+  %9 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0
+  store i32* %8, i32** %9, align 4
+  %10 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %3, i32 0, i32 0
+  %11 = load i32** %10, align 4
+  %tmp2.i = ptrtoint i32* %11 to i32
+  %tmp1.i = inttoptr i32 %tmp2.i to i32*
+  %tmp3 = ptrtoint i32* %tmp1.i to i32
+  %tmp2 = inttoptr i32 %tmp3 to i32*
+  %12 = getelementptr %"struct.std::vector<int,std::allocator<int> >"* %X, i32 0, i32 0
+  %13 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >"* %12, i32 0, i32 0
+  %14 = getelementptr %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl"* %13, i32 0, i32 0
+  %15 = load i32** %14, align 4
+  %16 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0
+  store i32* %15, i32** %16, align 4
+  %17 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %0, i32 0, i32 0
+  %18 = load i32** %17, align 4
+  %tmp2.i17 = ptrtoint i32* %18 to i32
+  %tmp1.i18 = inttoptr i32 %tmp2.i17 to i32*
+  %tmp8 = ptrtoint i32* %tmp1.i18 to i32
+  %tmp6 = inttoptr i32 %tmp8 to i32*
+  %19 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0
+  store i32* %tmp6, i32** %19
+  %20 = getelementptr %"struct.std::bidirectional_iterator_tag"* %1, i32 0, i32 0
+  %21 = load i8* %20, align 1
+  %22 = or i8 %21, 0
+  %23 = or i8 %22, 0
+  %24 = or i8 %23, 0
+  %25 = getelementptr %"struct.std::bidirectional_iterator_tag"* %2, i32 0, i32 0
+  store i8 0, i8* %25, align 1
+  %elt.i = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i, i32 0, i32 0
+  %val.i = load i32** %elt.i
+  %tmp.i = bitcast %"struct.std::bidirectional_iterator_tag"* %unnamed_arg.i to i8*
+  %tmp9.i = bitcast %"struct.std::bidirectional_iterator_tag"* %2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp.i, i8* %tmp9.i, i64 1, i32 1, i1 false)
+  %26 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %val.i, i32** %26
+  %27 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  store i32* %tmp2, i32** %27
+  %28 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  %29 = load i32** %28, align 4
+  %30 = ptrtoint i32* %29 to i32
+  %31 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %32 = load i32** %31, align 4
+  %33 = ptrtoint i32* %32 to i32
+  %34 = sub i32 %30, %33
+  %35 = ashr i32 %34, 2
+  %36 = ashr i32 %35, 2
+  br label %bb12.i.i
 
-bb.i.i:		; preds = %bb12.i.i
-	%37 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%38 = load i32** %37, align 4		; <i32*> [#uses=1]
-	%39 = load i32* %38, align 4		; <i32> [#uses=1]
-	%40 = load i32* %4, align 4		; <i32> [#uses=1]
-	%41 = icmp eq i32 %39, %40		; <i1> [#uses=1]
-	%42 = zext i1 %41 to i8		; <i8> [#uses=1]
-	%toBool.i.i = icmp ne i8 %42, 0		; <i1> [#uses=1]
-	br i1 %toBool.i.i, label %bb1.i.i, label %bb2.i.i
+bb.i.i:                                           ; preds = %bb12.i.i
+  %37 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %38 = load i32** %37, align 4
+  %39 = load i32* %38, align 4
+  %40 = load i32* %4, align 4
+  %41 = icmp eq i32 %39, %40
+  %42 = zext i1 %41 to i8
+  %toBool.i.i = icmp ne i8 %42, 0
+  br i1 %toBool.i.i, label %bb1.i.i, label %bb2.i.i
 
-bb1.i.i:		; preds = %bb.i.i
-	%43 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%44 = load i32** %43, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb1.i.i:                                          ; preds = %bb.i.i
+  %43 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %44 = load i32** %43, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb2.i.i:		; preds = %bb.i.i
-	%45 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%46 = load i32** %45, align 4		; <i32*> [#uses=1]
-	%47 = getelementptr i32* %46, i64 1		; <i32*> [#uses=1]
-	%48 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %47, i32** %48, align 4
-	%49 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%50 = load i32** %49, align 4		; <i32*> [#uses=1]
-	%51 = load i32* %50, align 4		; <i32> [#uses=1]
-	%52 = load i32* %4, align 4		; <i32> [#uses=1]
-	%53 = icmp eq i32 %51, %52		; <i1> [#uses=1]
-	%54 = zext i1 %53 to i8		; <i8> [#uses=1]
-	%toBool3.i.i = icmp ne i8 %54, 0		; <i1> [#uses=1]
-	br i1 %toBool3.i.i, label %bb4.i.i, label %bb5.i.i
+bb2.i.i:                                          ; preds = %bb.i.i
+  %45 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %46 = load i32** %45, align 4
+  %47 = getelementptr i32* %46, i64 1
+  %48 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %47, i32** %48, align 4
+  %49 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %50 = load i32** %49, align 4
+  %51 = load i32* %50, align 4
+  %52 = load i32* %4, align 4
+  %53 = icmp eq i32 %51, %52
+  %54 = zext i1 %53 to i8
+  %toBool3.i.i = icmp ne i8 %54, 0
+  br i1 %toBool3.i.i, label %bb4.i.i, label %bb5.i.i
 
-bb4.i.i:		; preds = %bb2.i.i
-	%55 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%56 = load i32** %55, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb4.i.i:                                          ; preds = %bb2.i.i
+  %55 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %56 = load i32** %55, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb5.i.i:		; preds = %bb2.i.i
-	%57 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%58 = load i32** %57, align 4		; <i32*> [#uses=1]
-	%59 = getelementptr i32* %58, i64 1		; <i32*> [#uses=1]
-	%60 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %59, i32** %60, align 4
-	%61 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%62 = load i32** %61, align 4		; <i32*> [#uses=1]
-	%63 = load i32* %62, align 4		; <i32> [#uses=1]
-	%64 = load i32* %4, align 4		; <i32> [#uses=1]
-	%65 = icmp eq i32 %63, %64		; <i1> [#uses=1]
-	%66 = zext i1 %65 to i8		; <i8> [#uses=1]
-	%toBool6.i.i = icmp ne i8 %66, 0		; <i1> [#uses=1]
-	br i1 %toBool6.i.i, label %bb7.i.i, label %bb8.i.i
+bb5.i.i:                                          ; preds = %bb2.i.i
+  %57 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %58 = load i32** %57, align 4
+  %59 = getelementptr i32* %58, i64 1
+  %60 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %59, i32** %60, align 4
+  %61 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %62 = load i32** %61, align 4
+  %63 = load i32* %62, align 4
+  %64 = load i32* %4, align 4
+  %65 = icmp eq i32 %63, %64
+  %66 = zext i1 %65 to i8
+  %toBool6.i.i = icmp ne i8 %66, 0
+  br i1 %toBool6.i.i, label %bb7.i.i, label %bb8.i.i
 
-bb7.i.i:		; preds = %bb5.i.i
-	%67 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%68 = load i32** %67, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb7.i.i:                                          ; preds = %bb5.i.i
+  %67 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %68 = load i32** %67, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb8.i.i:		; preds = %bb5.i.i
-	%69 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%70 = load i32** %69, align 4		; <i32*> [#uses=1]
-	%71 = getelementptr i32* %70, i64 1		; <i32*> [#uses=1]
-	%72 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %71, i32** %72, align 4
-	%73 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%74 = load i32** %73, align 4		; <i32*> [#uses=1]
-	%75 = load i32* %74, align 4		; <i32> [#uses=1]
-	%76 = load i32* %4, align 4		; <i32> [#uses=1]
-	%77 = icmp eq i32 %75, %76		; <i1> [#uses=1]
-	%78 = zext i1 %77 to i8		; <i8> [#uses=1]
-	%toBool9.i.i = icmp ne i8 %78, 0		; <i1> [#uses=1]
-	br i1 %toBool9.i.i, label %bb10.i.i, label %bb11.i.i
+bb8.i.i:                                          ; preds = %bb5.i.i
+  %69 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %70 = load i32** %69, align 4
+  %71 = getelementptr i32* %70, i64 1
+  %72 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %71, i32** %72, align 4
+  %73 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %74 = load i32** %73, align 4
+  %75 = load i32* %74, align 4
+  %76 = load i32* %4, align 4
+  %77 = icmp eq i32 %75, %76
+  %78 = zext i1 %77 to i8
+  %toBool9.i.i = icmp ne i8 %78, 0
+  br i1 %toBool9.i.i, label %bb10.i.i, label %bb11.i.i
 
-bb10.i.i:		; preds = %bb8.i.i
-	%79 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%80 = load i32** %79, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb10.i.i:                                         ; preds = %bb8.i.i
+  %79 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %80 = load i32** %79, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb11.i.i:		; preds = %bb8.i.i
-	%81 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%82 = load i32** %81, align 4		; <i32*> [#uses=1]
-	%83 = getelementptr i32* %82, i64 1		; <i32*> [#uses=1]
-	%84 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %83, i32** %84, align 4
-	%85 = sub i32 %__trip_count.0.i.i, 1		; <i32> [#uses=1]
-	br label %bb12.i.i
+bb11.i.i:                                         ; preds = %bb8.i.i
+  %81 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %82 = load i32** %81, align 4
+  %83 = getelementptr i32* %82, i64 1
+  %84 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %83, i32** %84, align 4
+  %85 = sub i32 %__trip_count.0.i.i, 1
+  br label %bb12.i.i
 
-bb12.i.i:		; preds = %bb11.i.i, %entry
-	%__trip_count.0.i.i = phi i32 [ %36, %entry ], [ %85, %bb11.i.i ]		; <i32> [#uses=2]
-	%86 = icmp sgt i32 %__trip_count.0.i.i, 0		; <i1> [#uses=1]
-	br i1 %86, label %bb.i.i, label %bb13.i.i
+bb12.i.i:                                         ; preds = %bb11.i.i, %entry
+  %__trip_count.0.i.i = phi i32 [ %36, %entry ], [ %85, %bb11.i.i ]
+  %86 = icmp sgt i32 %__trip_count.0.i.i, 0
+  br i1 %86, label %bb.i.i, label %bb13.i.i
 
-bb13.i.i:		; preds = %bb12.i.i
-	%87 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%88 = load i32** %87, align 4		; <i32*> [#uses=1]
-	%89 = ptrtoint i32* %88 to i32		; <i32> [#uses=1]
-	%90 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%91 = load i32** %90, align 4		; <i32*> [#uses=1]
-	%92 = ptrtoint i32* %91 to i32		; <i32> [#uses=1]
-	%93 = sub i32 %89, %92		; <i32> [#uses=1]
-	%94 = ashr i32 %93, 2		; <i32> [#uses=1]
-	switch i32 %94, label %bb26.i.i [
-		i32 1, label %bb22.i.i
-		i32 2, label %bb18.i.i
-		i32 3, label %bb14.i.i
-	]
+bb13.i.i:                                         ; preds = %bb12.i.i
+  %87 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  %88 = load i32** %87, align 4
+  %89 = ptrtoint i32* %88 to i32
+  %90 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %91 = load i32** %90, align 4
+  %92 = ptrtoint i32* %91 to i32
+  %93 = sub i32 %89, %92
+  %94 = ashr i32 %93, 2
+  switch i32 %94, label %bb26.i.i [
+    i32 1, label %bb22.i.i
+    i32 2, label %bb18.i.i
+    i32 3, label %bb14.i.i
+  ]
 
-bb14.i.i:		; preds = %bb13.i.i
-	%95 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%96 = load i32** %95, align 4		; <i32*> [#uses=1]
-	%97 = load i32* %96, align 4		; <i32> [#uses=1]
-	%98 = load i32* %4, align 4		; <i32> [#uses=1]
-	%99 = icmp eq i32 %97, %98		; <i1> [#uses=1]
-	%100 = zext i1 %99 to i8		; <i8> [#uses=1]
-	%toBool15.i.i = icmp ne i8 %100, 0		; <i1> [#uses=1]
-	br i1 %toBool15.i.i, label %bb16.i.i, label %bb17.i.i
+bb14.i.i:                                         ; preds = %bb13.i.i
+  %95 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %96 = load i32** %95, align 4
+  %97 = load i32* %96, align 4
+  %98 = load i32* %4, align 4
+  %99 = icmp eq i32 %97, %98
+  %100 = zext i1 %99 to i8
+  %toBool15.i.i = icmp ne i8 %100, 0
+  br i1 %toBool15.i.i, label %bb16.i.i, label %bb17.i.i
 
-bb16.i.i:		; preds = %bb14.i.i
-	%101 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%102 = load i32** %101, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb16.i.i:                                         ; preds = %bb14.i.i
+  %101 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %102 = load i32** %101, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb17.i.i:		; preds = %bb14.i.i
-	%103 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%104 = load i32** %103, align 4		; <i32*> [#uses=1]
-	%105 = getelementptr i32* %104, i64 1		; <i32*> [#uses=1]
-	%106 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %105, i32** %106, align 4
-	br label %bb18.i.i
+bb17.i.i:                                         ; preds = %bb14.i.i
+  %103 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %104 = load i32** %103, align 4
+  %105 = getelementptr i32* %104, i64 1
+  %106 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %105, i32** %106, align 4
+  br label %bb18.i.i
 
-bb18.i.i:		; preds = %bb17.i.i, %bb13.i.i
-	%107 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%108 = load i32** %107, align 4		; <i32*> [#uses=1]
-	%109 = load i32* %108, align 4		; <i32> [#uses=1]
-	%110 = load i32* %4, align 4		; <i32> [#uses=1]
-	%111 = icmp eq i32 %109, %110		; <i1> [#uses=1]
-	%112 = zext i1 %111 to i8		; <i8> [#uses=1]
-	%toBool19.i.i = icmp ne i8 %112, 0		; <i1> [#uses=1]
-	br i1 %toBool19.i.i, label %bb20.i.i, label %bb21.i.i
+bb18.i.i:                                         ; preds = %bb17.i.i, %bb13.i.i
+  %107 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %108 = load i32** %107, align 4
+  %109 = load i32* %108, align 4
+  %110 = load i32* %4, align 4
+  %111 = icmp eq i32 %109, %110
+  %112 = zext i1 %111 to i8
+  %toBool19.i.i = icmp ne i8 %112, 0
+  br i1 %toBool19.i.i, label %bb20.i.i, label %bb21.i.i
 
-bb20.i.i:		; preds = %bb18.i.i
-	%113 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%114 = load i32** %113, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb20.i.i:                                         ; preds = %bb18.i.i
+  %113 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %114 = load i32** %113, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb21.i.i:		; preds = %bb18.i.i
-	%115 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%116 = load i32** %115, align 4		; <i32*> [#uses=1]
-	%117 = getelementptr i32* %116, i64 1		; <i32*> [#uses=1]
-	%118 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %117, i32** %118, align 4
-	br label %bb22.i.i
+bb21.i.i:                                         ; preds = %bb18.i.i
+  %115 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %116 = load i32** %115, align 4
+  %117 = getelementptr i32* %116, i64 1
+  %118 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %117, i32** %118, align 4
+  br label %bb22.i.i
 
-bb22.i.i:		; preds = %bb21.i.i, %bb13.i.i
-	%119 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%120 = load i32** %119, align 4		; <i32*> [#uses=1]
-	%121 = load i32* %120, align 4		; <i32> [#uses=1]
-	%122 = load i32* %4, align 4		; <i32> [#uses=1]
-	%123 = icmp eq i32 %121, %122		; <i1> [#uses=1]
-	%124 = zext i1 %123 to i8		; <i8> [#uses=1]
-	%toBool23.i.i = icmp ne i8 %124, 0		; <i1> [#uses=1]
-	br i1 %toBool23.i.i, label %bb24.i.i, label %bb25.i.i
+bb22.i.i:                                         ; preds = %bb21.i.i, %bb13.i.i
+  %119 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %120 = load i32** %119, align 4
+  %121 = load i32* %120, align 4
+  %122 = load i32* %4, align 4
+  %123 = icmp eq i32 %121, %122
+  %124 = zext i1 %123 to i8
+  %toBool23.i.i = icmp ne i8 %124, 0
+  br i1 %toBool23.i.i, label %bb24.i.i, label %bb25.i.i
 
-bb24.i.i:		; preds = %bb22.i.i
-	%125 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%126 = load i32** %125, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb24.i.i:                                         ; preds = %bb22.i.i
+  %125 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %126 = load i32** %125, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-bb25.i.i:		; preds = %bb22.i.i
-	%127 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%128 = load i32** %127, align 4		; <i32*> [#uses=1]
-	%129 = getelementptr i32* %128, i64 1		; <i32*> [#uses=1]
-	%130 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	store i32* %129, i32** %130, align 4
-	br label %bb26.i.i
+bb25.i.i:                                         ; preds = %bb22.i.i
+  %127 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  %128 = load i32** %127, align 4
+  %129 = getelementptr i32* %128, i64 1
+  %130 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__first_addr.i.i, i32 0, i32 0
+  store i32* %129, i32** %130, align 4
+  br label %bb26.i.i
 
-bb26.i.i:		; preds = %bb25.i.i, %bb13.i.i
-	%131 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0		; <i32**> [#uses=1]
-	%132 = load i32** %131, align 4		; <i32*> [#uses=1]
-	br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+bb26.i.i:                                         ; preds = %bb25.i.i, %bb13.i.i
+  %131 = getelementptr %"struct.__gnu_cxx::__normal_iterator<int*,std::vector<int, std::allocator<int> > >"* %__last_addr.i.i, i32 0, i32 0
+  %132 = load i32** %131, align 4
+  br label %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
 
-_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit:		; preds = %bb26.i.i, %bb24.i.i, %bb20.i.i, %bb16.i.i, %bb10.i.i, %bb7.i.i, %bb4.i.i, %bb1.i.i
-	%.0.0.i.i = phi i32* [ %132, %bb26.i.i ], [ %126, %bb24.i.i ], [ %114, %bb20.i.i ], [ %102, %bb16.i.i ], [ %80, %bb10.i.i ], [ %68, %bb7.i.i ], [ %56, %bb4.i.i ], [ %44, %bb1.i.i ]		; <i32*> [#uses=1]
-	%tmp2.i.i = ptrtoint i32* %.0.0.i.i to i32		; <i32> [#uses=1]
-	%tmp1.i.i = inttoptr i32 %tmp2.i.i to i32*		; <i32*> [#uses=1]
-	%tmp4.i = ptrtoint i32* %tmp1.i.i to i32		; <i32> [#uses=1]
-	%tmp3.i = inttoptr i32 %tmp4.i to i32*		; <i32*> [#uses=1]
-	%tmp8.i = ptrtoint i32* %tmp3.i to i32		; <i32> [#uses=1]
-	%tmp6.i = inttoptr i32 %tmp8.i to i32*		; <i32*> [#uses=1]
-	%tmp12 = ptrtoint i32* %tmp6.i to i32		; <i32> [#uses=1]
-	%tmp10 = inttoptr i32 %tmp12 to i32*		; <i32*> [#uses=1]
-	%tmp16 = ptrtoint i32* %tmp10 to i32		; <i32> [#uses=1]
-	br label %return
+_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit: ; preds = %bb26.i.i, %bb24.i.i, %bb20.i.i, %bb16.i.i, %bb10.i.i, %bb7.i.i, %bb4.i.i, %bb1.i.i
+  %.0.0.i.i = phi i32* [ %132, %bb26.i.i ], [ %126, %bb24.i.i ], [ %114, %bb20.i.i ], [ %102, %bb16.i.i ], [ %80, %bb10.i.i ], [ %68, %bb7.i.i ], [ %56, %bb4.i.i ], [ %44, %bb1.i.i ]
+  %tmp2.i.i = ptrtoint i32* %.0.0.i.i to i32
+  %tmp1.i.i = inttoptr i32 %tmp2.i.i to i32*
+  %tmp4.i = ptrtoint i32* %tmp1.i.i to i32
+  %tmp3.i = inttoptr i32 %tmp4.i to i32*
+  %tmp8.i = ptrtoint i32* %tmp3.i to i32
+  %tmp6.i = inttoptr i32 %tmp8.i to i32*
+  %tmp12 = ptrtoint i32* %tmp6.i to i32
+  %tmp10 = inttoptr i32 %tmp12 to i32*
+  %tmp16 = ptrtoint i32* %tmp10 to i32
+  br label %return
 
-return:		; preds = %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
-	%tmp14 = inttoptr i32 %tmp16 to i32*		; <i32*> [#uses=1]
-	ret i32* %tmp14
+return:                                           ; preds = %_ZSt4findIN9__gnu_cxx17__normal_iteratorIPiSt6vectorIiSaIiEEEEiET_S7_S7_RKT0_.exit
+  %tmp14 = inttoptr i32 %tmp16 to i32*
+  ret i32* %tmp14
 }
 
-declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/2011-02-16-InsertelementHang.ll b/test/Transforms/InstCombine/2011-02-16-InsertelementHang.ll
deleted file mode 100644
index 2f6034e..0000000
--- a/test/Transforms/InstCombine/2011-02-16-InsertelementHang.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; PR9218
-
-%vec2x2 = type { <2 x double>, <2 x double> }
-
-define %vec2x2 @split(double) nounwind alwaysinline {
-; CHECK: @split
-; CHECK: ret %vec2x2 undef
-  %vba = insertelement <2 x double> undef, double %0, i32 2
-  ret <2 x double> %vba, <2 x double> %vba
-}
diff --git a/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
new file mode 100644
index 0000000..2f72b73
--- /dev/null
+++ b/test/Transforms/InstCombine/2011-06-13-nsw-alloca.ll
@@ -0,0 +1,60 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+target triple = "i386-apple-darwin10.0.0"
+
+define void @fu1(i32 %parm) nounwind ssp {
+  %1 = alloca i32, align 4
+  %ptr = alloca double*, align 4
+  store i32 %parm, i32* %1, align 4
+  store double* null, double** %ptr, align 4
+  %2 = load i32* %1, align 4
+  %3 = icmp ne i32 %2, 0
+  br i1 %3, label %4, label %10
+
+; <label>:4                                       ; preds = %0
+  %5 = load i32* %1, align 4
+  %6 = mul nsw i32 %5, 8
+; With "nsw", the alloca and its bitcast can be fused:
+  %7 = add nsw i32 %6, 2048
+; CHECK: alloca double*
+  %8 = alloca i8, i32 %7
+  %9 = bitcast i8* %8 to double*
+  store double* %9, double** %ptr, align 4
+  br label %10
+
+; <label>:10                                      ; preds = %4, %0
+  %11 = load double** %ptr, align 4
+  call void @bar(double* %11)
+; CHECK: ret
+  ret void
+}
+
+declare void @bar(double*)
+
+define void @fu2(i32 %parm) nounwind ssp {
+  %1 = alloca i32, align 4
+  %ptr = alloca double*, align 4
+  store i32 %parm, i32* %1, align 4
+  store double* null, double** %ptr, align 4
+  %2 = load i32* %1, align 4
+  %3 = icmp ne i32 %2, 0
+  br i1 %3, label %4, label %10
+
+; <label>:4                                       ; preds = %0
+  %5 = load i32* %1, align 4
+  %6 = mul nsw i32 %5, 8
+; Without "nsw", the alloca and its bitcast cannot be fused:
+  %7 = add  i32 %6, 2048
+; CHECK: alloca i8
+  %8 = alloca i8, i32 %7
+; CHECK-NEXT: bitcast i8*
+  %9 = bitcast i8* %8 to double*
+  store double* %9, double** %ptr, align 4
+  br label %10
+
+; <label>:10                                      ; preds = %4, %0
+  %11 = load double** %ptr, align 4
+  call void @bar(double* %11)
+  ret void
+}
+
diff --git a/test/Transforms/InstCombine/bswap-fold.ll b/test/Transforms/InstCombine/bswap-fold.ll
index a6b30c0..442ce58 100644
--- a/test/Transforms/InstCombine/bswap-fold.ll
+++ b/test/Transforms/InstCombine/bswap-fold.ll
@@ -50,10 +50,6 @@ entry:
 }
 
 ; PR5284
-declare i64 @llvm.bswap.i64(i64)
-declare i32 @llvm.bswap.i32(i32)
-declare i16 @llvm.bswap.i16(i16)
-
 define i16 @test7(i32 %A) {
   %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind 
   %C = trunc i32 %B to i16
diff --git a/test/Transforms/InstCombine/call2.ll b/test/Transforms/InstCombine/call2.ll
index 3a6bd67..1f45c7a 100644
--- a/test/Transforms/InstCombine/call2.ll
+++ b/test/Transforms/InstCombine/call2.ll
@@ -4,7 +4,6 @@
 define i32 @bar() {
 entry:
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	%tmp = call i32 (...)* bitcast (i32 (i8*)* @f to i32 (...)*)( double 3.000000e+00 )		; <i32> [#uses=0]
 	br label %return
 
@@ -17,7 +16,6 @@ define i32 @f(i8* %p) {
 entry:
 	%p_addr = alloca i8*		; <i8**> [#uses=1]
 	%retval = alloca i32, align 4		; <i32*> [#uses=1]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i8* %p, i8** %p_addr
 	br label %return
 
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index bc5e365..f85636f 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -99,14 +99,6 @@ define void @test11(i32* %P) {
 ; CHECK: ret void
 }
 
-define i32* @test12() {
-        %p = malloc [4 x i8]            ; <[4 x i8]*> [#uses=1]
-        %c = bitcast [4 x i8]* %p to i32*               ; <i32*> [#uses=1]
-        ret i32* %c
-; CHECK: %malloccall = tail call i8* @malloc(i32 4)
-; CHECK: ret i32* %c
-}
-
 define i8* @test13(i64 %A) {
         %c = getelementptr [0 x i8]* bitcast ([32832 x i8]* @inbuf to [0 x i8]*), i64 0, i64 %A             ; <i8*> [#uses=1]
         ret i8* %c
@@ -265,22 +257,11 @@ define i1 @test31(i64 %A) {
         %C = and i32 %B, 42             ; <i32> [#uses=1]
         %D = icmp eq i32 %C, 10         ; <i1> [#uses=1]
         ret i1 %D
-; CHECK: %C1 = and i64 %A, 42
-; CHECK: %D = icmp eq i64 %C1, 10
+; CHECK: %C = and i64 %A, 42
+; CHECK: %D = icmp eq i64 %C, 10
 ; CHECK: ret i1 %D
 }
 
-define void @test32(double** %tmp) {
-        %tmp8 = malloc [16 x i8]                ; <[16 x i8]*> [#uses=1]
-        %tmp8.upgrd.1 = bitcast [16 x i8]* %tmp8 to double*             ; <double*> [#uses=1]
-        store double* %tmp8.upgrd.1, double** %tmp
-        ret void
-; CHECK: %malloccall = tail call i8* @malloc(i32 16)
-; CHECK: %tmp8.upgrd.1 = bitcast i8* %malloccall to double*
-; CHECK: store double* %tmp8.upgrd.1, double** %tmp
-; CHECK: ret void
-}
-
 define i32 @test33(i32 %c1) {
         %x = bitcast i32 %c1 to float           ; <float> [#uses=1]
         %y = bitcast float %x to i32            ; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index 9e8547b..26c0e47 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -40,7 +40,7 @@ define i32* @test4({ i32 }* %I) {
         %B = getelementptr { i32 }* %A, i64 0, i32 0
         ret i32* %B
 ; CHECK: @test4
-; CHECK: getelementptr %intstruct* %I, i64 1, i32 0
+; CHECK: getelementptr { i32 }* %I, i64 1, i32 0
 }
 
 define void @test5(i8 %B) {
@@ -52,14 +52,6 @@ define void @test5(i8 %B) {
 ; CHECK: store i8 %B, i8* getelementptr inbounds ([10 x i8]* @Global, i64 0, i64 4)
 }
 
-define i32* @test6() {
-        %M = malloc [4 x i32] 
-        %A = getelementptr [4 x i32]* %M, i64 0, i64 0
-        %B = getelementptr i32* %A, i64 2             
-        ret i32* %B
-; CHECK: @test6
-; CHECK: getelementptr i8* %malloccall, i64 8
-}
 
 define i32* @test7(i32* %I, i64 %C, i64 %D) {
         %A = getelementptr i32* %I, i64 %C 
@@ -94,7 +86,7 @@ define i1 @test10({ i32, i32 }* %x, { i32, i32 }* %y) {
         %tmp.4 = icmp eq i32* %tmp.1, %tmp.3       
         ret i1 %tmp.4
 ; CHECK: @test10
-; CHECK: icmp eq %pair* %x, %y
+; CHECK: icmp eq { i32, i32 }* %x, %y
 }
 
 define i1 @test11({ i32, i32 }* %X) {
@@ -102,7 +94,7 @@ define i1 @test11({ i32, i32 }* %X) {
         %Q = icmp eq i32* %P, null             
         ret i1 %Q
 ; CHECK: @test11
-; CHECK: icmp eq %pair* %X, null
+; CHECK: icmp eq { i32, i32 }* %X, null
 }
 
 
@@ -236,19 +228,6 @@ define i1 @test23() {
 ; CHECK: ret i1 false
 }
 
-%"java/lang/Object" = type { %struct.llvm_java_object_base }
-%"java/lang/StringBuffer" = type { %"java/lang/Object", i32, { %"java/lang/Object", i32, [0 x i16] }*, i1 }
-%struct.llvm_java_object_base = type opaque
-
-define void @test24() {
-bc0:
-        %tmp53 = getelementptr %"java/lang/StringBuffer"* null, i32 0, i32 1            ; <i32*> [#uses=1]
-        store i32 0, i32* %tmp53
-        ret void
-; CHECK: @test24
-; CHECK: store i32 0, i32* getelementptr (%"java/lang/StringBuffer"* null, i64 0, i32 1)
-}
-
 define void @test25() {
 entry:
         %tmp = getelementptr { i64, i64, i64, i64 }* null, i32 0, i32 3         ; <i64*> [#uses=1]
@@ -477,3 +456,19 @@ define i32* @test38(i32* %I, i32 %n) {
 ; CHECK: = sext i32 %n to i64
 ; CHECK: %A = getelementptr i32* %I, i64 %
 }
+
+; Test that we don't duplicate work when the second gep is a "bitcast".
+%pr10322_t = type { i8* }
+declare void @pr10322_f2(%pr10322_t*)
+declare void @pr10322_f3(i8**)
+define void @pr10322_f1(%pr10322_t* %foo) {
+entry:
+  %arrayidx8 = getelementptr inbounds %pr10322_t* %foo, i64 2
+  call void @pr10322_f2(%pr10322_t* %arrayidx8) nounwind
+  %tmp2 = getelementptr inbounds %pr10322_t* %arrayidx8, i64 0, i32 0
+  call void @pr10322_f3(i8** %tmp2) nounwind
+  ret void
+
+; CHECK: @pr10322_f1
+; CHECK: %tmp2 = getelementptr inbounds %pr10322_t* %arrayidx8, i64 0, i32 0
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 1237ade..77ca62c 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -521,3 +521,41 @@ define i1 @test53(i32 %a, i32 %b) nounwind {
  %z = icmp eq i32 %x, %y
  ret i1 %z
 }
+
+; CHECK: @test54
+; CHECK-NEXT: %and = and i8 %a, -64
+; CHECK-NEXT icmp eq i8 %and, -128
+define i1 @test54(i8 %a) nounwind {
+  %ext = zext i8 %a to i32
+  %and = and i32 %ext, 192
+  %ret = icmp eq i32 %and, 128
+  ret i1 %ret
+}
+
+; CHECK: @test55
+; CHECK-NEXT: icmp eq i32 %a, -123
+define i1 @test55(i32 %a) {
+  %sub = sub i32 0, %a
+  %cmp = icmp eq i32 %sub, 123
+  ret i1 %cmp
+}
+
+; CHECK: @test56
+; CHECK-NEXT: icmp eq i32 %a, -113
+define i1 @test56(i32 %a) {
+  %sub = sub i32 10, %a
+  %cmp = icmp eq i32 %sub, 123
+  ret i1 %cmp
+}
+
+; PR10267 Don't make icmps more expensive when no other inst is subsumed.
+declare void @foo(i32)
+; CHECK: @test57
+; CHECK: %and = and i32 %a, -2
+; CHECK: %cmp = icmp ne i32 %and, 0
+define i1 @test57(i32 %a) {
+  %and = and i32 %a, -2
+  %cmp = icmp ne i32 %and, 0
+  call void @foo(i32 %and)
+  ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/intrinsics.ll b/test/Transforms/InstCombine/intrinsics.ll
index 107f313..0d84ae4 100644
--- a/test/Transforms/InstCombine/intrinsics.ll
+++ b/test/Transforms/InstCombine/intrinsics.ll
@@ -213,5 +213,4 @@ define i32 @cttz_simplify1(i32 %x) nounwind readnone ssp {
 ; CHECK-NEXT: ret i32
 }
 
-declare i32 @llvm.ctlz.i32(i32) nounwind readnone
 
diff --git a/test/Transforms/InstCombine/malloc-free-delete.ll b/test/Transforms/InstCombine/malloc-free-delete.ll
index 317786f..8455300 100644
--- a/test/Transforms/InstCombine/malloc-free-delete.ll
+++ b/test/Transforms/InstCombine/malloc-free-delete.ll
@@ -1,14 +1,14 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 ; PR1201
 define i32 @main(i32 %argc, i8** %argv) {
-        %c_19 = alloca i8*
-        %malloc_206 = malloc i8, i32 10
+    %c_19 = alloca i8*
+    %malloc_206 = tail call i8* @malloc(i32 mul (i32 ptrtoint (i8* getelementptr (i8* null, i32 1) to i32), i32 10))
+    store i8* %malloc_206, i8** %c_19
+    %tmp_207 = load i8** %c_19
+    tail call void @free(i8* %tmp_207)
+    ret i32 0
 ; CHECK-NOT: malloc
-        store i8* %malloc_206, i8** %c_19
-        %tmp_207 = load i8** %c_19
-        free i8* %tmp_207
 ; CHECK-NOT: free
-        ret i32 0
 ; CHECK: ret i32 0
 }
 
diff --git a/test/Transforms/InstCombine/malloc.ll b/test/Transforms/InstCombine/malloc.ll
deleted file mode 100644
index b6ebbea..0000000
--- a/test/Transforms/InstCombine/malloc.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; test that malloc's with a constant argument are promoted to array allocations
-; RUN: opt < %s -instcombine -S | grep getelementptr
-
-define i32* @test() {
-	%X = malloc i32, i32 4
-	ret i32* %X
-}
diff --git a/test/Transforms/InstCombine/malloc2.ll b/test/Transforms/InstCombine/malloc2.ll
deleted file mode 100644
index 8462dac..0000000
--- a/test/Transforms/InstCombine/malloc2.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; PR1313
-
-define i32 @test1(i32 %argc, i8* %argv, i8* %envp) {
-        %tmp15.i.i.i23 = malloc [2564 x i32]            ; <[2564 x i32]*> [#uses=1]
-; CHECK-NOT: call i8* @malloc
-        %c = icmp eq [2564 x i32]* %tmp15.i.i.i23, null              ; <i1>:0 [#uses=1]
-        %retval = zext i1 %c to i32             ; <i32> [#uses=1]
-        ret i32 %retval
-; CHECK: ret i32 0
-}
-
-define i32 @test2(i32 %argc, i8* %argv, i8* %envp) {
-        %tmp15.i.i.i23 = malloc [2564 x i32]            ; <[2564 x i32]*> [#uses=1]
-; CHECK-NOT: call i8* @malloc
-        %X = bitcast [2564 x i32]* %tmp15.i.i.i23 to i32*
-        %c = icmp ne i32* %X, null
-        %retval = zext i1 %c to i32             ; <i32> [#uses=1]
-        ret i32 %retval
-; CHECK: ret i32 1
-}
-
diff --git a/test/Transforms/InstCombine/malloc3.ll b/test/Transforms/InstCombine/malloc3.ll
deleted file mode 100644
index f1c0cae..0000000
--- a/test/Transforms/InstCombine/malloc3.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep load
-; PR1728
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin8"
-        %struct.foo = type { %struct.foo*, [10 x i32] }
-@.str = internal constant [21 x i8] c"tmp = %p, next = %p\0A\00"                ; <[21 x i8]*> [#uses=1]
-
-define i32 @main() {
-entry:
-        %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-        %tmp1 = malloc i8, i32 44               ; <i8*> [#uses=1]
-        %tmp12 = bitcast i8* %tmp1 to %struct.foo*              ; <%struct.foo*> [#uses=3]
-        %tmp3 = malloc i8, i32 44               ; <i8*> [#uses=1]
-        %tmp34 = bitcast i8* %tmp3 to %struct.foo*              ; <%struct.foo*> [#uses=1]
-        %tmp6 = getelementptr %struct.foo* %tmp12, i32 0, i32 0         ; <%struct.foo**> [#uses=1]
-        store %struct.foo* %tmp34, %struct.foo** %tmp6, align 4
-        %tmp8 = getelementptr %struct.foo* %tmp12, i32 0, i32 0         ; <%struct.foo**> [#uses=1]
-        %tmp9 = load %struct.foo** %tmp8, align 4               ; <%struct.foo*> [#uses=1]
-        %tmp10 = getelementptr [21 x i8]* @.str, i32 0, i32 0           ; <i8*> [#uses=1]
-        %tmp13 = call i32 (i8*, ...)* @printf( i8* %tmp10, %struct.foo* %tmp12, %struct.foo* %tmp9 )            ; <i32> [#uses=0]
-        ret i32 undef
-}
-
-declare i32 @printf(i8*, ...)
-
diff --git a/test/Transforms/InstCombine/memcpy-to-load.ll b/test/Transforms/InstCombine/memcpy-to-load.ll
index ebb8711..04aac98 100644
--- a/test/Transforms/InstCombine/memcpy-to-load.ll
+++ b/test/Transforms/InstCombine/memcpy-to-load.ll
@@ -4,11 +4,10 @@ target triple = "i686-apple-darwin8"
 
 define void @foo(double* %X, double* %Y) {
 entry:
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	%tmp2 = bitcast double* %X to i8*		; <i8*> [#uses=1]
-	%tmp13 = bitcast double* %Y to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp2, i8* %tmp13, i32 8, i32 1 )
-	ret void
+  %tmp2 = bitcast double* %X to i8*
+  %tmp13 = bitcast double* %Y to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i32 1, i1 false)
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/memmove.ll b/test/Transforms/InstCombine/memmove.ll
index 1806cfc..4602c12 100644
--- a/test/Transforms/InstCombine/memmove.ll
+++ b/test/Transforms/InstCombine/memmove.ll
@@ -1,24 +1,20 @@
 ; This test makes sure that memmove instructions are properly eliminated.
 ;
-; RUN: opt < %s -instcombine -S | \
-; RUN:    not grep {call void @llvm.memmove}
+; RUN: opt < %s -instcombine -S | not grep {call void @llvm.memmove}
 
 @S = internal constant [33 x i8] c"panic: restorelist inconsistency\00"		; <[33 x i8]*> [#uses=1]
 @h = constant [2 x i8] c"h\00"		; <[2 x i8]*> [#uses=1]
 @hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=1]
 @hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=1]
 
-
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32)
-
 define void @test1(i8* %A, i8* %B, i32 %N) {
-	call void @llvm.memmove.i32( i8* %A, i8* %B, i32 0, i32 1 )
+	call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* %B, i32 0, i32 1, i1 false)
 	ret void
 }
 
 define void @test2(i8* %A, i32 %N) {
         ;; dest can't alias source since we can't write to source!
-	call void @llvm.memmove.i32( i8* %A, i8* getelementptr ([33 x i8]* @S, i32 0, i32 0), i32 %N, i32 1 )
+	call void @llvm.memmove.p0i8.p0i8.i32(i8* %A, i8* getelementptr inbounds ([33 x i8]* @S, i32 0, i32 0), i32 %N, i32 1, i1 false)
 	ret void
 }
 
@@ -28,15 +24,16 @@ define i32 @test3() {
 	%hello_u_p = getelementptr [8 x i8]* @hello_u, i32 0, i32 0		; <i8*> [#uses=1]
 	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
 	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=3]
-	call void @llvm.memmove.i32( i8* %target_p, i8* %h_p, i32 2, i32 2 )
-	call void @llvm.memmove.i32( i8* %target_p, i8* %hel_p, i32 4, i32 4 )
-	call void @llvm.memmove.i32( i8* %target_p, i8* %hello_u_p, i32 8, i32 8 )
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i32 2, i1 false)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i32 4, i1 false)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i32 8, i1 false)
 	ret i32 0
 }
 
 ; PR2370
 define void @test4(i8* %a) {
-        tail call void @llvm.memmove.i32( i8* %a, i8* %a, i32 100, i32 1 )
-        ret void
+  tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %a, i8* %a, i32 100, i32 1, i1 false)
+  ret void
 }
 
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/memset.ll b/test/Transforms/InstCombine/memset.ll
index 8e85694..7f7bc9f 100644
--- a/test/Transforms/InstCombine/memset.ll
+++ b/test/Transforms/InstCombine/memset.ll
@@ -1,15 +1,14 @@
 ; RUN: opt < %s -instcombine -S | not grep {call.*llvm.memset}
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
-
 define i32 @main() {
-	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
-	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=5]
-	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 0, i32 1 )
-	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 1, i32 1 )
-	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 2, i32 2 )
-	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 4, i32 4 )
-	call void @llvm.memset.i32( i8* %target_p, i8 1, i32 8, i32 8 )
-	ret i32 0
+  %target = alloca [1024 x i8]
+  %target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 0, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 1, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 2, i32 2, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 4, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %target_p, i8 1, i32 8, i32 8, i1 false)
+  ret i32 0
 }
 
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/not.ll b/test/Transforms/InstCombine/not.ll
index c58ce11..4a8825b 100644
--- a/test/Transforms/InstCombine/not.ll
+++ b/test/Transforms/InstCombine/not.ll
@@ -43,7 +43,7 @@ define i32 @test5(i32 %A, i32 %B) {
 }
 
 ; PR2298
-define i8 @test6(i32 %a, i32 %b) zeroext nounwind  {
+define zeroext i8 @test6(i32 %a, i32 %b)  nounwind  {
 entry:
 	%tmp1not = xor i32 %a, -1		; <i32> [#uses=1]
 	%tmp2not = xor i32 %b, -1		; <i32> [#uses=1]
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index 043525b..28ceb68 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -149,8 +149,6 @@ declare i8* @__memset_chk(i8*, i32, i32, i32) nounwind
 
 declare noalias i8* @malloc(i32) nounwind
 
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
-
 define i32 @test7() {
 ; CHECK: @test7
   %alloc = call noalias i8* @malloc(i32 48) nounwind
diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll
index cd865ae..219545c 100644
--- a/test/Transforms/InstCombine/phi.ll
+++ b/test/Transforms/InstCombine/phi.ll
@@ -139,7 +139,7 @@ BB2:
 ; CHECK: @test8
 ; CHECK-NOT: phi
 ; CHECK: BB2:
-; CHECK-NEXT: %B = getelementptr %0 
+; CHECK-NEXT: %B = getelementptr { i32, i32 }* %A 
 ; CHECK-NEXT: ret i32* %B
 }
 
diff --git a/test/Transforms/InstCombine/select-crash.ll b/test/Transforms/InstCombine/select-crash.ll
index 8ee3369..18af152 100644
--- a/test/Transforms/InstCombine/select-crash.ll
+++ b/test/Transforms/InstCombine/select-crash.ll
@@ -18,3 +18,15 @@ entry:
   %add94 = fadd double undef, %mul91
   ret double %add94
 }
+
+; PR10180: same crash, but with vectors
+define <4 x float> @foo(i1 %b, <4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; CHECK: @foo
+; CHECK: fsub <4 x float>
+; CHECK: select
+; CHECK: fadd <4 x float>
+  %a = fadd <4 x float> %x, %y
+  %sub = fsub <4 x float> %x, %z
+  %sel = select i1 %b, <4 x float> %a, <4 x float> %sub 
+  ret <4 x float> %sel
+}
diff --git a/test/Transforms/InstCombine/sqrt.ll b/test/Transforms/InstCombine/sqrt.ll
index 69e511b..cc78417 100644
--- a/test/Transforms/InstCombine/sqrt.ll
+++ b/test/Transforms/InstCombine/sqrt.ll
@@ -14,8 +14,6 @@ entry:
   ret float %conv1
 }
 
-declare double @sqrt(double)
-
 ; PR8096
 define float @test2(float %x) nounwind readnone ssp {
 entry:
@@ -30,3 +28,27 @@ entry:
 ; CHECK: ret float
   ret float %conv1
 }
+
+; rdar://9763193
+; Can't fold (fptrunc (sqrt (fpext x))) -> (sqrtf x) since there is another
+; use of sqrt result.
+define float @test3(float* %v) nounwind uwtable ssp {
+entry:
+; CHECK: @test3
+; CHECK: sqrt(
+; CHECK-NOT: sqrtf(
+; CHECK: fptrunc
+  %arrayidx13 = getelementptr inbounds float* %v, i64 2
+  %tmp14 = load float* %arrayidx13
+  %mul18 = fmul float %tmp14, %tmp14
+  %add19 = fadd float undef, %mul18
+  %conv = fpext float %add19 to double
+  %call34 = call double @sqrt(double %conv) readnone
+  %call36 = call i32 (double)* @foo(double %call34) nounwind
+  %conv38 = fptrunc double %call34 to float
+  ret float %conv38
+}
+
+declare i32 @foo(double)
+
+declare double @sqrt(double) readnone
diff --git a/test/Transforms/InstCombine/stack-overalign.ll b/test/Transforms/InstCombine/stack-overalign.ll
index 88b4114..2fc8414 100644
--- a/test/Transforms/InstCombine/stack-overalign.ll
+++ b/test/Transforms/InstCombine/stack-overalign.ll
@@ -17,13 +17,13 @@
 
 define void @foo() nounwind {
 entry:
-	%src = alloca [1024 x i8], align 1
-	%src1 = getelementptr [1024 x i8]* %src, i32 0, i32 0
-	call void @llvm.memcpy.i32(i8* getelementptr ([1024 x i8]* @dst, i32 0, i32 0), i8* %src1, i32 1024, i32 1)
-	call void @frob(i8* %src1) nounwind
-	ret void
+  %src = alloca [1024 x i8], align 1
+  %src1 = getelementptr [1024 x i8]* %src, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([1024 x i8]* @dst, i32 0, i32 0), i8* %src1, i32 1024, i32 1, i1 false)
+  call void @frob(i8* %src1) nounwind
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
-
 declare void @frob(i8*)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/sub.ll b/test/Transforms/InstCombine/sub.ll
index 9656a7e..37de328 100644
--- a/test/Transforms/InstCombine/sub.ll
+++ b/test/Transforms/InstCombine/sub.ll
@@ -203,7 +203,7 @@ define i1 @test21(i32 %g, i32 %h) {
 }
 
 ; PR2298
-define i1 @test22(i32 %a, i32 %b) zeroext nounwind  {
+define zeroext i1 @test22(i32 %a, i32 %b)  nounwind  {
 	%tmp2 = sub i32 0, %a	
 	%tmp4 = sub i32 0, %b	
 	%tmp5 = icmp eq i32 %tmp2, %tmp4	
diff --git a/test/Transforms/InstCombine/vec_narrow.ll b/test/Transforms/InstCombine/vec_narrow.ll
index c05c802..2be43599 100644
--- a/test/Transforms/InstCombine/vec_narrow.ll
+++ b/test/Transforms/InstCombine/vec_narrow.ll
@@ -1,12 +1,10 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN:   grep {fadd float}
+; RUN: opt < %s -instcombine -S | grep {fadd float}
 
-        %V = type <4 x float>
 
-define float @test(%V %A, %V %B, float %f) {
-        %C = insertelement %V %A, float %f, i32 0               ; <%V> [#uses=1]
-        %D = fadd %V %C, %B              ; <%V> [#uses=1]
-        %E = extractelement %V %D, i32 0                ; <float> [#uses=1]
+define float @test(<4 x float> %A, <4 x float> %B, float %f) {
+        %C = insertelement <4 x float> %A, float %f, i32 0               ; <%V> [#uses=1]
+        %D = fadd <4 x float> %C, %B              ; <%V> [#uses=1]
+        %E = extractelement <4 x float> %D, i32 0                ; <float> [#uses=1]
         ret float %E
 }
 
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index bd36e9e..896cb88 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -1,28 +1,25 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-%T = type <4 x float>
-
-
-define %T @test1(%T %v1) {
+define <4 x float> @test1(<4 x float> %v1) {
 ; CHECK: @test1
-; CHECK: ret %T %v1
-  %v2 = shufflevector %T %v1, %T undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  ret %T %v2
+; CHECK: ret <4 x float> %v1
+  %v2 = shufflevector <4 x float> %v1, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  ret <4 x float> %v2
 }
 
-define %T @test2(%T %v1) {
+define <4 x float> @test2(<4 x float> %v1) {
 ; CHECK: @test2
-; CHECK: ret %T %v1
-  %v2 = shufflevector %T %v1, %T %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-  ret %T %v2
+; CHECK: ret <4 x float> %v1
+  %v2 = shufflevector <4 x float> %v1, <4 x float> %v1, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+  ret <4 x float> %v2
 }
 
-define float @test3(%T %A, %T %B, float %f) {
+define float @test3(<4 x float> %A, <4 x float> %B, float %f) {
 ; CHECK: @test3
 ; CHECK: ret float %f
-        %C = insertelement %T %A, float %f, i32 0
-        %D = shufflevector %T %C, %T %B, <4 x i32> <i32 5, i32 0, i32 2, i32 7>
-        %E = extractelement %T %D, i32 1
+        %C = insertelement <4 x float> %A, float %f, i32 0
+        %D = shufflevector <4 x float> %C, <4 x float> %B, <4 x i32> <i32 5, i32 0, i32 2, i32 7>
+        %E = extractelement <4 x float> %D, i32 1
         ret float %E
 }
 
@@ -57,7 +54,7 @@ define float @test6(<4 x float> %X) {
 
 define <4 x float> @test7(<4 x float> %tmp45.i) {
 ; CHECK: @test7
-; CHECK-NEXT: ret %T %tmp45.i
+; CHECK-NEXT: ret <4 x float> %tmp45.i
         %tmp1642.i = shufflevector <4 x float> %tmp45.i, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 6, i32 7 >
         ret <4 x float> %tmp1642.i
 }
diff --git a/test/Transforms/InstCombine/zext-or-icmp.ll b/test/Transforms/InstCombine/zext-or-icmp.ll
index 969c301..ddc6083 100644
--- a/test/Transforms/InstCombine/zext-or-icmp.ll
+++ b/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -4,7 +4,7 @@
 	%struct.Rock = type { i16, i16 }
 @some_idx = internal constant [4 x i8] c"\0A\0B\0E\0F"		; <[4 x i8]*> [#uses=1]
 
-define i8 @t(%struct.FooBar* %up, i8 zeroext  %intra_flag, i32 %blk_i) zeroext nounwind  {
+define zeroext  i8 @t(%struct.FooBar* %up, i8 zeroext  %intra_flag, i32 %blk_i) nounwind  {
 entry:
 	%tmp2 = lshr i32 %blk_i, 1		; <i32> [#uses=1]
 	%tmp3 = and i32 %tmp2, 2		; <i32> [#uses=1]
diff --git a/test/Transforms/InstSimplify/undef.ll b/test/Transforms/InstSimplify/undef.ll
new file mode 100644
index 0000000..8134cc8
--- /dev/null
+++ b/test/Transforms/InstSimplify/undef.ll
@@ -0,0 +1,127 @@
+; RUN: opt -instsimplify -S < %s | FileCheck %s
+
+; @test0
+; CHECK: ret i64 undef
+define i64 @test0() {
+  %r = mul i64 undef, undef
+  ret i64 %r
+}
+
+; @test1
+; CHECK: ret i64 undef
+define i64 @test1() {
+  %r = mul i64 3, undef
+  ret i64 %r
+}
+
+; @test2
+; CHECK: ret i64 undef
+define i64 @test2() {
+  %r = mul i64 undef, 3
+  ret i64 %r
+}
+
+; @test3
+; CHECK: ret i64 0
+define i64 @test3() {
+  %r = mul i64 undef, 6
+  ret i64 %r
+}
+
+; @test4
+; CHECK: ret i64 0
+define i64 @test4() {
+  %r = mul i64 6, undef
+  ret i64 %r
+}
+
+; @test5
+; CHECK: ret i64 undef
+define i64 @test5() {
+  %r = and i64 undef, undef
+  ret i64 %r
+}
+
+; @test6
+; CHECK: ret i64 undef
+define i64 @test6() {
+  %r = or i64 undef, undef
+  ret i64 %r
+}
+
+; @test7
+; CHECK: ret i64 undef
+define i64 @test7() {
+  %r = udiv i64 undef, 1
+  ret i64 %r
+}
+
+; @test8
+; CHECK: ret i64 undef
+define i64 @test8() {
+  %r = sdiv i64 undef, 1
+  ret i64 %r
+}
+
+; @test9
+; CHECK: ret i64 0
+define i64 @test9() {
+  %r = urem i64 undef, 1
+  ret i64 %r
+}
+
+; @test10
+; CHECK: ret i64 0
+define i64 @test10() {
+  %r = srem i64 undef, 1
+  ret i64 %r
+}
+
+; @test11
+; CHECK: ret i64 undef
+define i64 @test11() {
+  %r = shl i64 undef, undef
+  ret i64 %r
+}
+
+; @test12
+; CHECK: ret i64 undef
+define i64 @test12() {
+  %r = ashr i64 undef, undef
+  ret i64 %r
+}
+
+; @test13
+; CHECK: ret i64 undef
+define i64 @test13() {
+  %r = lshr i64 undef, undef
+  ret i64 %r
+}
+
+; @test14
+; CHECK: ret i1 undef
+define i1 @test14() {
+  %r = icmp slt i64 undef, undef
+  ret i1 %r
+}
+
+; @test15
+; CHECK: ret i1 undef
+define i1 @test15() {
+  %r = icmp ult i64 undef, undef
+  ret i1 %r
+}
+
+; @test16
+; CHECK: ret i64 undef
+define i64 @test16(i64 %a) {
+  %r = select i1 undef, i64 %a, i64 undef
+  ret i64 %r
+}
+
+; @test17
+; CHECK: ret i64 undef
+define i64 @test17(i64 %a) {
+  %r = select i1 undef, i64 undef, i64 %a
+  ret i64 %r
+}
diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll
index aed51a1..2115dd3 100644
--- a/test/Transforms/JumpThreading/crash.ll
+++ b/test/Transforms/JumpThreading/crash.ll
@@ -18,7 +18,7 @@ bb13:		; preds = %bb
 	br label %bb
 
 bb110:		; preds = %bb
-	%mrv_gr124 = getresult %struct.system__secondary_stack__mark_id %tmp120, 1		; <i64> [#uses=0]
+	%mrv_gr124 = extractvalue %struct.system__secondary_stack__mark_id %tmp120, 1		; <i64> [#uses=0]
 	unreachable
 }
 
diff --git a/test/Transforms/LICM/2005-03-24-LICM-Aggregate-Crash.ll b/test/Transforms/LICM/2005-03-24-LICM-Aggregate-Crash.ll
deleted file mode 100644
index 91740cf..0000000
--- a/test/Transforms/LICM/2005-03-24-LICM-Aggregate-Crash.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: opt < %s -licm -disable-output
-
-define void @test({ i32 }* %P) {
-	br label %Loop
-Loop:		; preds = %Loop, %0
-	free { i32 }* %P
-	br label %Loop
-}
-
diff --git a/test/Transforms/LICM/2011-07-06-Alignment.ll b/test/Transforms/LICM/2011-07-06-Alignment.ll
new file mode 100644
index 0000000..f97b701
--- /dev/null
+++ b/test/Transforms/LICM/2011-07-06-Alignment.ll
@@ -0,0 +1,26 @@
+; RUN: opt -licm -S %s | FileCheck %s
+
+@A = common global [1024 x float] zeroinitializer, align 4
+
+define i32 @main() nounwind {
+entry:
+  br label %for.cond
+
+for.cond:
+  %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr [1024 x float]* @A, i64 0, i64 3
+  %vecidx = bitcast float* %arrayidx to <4 x float>*
+  store <4 x float> zeroinitializer, <4 x float>* %vecidx, align 4
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp ne i64 %indvar, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:
+  br label %for.cond
+
+for.end:
+  ret i32 0
+}
+
+;CHECK: store <4 x float> {{.*}} align 4
+
diff --git a/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll b/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll
new file mode 100644
index 0000000..40c6629
--- /dev/null
+++ b/test/Transforms/LoopDeletion/2011-06-21-phioperands.ll
@@ -0,0 +1,182 @@
+; RUN: opt %s -loop-deletion -disable-output
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%0 = type { %"class.llvm::SmallVectorImpl", [1 x %"union.llvm::SmallVectorBase::U"] }
+%"class.clang::SourceLocation" = type { i32 }
+%"class.clang::driver::Arg" = type { %"class.clang::driver::Option"*, %"class.clang::driver::Arg"*, i32, i8, %0 }
+%"class.clang::driver::Option" = type { i32 (...)**, i32, %"class.clang::SourceLocation", i8*, %"class.clang::driver::OptionGroup"*, %"class.clang::driver::Option"*, i8 }
+%"class.clang::driver::OptionGroup" = type { %"class.clang::driver::Option" }
+%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8*, %"union.llvm::SmallVectorBase::U" }
+%"class.llvm::SmallVectorImpl" = type { %"class.llvm::SmallVectorTemplateBase" }
+%"class.llvm::SmallVectorTemplateBase" = type { %"class.llvm::SmallVectorTemplateCommon" }
+%"class.llvm::SmallVectorTemplateCommon" = type { %"class.llvm::SmallVectorBase" }
+%"union.llvm::SmallVectorBase::U" = type { x86_fp80 }
+
+define void @_ZNK5clang6driver7ArgList20AddAllArgsTranslatedERN4llvm11SmallVectorIPKcLj16EEENS0_12OptSpecifierES5_b(i1 zeroext %Joined) nounwind align 2 {
+entry:
+  br i1 undef, label %entry.split.us, label %entry.entry.split_crit_edge
+
+entry.entry.split_crit_edge:                      ; preds = %entry
+  br label %entry.split
+
+entry.split.us:                                   ; preds = %entry
+  br label %for.cond.i14.us
+
+for.cond.i14.us:                                  ; preds = %for.inc.i38.us, %entry.split.us
+  br i1 true, label %for.cond.i50.us-lcssa.us, label %if.end.i23.us
+
+for.inc.i38.us:                                   ; preds = %if.end.i23.us
+  br label %for.cond.i14.us
+
+if.end.i23.us:                                    ; preds = %for.cond.i14.us
+  br i1 true, label %for.cond.i50.us-lcssa.us, label %for.inc.i38.us
+
+for.cond.i50.us-lcssa.us:                         ; preds = %if.end.i23.us, %for.cond.i14.us
+  br label %for.cond.i50
+
+entry.split:                                      ; preds = %entry.entry.split_crit_edge
+  br label %for.cond.i14
+
+for.cond.i14:                                     ; preds = %for.inc.i38, %entry.split
+  br i1 undef, label %for.cond.i50.us-lcssa, label %if.end.i23
+
+if.end.i23:                                       ; preds = %for.cond.i14
+  br i1 undef, label %for.cond.i50.us-lcssa, label %for.inc.i38
+
+for.inc.i38:                                      ; preds = %if.end.i23
+  br label %for.cond.i14
+
+for.cond.i50.us-lcssa:                            ; preds = %if.end.i23, %for.cond.i14
+  br label %for.cond.i50
+
+for.cond.i50:                                     ; preds = %for.cond.i50.us-lcssa, %for.cond.i50.us-lcssa.us
+  br label %for.cond
+
+for.cond.loopexit.us-lcssa:                       ; preds = %if.end.i, %for.cond.i
+  br label %for.cond.loopexit
+
+for.cond.loopexit:                                ; preds = %for.cond.loopexit.us-lcssa.us, %for.cond.loopexit.us-lcssa
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond.loopexit, %for.cond.i50
+  br i1 undef, label %for.end, label %for.body
+
+for.body:                                         ; preds = %for.cond
+  br i1 %Joined, label %if.then, label %if.else
+
+if.then:                                          ; preds = %for.body
+  br i1 undef, label %cond.false.i.i, label %_ZN4llvm9StringRefC1EPKc.exit
+
+cond.false.i.i:                                   ; preds = %if.then
+  unreachable
+
+_ZN4llvm9StringRefC1EPKc.exit:                    ; preds = %if.then
+  br i1 undef, label %_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit, label %cond.false.i.i91
+
+cond.false.i.i91:                                 ; preds = %_ZN4llvm9StringRefC1EPKc.exit
+  unreachable
+
+_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit: ; preds = %_ZN4llvm9StringRefC1EPKc.exit
+  br i1 undef, label %cond.false.i.i.i, label %if.end13.i.i.i.i
+
+if.end13.i.i.i.i:                                 ; preds = %_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit
+  br i1 undef, label %land.lhs.true16.i.i.i.i, label %if.end19.i.i.i.i
+
+land.lhs.true16.i.i.i.i:                          ; preds = %if.end13.i.i.i.i
+  br i1 undef, label %cond.false.i.i.i, label %_ZNK4llvm5Twine8isBinaryEv.exit8.i.i.i.i
+
+_ZNK4llvm5Twine8isBinaryEv.exit8.i.i.i.i:         ; preds = %land.lhs.true16.i.i.i.i
+  br i1 undef, label %cond.false.i.i.i, label %if.end19.i.i.i.i
+
+if.end19.i.i.i.i:                                 ; preds = %_ZNK4llvm5Twine8isBinaryEv.exit8.i.i.i.i, %if.end13.i.i.i.i
+  br i1 undef, label %land.lhs.true22.i.i.i.i, label %_ZN4llvmplERKNS_9StringRefEPKc.exit
+
+land.lhs.true22.i.i.i.i:                          ; preds = %if.end19.i.i.i.i
+  br i1 undef, label %cond.false.i.i.i, label %_ZNK4llvm5Twine8isBinaryEv.exit.i.i.i.i
+
+_ZNK4llvm5Twine8isBinaryEv.exit.i.i.i.i:          ; preds = %land.lhs.true22.i.i.i.i
+  br i1 undef, label %cond.false.i.i.i, label %_ZN4llvmplERKNS_9StringRefEPKc.exit
+
+cond.false.i.i.i:                                 ; preds = %_ZNK4llvm5Twine8isBinaryEv.exit.i.i.i.i, %land.lhs.true22.i.i.i.i, %_ZNK4llvm5Twine8isBinaryEv.exit8.i.i.i.i, %land.lhs.true16.i.i.i.i, %_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit
+  unreachable
+
+_ZN4llvmplERKNS_9StringRefEPKc.exit:              ; preds = %_ZNK4llvm5Twine8isBinaryEv.exit.i.i.i.i, %if.end19.i.i.i.i
+  br i1 undef, label %Retry.i, label %if.end.i99
+
+Retry.i:                                          ; preds = %if.end.i99, %_ZN4llvmplERKNS_9StringRefEPKc.exit
+  br i1 undef, label %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit, label %new.notnull.i
+
+new.notnull.i:                                    ; preds = %Retry.i
+  br label %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit
+
+if.end.i99:                                       ; preds = %_ZN4llvmplERKNS_9StringRefEPKc.exit
+  br label %Retry.i
+
+_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit: ; preds = %new.notnull.i, %Retry.i
+  br label %for.cond.i.preheader
+
+if.else:                                          ; preds = %for.body
+  br i1 undef, label %Retry.i108, label %if.end.i113
+
+Retry.i108:                                       ; preds = %if.end.i113, %if.else
+  br i1 undef, label %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit114, label %new.notnull.i110
+
+new.notnull.i110:                                 ; preds = %Retry.i108
+  br label %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit114
+
+if.end.i113:                                      ; preds = %if.else
+  br label %Retry.i108
+
+_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit114: ; preds = %new.notnull.i110, %Retry.i108
+  br i1 undef, label %_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit125, label %cond.false.i.i123
+
+cond.false.i.i123:                                ; preds = %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit114
+  unreachable
+
+_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit125: ; preds = %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit114
+  br i1 undef, label %Retry.i134, label %if.end.i139
+
+Retry.i134:                                       ; preds = %if.end.i139, %_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit125
+  br i1 undef, label %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit140, label %new.notnull.i136
+
+new.notnull.i136:                                 ; preds = %Retry.i134
+  br label %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit140
+
+if.end.i139:                                      ; preds = %_ZNK5clang6driver3Arg8getValueERKNS0_7ArgListEj.exit125
+  br label %Retry.i134
+
+_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit140: ; preds = %new.notnull.i136, %Retry.i134
+  br label %for.cond.i.preheader
+
+for.cond.i.preheader:                             ; preds = %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit140, %_ZN4llvm15SmallVectorImplIPKcE9push_backERKS2_.exit
+  br i1 undef, label %for.cond.i.preheader.split.us, label %for.cond.i.preheader.for.cond.i.preheader.split_crit_edge
+
+for.cond.i.preheader.for.cond.i.preheader.split_crit_edge: ; preds = %for.cond.i.preheader
+  br label %for.cond.i.preheader.split
+
+for.cond.i.preheader.split.us:                    ; preds = %for.cond.i.preheader
+  br label %for.cond.i.us
+
+for.cond.i.us:                                    ; preds = %if.end.i.us, %for.cond.i.preheader.split.us
+  br i1 true, label %for.cond.loopexit.us-lcssa.us, label %if.end.i.us
+
+if.end.i.us:                                      ; preds = %for.cond.i.us
+  br i1 true, label %for.cond.loopexit.us-lcssa.us, label %for.cond.i.us
+
+for.cond.loopexit.us-lcssa.us:                    ; preds = %if.end.i.us, %for.cond.i.us
+  %tmp178218.us.lcssa = phi %"class.clang::driver::Arg"** [ undef, %if.end.i.us ], [ undef, %for.cond.i.us ]
+  br label %for.cond.loopexit
+
+for.cond.i.preheader.split:                       ; preds = %for.cond.i.preheader.for.cond.i.preheader.split_crit_edge
+  br label %for.cond.i
+
+for.cond.i:                                       ; preds = %if.end.i, %for.cond.i.preheader.split
+  br i1 undef, label %for.cond.loopexit.us-lcssa, label %if.end.i
+
+if.end.i:                                         ; preds = %for.cond.i
+  br i1 undef, label %for.cond.loopexit.us-lcssa, label %for.cond.i
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
diff --git a/test/Transforms/LoopIdiom/memset_noidiom.ll b/test/Transforms/LoopIdiom/memset_noidiom.ll
new file mode 100644
index 0000000..168eb95
--- /dev/null
+++ b/test/Transforms/LoopIdiom/memset_noidiom.ll
@@ -0,0 +1,30 @@
+; RUN: opt -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.0.0"
+
+; CHECK: @memset
+; CHECK-NOT: llvm.memset
+define i8* @memset(i8* %b, i32 %c, i64 %len) nounwind uwtable ssp {
+entry:
+  %cmp1 = icmp ult i64 0, %len
+  br i1 %cmp1, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry
+  %conv6 = trunc i32 %c to i8
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %indvar = phi i64 [ 0, %for.body.lr.ph ], [ %indvar.next, %for.body ]
+  %p.02 = getelementptr i8* %b, i64 %indvar
+  store i8 %conv6, i8* %p.02, align 1
+  %indvar.next = add i64 %indvar, 1
+  %exitcond = icmp ne i64 %indvar.next, %len
+  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry
+  ret i8* %b
+}
+
diff --git a/test/Transforms/LoopRotate/PhiRename-1.ll b/test/Transforms/LoopRotate/PhiRename-1.ll
index 74426a8..9cb55b4 100644
--- a/test/Transforms/LoopRotate/PhiRename-1.ll
+++ b/test/Transforms/LoopRotate/PhiRename-1.ll
@@ -33,7 +33,6 @@ entry:
 	%c = alloca i32, align 4		; <i32*> [#uses=4]
 	%l = alloca %struct.list*, align 4		; <%struct.list**> [#uses=5]
 	%op = alloca %struct.operator*, align 4		; <%struct.operator**> [#uses=3]
-	"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
 	store i32 %arity, i32* %arity_addr
 	store i32 0, i32* %c
 	%tmp1 = load %struct.list** @operators		; <%struct.list*> [#uses=1]
diff --git a/test/Transforms/LoopRotate/crash.ll b/test/Transforms/LoopRotate/crash.ll
index 16a6868..954b834 100644
--- a/test/Transforms/LoopRotate/crash.ll
+++ b/test/Transforms/LoopRotate/crash.ll
@@ -113,7 +113,7 @@ bb116:		; preds = %bb131, %entry
 	br i1 false, label %bb141, label %bb131
 
 bb131:		; preds = %bb116
-	%mrv_gr125 = getresult %struct.NSRange %tmp123, 1		; <i64> [#uses=0]
+	%mrv_gr125 = extractvalue %struct.NSRange %tmp123, 1		; <i64> [#uses=0]
 	br label %bb116
 
 bb141:		; preds = %bb116
diff --git a/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll b/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll
index e73fff1..e7d0f84 100644
--- a/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll
+++ b/test/Transforms/LoopSimplify/2007-10-28-InvokeCrash.ll
@@ -278,7 +278,7 @@ invcont:		; preds = %bb_main
 	br label %bb_main
 
 invcont.fragment:		; preds = %bb_main
-	invoke void @_ZN9Fibonacci10get_numberEj( %struct.BigInt* null sret , %struct.Fibonacci* %this_this, i32 %n_i_n_i )
+	invoke void @_ZN9Fibonacci10get_numberEj( %struct.BigInt* sret null  , %struct.Fibonacci* %this_this, i32 %n_i_n_i )
 			to label %invcont14 unwind label %meshBB37
 
 invcont.unwind10_crit_edge:		; preds = %bb_main
@@ -304,7 +304,7 @@ invcont14:		; preds = %invcont.fragment, %bb_main
 	br label %bb_main
 
 invcont14.normaldest:		; No predecessors!
-	invoke %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"* @___ZN9__gnu_cxx17__normal_iteratorIP6BigIntSt6vectorIS1_SaIS1_EEEppEv___ZNSt6vectorImSaImEED1Ev___ZN6BigIntD1Ev___ZN9__gnu_cxx13new_allocatorI6BigIntE7destroyEPS1____ZSt8_DestroyIP6BigIntSaIS0_EEvT_S3_T0_( i32 14, %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"* null, %"struct.std::vector<ulong,std::allocator<ulong> >"* null, %struct.BigInt* null, %struct.__false_type* null, %struct.BigInt* null, %struct.__false_type* null noalias  )
+	invoke %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"* @___ZN9__gnu_cxx17__normal_iteratorIP6BigIntSt6vectorIS1_SaIS1_EEEppEv___ZNSt6vectorImSaImEED1Ev___ZN6BigIntD1Ev___ZN9__gnu_cxx13new_allocatorI6BigIntE7destroyEPS1____ZSt8_DestroyIP6BigIntSaIS0_EEvT_S3_T0_( i32 14, %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"* null, %"struct.std::vector<ulong,std::allocator<ulong> >"* null, %struct.BigInt* null, %struct.__false_type* null, %struct.BigInt* null, %struct.__false_type* noalias null   )
 			to label %invcont15 unwind label %meshBB345		; <%"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"*>:0 [#uses=0]
 
 invcont14.unwind10_crit_edge:		; preds = %bb_main
@@ -372,7 +372,7 @@ invcont.cond_next_crit_edge:		; preds = %bb_main
 	br label %bb_main
 
 cond_true:		; preds = %bb_main
-	invoke void @_ZN9Fibonacci10get_numberEj( %struct.BigInt* null sret , %struct.Fibonacci* %this_this, i32 %n_i_n_i )
+	invoke void @_ZN9Fibonacci10get_numberEj( %struct.BigInt* sret null , %struct.Fibonacci* %this_this, i32 %n_i_n_i )
 			to label %meshBB323 unwind label %cond_true.unwind_crit_edge
 
 cond_true.unwind_crit_edge:		; preds = %cond_true, %bb_main
@@ -385,7 +385,7 @@ invcont12:		; preds = %bb_main
 	br label %bb_main
 
 invcont12.fragment:		; preds = %bb_main
-	invoke %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"* @___ZN9__gnu_cxx17__normal_iteratorIP6BigIntSt6vectorIS1_SaIS1_EEEppEv___ZNSt6vectorImSaImEED1Ev___ZN6BigIntD1Ev___ZN9__gnu_cxx13new_allocatorI6BigIntE7destroyEPS1____ZSt8_DestroyIP6BigIntSaIS0_EEvT_S3_T0_( i32 14, %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"* null, %"struct.std::vector<ulong,std::allocator<ulong> >"* null, %struct.BigInt* null, %struct.__false_type* null, %struct.BigInt* null, %struct.__false_type* null noalias  )
+	invoke %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"* @___ZN9__gnu_cxx17__normal_iteratorIP6BigIntSt6vectorIS1_SaIS1_EEEppEv___ZNSt6vectorImSaImEED1Ev___ZN6BigIntD1Ev___ZN9__gnu_cxx13new_allocatorI6BigIntE7destroyEPS1____ZSt8_DestroyIP6BigIntSaIS0_EEvT_S3_T0_( i32 14, %"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"* null, %"struct.std::vector<ulong,std::allocator<ulong> >"* null, %struct.BigInt* null, %struct.__false_type* null, %struct.BigInt* null, %struct.__false_type* noalias null   )
 			to label %meshBB30 unwind label %meshBB337		; <%"struct.__gnu_cxx::__normal_iterator<BigInt*,std::vector<BigInt, std::allocator<BigInt> > >"*>:1 [#uses=0]
 
 invcont12.unwind_crit_edge:		; preds = %bb_main
@@ -467,7 +467,7 @@ invcont30.unwind_crit_edge.unwinddest:		; No predecessors!
 	br label %bb_main
 
 invcont33:		; preds = %bb_main
-	invoke void @_ZNKSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE3strEv( %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* null sret , %"struct.std::ostringstream"* null )
+	invoke void @_ZNKSt19basic_ostringstreamIcSt11char_traitsIcESaIcEE3strEv( %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* sret null  , %"struct.std::ostringstream"* null )
 			to label %invcont36 unwind label %invcont33.unwind_crit_edge
 
 invcont33.unwind_crit_edge:		; preds = %invcont33, %bb_main
diff --git a/test/Transforms/LowerExpectIntrinsic/basic.ll b/test/Transforms/LowerExpectIntrinsic/basic.ll
new file mode 100644
index 0000000..c00127e
--- /dev/null
+++ b/test/Transforms/LowerExpectIntrinsic/basic.ll
@@ -0,0 +1,251 @@
+; RUN: opt -lower-expect -strip-dead-prototypes -S -o - < %s | FileCheck %s
+
+; CHECK: @test1
+define i32 @test1(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %cmp = icmp sgt i32 %tmp, 1
+  %conv = zext i1 %cmp to i32
+  %conv1 = sext i32 %conv to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 1)
+  %tobool = icmp ne i64 %expval, 0
+; CHECK: !prof !0
+; CHECK-NOT: @llvm.expect
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+declare i64 @llvm.expect.i64(i64, i64) nounwind readnone
+
+declare i32 @f(...)
+
+; CHECK: @test2
+define i32 @test2(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %conv = sext i32 %tmp to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  %tobool = icmp ne i64 %expval, 0
+; CHECK: !prof !0
+; CHECK-NOT: @llvm.expect
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+; CHECK: @test3
+define i32 @test3(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %tobool = icmp ne i32 %tmp, 0
+  %lnot = xor i1 %tobool, true
+  %lnot.ext = zext i1 %lnot to i32
+  %conv = sext i32 %lnot.ext to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  %tobool1 = icmp ne i64 %expval, 0
+; CHECK: !prof !0
+; CHECK-NOT: @llvm.expect
+  br i1 %tobool1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+; CHECK: @test4
+define i32 @test4(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %tobool = icmp ne i32 %tmp, 0
+  %lnot = xor i1 %tobool, true
+  %lnot1 = xor i1 %lnot, true
+  %lnot.ext = zext i1 %lnot1 to i32
+  %conv = sext i32 %lnot.ext to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+  %tobool2 = icmp ne i64 %expval, 0
+; CHECK: !prof !0
+; CHECK-NOT: @llvm.expect
+  br i1 %tobool2, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+; CHECK: @test5
+define i32 @test5(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %cmp = icmp slt i32 %tmp, 0
+  %conv = zext i1 %cmp to i32
+  %conv1 = sext i32 %conv to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 0)
+  %tobool = icmp ne i64 %expval, 0
+; CHECK: !prof !1
+; CHECK-NOT: @llvm.expect
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+; CHECK: @test6
+define i32 @test6(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %conv = sext i32 %tmp to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+; CHECK: !prof !2
+; CHECK-NOT: @llvm.expect
+  switch i64 %expval, label %sw.epilog [
+    i64 1, label %sw.bb
+    i64 2, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry
+  store i32 0, i32* %retval
+  br label %return
+
+sw.epilog:                                        ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %sw.epilog, %sw.bb
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+; CHECK: @test7
+define i32 @test7(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %conv = sext i32 %tmp to i64
+  %expval = call i64 @llvm.expect.i64(i64 %conv, i64 1)
+; CHECK: !prof !3
+; CHECK-NOT: @llvm.expect
+  switch i64 %expval, label %sw.epilog [
+    i64 2, label %sw.bb
+    i64 3, label %sw.bb
+  ]
+
+sw.bb:                                            ; preds = %entry, %entry
+  %tmp1 = load i32* %x.addr, align 4
+  store i32 %tmp1, i32* %retval
+  br label %return
+
+sw.epilog:                                        ; preds = %entry
+  store i32 0, i32* %retval
+  br label %return
+
+return:                                           ; preds = %sw.epilog, %sw.bb
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+; CHECK: @test8
+define i32 @test8(i32 %x) nounwind uwtable ssp {
+entry:
+  %retval = alloca i32, align 4
+  %x.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  %tmp = load i32* %x.addr, align 4
+  %cmp = icmp sgt i32 %tmp, 1
+  %conv = zext i1 %cmp to i32
+  %expval = call i32 @llvm.expect.i32(i32 %conv, i32 1)
+  %tobool = icmp ne i32 %expval, 0
+; CHECK: !prof !0
+; CHECK-NOT: @llvm.expect
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  %call = call i32 (...)* @f()
+  store i32 %call, i32* %retval
+  br label %return
+
+if.end:                                           ; preds = %entry
+  store i32 1, i32* %retval
+  br label %return
+
+return:                                           ; preds = %if.end, %if.then
+  %0 = load i32* %retval
+  ret i32 %0
+}
+
+declare i32 @llvm.expect.i32(i32, i32) nounwind readnone
+
+; CHECK: !0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+; CHECK: !1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+; CHECK: !2 = metadata !{metadata !"branch_weights", i32 4, i32 64, i32 4}
+; CHECK: !3 = metadata !{metadata !"branch_weights", i32 64, i32 4, i32 4}
diff --git a/test/Transforms/LowerExpectIntrinsic/dg.exp b/test/Transforms/LowerExpectIntrinsic/dg.exp
new file mode 100644
index 0000000..f200589
--- /dev/null
+++ b/test/Transforms/LowerExpectIntrinsic/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/LowerSetJmp/simpletest.ll b/test/Transforms/LowerSetJmp/simpletest.ll
index 1430dff..dece840 100644
--- a/test/Transforms/LowerSetJmp/simpletest.ll
+++ b/test/Transforms/LowerSetJmp/simpletest.ll
@@ -1,6 +1,5 @@
 ; RUN: opt < %s -lowersetjmp -S | grep invoke
 
-	%JmpBuf = type i32
 @.str_1 = internal constant [13 x i8] c"returned %d\0A\00"		; <[13 x i8]*> [#uses=1]
 
 declare void @llvm.longjmp(i32*, i32)
diff --git a/test/Transforms/Mem2Reg/ignore-lifetime.ll b/test/Transforms/Mem2Reg/ignore-lifetime.ll
new file mode 100644
index 0000000..5e4f9bf
--- /dev/null
+++ b/test/Transforms/Mem2Reg/ignore-lifetime.ll
@@ -0,0 +1,26 @@
+; RUN: opt -mem2reg -S -o - < %s | FileCheck %s
+
+declare void @llvm.lifetime.start(i64 %size, i8* nocapture %ptr)
+declare void @llvm.lifetime.end(i64 %size, i8* nocapture %ptr)
+
+define void @test1() {
+; CHECK: test1
+; CHECK-NOT: alloca
+  %A = alloca i32
+  %B = bitcast i32* %A to i8*
+  call void @llvm.lifetime.start(i64 2, i8* %B)
+  store i32 1, i32* %A
+  call void @llvm.lifetime.end(i64 2, i8* %B)
+  ret void
+}
+
+define void @test2() {
+; CHECK: test2
+; CHECK-NOT: alloca
+  %A = alloca {i8, i16}
+  %B = getelementptr {i8, i16}* %A, i32 0, i32 0
+  call void @llvm.lifetime.start(i64 2, i8* %B)
+  store {i8, i16} zeroinitializer, {i8, i16}* %A
+  call void @llvm.lifetime.end(i64 2, i8* %B)
+  ret void
+}
diff --git a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
index 9f1e280..b95ad91 100644
--- a/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
+++ b/test/Transforms/MemCpyOpt/2008-02-24-MultipleUseofSRet.ll
@@ -4,31 +4,33 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
 target triple = "i386-pc-linux-gnu"
 
-define internal fastcc void @initialize({ x86_fp80, x86_fp80 }* noalias sret  %agg.result) nounwind  {
+%0 = type { x86_fp80, x86_fp80 }
+
+define internal fastcc void @initialize(%0* noalias sret %agg.result) nounwind {
 entry:
-	%agg.result.03 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
-	%agg.result.15 = getelementptr { x86_fp80, x86_fp80 }* %agg.result, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
-	ret void
+  %agg.result.03 = getelementptr %0* %agg.result, i32 0, i32 0
+  store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.03
+  %agg.result.15 = getelementptr %0* %agg.result, i32 0, i32 1
+  store x86_fp80 0xK00000000000000000000, x86_fp80* %agg.result.15
+  ret void
 }
 
-declare fastcc x86_fp80 @passed_uninitialized({ x86_fp80, x86_fp80 }* %x) nounwind
+declare fastcc x86_fp80 @passed_uninitialized(%0*) nounwind
 
-define fastcc void @badly_optimized() nounwind  {
+define fastcc void @badly_optimized() nounwind {
 entry:
-	%z = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%tmp = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 8		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	call fastcc void @initialize( { x86_fp80, x86_fp80 }* noalias sret  %memtmp )
-	%tmp1 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*		; <i8*> [#uses=1]
-	%memtmp2 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp1, i8* %memtmp2, i32 24, i32 8 )
-	%z3 = bitcast { x86_fp80, x86_fp80 }* %z to i8*		; <i8*> [#uses=1]
-	%tmp4 = bitcast { x86_fp80, x86_fp80 }* %tmp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %z3, i8* %tmp4, i32 24, i32 8 )
-	%tmp5 = call fastcc x86_fp80 @passed_uninitialized( { x86_fp80, x86_fp80 }* %z )		; <x86_fp80> [#uses=0]
-	ret void
+  %z = alloca %0
+  %tmp = alloca %0
+  %memtmp = alloca %0, align 8
+  call fastcc void @initialize(%0* noalias sret %memtmp)
+  %tmp1 = bitcast %0* %tmp to i8*
+  %memtmp2 = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %memtmp2, i32 24, i32 8, i1 false)
+  %z3 = bitcast %0* %z to i8*
+  %tmp4 = bitcast %0* %tmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z3, i8* %tmp4, i32 24, i32 8, i1 false)
+  %tmp5 = call fastcc x86_fp80 @passed_uninitialized(%0* %z)
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
index 418761e..24cf576 100644
--- a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
+++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -1,20 +1,22 @@
 ; RUN: opt < %s -basicaa -memcpyopt -S | not grep {call.*memcpy.}
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-	%a = type { i32 }
-	%b = type { float }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+%a = type { i32 }
+%b = type { float }
+
 declare void @g(%a*)
 
 define float @f() {
 entry:
-	%a_var = alloca %a
-	%b_var = alloca %b
-	call void @g(%a *%a_var)
-	%a_i8 = bitcast %a* %a_var to i8*
-	%b_i8 = bitcast %b* %b_var to i8*
-	call void @llvm.memcpy.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4)
-	%tmp1 = getelementptr %b* %b_var, i32 0, i32 0
-	%tmp2 = load float* %tmp1
-	ret float %tmp2
+  %a_var = alloca %a
+  %b_var = alloca %b
+  call void @g(%a* %a_var)
+  %a_i8 = bitcast %a* %a_var to i8*
+  %b_i8 = bitcast %b* %b_var to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4, i1 false)
+  %tmp1 = getelementptr %b* %b_var, i32 0, i32 0
+  %tmp2 = load float* %tmp1
+  ret float %tmp2
 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/MemCpyOpt/memcpy.ll b/test/Transforms/MemCpyOpt/memcpy.ll
index 5c6a94c..71d4d4e 100644
--- a/test/Transforms/MemCpyOpt/memcpy.ll
+++ b/test/Transforms/MemCpyOpt/memcpy.ll
@@ -3,17 +3,21 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
 
-define void @test1({ x86_fp80, x86_fp80 }* sret  %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind  {
+%0 = type { x86_fp80, x86_fp80 }
+%1 = type { i32, i32 }
+
+define void @test1(%0* sret  %agg.result, x86_fp80 %z.0, x86_fp80 %z.1) nounwind  {
 entry:
-	%tmp2 = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=1]
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1		; <x86_fp80> [#uses=1]
-	call void @ccoshl( { x86_fp80, x86_fp80 }* sret  %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0 ) nounwind 
-	%tmp219 = bitcast { x86_fp80, x86_fp80 }* %tmp2 to i8*		; <i8*> [#uses=2]
-	%memtmp20 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp219, i8* %memtmp20, i32 32, i32 16 )
-	%agg.result21 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %agg.result21, i8* %tmp219, i32 32, i32 16 )
+  %tmp2 = alloca %0
+  %memtmp = alloca %0, align 16
+  %tmp5 = fsub x86_fp80 0xK80000000000000000000, %z.1
+  call void @ccoshl(%0* sret %memtmp, x86_fp80 %tmp5, x86_fp80 %z.0) nounwind
+  %tmp219 = bitcast %0* %tmp2 to i8*
+  %memtmp20 = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp219, i8* %memtmp20, i32 32, i32 16, i1 false)
+  %agg.result21 = bitcast %0* %agg.result to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result21, i8* %tmp219, i32 32, i32 16, i1 false)
+  ret void
 
 ; Check that one of the memcpy's are removed.
 ;; FIXME: PR 8643 We should be able to eliminate the last memcpy here.
@@ -23,22 +27,19 @@ entry:
 ; CHECK: call void @llvm.memcpy
 ; CHECK-NOT: llvm.memcpy
 ; CHECK: ret void
-	ret void
 }
 
-declare void @ccoshl({ x86_fp80, x86_fp80 }* sret , x86_fp80, x86_fp80) nounwind 
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+declare void @ccoshl(%0* sret , x86_fp80, x86_fp80) nounwind 
 
 
 ; The intermediate alloca and one of the memcpy's should be eliminated, the
 ; other should be related with a memmove.
 define void @test2(i8* %P, i8* %Q) nounwind  {
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16
-	%R = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*
-	call void @llvm.memcpy.i32( i8* %R, i8* %P, i32 32, i32 16 )
-	call void @llvm.memcpy.i32( i8* %Q, i8* %R, i32 32, i32 16 )
-        ret void
+  %memtmp = alloca %0, align 16
+  %R = bitcast %0* %memtmp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %R, i8* %P, i32 32, i32 16, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %Q, i8* %R, i32 32, i32 16, i1 false)
+  ret void
         
 ; CHECK: @test2
 ; CHECK-NEXT: call void @llvm.memmove{{.*}}(i8* %Q, i8* %P
@@ -48,15 +49,15 @@ define void @test2(i8* %P, i8* %Q) nounwind  {
 
 
 
-@x = external global { x86_fp80, x86_fp80 }
+@x = external global %0
 
-define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind  {
-	%x.0 = alloca { x86_fp80, x86_fp80 }
-	%x.01 = bitcast { x86_fp80, x86_fp80 }* %x.0 to i8*
-	call void @llvm.memcpy.i32( i8* %x.01, i8* bitcast ({ x86_fp80, x86_fp80 }* @x to i8*), i32 32, i32 16 )
-	%agg.result2 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*
-	call void @llvm.memcpy.i32( i8* %agg.result2, i8* %x.01, i32 32, i32 16 )
-	ret void
+define void @test3(%0* noalias sret %agg.result) nounwind  {
+  %x.0 = alloca %0
+  %x.01 = bitcast %0* %x.0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %x.01, i8* bitcast (%0* @x to i8*), i32 32, i32 16, i1 false)
+  %agg.result2 = bitcast %0* %agg.result to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result2, i8* %x.01, i32 32, i32 16, i1 false)
+  ret void
 ; CHECK: @test3
 ; CHECK-NEXT: %agg.result2 = bitcast 
 ; CHECK-NEXT: call void @llvm.memcpy
@@ -66,10 +67,10 @@ define void @test3({ x86_fp80, x86_fp80 }* noalias sret %agg.result) nounwind  {
 
 ; PR8644
 define void @test4(i8 *%P) {
-  %A = alloca {i32, i32}
-  %a = bitcast {i32, i32}* %A to i8*
+  %A = alloca %1
+  %a = bitcast %1* %A to i8*
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %P, i64 8, i32 4, i1 false)
-  call void @test4a(i8* byval align 1 %a) 
+  call void @test4a(i8* byval align 1 %a)
   ret void
 ; CHECK: @test4
 ; CHECK-NEXT: call void @test4a(
@@ -84,7 +85,6 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32,
 
 declare void @test5a(%struct.S* byval align 16) nounwind ssp
 
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 ; rdar://8713376 - This memcpy can't be eliminated.
 define i32 @test5(i32 %x) nounwind ssp {
@@ -128,4 +128,5 @@ entry:
 
 declare i32 @g(%struct.p* byval align 8)
 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
diff --git a/test/Transforms/MemCpyOpt/memmove.ll b/test/Transforms/MemCpyOpt/memmove.ll
index 8babb04..7f1667a 100644
--- a/test/Transforms/MemCpyOpt/memmove.ll
+++ b/test/Transforms/MemCpyOpt/memmove.ll
@@ -4,25 +4,28 @@
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 target triple = "x86_64-apple-darwin9.0"
 
-declare void @llvm.memmove.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
 
 define i8* @test1(i8* nocapture %src) nounwind {
 entry:
 ; CHECK: @test1
 ; CHECK: call void @llvm.memcpy
 
-  %call3 = malloc [13 x i8]                       ; <[13 x i8]*> [#uses=1]
-  %call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0 ; <i8*> [#uses=2]
-  tail call void @llvm.memmove.i64(i8* %call3.sub, i8* %src, i64 13, i32 1)
+  %malloccall = tail call i8* @malloc(i32 trunc (i64 mul nuw (i64 ptrtoint (i8* getelementptr (i8* null, i32 1) to i64), i64 13) to i32))
+  %call3 = bitcast i8* %malloccall to [13 x i8]*
+  %call3.sub = getelementptr inbounds [13 x i8]* %call3, i64 0, i64 0
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %call3.sub, i8* %src, i64 13, i32 1, i1 false)
   ret i8* %call3.sub
 }
+declare noalias i8* @malloc(i32)
+
 
 define void @test2(i8* %P) nounwind {
 entry:
 ; CHECK: @test2
 ; CHECK: call void @llvm.memcpy
-  %add.ptr = getelementptr i8* %P, i64 16         ; <i8*> [#uses=1]
-  tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 16, i32 1)
+  %add.ptr = getelementptr i8* %P, i64 16
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 16, i32 1, i1 false)
   ret void
 }
 
@@ -31,7 +34,7 @@ define void @test3(i8* %P) nounwind {
 entry:
 ; CHECK: @test3
 ; CHECK: call void @llvm.memmove
-  %add.ptr = getelementptr i8* %P, i64 16         ; <i8*> [#uses=1]
-  tail call void @llvm.memmove.i64(i8* %P, i8* %add.ptr, i64 17, i32 1)
+  %add.ptr = getelementptr i8* %P, i64 16
+  tail call void @llvm.memmove.p0i8.p0i8.i64(i8* %P, i8* %add.ptr, i64 17, i32 1, i1 false)
   ret void
 }
diff --git a/test/Transforms/MemCpyOpt/sret.ll b/test/Transforms/MemCpyOpt/sret.ll
index ddfd0fd..8eac7da 100644
--- a/test/Transforms/MemCpyOpt/sret.ll
+++ b/test/Transforms/MemCpyOpt/sret.ll
@@ -3,26 +3,28 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i686-apple-darwin9"
 
-define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret  %agg.result, { x86_fp80, x86_fp80 }* byval  align 8 %z) nounwind  {
+%0 = type { x86_fp80, x86_fp80 }
+
+define void @ccosl(%0* noalias sret %agg.result, %0* byval align 8 %z) nounwind {
 entry:
-	%iz = alloca { x86_fp80, x86_fp80 }		; <{ x86_fp80, x86_fp80 }*> [#uses=3]
-	%memtmp = alloca { x86_fp80, x86_fp80 }, align 16		; <{ x86_fp80, x86_fp80 }*> [#uses=2]
-	%tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	%tmp2 = load x86_fp80* %tmp1, align 16		; <x86_fp80> [#uses=1]
-	%tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2		; <x86_fp80> [#uses=1]
-	%tmp4 = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 1		; <x86_fp80*> [#uses=1]
-	%real = getelementptr { x86_fp80, x86_fp80 }* %iz, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	%tmp7 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 0		; <x86_fp80*> [#uses=1]
-	%tmp8 = load x86_fp80* %tmp7, align 16		; <x86_fp80> [#uses=1]
-	store x86_fp80 %tmp3, x86_fp80* %real, align 16
-	store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
-	call void @ccoshl( { x86_fp80, x86_fp80 }* noalias sret  %memtmp, { x86_fp80, x86_fp80 }* byval align 8 %iz ) nounwind 
-	%memtmp14 = bitcast { x86_fp80, x86_fp80 }* %memtmp to i8*		; <i8*> [#uses=1]
-	%agg.result15 = bitcast { x86_fp80, x86_fp80 }* %agg.result to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %agg.result15, i8* %memtmp14, i32 32, i32 16 )
-	ret void
+  %iz = alloca %0
+  %memtmp = alloca %0, align 16
+  %tmp1 = getelementptr %0* %z, i32 0, i32 1
+  %tmp2 = load x86_fp80* %tmp1, align 16
+  %tmp3 = fsub x86_fp80 0xK80000000000000000000, %tmp2
+  %tmp4 = getelementptr %0* %iz, i32 0, i32 1
+  %real = getelementptr %0* %iz, i32 0, i32 0
+  %tmp7 = getelementptr %0* %z, i32 0, i32 0
+  %tmp8 = load x86_fp80* %tmp7, align 16
+  store x86_fp80 %tmp3, x86_fp80* %real, align 16
+  store x86_fp80 %tmp8, x86_fp80* %tmp4, align 16
+  call void @ccoshl(%0* noalias sret %memtmp, %0* byval align 8 %iz) nounwind
+  %memtmp14 = bitcast %0* %memtmp to i8*
+  %agg.result15 = bitcast %0* %agg.result to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %agg.result15, i8* %memtmp14, i32 32, i32 16, i1 false)
+  ret void
 }
 
-declare void @ccoshl({ x86_fp80, x86_fp80 }* noalias sret , { x86_fp80, x86_fp80 }* byval ) nounwind 
+declare void @ccoshl(%0* noalias sret, %0* byval) nounwind
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
new file mode 100644
index 0000000..a6bbf86
--- /dev/null
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -0,0 +1,1898 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+declare i8* @objc_retain(i8*)
+declare void @objc_release(i8*)
+declare i8* @objc_autorelease(i8*)
+declare void @objc_autoreleasePoolPop(i8*)
+declare void @objc_autoreleasePoolPush()
+declare i8* @objc_retainBlock(i8*)
+
+declare i8* @objc_retainedObject(i8*)
+declare i8* @objc_unretainedObject(i8*)
+declare i8* @objc_unretainedPointer(i8*)
+
+declare void @use_pointer(i8*)
+declare void @callee()
+declare void @callee_fnptr(void ()*)
+declare void @invokee()
+declare i8* @returner()
+
+declare void @llvm.dbg.value(metadata, i64, metadata)
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+; Simple retain+release pair deletion, with some intervening control
+; flow and harmless instructions.
+
+; CHECK: define void @test0(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test0(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  br label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  ret void
+}
+
+; Like test0 but the release isn't always executed when the retain is,
+; so the optimization is not safe.
+
+; TODO: Make the objc_release's argument be %0.
+
+; CHECK: define void @test1(
+; CHECK: @objc_retain(i8* %a)
+; CHECK: @objc_release
+; CHECK: }
+define void @test1(i32* %x, i1 %p, i1 %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @callee()
+  br i1 %q, label %return, label %alt_return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  ret void
+
+alt_return:
+  ret void
+}
+
+; Like test0 but the pointer is passed to an intervening call,
+; so the optimization is not safe.
+
+; CHECK: define void @test2(
+; CHECK: @objc_retain(i8* %a)
+; CHECK: @objc_release
+; CHECK: }
+define void @test2(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @use_pointer(i8* %0)
+  %d = bitcast i32* %x to float*
+  store float 3.0, float* %d
+  br label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  ret void
+}
+
+; Like test0 but the release is in a loop,
+; so the optimization is not safe.
+
+; TODO: For now, assume this can't happen.
+
+; CHECK: define void @test3(
+; TODO: @objc_retain(i8* %a)
+; TODO: @objc_release
+; CHECK: }
+define void @test3(i32* %x, i1* %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br label %loop
+
+loop:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  %j = volatile load i1* %q
+  br i1 %j, label %loop, label %return
+
+return:
+  ret void
+}
+
+; TODO: For now, assume this can't happen.
+
+; Like test0 but the retain is in a loop,
+; so the optimization is not safe.
+
+; CHECK: define void @test4(
+; TODO: @objc_retain(i8* %a)
+; TODO: @objc_release
+; CHECK: }
+define void @test4(i32* %x, i1* %q) nounwind {
+entry:
+  br label %loop
+
+loop:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  %j = volatile load i1* %q
+  br i1 %j, label %loop, label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  ret void
+}
+
+; Like test0 but the pointer is conditionally passed to an intervening call,
+; so the optimization is not safe.
+
+; CHECK: define void @test5(
+; CHECK: @objc_retain(i8*
+; CHECK: @objc_release
+; CHECK: }
+define void @test5(i32* %x, i1 %q, i8* %y) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  %s = select i1 %q, i8* %y, i8* %0
+  call void @use_pointer(i8* %s)
+  store i32 7, i32* %x
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  ret void
+}
+
+; retain+release pair deletion, where the release happens on two different
+; flow paths.
+
+; CHECK: define void @test6(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test6(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  br i1 %p, label %t, label %f
+
+t:
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  %ct = bitcast i32* %x to i8*
+  call void @objc_release(i8* %ct) nounwind
+  br label %return
+
+f:
+  store i32 7, i32* %x
+  call void @callee()
+  %cf = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cf) nounwind
+  br label %return
+
+return:
+  ret void
+}
+
+; retain+release pair deletion, where the retain happens on two different
+; flow paths.
+
+; CHECK: define void @test7(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test7(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  br i1 %p, label %t, label %f
+
+t:
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  %1 = call i8* @objc_retain(i8* %a) nounwind
+  store i32 7, i32* %x
+  call void @callee()
+  br label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  ret void
+}
+
+; Like test7, but there's a retain/retainBlock mismatch. Don't delete!
+
+; CHECK: define void @test7b
+; CHECK: t:
+; CHECK: call i8* @objc_retainBlock
+; CHECK: f:
+; CHECK: call i8* @objc_retain
+; CHECK: return:
+; CHECK: call void @objc_release
+; CHECK: }
+define void @test7b(i32* %x, i1 %p) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  br i1 %p, label %t, label %f
+
+t:
+  %0 = call i8* @objc_retainBlock(i8* %a) nounwind
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %return
+
+f:
+  %1 = call i8* @objc_retain(i8* %a) nounwind
+  store i32 7, i32* %x
+  call void @callee()
+  br label %return
+
+return:
+  %c = bitcast i32* %x to i8*
+  call void @objc_release(i8* %c) nounwind
+  ret void
+}
+
+; retain+release pair deletion, where the retain and release both happen on
+; different flow paths. Wild!
+
+; CHECK: define void @test8(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test8(i32* %x, i1 %p, i1 %q) nounwind {
+entry:
+  %a = bitcast i32* %x to i8*
+  br i1 %p, label %t, label %f
+
+t:
+  %0 = call i8* @objc_retain(i8* %a) nounwind
+  store i8 3, i8* %a
+  %b = bitcast i32* %x to float*
+  store float 2.0, float* %b
+  br label %mid
+
+f:
+  %1 = call i8* @objc_retain(i8* %a) nounwind
+  store i32 7, i32* %x
+  br label %mid
+
+mid:
+  br i1 %q, label %u, label %g
+
+u:
+  call void @callee()
+  %cu = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cu) nounwind
+  br label %return
+
+g:
+  %cg = bitcast i32* %x to i8*
+  call void @objc_release(i8* %cg) nounwind
+  br label %return
+
+return:
+  ret void
+}
+
+; Trivial retain+release pair deletion.
+
+; CHECK: define void @test9(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test9(i8* %x) nounwind {
+entry:
+  %0 = call i8* @objc_retain(i8* %x) nounwind
+  call void @objc_release(i8* %0) nounwind
+  ret void
+}
+
+; Retain+release pair, but on an unknown pointer relationship. Don't delete!
+
+; CHECK: define void @test9b
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_release(i8* %s)
+; CHECK: }
+define void @test9b(i8* %x, i1 %j, i8* %p) nounwind {
+entry:
+  %0 = call i8* @objc_retain(i8* %x) nounwind
+  %s = select i1 %j, i8* %x, i8* %p
+  call void @objc_release(i8* %s) nounwind
+  ret void
+}
+
+; Trivial retain+release pair with intervening calls - don't delete!
+
+; CHECK: define void @test10(
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @use_pointer
+; CHECK: @objc_release
+; CHECK: }
+define void @test10(i8* %x) nounwind {
+entry:
+  %0 = call i8* @objc_retain(i8* %x) nounwind
+  call void @use_pointer(i8* %x)
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %0) nounwind
+  ret void
+}
+
+; Trivial retain+autoreleaserelease pair. Don't delete!
+; Also, add a tail keyword, since objc_retain can never be passed
+; a stack argument.
+
+; CHECK: define void @test11(
+; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK: tail call i8* @objc_autorelease(i8* %0) nounwind
+; CHECK: }
+define void @test11(i8* %x) nounwind {
+entry:
+  %0 = call i8* @objc_retain(i8* %x) nounwind
+  call i8* @objc_autorelease(i8* %0) nounwind
+  call void @use_pointer(i8* %x)
+  ret void
+}
+
+; Same as test11 but with no use_pointer call. Delete the pair!
+
+; CHECK: define void @test11a(
+; CHECK: entry:
+; CHECK-NEXT: ret void
+; CHECK: }
+define void @test11a(i8* %x) nounwind {
+entry:
+  %0 = call i8* @objc_retain(i8* %x) nounwind
+  call i8* @objc_autorelease(i8* %0) nounwind
+  ret void
+}
+
+; Same as test11 but the value is returned. Do an RV optimization.
+
+; CHECK: define i8* @test11b(
+; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %0) nounwind
+; CHECK: }
+define i8* @test11b(i8* %x) nounwind {
+entry:
+  %0 = call i8* @objc_retain(i8* %x) nounwind
+  call i8* @objc_autorelease(i8* %0) nounwind
+  ret i8* %x
+}
+
+; Trivial retain,release pair with intervening call, but it's dominated
+; by another retain - delete!
+
+; CHECK: define void @test12(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: @objc_retain(i8* %x)
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test12(i8* %x, i64 %n) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  call i8* @objc_retain(i8* %x) nounwind
+  call void @use_pointer(i8* %x)
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; Trivial retain,autorelease pair. Don't delete!
+
+; CHECK: define void @test13(
+; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK: @use_pointer(i8* %x)
+; CHECK: tail call i8* @objc_autorelease(i8* %x) nounwind
+; CHECK: }
+define void @test13(i8* %x, i64 %n) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  call i8* @objc_retain(i8* %x) nounwind
+  call void @use_pointer(i8* %x)
+  call i8* @objc_autorelease(i8* %x) nounwind
+  ret void
+}
+
+; Delete the retain+release pair.
+
+; CHECK: define void @test13b
+; CHECK-NEXT: entry:
+; CHECK-NEXT: @objc_retain(i8* %x)
+; CHECK-NEXT: @use_pointer
+; CHECK-NEXT: @use_pointer
+; CHECK-NEXT: ret void
+define void @test13b(i8* %x, i64 %n) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  call i8* @objc_retain(i8* %x) nounwind
+  call void @use_pointer(i8* %x)
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; Don't delete the retain+release pair because there's an
+; autoreleasePoolPop in the way.
+
+; CHECK: define void @test13c
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @objc_autoreleasePoolPop
+; CHECK: @objc_retain(i8* %x)
+; CHECK: @use_pointer
+; CHECK: @objc_release
+; CHECK: }
+define void @test13c(i8* %x, i64 %n) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  call void @objc_autoreleasePoolPop(i8* undef)
+  call i8* @objc_retain(i8* %x) nounwind
+  call void @use_pointer(i8* %x)
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; Like test13c, but there's an autoreleasePoolPush in the way, but that
+; doesn't matter.
+
+; CHECK: define void @test13d
+; CHECK-NEXT: entry:
+; CHECK-NEXT: @objc_retain(i8* %x)
+; CHECK-NEXT: @objc_autoreleasePoolPush
+; CHECK-NEXT: @use_pointer
+; CHECK-NEXT: @use_pointer
+; CHECK-NEXT: ret void
+define void @test13d(i8* %x, i64 %n) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  call void @objc_autoreleasePoolPush()
+  call i8* @objc_retain(i8* %x) nounwind
+  call void @use_pointer(i8* %x)
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; Trivial retain,release pair with intervening call, but it's post-dominated
+; by another release - delete!
+
+; CHECK: define void @test14(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: @use_pointer
+; CHECK-NEXT: @use_pointer
+; CHECK-NEXT: @objc_release
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test14(i8* %x, i64 %n) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  call void @use_pointer(i8* %x)
+  call void @use_pointer(i8* %x)
+  call void @objc_release(i8* %x) nounwind
+  call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; Trivial retain,autorelease pair with intervening call, but it's post-dominated
+; by another release. Don't delete anything.
+
+; CHECK: define void @test15(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: @objc_retain(i8* %x)
+; CHECK-NEXT: @use_pointer
+; CHECK-NEXT: @objc_autorelease(i8* %x)
+; CHECK-NEXT: @objc_release
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test15(i8* %x, i64 %n) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  call void @use_pointer(i8* %x)
+  call i8* @objc_autorelease(i8* %x) nounwind
+  call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; Trivial retain,autorelease pair, post-dominated
+; by another release. Delete the retain and release.
+
+; CHECK: define void @test15b
+; CHECK-NEXT: entry:
+; CHECK-NEXT: @objc_autorelease
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test15b(i8* %x, i64 %n) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  call i8* @objc_autorelease(i8* %x) nounwind
+  call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; Retain+release pairs in diamonds, all dominated by a retain.
+
+; CHECK: define void @test16(
+; CHECK: @objc_retain(i8* %x)
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test16(i1 %a, i1 %b, i8* %x) {
+entry:
+  call i8* @objc_retain(i8* %x) nounwind
+  br i1 %a, label %red, label %orange
+
+red:
+  call i8* @objc_retain(i8* %x) nounwind
+  br label %yellow
+
+orange:
+  call i8* @objc_retain(i8* %x) nounwind
+  br label %yellow
+
+yellow:
+  call void @use_pointer(i8* %x)
+  call void @use_pointer(i8* %x)
+  br i1 %b, label %green, label %blue
+
+green:
+  call void @objc_release(i8* %x) nounwind
+  br label %purple
+
+blue:
+  call void @objc_release(i8* %x) nounwind
+  br label %purple
+
+purple:
+  ret void
+}
+
+; Retain+release pairs in diamonds, all post-dominated by a release.
+
+; CHECK: define void @test17(
+; CHECK-NOT: @objc_
+; CHECK: purple:
+; CHECK: @objc_release
+; CHECK: }
+define void @test17(i1 %a, i1 %b, i8* %x) {
+entry:
+  br i1 %a, label %red, label %orange
+
+red:
+  call i8* @objc_retain(i8* %x) nounwind
+  br label %yellow
+
+orange:
+  call i8* @objc_retain(i8* %x) nounwind
+  br label %yellow
+
+yellow:
+  call void @use_pointer(i8* %x)
+  call void @use_pointer(i8* %x)
+  br i1 %b, label %green, label %blue
+
+green:
+  call void @objc_release(i8* %x) nounwind
+  br label %purple
+
+blue:
+  call void @objc_release(i8* %x) nounwind
+  br label %purple
+
+purple:
+  call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; Delete no-ops.
+
+; CHECK: define void @test18(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test18() {
+  call i8* @objc_retain(i8* null)
+  call void @objc_release(i8* null)
+  call i8* @objc_autorelease(i8* null)
+  ret void
+}
+
+; Delete no-ops where undef can be assumed to be null.
+
+; CHECK: define void @test18b
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test18b() {
+  call i8* @objc_retain(i8* undef)
+  call void @objc_release(i8* undef)
+  call i8* @objc_autorelease(i8* undef)
+  ret void
+}
+
+; Replace uses of arguments with uses of return values, to reduce
+; register pressure.
+
+; CHECK: define void @test19(i32* %y) {
+; CHECK:   %z = bitcast i32* %y to i8*
+; CHECK:   %0 = bitcast i32* %y to i8*
+; CHECK:   %1 = tail call i8* @objc_retain(i8* %0)
+; CHECK:   call void @use_pointer(i8* %z)
+; CHECK:   call void @use_pointer(i8* %z)
+; CHECK:   %2 = bitcast i32* %y to i8*
+; CHECK:   call void @objc_release(i8* %2)
+; CHECK:   ret void
+; CHECK: }
+define void @test19(i32* %y) {
+entry:
+  %x = bitcast i32* %y to i8*
+  %0 = call i8* @objc_retain(i8* %x) nounwind
+  %z = bitcast i32* %y to i8*
+  call void @use_pointer(i8* %z)
+  call void @use_pointer(i8* %z)
+  call void @objc_release(i8* %x)
+  ret void
+}
+
+; Bitcast insertion
+
+; CHECK: define void @test20(
+; CHECK: %tmp1 = tail call i8* @objc_retain(i8* %tmp) nounwind
+; CHECK-NEXT: invoke
+define void @test20(double* %self) {
+if.then12:
+  %tmp = bitcast double* %self to i8*
+  %tmp1 = call i8* @objc_retain(i8* %tmp) nounwind
+  invoke void @invokee()
+          to label %invoke.cont23 unwind label %lpad20
+
+invoke.cont23:                                    ; preds = %if.then12
+  invoke void @invokee()
+          to label %if.end unwind label %lpad20
+
+lpad20:                                           ; preds = %invoke.cont23, %if.then12
+  %tmp502 = phi double* [ undef, %invoke.cont23 ], [ %self, %if.then12 ]
+  unreachable
+
+if.end:                                           ; preds = %invoke.cont23
+  ret void
+}
+
+; Delete a redundant retain,autorelease when forwaring a call result
+; directly to a return value.
+
+; CHECK: define i8* @test21(
+; CHECK: call i8* @returner()
+; CHECK-NEXT: ret i8* %call
+define i8* @test21() {
+entry:
+  %call = call i8* @returner()
+  %0 = call i8* @objc_retain(i8* %call) nounwind
+  %1 = call i8* @objc_autorelease(i8* %0) nounwind
+  ret i8* %1
+}
+
+; Move an objc call up through a phi that has null operands.
+
+; CHECK: define void @test22(
+; CHECK: B:
+; CHECK:   %1 = bitcast double* %p to i8*
+; CHECK:   call void @objc_release(i8* %1)
+; CHECK:   br label %C
+; CHECK: C:                                                ; preds = %B, %A
+; CHECK-NOT: @objc_release
+; CHECK: }
+define void @test22(double* %p, i1 %a) {
+  br i1 %a, label %A, label %B
+A:
+  br label %C
+B:
+  br label %C
+C:
+  %h = phi double* [ null, %A ], [ %p, %B ]
+  %c = bitcast double* %h to i8*
+  call void @objc_release(i8* %c)
+  ret void
+}
+
+; Optimize objc_retainBlock.
+
+; CHECK: define void @test23(
+; CHECK-NOT: @objc_
+; CHECK: }
+%block0 = type { i64, i64, i8*, i8* }
+%block1 = type { i8**, i32, i32, i32 (%struct.__block_literal_1*)*, %block0* }
+%struct.__block_descriptor = type { i64, i64 }
+%struct.__block_literal_1 = type { i8**, i32, i32, i8**, %struct.__block_descriptor* }
+@__block_holder_tmp_1 = external constant %block1
+define void @test23() {
+entry:
+  %0 = call i8* @objc_retainBlock(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
+  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
+  call void @bar(i32 ()* bitcast (%block1* @__block_holder_tmp_1 to i32 ()*))
+  call void @objc_release(i8* bitcast (%block1* @__block_holder_tmp_1 to i8*)) nounwind
+  ret void
+}
+
+; Don't optimize objc_retainBlock.
+
+; CHECK: define void @test23b
+; CHECK: @objc_retainBlock
+; CHECK: @objc_release
+; CHECK: }
+define void @test23b(i8* %p) {
+entry:
+  %0 = call i8* @objc_retainBlock(i8* %p) nounwind
+  call void @use_pointer(i8* %p)
+  call void @use_pointer(i8* %p)
+  call void @objc_release(i8* %p) nounwind
+  ret void
+}
+
+; Any call can decrement a retain count.
+
+; CHECK: define void @test24(
+; CHECK: @objc_retain(i8* %a)
+; CHECK: @objc_release
+; CHECK: }
+define void @test24(i8* %r, i8* %a) {
+  call i8* @objc_retain(i8* %a)
+  call void @use_pointer(i8* %r)
+  %q = load i8* %a
+  call void @objc_release(i8* %a)
+  ret void
+}
+
+; Don't move a retain/release pair if the release can be moved
+; but the retain can't be moved to balance it.
+
+; CHECK: define void @test25(
+; CHECK: entry:
+; CHECK:   call i8* @objc_retain(i8* %p)
+; CHECK: true:
+; CHECK: done:
+; CHECK:   call void @objc_release(i8* %p)
+; CHECK: }
+define void @test25(i8* %p, i1 %x) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  call void @callee()
+  br i1 %x, label %true, label %done
+
+true:
+  store i8 0, i8* %p
+  br label %done
+
+done:
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Don't move a retain/release pair if the retain can be moved
+; but the release can't be moved to balance it.
+
+; CHECK: define void @test26(
+; CHECK: entry:
+; CHECK:   call i8* @objc_retain(i8* %p)
+; CHECK: true:
+; CHECK: done:
+; CHECK:   call void @objc_release(i8* %p)
+; CHECK: }
+define void @test26(i8* %p, i1 %x) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  br i1 %x, label %true, label %done
+
+true:
+  call void @callee()
+  br label %done
+
+done:
+  store i8 0, i8* %p
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Don't sink the retain,release into the loop.
+
+; CHECK: define void @test27(
+; CHECK: entry:
+; CHECK: call i8* @objc_retain(i8* %p)
+; CHECK: loop:
+; CHECK-NOT: @objc_
+; CHECK: done:
+; CHECK: call void @objc_release
+; CHECK: }
+define void @test27(i8* %p, i1 %x, i1 %y) {
+entry: 
+  %f0 = call i8* @objc_retain(i8* %p)
+  br i1 %x, label %loop, label %done
+
+loop:
+  call void @callee()
+  store i8 0, i8* %p
+  br i1 %y, label %done, label %loop
+  
+done: 
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Trivial code motion case: Triangle.
+
+; CHECK: define void @test28(
+; CHECK-NOT: @objc_
+; CHECK: true:
+; CHECK: call i8* @objc_retain(
+; CHECK: call void @callee()
+; CHECK: store
+; CHECK: call void @objc_release
+; CHECK: done:
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test28(i8* %p, i1 %x) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  br i1 %x, label %true, label %done
+
+true:
+  call void @callee()
+  store i8 0, i8* %p
+  br label %done
+
+done:
+  call void @objc_release(i8* %p), !clang.imprecise_release !0
+  ret void
+}
+
+; Trivial code motion case: Triangle, but no metadata. Don't move past
+; unrelated memory references!
+
+; CHECK: define void @test28b
+; CHECK: call i8* @objc_retain(
+; CHECK: true:
+; CHECK-NOT: @objc_
+; CHECK: call void @callee()
+; CHECK-NOT: @objc_
+; CHECK: store
+; CHECK-NOT: @objc_
+; CHECK: done:
+; CHECK: @objc_release
+; CHECK: }
+define void @test28b(i8* %p, i1 %x, i8* noalias %t) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  br i1 %x, label %true, label %done
+
+true:
+  call void @callee()
+  store i8 0, i8* %p
+  br label %done
+
+done:
+  store i8 0, i8* %t
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Trivial code motion case: Triangle, with metadata. Do move past
+; unrelated memory references! And preserve the metadata.
+
+; CHECK: define void @test28c
+; CHECK-NOT: @objc_
+; CHECK: true:
+; CHECK: call i8* @objc_retain(
+; CHECK: call void @callee()
+; CHECK: store
+; CHECK: call void @objc_release(i8* %p) nounwind, !clang.imprecise_release
+; CHECK: done:
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test28c(i8* %p, i1 %x, i8* noalias %t) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  br i1 %x, label %true, label %done
+
+true:
+  call void @callee()
+  store i8 0, i8* %p
+  br label %done
+
+done:
+  store i8 0, i8* %t
+  call void @objc_release(i8* %p), !clang.imprecise_release !0
+  ret void
+}
+
+; Like test28. but with two releases.
+
+; CHECK: define void @test29(
+; CHECK-NOT: @objc_
+; CHECK: true:
+; CHECK: call i8* @objc_retain(
+; CHECK: call void @callee()
+; CHECK: store
+; CHECK: call void @objc_release
+; CHECK-NOT: @objc_release
+; CHECK: done:
+; CHECK-NOT: @objc_
+; CHECK: ohno:
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test29(i8* %p, i1 %x, i1 %y) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  br i1 %x, label %true, label %done
+
+true:
+  call void @callee()
+  store i8 0, i8* %p
+  br i1 %y, label %done, label %ohno
+
+done:
+  call void @objc_release(i8* %p)
+  ret void
+
+ohno:
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Basic case with the use and call in a diamond
+; with an extra release.
+
+; CHECK: define void @test30(
+; CHECK-NOT: @objc_
+; CHECK: true:
+; CHECK: call i8* @objc_retain(
+; CHECK: call void @callee()
+; CHECK: store
+; CHECK: call void @objc_release
+; CHECK-NOT: @objc_release
+; CHECK: false:
+; CHECK-NOT: @objc_
+; CHECK: done:
+; CHECK-NOT: @objc_
+; CHECK: ohno:
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test30(i8* %p, i1 %x, i1 %y, i1 %z) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  br i1 %x, label %true, label %false
+
+true:
+  call void @callee()
+  store i8 0, i8* %p
+  br i1 %y, label %done, label %ohno
+
+false:
+  br i1 %z, label %done, label %ohno
+
+done:
+  call void @objc_release(i8* %p)
+  ret void
+
+ohno:
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Basic case with a mergeable release.
+
+; CHECK: define void @test31(
+; CHECK: call i8* @objc_retain(i8* %p)
+; CHECK: call void @callee()
+; CHECK: store
+; CHECK: call void @objc_release
+; CHECK-NOT: @objc_release
+; CHECK: true:
+; CHECK-NOT: @objc_release
+; CHECK: false:
+; CHECK-NOT: @objc_release
+; CHECK: ret void
+; CHECK-NOT: @objc_release
+; CHECK: }
+define void @test31(i8* %p, i1 %x) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  call void @callee()
+  store i8 0, i8* %p
+  br i1 %x, label %true, label %false
+true:
+  call void @objc_release(i8* %p)
+  ret void
+false:
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Don't consider bitcasts or getelementptrs direct uses.
+
+; CHECK: define void @test32(
+; CHECK-NOT: @objc_
+; CHECK: true:
+; CHECK: call i8* @objc_retain(
+; CHECK: call void @callee()
+; CHECK: store
+; CHECK: call void @objc_release
+; CHECK: done:
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test32(i8* %p, i1 %x) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  br i1 %x, label %true, label %done
+
+true:
+  call void @callee()
+  store i8 0, i8* %p
+  br label %done
+
+done:
+  %g = bitcast i8* %p to i8*
+  %h = getelementptr i8* %g, i64 0
+  call void @objc_release(i8* %g)
+  ret void
+}
+
+; Do consider icmps to be direct uses.
+
+; CHECK: define void @test33(
+; CHECK-NOT: @objc_
+; CHECK: true:
+; CHECK: call i8* @objc_retain(
+; CHECK: call void @callee()
+; CHECK: icmp
+; CHECK: call void @objc_release
+; CHECK: done:
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test33(i8* %p, i1 %x, i8* %y) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  br i1 %x, label %true, label %done
+
+true:
+  call void @callee()
+  %v = icmp eq i8* %p, %y
+  br label %done
+
+done:
+  %g = bitcast i8* %p to i8*
+  %h = getelementptr i8* %g, i64 0
+  call void @objc_release(i8* %g)
+  ret void
+}
+
+; Delete retain,release if there's just a possible dec.
+
+; CHECK: define void @test34(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test34(i8* %p, i1 %x, i8* %y) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  br i1 %x, label %true, label %done
+
+true:
+  call void @callee()
+  br label %done
+
+done:
+  %g = bitcast i8* %p to i8*
+  %h = getelementptr i8* %g, i64 0
+  call void @objc_release(i8* %g)
+  ret void
+}
+
+; Delete retain,release if there's just a use.
+
+; CHECK: define void @test35(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test35(i8* %p, i1 %x, i8* %y) {
+entry:
+  %f0 = call i8* @objc_retain(i8* %p)
+  br i1 %x, label %true, label %done
+
+true:
+  %v = icmp eq i8* %p, %y
+  br label %done
+
+done:
+  %g = bitcast i8* %p to i8*
+  %h = getelementptr i8* %g, i64 0
+  call void @objc_release(i8* %g)
+  ret void
+}
+
+; Delete a retain,release if there's no actual use.
+
+; CHECK: define void @test36(
+; CHECK-NOT: @objc_
+; CHECK: call void @callee()
+; CHECK-NOT: @objc_
+; CHECK: call void @callee()
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test36(i8* %p) {
+entry:
+  call i8* @objc_retain(i8* %p)
+  call void @callee()
+  call void @callee()
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Like test36, but with metadata.
+
+; CHECK: define void @test37(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test37(i8* %p) {
+entry:
+  call i8* @objc_retain(i8* %p)
+  call void @callee()
+  call void @callee()
+  call void @objc_release(i8* %p), !clang.imprecise_release !0
+  ret void
+}
+
+; Be aggressive about analyzing phis to eliminate possible uses.
+
+; CHECK: define void @test38(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test38(i8* %p, i1 %u, i1 %m, i8* %z, i8* %y, i8* %x, i8* %w) {
+entry:
+  call i8* @objc_retain(i8* %p)
+  br i1 %u, label %true, label %false
+true:
+  br i1 %m, label %a, label %b
+false:
+  br i1 %m, label %c, label %d
+a:
+  br label %e
+b:
+  br label %e
+c:
+  br label %f
+d:
+  br label %f
+e:
+  %j = phi i8* [ %z, %a ], [ %y, %b ]
+  br label %g
+f:
+  %k = phi i8* [ %w, %c ], [ %x, %d ]
+  br label %g
+g:
+  %h = phi i8* [ %j, %e ], [ %k, %f ]
+  call void @use_pointer(i8* %h)
+  call void @objc_release(i8* %p), !clang.imprecise_release !0
+  ret void
+}
+
+; Delete retain,release pairs around loops.
+
+; CHECK: define void @test39(
+; CHECK_NOT: @objc_
+; CHECK: }
+define void @test39(i8* %p) {
+entry:
+  %0 = call i8* @objc_retain(i8* %p)
+  br label %loop
+
+loop:                                             ; preds = %loop, %entry
+  br i1 undef, label %loop, label %exit
+
+exit:                                             ; preds = %loop
+  call void @objc_release(i8* %0), !clang.imprecise_release !0
+  ret void
+}
+
+; Delete retain,release pairs around loops containing uses.
+
+; CHECK: define void @test39b(
+; CHECK_NOT: @objc_
+; CHECK: }
+define void @test39b(i8* %p) {
+entry:
+  %0 = call i8* @objc_retain(i8* %p)
+  br label %loop
+
+loop:                                             ; preds = %loop, %entry
+  store i8 0, i8* %0
+  br i1 undef, label %loop, label %exit
+
+exit:                                             ; preds = %loop
+  call void @objc_release(i8* %0), !clang.imprecise_release !0
+  ret void
+}
+
+; Delete retain,release pairs around loops containing potential decrements.
+
+; CHECK: define void @test39c(
+; CHECK_NOT: @objc_
+; CHECK: }
+define void @test39c(i8* %p) {
+entry:
+  %0 = call i8* @objc_retain(i8* %p)
+  br label %loop
+
+loop:                                             ; preds = %loop, %entry
+  call void @use_pointer(i8* %0)
+  br i1 undef, label %loop, label %exit
+
+exit:                                             ; preds = %loop
+  call void @objc_release(i8* %0), !clang.imprecise_release !0
+  ret void
+}
+
+; Delete retain,release pairs around loops even if
+; the successors are in a different order.
+
+; CHECK: define void @test40(
+; CHECK_NOT: @objc_
+; CHECK: }
+define void @test40(i8* %p) {
+entry:
+  %0 = call i8* @objc_retain(i8* %p)
+  br label %loop
+
+loop:                                             ; preds = %loop, %entry
+  call void @use_pointer(i8* %0)
+  br i1 undef, label %exit, label %loop
+
+exit:                                             ; preds = %loop
+  call void @objc_release(i8* %0), !clang.imprecise_release !0
+  ret void
+}
+
+; Do the known-incremented retain+release elimination even if the pointer
+; is also autoreleased.
+
+; CHECK: define void @test42(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call i8* @objc_retain(i8* %p)
+; CHECK-NEXT: call i8* @objc_autorelease(i8* %p)
+; CHECK-NEXT: call void @use_pointer(i8* %p)
+; CHECK-NEXT: call void @use_pointer(i8* %p)
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test42(i8* %p) {
+entry:
+  call i8* @objc_retain(i8* %p)
+  call i8* @objc_autorelease(i8* %p)
+  call i8* @objc_retain(i8* %p)
+  call void @use_pointer(i8* %p)
+  call void @use_pointer(i8* %p)
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Don't the known-incremented retain+release elimination if the pointer is
+; autoreleased and there's an autoreleasePoolPop.
+
+; CHECK: define void @test43(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call i8* @objc_retain(i8* %p)
+; CHECK-NEXT: call i8* @objc_autorelease(i8* %p)
+; CHECK-NEXT: call i8* @objc_retain
+; CHECK-NEXT: call void @use_pointer(i8* %p)
+; CHECK-NEXT: call void @use_pointer(i8* %p)
+; CHECK-NEXT: call void @objc_autoreleasePoolPop(i8* undef)
+; CHECK-NEXT: call void @objc_release
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test43(i8* %p) {
+entry:
+  call i8* @objc_retain(i8* %p)
+  call i8* @objc_autorelease(i8* %p)
+  call i8* @objc_retain(i8* %p)
+  call void @use_pointer(i8* %p)
+  call void @use_pointer(i8* %p)
+  call void @objc_autoreleasePoolPop(i8* undef)
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Do the known-incremented retain+release elimination if the pointer is
+; autoreleased and there's an autoreleasePoolPush.
+
+; CHECK: define void @test43b
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call i8* @objc_retain(i8* %p)
+; CHECK-NEXT: call i8* @objc_autorelease(i8* %p)
+; CHECK-NEXT: call void @use_pointer(i8* %p)
+; CHECK-NEXT: call void @use_pointer(i8* %p)
+; CHECK-NEXT: call void @objc_autoreleasePoolPush()
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test43b(i8* %p) {
+entry:
+  call i8* @objc_retain(i8* %p)
+  call i8* @objc_autorelease(i8* %p)
+  call i8* @objc_retain(i8* %p)
+  call void @use_pointer(i8* %p)
+  call void @use_pointer(i8* %p)
+  call void @objc_autoreleasePoolPush()
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Do retain+release elimination for non-provenance pointers.
+
+; CHECK: define void @test44(
+; CHECK-NOT: objc_
+; CHECK: }
+define void @test44(i8** %pp) {
+  %p = load i8** %pp
+  %q = call i8* @objc_retain(i8* %p)
+  call void @objc_release(i8* %q)
+  ret void
+}
+
+; Don't delete retain+release with an unknown-provenance
+; may-alias objc_release between them.
+
+; CHECK: define void @test45(
+; CHECK: call i8* @objc_retain(i8* %p)
+; CHECK: call void @objc_release(i8* %q)
+; CHECK: call void @use_pointer(i8* %p)
+; CHECK: call void @objc_release(i8* %p)
+define void @test45(i8** %pp, i8** %qq) {
+  %p = load i8** %pp
+  %q = load i8** %qq
+  call i8* @objc_retain(i8* %p)
+  call void @objc_release(i8* %q)
+  call void @use_pointer(i8* %p)
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Don't delete retain and autorelease here.
+
+; CHECK: define void @test46(
+; CHECK: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK: true:
+; CHECK: tail call i8* @objc_autorelease(i8* %p) nounwind
+define void @test46(i8* %p, i1 %a) {
+entry:
+  call i8* @objc_retain(i8* %p)
+  br i1 %a, label %true, label %false
+
+true:
+  call i8* @objc_autorelease(i8* %p)
+  call void @use_pointer(i8* %p)
+  ret void
+
+false:
+  ret void
+}
+
+; Delete no-op cast calls.
+
+; CHECK: define i8* @test47(
+; CHECK-NOT: call
+; CHECK: ret i8* %p
+define i8* @test47(i8* %p) nounwind {
+  %x = call i8* @objc_retainedObject(i8* %p)
+  ret i8* %x
+}
+
+; Delete no-op cast calls.
+
+; CHECK: define i8* @test48(
+; CHECK-NOT: call
+; CHECK: ret i8* %p
+define i8* @test48(i8* %p) nounwind {
+  %x = call i8* @objc_unretainedObject(i8* %p)
+  ret i8* %x
+}
+
+; Delete no-op cast calls.
+
+; CHECK: define i8* @test49(
+; CHECK-NOT: call
+; CHECK: ret i8* %p
+define i8* @test49(i8* %p) nounwind {
+  %x = call i8* @objc_unretainedPointer(i8* %p)
+  ret i8* %x
+}
+
+; Do delete retain+release with intervening stores of the
+; address value;
+
+; CHECK: define void @test50(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test50(i8* %p, i8** %pp) {
+  call i8* @objc_retain(i8* %p)
+  call void @callee()
+  store i8* %p, i8** %pp
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Don't delete retain+release with intervening stores through the
+; address value.
+
+; CHECK: define void @test51(
+; CHECK: call i8* @objc_retain(i8* %p)
+; CHECK: call void @objc_release(i8* %p)
+define void @test51(i8* %p) {
+  call i8* @objc_retain(i8* %p)
+  call void @callee()
+  store i8 0, i8* %p
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Don't delete retain+release with intervening use of a pointer of
+; unknown provenance.
+
+; CHECK: define void @test52(
+; CHECK: call i8* @objc_retain
+; CHECK: call void @callee()
+; CHECK: call void @use_pointer(i8* %z)
+; CHECK: call void @objc_release
+define void @test52(i8** %zz, i8** %pp) {
+  %p = load i8** %pp
+  %1 = call i8* @objc_retain(i8* %p)
+  call void @callee()
+  %z = load i8** %zz
+  call void @use_pointer(i8* %z)
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Like test52, but the pointer has function type, so it's assumed to
+; be not reference counted.
+
+; CHECK: define void @test53(
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test53(void ()** %zz, i8** %pp) {
+  %p = load i8** %pp
+  %1 = call i8* @objc_retain(i8* %p)
+  call void @callee()
+  %z = load void ()** %zz
+  call void @callee_fnptr(void ()* %z)
+  call void @objc_release(i8* %p)
+  ret void
+}
+
+; Convert autorelease to release if the value is unused.
+
+; CHECK: define void @test54(
+; CHECK: call i8* @returner()
+; CHECK-NEXT: call void @objc_release(i8* %t) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: ret void
+define void @test54() {
+  %t = call i8* @returner()
+  call i8* @objc_autorelease(i8* %t)
+  ret void
+}
+
+; Nested retain+release pairs. Delete them both.
+
+; CHECK: define void @test55(
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test55(i8* %x) { 
+entry: 
+  %0 = call i8* @objc_retain(i8* %x) nounwind 
+  %1 = call i8* @objc_retain(i8* %x) nounwind 
+  call void @objc_release(i8* %x) nounwind 
+  call void @objc_release(i8* %x) nounwind 
+  ret void 
+}
+
+; Nested retain+release pairs where the inner pair depends
+; on the outer pair to be removed, and then the outer pair
+; can be partially eliminated. Plus an extra outer pair to
+; eliminate, for fun.
+
+; CHECK: define void @test56(
+; CHECK-NOT: @objc
+; CHECK: if.then:
+; CHECK-NEXT: %0 = tail call i8* @objc_retain(i8* %x) nounwind
+; CHECK-NEXT: tail call void @use_pointer(i8* %x)
+; CHECK-NEXT: tail call void @use_pointer(i8* %x)
+; CHECK-NEXT: tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+; CHECK-NEXT: br label %if.end
+; CHECK-NOT: @objc
+; CHECK: }
+define void @test56(i8* %x, i32 %n) {
+entry:
+  %0 = tail call i8* @objc_retain(i8* %x) nounwind
+  %1 = tail call i8* @objc_retain(i8* %0) nounwind
+  %tobool = icmp eq i32 %n, 0
+  br i1 %tobool, label %if.end, label %if.then
+
+if.then:                                          ; preds = %entry
+  %2 = tail call i8* @objc_retain(i8* %1) nounwind
+  tail call void @use_pointer(i8* %2)
+  tail call void @use_pointer(i8* %2)
+  tail call void @objc_release(i8* %2) nounwind, !clang.imprecise_release !0
+  br label %if.end
+
+if.end:                                           ; preds = %entry, %if.then
+  tail call void @objc_release(i8* %1) nounwind, !clang.imprecise_release !0
+  tail call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+declare void @bar(i32 ()*)
+
+; A few real-world testcases.
+
+@.str4 = private unnamed_addr constant [33 x i8] c"-[A z] = { %f, %f, { %f, %f } }\0A\00"
+@"OBJC_IVAR_$_A.myZ" = global i64 20, section "__DATA, __objc_const", align 8
+declare i32 @printf(i8* nocapture, ...) nounwind
+declare i32 @puts(i8* nocapture) nounwind
+@str = internal constant [16 x i8] c"-[ Top0 _getX ]\00"
+
+; CHECK: @"\01-[A z]"
+; CHECK-NOT: @objc_
+; CHECK: }
+
+define {<2 x float>, <2 x float>} @"\01-[A z]"({}* %self, i8* nocapture %_cmd) nounwind {
+invoke.cont:
+  %0 = bitcast {}* %self to i8*
+  %1 = tail call i8* @objc_retain(i8* %0) nounwind
+  tail call void @llvm.dbg.value(metadata !{{}* %self}, i64 0, metadata !0)
+  tail call void @llvm.dbg.value(metadata !{{}* %self}, i64 0, metadata !0)
+  %ivar = load i64* @"OBJC_IVAR_$_A.myZ", align 8
+  %add.ptr = getelementptr i8* %0, i64 %ivar
+  %tmp1 = bitcast i8* %add.ptr to float*
+  %tmp2 = load float* %tmp1, align 4
+  %conv = fpext float %tmp2 to double
+  %add.ptr.sum = add i64 %ivar, 4
+  %tmp6 = getelementptr inbounds i8* %0, i64 %add.ptr.sum
+  %2 = bitcast i8* %tmp6 to float*
+  %tmp7 = load float* %2, align 4
+  %conv8 = fpext float %tmp7 to double
+  %add.ptr.sum36 = add i64 %ivar, 8
+  %tmp12 = getelementptr inbounds i8* %0, i64 %add.ptr.sum36
+  %arrayidx = bitcast i8* %tmp12 to float*
+  %tmp13 = load float* %arrayidx, align 4
+  %conv14 = fpext float %tmp13 to double
+  %tmp12.sum = add i64 %ivar, 12
+  %arrayidx19 = getelementptr inbounds i8* %0, i64 %tmp12.sum
+  %3 = bitcast i8* %arrayidx19 to float*
+  %tmp20 = load float* %3, align 4
+  %conv21 = fpext float %tmp20 to double
+  %call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([33 x i8]* @.str4, i64 0, i64 0), double %conv, double %conv8, double %conv14, double %conv21)
+  %ivar23 = load i64* @"OBJC_IVAR_$_A.myZ", align 8
+  %add.ptr24 = getelementptr i8* %0, i64 %ivar23
+  %4 = bitcast i8* %add.ptr24 to i128*
+  %srcval = load i128* %4, align 4
+  tail call void @objc_release(i8* %0) nounwind
+  %tmp29 = trunc i128 %srcval to i64
+  %tmp30 = bitcast i64 %tmp29 to <2 x float>
+  %tmp31 = insertvalue {<2 x float>, <2 x float>} undef, <2 x float> %tmp30, 0
+  %tmp32 = lshr i128 %srcval, 64
+  %tmp33 = trunc i128 %tmp32 to i64
+  %tmp34 = bitcast i64 %tmp33 to <2 x float>
+  %tmp35 = insertvalue {<2 x float>, <2 x float>} %tmp31, <2 x float> %tmp34, 1
+  ret {<2 x float>, <2 x float>} %tmp35
+}
+
+; CHECK: @"\01-[Top0 _getX]"
+; CHECK-NOT: @objc_
+; CHECK: }
+
+define i32 @"\01-[Top0 _getX]"({}* %self, i8* nocapture %_cmd) nounwind {
+invoke.cont:
+  %0 = bitcast {}* %self to i8*
+  %1 = tail call i8* @objc_retain(i8* %0) nounwind
+  %puts = tail call i32 @puts(i8* getelementptr inbounds ([16 x i8]* @str, i64 0, i64 0))
+  tail call void @objc_release(i8* %0) nounwind
+  ret i32 0
+}
+
+@"\01L_OBJC_METH_VAR_NAME_" = internal global [5 x i8] c"frob\00", section "__TEXT,__cstring,cstring_literals", align 1@"\01L_OBJC_SELECTOR_REFERENCES_" = internal global i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i64 0, i64 0), section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__DATA, __objc_imageinfo, regular, no_dead_strip"
+@llvm.used = appending global [3 x i8*] [i8* getelementptr inbounds ([5 x i8]* @"\01L_OBJC_METH_VAR_NAME_", i32 0, i32 0), i8* bitcast (i8** @"\01L_OBJC_SELECTOR_REFERENCES_" to i8*), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*)], section "llvm.metadata"
+
+; A simple loop. Eliminate the retain and release inside of it!
+
+; CHECK: define void @loop
+; CHECK: for.body:
+; CHECK-NOT: @objc_
+; CHECK: @objc_msgSend
+; CHECK-NOT: @objc_
+; CHECK: for.end:
+define void @loop(i8* %x, i64 %n) {
+entry:
+  %0 = tail call i8* @objc_retain(i8* %x) nounwind
+  %cmp9 = icmp sgt i64 %n, 0
+  br i1 %cmp9, label %for.body, label %for.end
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.010 = phi i64 [ %inc, %for.body ], [ 0, %entry ]
+  %1 = tail call i8* @objc_retain(i8* %x) nounwind
+  %tmp5 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_", align 8
+  %call = tail call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %1, i8* %tmp5)
+  tail call void @objc_release(i8* %1) nounwind, !clang.imprecise_release !0
+  %inc = add nsw i64 %i.010, 1
+  %exitcond = icmp eq i64 %inc, %n
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:                                          ; preds = %for.body, %entry
+  tail call void @objc_release(i8* %x) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; ObjCARCOpt can delete the retain,release on self.
+
+; CHECK: define void @TextEditTest
+; CHECK-NOT: call i8* @objc_retain(i8* %tmp7)
+; CHECK: }
+
+%0 = type { i8* (i8*, %struct._message_ref_t*, ...)*, i8* }
+%1 = type opaque
+%2 = type opaque
+%3 = type opaque
+%4 = type opaque
+%5 = type opaque
+%struct.NSConstantString = type { i32*, i32, i8*, i64 }
+%struct._NSRange = type { i64, i64 }
+%struct.__CFString = type opaque
+%struct.__method_list_t = type { i32, i32, [0 x %struct._objc_method] }
+%struct._class_ro_t = type { i32, i32, i32, i8*, i8*, %struct.__method_list_t*, %struct._objc_protocol_list*, %struct._ivar_list_t*, i8*, %struct._prop_list_t* }
+%struct._class_t = type { %struct._class_t*, %struct._class_t*, %struct._objc_cache*, i8* (i8*, i8*)**, %struct._class_ro_t* }
+%struct._ivar_list_t = type { i32, i32, [0 x %struct._ivar_t] }
+%struct._ivar_t = type { i64*, i8*, i8*, i32, i32 }
+%struct._message_ref_t = type { i8*, i8* }
+%struct._objc_cache = type opaque
+%struct._objc_method = type { i8*, i8*, i8* }
+%struct._objc_protocol_list = type { i64, [0 x %struct._protocol_t*] }
+%struct._prop_list_t = type { i32, i32, [0 x %struct._message_ref_t] }
+%struct._protocol_t = type { i8*, i8*, %struct._objc_protocol_list*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct.__method_list_t*, %struct._prop_list_t*, i32, i32 }
+
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_17" = external hidden global %struct._class_t*, section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@kUTTypePlainText = external constant %struct.__CFString*
+@"\01L_OBJC_SELECTOR_REFERENCES_19" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_21" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_23" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_25" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_26" = external hidden global %struct._class_t*, section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_SELECTOR_REFERENCES_28" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_29" = external hidden global %struct._class_t*, section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_SELECTOR_REFERENCES_31" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_33" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_35" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_37" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_38" = external hidden global %struct._class_t*, section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_SELECTOR_REFERENCES_40" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_42" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@_unnamed_cfstring_44 = external hidden constant %struct.NSConstantString, section "__DATA,__cfstring"
+@"\01L_OBJC_SELECTOR_REFERENCES_46" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_48" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01l_objc_msgSend_fixup_isEqual_" = external hidden global %0, section "__DATA, __objc_msgrefs, coalesced", align 16
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_50" = external hidden global %struct._class_t*, section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@NSCocoaErrorDomain = external constant %1*
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_51" = external hidden global %struct._class_t*, section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@NSFilePathErrorKey = external constant %1*
+@"\01L_OBJC_SELECTOR_REFERENCES_53" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_55" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_56" = external hidden global %struct._class_t*, section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_SELECTOR_REFERENCES_58" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_60" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+declare %1* @truncatedString(%1*, i64)
+define void @TextEditTest(%2* %self, %3* %pboard) {
+entry:
+  %err = alloca %4*, align 8
+  %tmp7 = bitcast %2* %self to i8*
+  %tmp8 = call i8* @objc_retain(i8* %tmp7) nounwind
+  store %4* null, %4** %err, align 8
+  %tmp1 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_17", align 8
+  %tmp2 = load %struct.__CFString** @kUTTypePlainText, align 8
+  %tmp3 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_19", align 8
+  %tmp4 = bitcast %struct._class_t* %tmp1 to i8*
+  %call5 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp4, i8* %tmp3, %struct.__CFString* %tmp2)
+  %tmp5 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_21", align 8
+  %tmp6 = bitcast %3* %pboard to i8*
+  %call76 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp6, i8* %tmp5, i8* %call5)
+  %tmp9 = call i8* @objc_retain(i8* %call76) nounwind
+  %tobool = icmp eq i8* %tmp9, null
+  br i1 %tobool, label %end, label %land.lhs.true
+
+land.lhs.true:                                    ; preds = %entry
+  %tmp11 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_23", align 8
+  %call137 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp6, i8* %tmp11, i8* %tmp9)
+  %tmp = bitcast i8* %call137 to %1*
+  %tmp10 = call i8* @objc_retain(i8* %call137) nounwind
+  call void @objc_release(i8* null) nounwind
+  %tmp12 = call i8* @objc_retain(i8* %call137) nounwind
+  call void @objc_release(i8* null) nounwind
+  %tobool16 = icmp eq i8* %call137, null
+  br i1 %tobool16, label %end, label %if.then
+
+if.then:                                          ; preds = %land.lhs.true
+  %tmp19 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_25", align 8
+  %call21 = call signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*)*)(i8* %call137, i8* %tmp19)
+  %tobool22 = icmp eq i8 %call21, 0
+  br i1 %tobool22, label %if.then44, label %land.lhs.true23
+
+land.lhs.true23:                                  ; preds = %if.then
+  %tmp24 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_26", align 8
+  %tmp26 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_28", align 8
+  %tmp27 = bitcast %struct._class_t* %tmp24 to i8*
+  %call2822 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp27, i8* %tmp26, i8* %call137)
+  %tmp13 = bitcast i8* %call2822 to %5*
+  %tmp14 = call i8* @objc_retain(i8* %call2822) nounwind
+  call void @objc_release(i8* null) nounwind
+  %tobool30 = icmp eq i8* %call2822, null
+  br i1 %tobool30, label %if.then44, label %if.end
+
+if.end:                                           ; preds = %land.lhs.true23
+  %tmp32 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_29", align 8
+  %tmp33 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_31", align 8
+  %tmp34 = bitcast %struct._class_t* %tmp32 to i8*
+  %call35 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp34, i8* %tmp33)
+  %tmp37 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_33", align 8
+  %call3923 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call35, i8* %tmp37, i8* %call2822, i32 signext 1, %4** %err)
+  %cmp = icmp eq i8* %call3923, null
+  br i1 %cmp, label %if.then44, label %end
+
+if.then44:                                        ; preds = %if.end, %land.lhs.true23, %if.then
+  %url.025 = phi %5* [ %tmp13, %if.end ], [ %tmp13, %land.lhs.true23 ], [ null, %if.then ]
+  %tmp49 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_35", align 8
+  %call51 = call %struct._NSRange bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %struct._NSRange (i8*, i8*, i64, i64)*)(i8* %call137, i8* %tmp49, i64 0, i64 0)
+  %call513 = extractvalue %struct._NSRange %call51, 0
+  %call514 = extractvalue %struct._NSRange %call51, 1
+  %tmp52 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_37", align 8
+  %call548 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call137, i8* %tmp52, i64 %call513, i64 %call514)
+  %tmp55 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_38", align 8
+  %tmp56 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_40", align 8
+  %tmp57 = bitcast %struct._class_t* %tmp55 to i8*
+  %call58 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp57, i8* %tmp56)
+  %tmp59 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_42", align 8
+  %call6110 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call548, i8* %tmp59, i8* %call58)
+  %tmp15 = call i8* @objc_retain(i8* %call6110) nounwind
+  call void @objc_release(i8* %call137) nounwind
+  %tmp64 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_46", align 8
+  %call66 = call signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, %1*)*)(i8* %call6110, i8* %tmp64, %1* bitcast (%struct.NSConstantString* @_unnamed_cfstring_44 to %1*))
+  %tobool67 = icmp eq i8 %call66, 0
+  br i1 %tobool67, label %if.end74, label %if.then68
+
+if.then68:                                        ; preds = %if.then44
+  %tmp70 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_48", align 8
+  %call7220 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call6110, i8* %tmp70)
+  %tmp16 = call i8* @objc_retain(i8* %call7220) nounwind
+  call void @objc_release(i8* %call6110) nounwind
+  br label %if.end74
+
+if.end74:                                         ; preds = %if.then68, %if.then44
+  %filename.0.in = phi i8* [ %call7220, %if.then68 ], [ %call6110, %if.then44 ]
+  %filename.0 = bitcast i8* %filename.0.in to %1*
+  %tmp17 = load i8** bitcast (%0* @"\01l_objc_msgSend_fixup_isEqual_" to i8**), align 16
+  %tmp18 = bitcast i8* %tmp17 to i8 (i8*, %struct._message_ref_t*, i8*, ...)*
+  %call78 = call signext i8 (i8*, %struct._message_ref_t*, i8*, ...)* %tmp18(i8* %call137, %struct._message_ref_t* bitcast (%0* @"\01l_objc_msgSend_fixup_isEqual_" to %struct._message_ref_t*), i8* %filename.0.in)
+  %tobool79 = icmp eq i8 %call78, 0
+  br i1 %tobool79, label %land.lhs.true80, label %if.then109
+
+land.lhs.true80:                                  ; preds = %if.end74
+  %tmp82 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_25", align 8
+  %call84 = call signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*)*)(i8* %filename.0.in, i8* %tmp82)
+  %tobool86 = icmp eq i8 %call84, 0
+  br i1 %tobool86, label %if.then109, label %if.end106
+
+if.end106:                                        ; preds = %land.lhs.true80
+  %tmp88 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_26", align 8
+  %tmp90 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_28", align 8
+  %tmp91 = bitcast %struct._class_t* %tmp88 to i8*
+  %call9218 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp91, i8* %tmp90, i8* %filename.0.in)
+  %tmp20 = bitcast i8* %call9218 to %5*
+  %tmp21 = call i8* @objc_retain(i8* %call9218) nounwind
+  %tmp22 = bitcast %5* %url.025 to i8*
+  call void @objc_release(i8* %tmp22) nounwind
+  %tmp94 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_29", align 8
+  %tmp95 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_31", align 8
+  %tmp96 = bitcast %struct._class_t* %tmp94 to i8*
+  %call97 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp96, i8* %tmp95)
+  %tmp99 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_33", align 8
+  %call10119 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call97, i8* %tmp99, i8* %call9218, i32 signext 1, %4** %err)
+  %phitmp = icmp eq i8* %call10119, null
+  br i1 %phitmp, label %if.then109, label %end
+
+if.then109:                                       ; preds = %if.end106, %land.lhs.true80, %if.end74
+  %url.129 = phi %5* [ %tmp20, %if.end106 ], [ %url.025, %if.end74 ], [ %url.025, %land.lhs.true80 ]
+  %tmp110 = load %4** %err, align 8
+  %tobool111 = icmp eq %4* %tmp110, null
+  br i1 %tobool111, label %if.then112, label %if.end125
+
+if.then112:                                       ; preds = %if.then109
+  %tmp113 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_50", align 8
+  %tmp114 = load %1** @NSCocoaErrorDomain, align 8
+  %tmp115 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_51", align 8
+  %call117 = call %1* @truncatedString(%1* %filename.0, i64 1034)
+  %tmp118 = load %1** @NSFilePathErrorKey, align 8
+  %tmp119 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_53", align 8
+  %tmp120 = bitcast %struct._class_t* %tmp115 to i8*
+  %call12113 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp120, i8* %tmp119, %1* %call117, %1* %tmp118, i8* null)
+  %tmp122 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_55", align 8
+  %tmp123 = bitcast %struct._class_t* %tmp113 to i8*
+  %call12414 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp123, i8* %tmp122, %1* %tmp114, i64 258, i8* %call12113)
+  %tmp23 = call i8* @objc_retain(i8* %call12414) nounwind
+  %tmp25 = call i8* @objc_autorelease(i8* %tmp23) nounwind
+  %tmp28 = bitcast i8* %tmp25 to %4*
+  store %4* %tmp28, %4** %err, align 8
+  br label %if.end125
+
+if.end125:                                        ; preds = %if.then112, %if.then109
+  %tmp127 = phi %4* [ %tmp110, %if.then109 ], [ %tmp28, %if.then112 ]
+  %tmp126 = load %struct._class_t** @"\01L_OBJC_CLASSLIST_REFERENCES_$_56", align 8
+  %tmp128 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_58", align 8
+  %tmp129 = bitcast %struct._class_t* %tmp126 to i8*
+  %call13015 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %tmp129, i8* %tmp128, %4* %tmp127)
+  %tmp131 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_60", align 8
+  %call13317 = call i8* (i8*, i8*, ...)* @objc_msgSend(i8* %call13015, i8* %tmp131)
+  br label %end
+
+end:                                              ; preds = %if.end125, %if.end106, %if.end, %land.lhs.true, %entry
+  %filename.2 = phi %1* [ %filename.0, %if.end106 ], [ %filename.0, %if.end125 ], [ %tmp, %land.lhs.true ], [ null, %entry ], [ %tmp, %if.end ]
+  %origFilename.0 = phi %1* [ %tmp, %if.end106 ], [ %tmp, %if.end125 ], [ %tmp, %land.lhs.true ], [ null, %entry ], [ %tmp, %if.end ]
+  %url.2 = phi %5* [ %tmp20, %if.end106 ], [ %url.129, %if.end125 ], [ null, %land.lhs.true ], [ null, %entry ], [ %tmp13, %if.end ]
+  call void @objc_release(i8* %tmp9) nounwind, !clang.imprecise_release !0
+  %tmp29 = bitcast %5* %url.2 to i8*
+  call void @objc_release(i8* %tmp29) nounwind, !clang.imprecise_release !0
+  %tmp30 = bitcast %1* %origFilename.0 to i8*
+  call void @objc_release(i8* %tmp30) nounwind, !clang.imprecise_release !0
+  %tmp31 = bitcast %1* %filename.2 to i8*
+  call void @objc_release(i8* %tmp31) nounwind, !clang.imprecise_release !0
+  call void @objc_release(i8* %tmp7) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/cfg-hazards.ll b/test/Transforms/ObjCARC/cfg-hazards.ll
new file mode 100644
index 0000000..e3624df
--- /dev/null
+++ b/test/Transforms/ObjCARC/cfg-hazards.ll
@@ -0,0 +1,86 @@
+; RUN: opt -S -objc-arc < %s | FileCheck %s
+; rdar://9503416
+
+; Detect loop boundaries and don't move retains and releases
+; across them.
+
+declare void @use_pointer(i8*)
+declare i8* @objc_retain(i8*)
+declare void @objc_release(i8*)
+
+; CHECK: define void @test0(
+; CHECK:   call i8* @objc_retain(
+; CHECK: for.body:
+; CHECK-NOT: @objc
+; CHECK: for.end:
+; CHECK:   call void @objc_release(
+; CHECK: }
+define void @test0(i8* %digits) {
+entry:
+  %tmp1 = call i8* @objc_retain(i8* %digits) nounwind
+  call void @use_pointer(i8* %tmp1)
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %upcDigitIndex.01 = phi i64 [ 2, %entry ], [ %inc, %for.body ]
+  call void @use_pointer(i8* %tmp1)
+  %inc = add i64 %upcDigitIndex.01, 1
+  %cmp = icmp ult i64 %inc, 12
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  call void @objc_release(i8* %tmp1) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; CHECK: define void @test1(
+; CHECK:   call i8* @objc_retain(
+; CHECK: for.body:
+; CHECK-NOT: @objc
+; CHECK: for.end:
+; CHECK:   void @objc_release(
+; CHECK: }
+define void @test1(i8* %digits) {
+entry:
+  %tmp1 = call i8* @objc_retain(i8* %digits) nounwind
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %upcDigitIndex.01 = phi i64 [ 2, %entry ], [ %inc, %for.body ]
+  call void @use_pointer(i8* %tmp1)
+  call void @use_pointer(i8* %tmp1)
+  %inc = add i64 %upcDigitIndex.01, 1
+  %cmp = icmp ult i64 %inc, 12
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  call void @objc_release(i8* %tmp1) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; CHECK: define void @test2(
+; CHECK:   call i8* @objc_retain(
+; CHECK: for.body:
+; CHECK-NOT: @objc
+; CHECK: for.end:
+; CHECK:   void @objc_release(
+; CHECK: }
+define void @test2(i8* %digits) {
+entry:
+  %tmp1 = call i8* @objc_retain(i8* %digits) nounwind
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %upcDigitIndex.01 = phi i64 [ 2, %entry ], [ %inc, %for.body ]
+  call void @use_pointer(i8* %tmp1)
+  %inc = add i64 %upcDigitIndex.01, 1
+  %cmp = icmp ult i64 %inc, 12
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body
+  call void @use_pointer(i8* %tmp1)
+  call void @objc_release(i8* %tmp1) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/contract-marker.ll b/test/Transforms/ObjCARC/contract-marker.ll
new file mode 100644
index 0000000..01d978a
--- /dev/null
+++ b/test/Transforms/ObjCARC/contract-marker.ll
@@ -0,0 +1,23 @@
+; RUN: opt -S -objc-arc-contract < %s | FileCheck %s
+
+; CHECK:      %call = tail call i32* @qux()
+; CHECK-NEXT: %tcall = bitcast i32* %call to i8*
+; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
+; CHECK-NEXT: %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tcall) nounwind
+
+define void @foo() {
+entry:
+  %call = tail call i32* @qux()
+  %tcall = bitcast i32* %call to i8*
+  %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tcall) nounwind
+  tail call void @bar(i8* %0)
+  ret void
+}
+
+declare i32* @qux()
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
+declare void @bar(i8*)
+
+!clang.arc.retainAutoreleasedReturnValueMarker = !{!0}
+
+!0 = metadata !{metadata !"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"}
diff --git a/test/Transforms/ObjCARC/contract-storestrong-ivar.ll b/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
new file mode 100644
index 0000000..4ad78e7
--- /dev/null
+++ b/test/Transforms/ObjCARC/contract-storestrong-ivar.ll
@@ -0,0 +1,31 @@
+; RUN: opt -objc-arc-contract -S < %s | FileCheck %s
+
+; CHECK: call void @objc_storeStrong(i8**
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0.0"
+
+%0 = type opaque
+%1 = type opaque
+
+@"OBJC_IVAR_$_Controller.preferencesController" = external global i64, section "__DATA, __objc_const", align 8
+
+declare i8* @objc_retain(i8*)
+
+declare void @objc_release(i8*)
+
+define hidden void @y(%0* nocapture %self, %1* %preferencesController) nounwind {
+entry:
+  %ivar = load i64* @"OBJC_IVAR_$_Controller.preferencesController", align 8
+  %tmp = bitcast %0* %self to i8*
+  %add.ptr = getelementptr inbounds i8* %tmp, i64 %ivar
+  %tmp1 = bitcast i8* %add.ptr to %1**
+  %tmp2 = load %1** %tmp1, align 8
+  %tmp3 = bitcast %1* %preferencesController to i8*
+  %tmp4 = tail call i8* @objc_retain(i8* %tmp3) nounwind
+  %tmp5 = bitcast %1* %tmp2 to i8*
+  tail call void @objc_release(i8* %tmp5) nounwind
+  %tmp6 = bitcast i8* %tmp4 to %1*
+  store %1* %tmp6, %1** %tmp1, align 8
+  ret void
+}
diff --git a/test/Transforms/ObjCARC/contract-storestrong.ll b/test/Transforms/ObjCARC/contract-storestrong.ll
new file mode 100644
index 0000000..50ed260
--- /dev/null
+++ b/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -0,0 +1,59 @@
+; RUN: opt -objc-arc-contract -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+declare i8* @objc_retain(i8*)
+declare void @objc_release(i8*)
+
+@x = external global i8*
+
+; CHECK: define void @test0(
+; CHECK: entry:
+; CHECK-NEXT: call void @objc_storeStrong(i8** @x, i8* %p) nounwind
+; CHECK-NEXT: ret void
+define void @test0(i8* %p) {
+entry:
+  %0 = tail call i8* @objc_retain(i8* %p) nounwind
+  %tmp = load i8** @x, align 8
+  store i8* %0, i8** @x, align 8
+  tail call void @objc_release(i8* %tmp) nounwind
+  ret void
+}
+
+; Don't do this if the load is volatile.
+
+;      CHECK: define void @test1(i8* %p) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT:   %tmp = volatile load i8** @x, align 8
+; CHECK-NEXT:   store i8* %0, i8** @x, align 8
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test1(i8* %p) {
+entry:
+  %0 = tail call i8* @objc_retain(i8* %p) nounwind
+  %tmp = volatile load i8** @x, align 8
+  store i8* %0, i8** @x, align 8
+  tail call void @objc_release(i8* %tmp) nounwind
+  ret void
+}
+
+; Don't do this if the store is volatile.
+
+;      CHECK: define void @test2(i8* %p) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %0 = tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT:   %tmp = load i8** @x, align 8
+; CHECK-NEXT:   volatile store i8* %0, i8** @x, align 8
+; CHECK-NEXT:   tail call void @objc_release(i8* %tmp) nounwind
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test2(i8* %p) {
+entry:
+  %0 = tail call i8* @objc_retain(i8* %p) nounwind
+  %tmp = load i8** @x, align 8
+  volatile store i8* %0, i8** @x, align 8
+  tail call void @objc_release(i8* %tmp) nounwind
+  ret void
+}
diff --git a/test/Transforms/ObjCARC/contract-testcases.ll b/test/Transforms/ObjCARC/contract-testcases.ll
new file mode 100644
index 0000000..69fa837
--- /dev/null
+++ b/test/Transforms/ObjCARC/contract-testcases.ll
@@ -0,0 +1,63 @@
+; RUN: opt -objc-arc-contract -S < %s | FileCheck %s
+; rdar://9511608
+
+%0 = type opaque
+%1 = type opaque
+%2 = type { i64, i64 }
+%3 = type { i8*, i8* }
+%4 = type opaque
+
+declare %0* @"\01-[NSAttributedString(Terminal) pathAtIndex:effectiveRange:]"(%1*, i8* nocapture, i64, %2*) optsize
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
+declare i8* @objc_msgSend_fixup(i8*, %3*, ...)
+declare void @objc_release(i8*)
+declare %2 @NSUnionRange(i64, i64, i64, i64) optsize
+declare i8* @objc_autoreleaseReturnValue(i8*)
+declare i8* @objc_autorelease(i8*)
+declare i8* @objc_msgSend() nonlazybind
+
+; Don't get in trouble on bugpointed code.
+
+; CHECK: define void @test0(
+define void @test0() {
+bb:
+  %tmp = bitcast %4* undef to i8*
+  %tmp1 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tmp) nounwind
+  br label %bb3
+
+bb3:                                              ; preds = %bb2
+  br i1 undef, label %bb6, label %bb4
+
+bb4:                                              ; preds = %bb3
+  switch i64 undef, label %bb5 [
+    i64 9223372036854775807, label %bb6
+    i64 0, label %bb6
+  ]
+
+bb5:                                              ; preds = %bb4
+  br label %bb6
+
+bb6:                                              ; preds = %bb5, %bb4, %bb4, %bb3
+  %tmp7 = phi %4* [ undef, %bb5 ], [ undef, %bb4 ], [ undef, %bb3 ], [ undef, %bb4 ]
+  unreachable
+}
+
+; When rewriting operands for a phi which has multiple operands
+; for the same block, use the exactly same value in each block.
+
+; CHECK: define void @test1(
+; CHECK: %0 = bitcast i8* %tmp3 to %0* 
+; CHECK: br i1 undef, label %bb7, label %bb7
+; CHECK: bb7:
+; CHECK: %tmp8 = phi %0* [ %0, %bb ], [ %0, %bb ]
+define void @test1() {
+bb:
+  %tmp = tail call %0* bitcast (i8* ()* @objc_msgSend to %0* ()*)()
+  %tmp2 = bitcast %0* %tmp to i8*
+  %tmp3 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tmp2) nounwind
+  br i1 undef, label %bb7, label %bb7
+
+bb7:                                              ; preds = %bb6, %bb6, %bb5
+  %tmp8 = phi %0* [ %tmp, %bb ], [ %tmp, %bb ]
+  unreachable
+}
diff --git a/test/Transforms/ObjCARC/contract.ll b/test/Transforms/ObjCARC/contract.ll
new file mode 100644
index 0000000..04ae3ca
--- /dev/null
+++ b/test/Transforms/ObjCARC/contract.ll
@@ -0,0 +1,145 @@
+; RUN: opt -objc-arc-contract -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+declare i8* @objc_retain(i8*)
+declare void @objc_release(i8*)
+declare i8* @objc_autorelease(i8*)
+declare i8* @objc_autoreleaseReturnValue(i8*)
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
+
+declare void @use_pointer(i8*)
+declare i8* @returner()
+
+; CHECK: define void @test0
+; CHECK: call void @use_pointer(i8* %0)
+; CHECK: }
+define void @test0(i8* %x) nounwind {
+entry:
+  %0 = call i8* @objc_retain(i8* %x) nounwind
+  call void @use_pointer(i8* %x)
+  ret void
+}
+
+; CHECK: define void @test1
+; CHECK: call void @use_pointer(i8* %0)
+; CHECK: }
+define void @test1(i8* %x) nounwind {
+entry:
+  %0 = call i8* @objc_autorelease(i8* %x) nounwind
+  call void @use_pointer(i8* %x)
+  ret void
+}
+
+; Merge objc_retain and objc_autorelease into objc_retainAutorelease.
+
+; CHECK: define void @test2(
+; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) nounwind
+; CHECK: }
+define void @test2(i8* %x) nounwind {
+entry:
+  %0 = tail call i8* @objc_retain(i8* %x) nounwind
+  tail call i8* @objc_autorelease(i8* %0) nounwind
+  call void @use_pointer(i8* %x)
+  ret void
+}
+
+; Same as test2 but the value is returned. Do an RV optimization.
+
+; CHECK: define i8* @test2b(
+; CHECK: tail call i8* @objc_retainAutoreleaseReturnValue(i8* %x) nounwind
+; CHECK: }
+define i8* @test2b(i8* %x) nounwind {
+entry:
+  %0 = tail call i8* @objc_retain(i8* %x) nounwind
+  tail call i8* @objc_autoreleaseReturnValue(i8* %0) nounwind
+  ret i8* %x
+}
+
+; Merge a retain,autorelease pair around a call.
+
+; CHECK: define void @test3(
+; CHECK: tail call i8* @objc_retainAutorelease(i8* %x) nounwind
+; CHECK: @use_pointer(i8* %0)
+; CHECK: }
+define void @test3(i8* %x, i64 %n) {
+entry:
+  tail call i8* @objc_retain(i8* %x) nounwind
+  call void @use_pointer(i8* %x)
+  tail call i8* @objc_autorelease(i8* %x) nounwind
+  ret void
+}
+
+; Trivial retain,autorelease pair with intervening call, but it's post-dominated
+; by another release. The retain and autorelease can be merged.
+
+; CHECK: define void @test4(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: @objc_retainAutorelease(i8* %x) nounwind
+; CHECK-NEXT: @use_pointer
+; CHECK-NEXT: @objc_release
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test4(i8* %x, i64 %n) {
+entry:
+  tail call i8* @objc_retain(i8* %x) nounwind
+  call void @use_pointer(i8* %x)
+  tail call i8* @objc_autorelease(i8* %x) nounwind
+  tail call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; Don't merge retain and autorelease if they're not control-equivalent.
+
+; CHECK: define void @test5(
+; CHECK: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK: true:
+; CHECK: tail call i8* @objc_autorelease(i8* %0) nounwind
+; CHECK: }
+define void @test5(i8* %p, i1 %a) {
+entry:
+  tail call i8* @objc_retain(i8* %p) nounwind
+  br i1 %a, label %true, label %false
+
+true:
+  tail call i8* @objc_autorelease(i8* %p) nounwind
+  call void @use_pointer(i8* %p)
+  ret void
+
+false:
+  ret void
+}
+
+; Don't eliminate objc_retainAutoreleasedReturnValue by merging it into
+; an objc_autorelease.
+; TODO? Merge objc_retainAutoreleasedReturnValue and objc_autorelease into
+; objc_retainAutoreleasedReturnValueAutorelease and merge
+; objc_retainAutoreleasedReturnValue and objc_autoreleaseReturnValue
+; into objc_retainAutoreleasedReturnValueAutoreleaseReturnValue?
+; Those entrypoints don't exist yet though.
+
+; CHECK: define i8* @test6(
+; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %p) nounwind
+; CHECK: %t = tail call i8* @objc_autoreleaseReturnValue(i8* %1) nounwind
+; CHECK: }
+define i8* @test6() {
+  %p = call i8* @returner()
+  tail call i8* @objc_retainAutoreleasedReturnValue(i8* %p) nounwind
+  %t = tail call i8* @objc_autoreleaseReturnValue(i8* %p) nounwind
+  call void @use_pointer(i8* %t)
+  ret i8* %t
+}
+
+; Don't spoil the RV optimization.
+
+; CHECK: define i8* @test7(i8* %p)
+; CHECK: tail call i8* @objc_retain(i8* %p)
+; CHECK: call void @use_pointer(i8* %1)
+; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %1)
+; CHECK: ret i8* %2
+define i8* @test7(i8* %p) {
+  %1 = tail call i8* @objc_retain(i8* %p)
+  call void @use_pointer(i8* %p)
+  %2 = tail call i8* @objc_autoreleaseReturnValue(i8* %p)
+  ret i8* %p
+}
diff --git a/test/Transforms/ObjCARC/dg.exp b/test/Transforms/ObjCARC/dg.exp
new file mode 100644
index 0000000..f200589
--- /dev/null
+++ b/test/Transforms/ObjCARC/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
diff --git a/test/Transforms/ObjCARC/expand.ll b/test/Transforms/ObjCARC/expand.ll
new file mode 100644
index 0000000..5388673
--- /dev/null
+++ b/test/Transforms/ObjCARC/expand.ll
@@ -0,0 +1,28 @@
+; RUN: opt -objc-arc-expand -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+declare i8* @objc_retain(i8*)
+declare i8* @objc_autorelease(i8*)
+
+declare void @use_pointer(i8*)
+
+; CHECK: define void @test0
+; CHECK: call void @use_pointer(i8* %x)
+; CHECK: }
+define void @test0(i8* %x) nounwind {
+entry:
+  %0 = call i8* @objc_retain(i8* %x) nounwind
+  call void @use_pointer(i8* %0)
+  ret void
+}
+
+; CHECK: define void @test1
+; CHECK: call void @use_pointer(i8* %x)
+; CHECK: }
+define void @test1(i8* %x) nounwind {
+entry:
+  %0 = call i8* @objc_autorelease(i8* %x) nounwind
+  call void @use_pointer(i8* %x)
+  ret void
+}
diff --git a/test/Transforms/ObjCARC/gvn.ll b/test/Transforms/ObjCARC/gvn.ll
new file mode 100644
index 0000000..6917b02
--- /dev/null
+++ b/test/Transforms/ObjCARC/gvn.ll
@@ -0,0 +1,21 @@
+; RUN: opt -S -basicaa -objc-arc -gvn < %s | FileCheck %s
+
+@x = common global i8* null, align 8
+
+declare i8* @objc_retain(i8*)
+
+; GVN should be able to eliminate this redundant load, with ARC-specific
+; alias analysis.
+
+; CHECK: @foo
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %s = load i8** @x
+; CHECK-NOT: load
+; CHECK: ret i8* %s
+define i8* @foo(i32 %n) nounwind {
+entry:
+  %s = load i8** @x
+  %0 = tail call i8* @objc_retain(i8* %s) nounwind
+  %t = load i8** @x
+  ret i8* %s
+}
diff --git a/test/Transforms/ObjCARC/invoke.ll b/test/Transforms/ObjCARC/invoke.ll
new file mode 100644
index 0000000..a1b87d2
--- /dev/null
+++ b/test/Transforms/ObjCARC/invoke.ll
@@ -0,0 +1,67 @@
+; RUN: opt -S -objc-arc < %s | FileCheck %s
+
+declare i8* @objc_retain(i8*)
+declare void @objc_release(i8*)
+declare i8* @objc_msgSend(i8*, i8*, ...)
+declare void @use_pointer(i8*)
+declare void @callee()
+
+; ARCOpt shouldn't try to move the releases to the block containing the invoke.
+
+; CHECK: define void @test0(
+; CHECK: invoke.cont:
+; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   ret void
+; CHECK: lpad:
+; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   ret void
+define void @test0(i8* %zipFile) {
+entry:
+  call i8* @objc_retain(i8* %zipFile) nounwind
+  call void @use_pointer(i8* %zipFile)
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*)*)(i8* %zipFile) 
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+  ret void
+
+lpad:                                             ; preds = %entry
+  call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+; ARCOpt should move the release before the callee calls.
+
+; CHECK: define void @test1(
+; CHECK: invoke.cont:
+; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @callee()
+; CHECK:   br label %done
+; CHECK: lpad:
+; CHECK:   call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+; CHECK:   call void @callee()
+; CHECK:   br label %done
+; CHECK: done:
+; CHECK-NEXT: ret void
+define void @test1(i8* %zipFile) {
+entry:
+  call i8* @objc_retain(i8* %zipFile) nounwind
+  call void @use_pointer(i8* %zipFile)
+  invoke void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*)*)(i8* %zipFile)
+          to label %invoke.cont unwind label %lpad
+
+invoke.cont:                                      ; preds = %entry
+  call void @callee()
+  br label %done
+
+lpad:                                             ; preds = %entry
+  call void @callee()
+  br label %done
+
+done:
+  call void @objc_release(i8* %zipFile) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll b/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
new file mode 100644
index 0000000..170d0a9
--- /dev/null
+++ b/test/Transforms/ObjCARC/move-and-form-retain-autorelease.ll
@@ -0,0 +1,221 @@
+; RUN: opt -S -objc-arc-contract < %s | FileCheck %s
+
+; The optimizer should be able to move the autorelease past a control triangle
+; and various scary looking things and fold it into an objc_retainAutorelease.
+
+; CHECK: bb57:
+; CHECK: tail call i8* @objc_retainAutorelease(i8* %tmp71x) nounwind
+; CHECK: bb99:
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0.0"
+
+%0 = type { i8* (i8*, %1*, ...)*, i8* }
+%1 = type { i8*, i8* }
+%2 = type { %2*, %2*, %3*, i8* (i8*, i8*)**, %4* }
+%3 = type opaque
+%4 = type { i32, i32, i32, i8*, i8*, %5*, %7*, %10*, i8*, %9* }
+%5 = type { i32, i32, [0 x %6] }
+%6 = type { i8*, i8*, i8* }
+%7 = type { i64, [0 x %8*] }
+%8 = type { i8*, i8*, %7*, %5*, %5*, %5*, %5*, %9*, i32, i32 }
+%9 = type { i32, i32, [0 x %1] }
+%10 = type { i32, i32, [0 x %11] }
+%11 = type { i64*, i8*, i8*, i32, i32 }
+%12 = type { i32*, i32, i8*, i64 }
+%13 = type opaque
+%14 = type opaque
+%15 = type opaque
+%16 = type opaque
+%17 = type opaque
+%18 = type opaque
+%19 = type opaque
+%20 = type opaque
+%21 = type opaque
+%22 = type opaque
+%23 = type opaque
+%24 = type opaque
+%25 = type opaque
+
+@"\01l_objc_msgSend_fixup_alloc" = external hidden global %0, section "__DATA, __objc_msgrefs, coalesced", align 16
+@"\01L_OBJC_SELECTOR_REFERENCES_8" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_3725" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_40" = external hidden global %2*, section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_SELECTOR_REFERENCES_4227" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_4631" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_70" = external hidden global %2*, section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_SELECTOR_REFERENCES_148" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_159" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_188" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_328" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01l_objc_msgSend_fixup_objectAtIndex_" = external hidden global %0, section "__DATA, __objc_msgrefs, coalesced", align 16
+@_unnamed_cfstring_386 = external hidden constant %12, section "__DATA,__cfstring"
+@"\01l_objc_msgSend_fixup_count" = external hidden global %0, section "__DATA, __objc_msgrefs, coalesced", align 16
+@"\01L_OBJC_SELECTOR_REFERENCES_389" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_391" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_393" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@NSPrintHeaderAndFooter = external constant %13*
+@"\01L_OBJC_SELECTOR_REFERENCES_395" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_396" = external hidden global %2*, section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_SELECTOR_REFERENCES_398" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_400" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_402" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_404" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_406" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_408" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_CLASSLIST_REFERENCES_$_409" = external hidden global %2*, section "__DATA, __objc_classrefs, regular, no_dead_strip", align 8
+@"\01L_OBJC_SELECTOR_REFERENCES_411" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_413" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_415" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+declare i8* @objc_retain(i8*)
+
+declare void @objc_release(i8*)
+
+declare i8* @objc_autorelease(i8*)
+
+declare i8* @objc_explicit_autorelease(i8*)
+
+define hidden %14* @foo(%15* %arg, %16* %arg2) {
+bb:
+  %tmp = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_3725", align 8
+  %tmp4 = bitcast %15* %arg to i8*
+  %tmp5 = tail call %18* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %18* (i8*, i8*)*)(i8* %tmp4, i8* %tmp)
+  %tmp6 = bitcast %18* %tmp5 to i8*
+  %tmp7 = tail call i8* @objc_retain(i8* %tmp6) nounwind
+  %tmp8 = load %2** @"\01L_OBJC_CLASSLIST_REFERENCES_$_40", align 8
+  %tmp9 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_4227", align 8
+  %tmp10 = bitcast %2* %tmp8 to i8*
+  %tmp11 = tail call %19* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %19* (i8*, i8*)*)(i8* %tmp10, i8* %tmp9)
+  %tmp12 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_4631", align 8
+  %tmp13 = bitcast %19* %tmp11 to i8*
+  %tmp14 = tail call signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*, %13*)*)(i8* %tmp13, i8* %tmp12, %13* bitcast (%12* @_unnamed_cfstring_386 to %13*))
+  %tmp15 = bitcast %16* %arg2 to i8*
+  %tmp16 = load i8** bitcast (%0* @"\01l_objc_msgSend_fixup_count" to i8**), align 16
+  %tmp17 = bitcast i8* %tmp16 to i64 (i8*, %1*)*
+  %tmp18 = tail call i64 %tmp17(i8* %tmp15, %1* bitcast (%0* @"\01l_objc_msgSend_fixup_count" to %1*))
+  %tmp19 = icmp eq i64 %tmp18, 0
+  br i1 %tmp19, label %bb22, label %bb20
+
+bb20:                                             ; preds = %bb
+  %tmp21 = icmp eq i8 %tmp14, 0
+  br label %bb25
+
+bb22:                                             ; preds = %bb
+  %tmp23 = bitcast i8* %tmp7 to %18*
+  %tmp24 = icmp eq i8 %tmp14, 0
+  br i1 %tmp24, label %bb46, label %bb25
+
+bb25:                                             ; preds = %bb22, %bb20
+  %tmp26 = phi i1 [ %tmp21, %bb20 ], [ false, %bb22 ]
+  %tmp27 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_188", align 8
+  %tmp28 = tail call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp7, i8* %tmp27)
+  %tmp29 = tail call i8* @objc_explicit_autorelease(i8* %tmp28) nounwind
+  %tmp30 = bitcast i8* %tmp29 to %18*
+  tail call void @objc_release(i8* %tmp7) nounwind
+  %tmp31 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_389", align 8
+  %tmp32 = tail call %20* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %20* (i8*, i8*)*)(i8* %tmp29, i8* %tmp31)
+  %tmp33 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_391", align 8
+  %tmp34 = bitcast %20* %tmp32 to i8*
+  tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %16*)*)(i8* %tmp34, i8* %tmp33, %16* %arg2)
+  br i1 %tmp26, label %bb46, label %bb35
+
+bb35:                                             ; preds = %bb25
+  %tmp36 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_389", align 8
+  %tmp37 = tail call %20* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %20* (i8*, i8*)*)(i8* %tmp29, i8* %tmp36)
+  %tmp38 = load %2** @"\01L_OBJC_CLASSLIST_REFERENCES_$_70", align 8
+  %tmp39 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_393", align 8
+  %tmp40 = bitcast %2* %tmp38 to i8*
+  %tmp41 = tail call %21* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %21* (i8*, i8*, i8)*)(i8* %tmp40, i8* %tmp39, i8 signext 1)
+  %tmp42 = bitcast %21* %tmp41 to i8*
+  %tmp43 = load %13** @NSPrintHeaderAndFooter, align 8
+  %tmp44 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_159", align 8
+  %tmp45 = bitcast %20* %tmp37 to i8*
+  tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8*, %13*)*)(i8* %tmp45, i8* %tmp44, i8* %tmp42, %13* %tmp43)
+  br label %bb46
+
+bb46:                                             ; preds = %bb35, %bb25, %bb22
+  %tmp47 = phi %18* [ %tmp30, %bb35 ], [ %tmp30, %bb25 ], [ %tmp23, %bb22 ]
+  %tmp48 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_328", align 8
+  %tmp49 = tail call %22* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %22* (i8*, i8*)*)(i8* %tmp4, i8* %tmp48)
+  %tmp50 = bitcast %22* %tmp49 to i8*
+  %tmp51 = load i8** bitcast (%0* @"\01l_objc_msgSend_fixup_count" to i8**), align 16
+  %tmp52 = bitcast i8* %tmp51 to i64 (i8*, %1*)*
+  %tmp53 = tail call i64 %tmp52(i8* %tmp50, %1* bitcast (%0* @"\01l_objc_msgSend_fixup_count" to %1*))
+  %tmp54 = icmp eq i64 %tmp53, 0
+  br i1 %tmp54, label %bb55, label %bb57
+
+bb55:                                             ; preds = %bb46
+  %tmp56 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_395", align 8
+  tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*)*)(i8* %tmp4, i8* %tmp56)
+  br label %bb57
+
+bb57:                                             ; preds = %bb55, %bb46
+  %tmp58 = load %2** @"\01L_OBJC_CLASSLIST_REFERENCES_$_396", align 8
+  %tmp59 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_328", align 8
+  %tmp60 = tail call %22* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %22* (i8*, i8*)*)(i8* %tmp4, i8* %tmp59)
+  %tmp61 = bitcast %22* %tmp60 to i8*
+  %tmp62 = load i8** bitcast (%0* @"\01l_objc_msgSend_fixup_objectAtIndex_" to i8**), align 16
+  %tmp63 = bitcast i8* %tmp62 to i8* (i8*, %1*, i64)*
+  %tmp64 = tail call i8* %tmp63(i8* %tmp61, %1* bitcast (%0* @"\01l_objc_msgSend_fixup_objectAtIndex_" to %1*), i64 0)
+  %tmp65 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_398", align 8
+  %tmp66 = tail call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp64, i8* %tmp65)
+  %tmp67 = bitcast i8* %tmp66 to %23*
+  %tmp68 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_400", align 8
+  %tmp69 = bitcast %2* %tmp58 to i8*
+  %tmp70 = tail call %14* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %14* (i8*, i8*, %23*, %18*)*)(i8* %tmp69, i8* %tmp68, %23* %tmp67, %18* %tmp47)
+  %tmp71 = bitcast %14* %tmp70 to i8*
+  ; hack to prevent the optimize from using objc_retainAutoreleasedReturnValue.
+  %tmp71x = getelementptr i8* %tmp71, i64 1
+  %tmp72 = tail call i8* @objc_retain(i8* %tmp71x) nounwind
+  %tmp73 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_402", align 8
+  tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8)*)(i8* %tmp72, i8* %tmp73, i8 signext 1)
+  %tmp74 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_404", align 8
+  tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i8)*)(i8* %tmp72, i8* %tmp74, i8 signext 1)
+  %tmp75 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_328", align 8
+  %tmp76 = tail call %22* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %22* (i8*, i8*)*)(i8* %tmp4, i8* %tmp75)
+  %tmp77 = bitcast %22* %tmp76 to i8*
+  %tmp78 = load i8** bitcast (%0* @"\01l_objc_msgSend_fixup_objectAtIndex_" to i8**), align 16
+  %tmp79 = bitcast i8* %tmp78 to i8* (i8*, %1*, i64)*
+  %tmp80 = tail call i8* %tmp79(i8* %tmp77, %1* bitcast (%0* @"\01l_objc_msgSend_fixup_objectAtIndex_" to %1*), i64 0)
+  %tmp81 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_406", align 8
+  tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i64)*)(i8* %tmp80, i8* %tmp81, i64 9223372036854775807)
+  %tmp82 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_408", align 8
+  %tmp83 = tail call %24* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %24* (i8*, i8*)*)(i8* %tmp72, i8* %tmp82)
+  %tmp84 = bitcast %24* %tmp83 to i8*
+  %tmp85 = tail call i8* @objc_retain(i8* %tmp84) nounwind
+  %tmp86 = load %2** @"\01L_OBJC_CLASSLIST_REFERENCES_$_409", align 8
+  %tmp87 = bitcast %2* %tmp86 to i8*
+  %tmp88 = load i8** bitcast (%0* @"\01l_objc_msgSend_fixup_alloc" to i8**), align 16
+  %tmp89 = bitcast i8* %tmp88 to i8* (i8*, %1*)*
+  %tmp90 = tail call i8* %tmp89(i8* %tmp87, %1* bitcast (%0* @"\01l_objc_msgSend_fixup_alloc" to %1*))
+  %tmp91 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_8", align 8
+  %tmp92 = tail call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp90, i8* %tmp91)
+  %tmp93 = tail call i8* @objc_explicit_autorelease(i8* %tmp92) nounwind
+  %tmp94 = bitcast i8* %tmp93 to %25*
+  %tmp95 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_411", align 8
+  tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, %25*)*)(i8* %tmp85, i8* %tmp95, %25* %tmp94)
+  tail call void @objc_release(i8* %tmp93) nounwind
+  %tmp96 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_148", align 8
+  %tmp97 = tail call signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*)*)(i8* %tmp4, i8* %tmp96)
+  %tmp98 = icmp eq i8 %tmp97, 0
+  br i1 %tmp98, label %bb99, label %bb104
+
+bb99:                                             ; preds = %bb57
+  %tmp100 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_413", align 8
+  %tmp101 = tail call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*)*)(i8* %tmp85, i8* %tmp100)
+  %tmp102 = or i64 %tmp101, 12
+  %tmp103 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_415", align 8
+  tail call void bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to void (i8*, i8*, i64)*)(i8* %tmp85, i8* %tmp103, i64 %tmp102)
+  br label %bb104
+
+bb104:                                            ; preds = %bb99, %bb57
+  %tmp105 = tail call i8* @objc_autorelease(i8* %tmp72) nounwind
+  %tmp106 = bitcast i8* %tmp105 to %14*
+  tail call void @objc_release(i8* %tmp85) nounwind
+  %tmp107 = bitcast %18* %tmp47 to i8*
+  tail call void @objc_release(i8* %tmp107) nounwind
+  ret %14* %tmp106
+}
diff --git a/test/Transforms/ObjCARC/move-and-merge-autorelease.ll b/test/Transforms/ObjCARC/move-and-merge-autorelease.ll
new file mode 100644
index 0000000..8462c70
--- /dev/null
+++ b/test/Transforms/ObjCARC/move-and-merge-autorelease.ll
@@ -0,0 +1,108 @@
+; RUN: opt -S -objc-arc < %s | FileCheck %s
+
+; The optimizer should be able to move the autorelease past two phi nodes
+; and fold it with the release in bb65.
+
+; CHECK: bb65:
+; CHECK: call i8* @objc_retainAutorelease
+; CHECK: br label %bb76
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0.0"
+
+%0 = type opaque
+%1 = type opaque
+%2 = type opaque
+%3 = type opaque
+%4 = type opaque
+%5 = type opaque
+
+@"\01L_OBJC_SELECTOR_REFERENCES_11" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_421455" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_598" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_620" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_622" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_624" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+@"\01L_OBJC_SELECTOR_REFERENCES_626" = external hidden global i8*, section "__DATA, __objc_selrefs, literal_pointers, no_dead_strip"
+
+declare i8* @objc_msgSend(i8*, i8*, ...)
+
+declare i8* @objc_retain(i8*)
+
+declare void @objc_release(i8*)
+
+declare i8* @objc_autorelease(i8*)
+
+define hidden %0* @foo(%1* %arg, %3* %arg3) {
+bb:
+  %tmp16 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_620", align 8
+  %tmp17 = bitcast %3* %arg3 to i8*
+  %tmp18 = call %4* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %4* (i8*, i8*)*)(i8* %tmp17, i8* %tmp16)
+  %tmp19 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_622", align 8
+  %tmp20 = bitcast %4* %tmp18 to i8*
+  %tmp21 = call %5* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %5* (i8*, i8*)*)(i8* %tmp20, i8* %tmp19)
+  %tmp22 = bitcast %5* %tmp21 to i8*
+  %tmp23 = call i8* @objc_retain(i8* %tmp22) nounwind
+  %tmp24 = bitcast i8* %tmp23 to %5*
+  %tmp26 = icmp eq i8* %tmp23, null
+  br i1 %tmp26, label %bb81, label %bb27
+
+bb27:                                             ; preds = %bb
+  %tmp29 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_11", align 8
+  %tmp30 = bitcast %1* %arg to i8*
+  %tmp31 = call i8* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8* (i8*, i8*)*)(i8* %tmp30, i8* %tmp29)
+  %tmp34 = call i8* @objc_retain(i8* %tmp31) nounwind
+  %tmp37 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_421455", align 8
+  %tmp39 = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*)*)(i8* %tmp34, i8* %tmp37)
+  %tmp40 = bitcast %0* %tmp39 to i8*
+  %tmp41 = call i8* @objc_retain(i8* %tmp40) nounwind
+  %tmp42 = bitcast i8* %tmp41 to %0*
+  %tmp44 = icmp eq i8* %tmp41, null
+  br i1 %tmp44, label %bb45, label %bb55
+
+bb45:                                             ; preds = %bb27
+  %tmp47 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_624", align 8
+  %tmp49 = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*)*)(i8* %tmp34, i8* %tmp47)
+  %tmp51 = bitcast %0* %tmp49 to i8*
+  %tmp52 = call i8* @objc_retain(i8* %tmp51) nounwind
+  call void @objc_release(i8* %tmp41) nounwind
+  br label %bb55
+
+bb55:                                             ; preds = %bb27, %bb45
+  %tmp13.0 = phi %0* [ %tmp42, %bb27 ], [ %tmp49, %bb45 ]
+  %tmp57 = icmp eq %0* %tmp13.0, null
+  br i1 %tmp57, label %bb76, label %bb58
+
+bb58:                                             ; preds = %bb55
+  %tmp60 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_598", align 8
+  %tmp61 = bitcast %0* %tmp13.0 to i8*
+  %tmp62 = call signext i8 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i8 (i8*, i8*)*)(i8* %tmp61, i8* %tmp60)
+  %tmp64 = icmp eq i8 %tmp62, 0
+  br i1 %tmp64, label %bb76, label %bb65
+
+bb65:                                             ; preds = %bb58
+  %tmp68 = load i8** @"\01L_OBJC_SELECTOR_REFERENCES_626", align 8
+  %tmp69 = bitcast %0* %tmp13.0 to i8*
+  %tmp70 = call %0* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to %0* (i8*, i8*, %5*)*)(i8* %tmp69, i8* %tmp68, %5* %tmp24)
+  %tmp72 = bitcast %0* %tmp70 to i8*
+  %tmp73 = call i8* @objc_retain(i8* %tmp72) nounwind
+  br label %bb76
+
+bb76:                                             ; preds = %bb58, %bb55, %bb65
+  %tmp10.0 = phi %0* [ %tmp70, %bb65 ], [ null, %bb58 ], [ null, %bb55 ]
+  %tmp78 = bitcast %0* %tmp13.0 to i8*
+  call void @objc_release(i8* %tmp78) nounwind
+  call void @objc_release(i8* %tmp34) nounwind
+  br label %bb81
+
+bb81:                                             ; preds = %bb, %bb76
+  %tmp10.1 = phi %0* [ %tmp10.0, %bb76 ], [ null, %bb ]
+  %tmp83 = bitcast %0* %tmp10.1 to i8*
+  %tmp84 = call i8* @objc_retain(i8* %tmp83) nounwind
+  %tmp88 = bitcast i8* %tmp87 to %0*
+  call void @objc_release(i8* %tmp23) nounwind
+  %tmp87 = call i8* @objc_autorelease(i8* %tmp84) nounwind
+  %tmp92 = bitcast %0* %tmp10.1 to i8*
+  call void @objc_release(i8* %tmp92) nounwind
+  ret %0* %tmp88
+}
diff --git a/test/Transforms/ObjCARC/post-inlining.ll b/test/Transforms/ObjCARC/post-inlining.ll
new file mode 100644
index 0000000..ad69ccd
--- /dev/null
+++ b/test/Transforms/ObjCARC/post-inlining.ll
@@ -0,0 +1,48 @@
+; RUN: opt -S -objc-arc < %s | FileCheck %s
+
+declare void @use_pointer(i8*)
+declare i8* @returner()
+declare i8* @objc_retain(i8*)
+declare i8* @objc_autoreleaseReturnValue(i8*)
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
+
+; Clean up residue left behind after inlining.
+
+; CHECK: define void @test0(
+; CHECK: entry:
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test0(i8* %call.i) {
+entry:
+  %0 = tail call i8* @objc_retain(i8* %call.i) nounwind
+  %1 = tail call i8* @objc_autoreleaseReturnValue(i8* %0) nounwind
+  ret void
+}
+
+; Same as test0, but with slightly different use arrangements.
+
+; CHECK: define void @test1(
+; CHECK: entry:
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test1(i8* %call.i) {
+entry:
+  %0 = tail call i8* @objc_retain(i8* %call.i) nounwind
+  %1 = tail call i8* @objc_autoreleaseReturnValue(i8* %call.i) nounwind
+  ret void
+}
+
+; Delete a retainRV+autoreleaseRV even if the pointer is used.
+
+; CHECK: define void @test24(
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   call void @use_pointer(i8* %p)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test24(i8* %p) {
+entry:
+  call i8* @objc_autoreleaseReturnValue(i8* %p) nounwind
+  call i8* @objc_retainAutoreleasedReturnValue(i8* %p) nounwind
+  call void @use_pointer(i8* %p)
+  ret void
+}
diff --git a/test/Transforms/ObjCARC/retain-not-declared.ll b/test/Transforms/ObjCARC/retain-not-declared.ll
new file mode 100644
index 0000000..e1fe117
--- /dev/null
+++ b/test/Transforms/ObjCARC/retain-not-declared.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S -objc-arc -objc-arc-contract < %s | FileCheck %s
+
+; Test that the optimizer can create an objc_retainAutoreleaseReturnValue
+; declaration even if no objc_retain declaration exists.
+; rdar://9401303
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+declare i8* @objc_unretainedObject(i8*)
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
+declare i8* @objc_autoreleaseReturnValue(i8*)
+
+; CHECK:      define i8* @foo(i8* %p) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleaseReturnValue(i8* %p) nounwind
+; CHECK-NEXT:   ret i8* %0
+; CHECK-NEXT: }
+
+define i8* @foo(i8* %p) {
+entry:
+  %call = tail call i8* @objc_unretainedObject(i8* %p)
+  %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+  %1 = tail call i8* @objc_autoreleaseReturnValue(i8* %0) nounwind
+  ret i8* %1
+}
+
diff --git a/test/Transforms/ObjCARC/rle-s2l.ll b/test/Transforms/ObjCARC/rle-s2l.ll
new file mode 100644
index 0000000..8f8d5c0
--- /dev/null
+++ b/test/Transforms/ObjCARC/rle-s2l.ll
@@ -0,0 +1,135 @@
+; RUN: opt -S -basicaa -objc-arc < %s | FileCheck %s
+
+declare i8* @objc_loadWeak(i8**)
+declare i8* @objc_loadWeakRetained(i8**)
+declare i8* @objc_storeWeak(i8**, i8*)
+declare i8* @objc_initWeak(i8**, i8*)
+declare void @use_pointer(i8*)
+declare void @callee()
+
+; Basic redundant @objc_loadWeak elimination.
+
+; CHECK:      define void @test0(i8** %p) {
+; CHECK-NEXT:   %y = call i8* @objc_loadWeak(i8** %p)
+; CHECK-NEXT:   call void @use_pointer(i8* %y)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test0(i8** %p) {
+  %x = call i8* @objc_loadWeak(i8** %p)
+  %y = call i8* @objc_loadWeak(i8** %p)
+  call void @use_pointer(i8* %y)
+  ret void
+}
+
+; DCE the @objc_loadWeak.
+
+; CHECK:      define void @test1(i8** %p) {
+; CHECK-NEXT:   %y = call i8* @objc_loadWeakRetained(i8** %p)
+; CHECK-NEXT:   call void @use_pointer(i8* %y)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test1(i8** %p) {
+  %x = call i8* @objc_loadWeak(i8** %p)
+  %y = call i8* @objc_loadWeakRetained(i8** %p)
+  call void @use_pointer(i8* %y)
+  ret void
+}
+
+; Basic redundant @objc_loadWeakRetained elimination.
+
+; CHECK:      define void @test2(i8** %p) {
+; CHECK-NEXT:   %x = call i8* @objc_loadWeak(i8** %p)
+; CHECK-NEXT:   store i8 3, i8* %x
+; CHECK-NEXT:   %1 = tail call i8* @objc_retain(i8* %x)
+; CHECK-NEXT:   call void @use_pointer(i8* %x)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test2(i8** %p) {
+  %x = call i8* @objc_loadWeak(i8** %p)
+  store i8 3, i8* %x
+  %y = call i8* @objc_loadWeakRetained(i8** %p)
+  call void @use_pointer(i8* %y)
+  ret void
+}
+
+; Basic redundant @objc_loadWeakRetained elimination, this time
+; with a readonly call instead of a store.
+
+; CHECK:      define void @test3(i8** %p) {
+; CHECK-NEXT:   %x = call i8* @objc_loadWeak(i8** %p)
+; CHECK-NEXT:   call void @use_pointer(i8* %x) readonly
+; CHECK-NEXT:   %1 = tail call i8* @objc_retain(i8* %x)
+; CHECK-NEXT:   call void @use_pointer(i8* %x)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test3(i8** %p) {
+  %x = call i8* @objc_loadWeak(i8** %p)
+  call void @use_pointer(i8* %x) readonly
+  %y = call i8* @objc_loadWeakRetained(i8** %p)
+  call void @use_pointer(i8* %y)
+  ret void
+}
+
+; A regular call blocks redundant weak load elimination.
+
+; CHECK:      define void @test4(i8** %p) {
+; CHECK-NEXT:   %x = call i8* @objc_loadWeak(i8** %p)
+; CHECK-NEXT:   call void @use_pointer(i8* %x) readonly
+; CHECK-NEXT:   call void @callee()
+; CHECK-NEXT:   %y = call i8* @objc_loadWeak(i8** %p)
+; CHECK-NEXT:   call void @use_pointer(i8* %y)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test4(i8** %p) {
+  %x = call i8* @objc_loadWeak(i8** %p)
+  call void @use_pointer(i8* %x) readonly
+  call void @callee()
+  %y = call i8* @objc_loadWeak(i8** %p)
+  call void @use_pointer(i8* %y)
+  ret void
+}
+
+; Store to load forwarding.
+
+; CHECK:      define void @test5(i8** %p, i8* %n) {
+; CHECK-NEXT:   %1 = call i8* @objc_storeWeak(i8** %p, i8* %n)
+; CHECK-NEXT:   call void @use_pointer(i8* %n)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test5(i8** %p, i8* %n) {
+  call i8* @objc_storeWeak(i8** %p, i8* %n)
+  %y = call i8* @objc_loadWeak(i8** %p)
+  call void @use_pointer(i8* %y)
+  ret void
+}
+
+; Store to load forwarding with objc_initWeak.
+
+; CHECK:      define void @test6(i8** %p, i8* %n) {
+; CHECK-NEXT:   %1 = call i8* @objc_initWeak(i8** %p, i8* %n)
+; CHECK-NEXT:   call void @use_pointer(i8* %n)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test6(i8** %p, i8* %n) {
+  call i8* @objc_initWeak(i8** %p, i8* %n)
+  %y = call i8* @objc_loadWeak(i8** %p)
+  call void @use_pointer(i8* %y)
+  ret void
+}
+
+; Don't forward if there's a may-alias store in the way.
+
+; CHECK:      define void @test7(i8** %p, i8* %n, i8** %q, i8* %m) {
+; CHECK-NEXT:   call i8* @objc_initWeak(i8** %p, i8* %n)
+; CHECK-NEXT:   call i8* @objc_storeWeak(i8** %q, i8* %m)
+; CHECK-NEXT:   %y = call i8* @objc_loadWeak(i8** %p)
+; CHECK-NEXT:   call void @use_pointer(i8* %y)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @test7(i8** %p, i8* %n, i8** %q, i8* %m) {
+  call i8* @objc_initWeak(i8** %p, i8* %n)
+  call i8* @objc_storeWeak(i8** %q, i8* %m)
+  %y = call i8* @objc_loadWeak(i8** %p)
+  call void @use_pointer(i8* %y)
+  ret void
+}
diff --git a/test/Transforms/ObjCARC/rv.ll b/test/Transforms/ObjCARC/rv.ll
new file mode 100644
index 0000000..da53a86
--- /dev/null
+++ b/test/Transforms/ObjCARC/rv.ll
@@ -0,0 +1,331 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64"
+
+declare i8* @objc_retain(i8*)
+declare i8* @objc_retainAutoreleasedReturnValue(i8*)
+declare void @objc_release(i8*)
+declare i8* @objc_autorelease(i8*)
+declare i8* @objc_autoreleaseReturnValue(i8*)
+declare i8* @objc_retainAutoreleaseReturnValue(i8*)
+declare void @objc_autoreleasePoolPop(i8*)
+declare void @objc_autoreleasePoolPush()
+declare i8* @objc_retainBlock(i8*)
+
+declare i8* @objc_retainedObject(i8*)
+declare i8* @objc_unretainedObject(i8*)
+declare i8* @objc_unretainedPointer(i8*)
+
+declare void @use_pointer(i8*)
+declare void @callee()
+declare void @callee_fnptr(void ()*)
+declare void @invokee()
+declare i8* @returner()
+
+; Test that retain+release elimination is suppressed when the
+; retain is an objc_retainAutoreleasedReturnValue, since it's
+; better to do the RV optimization.
+
+; CHECK:      define void @test0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %x = call i8* @returner
+; CHECK-NEXT:   %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %x) nounwind
+; CHECK: t:
+; CHECK-NOT: @objc_
+; CHECK: return:
+; CHECK-NEXT: call void @objc_release(i8* %x)
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test0(i1 %p) nounwind {
+entry:
+  %x = call i8* @returner()
+  %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %x)
+  br i1 %p, label %t, label %return
+
+t:
+  call void @use_pointer(i8* %x)
+  store i8 0, i8* %x
+  br label %return
+
+return:
+  call void @objc_release(i8* %x) nounwind
+  ret void
+}
+
+; Delete no-ops.
+
+; CHECK: define void @test2
+; CHECK-NOT: @objc_
+; CHECK: }
+define void @test2() {
+  call i8* @objc_retainAutoreleasedReturnValue(i8* null)
+  call i8* @objc_autoreleaseReturnValue(i8* null)
+  ; call i8* @objc_retainAutoreleaseReturnValue(i8* null) ; TODO
+  ret void
+}
+
+; Delete a redundant retainRV,autoreleaseRV when forwaring a call result
+; directly to a return value.
+
+; CHECK: define i8* @test3
+; CHECK: call i8* @returner()
+; CHECK-NEXT: ret i8* %call
+define i8* @test3() {
+entry:
+  %call = call i8* @returner()
+  %0 = call i8* @objc_retainAutoreleasedReturnValue(i8* %call) nounwind
+  %1 = call i8* @objc_autoreleaseReturnValue(i8* %0) nounwind
+  ret i8* %1
+}
+
+; Delete a redundant retain,autoreleaseRV when forwaring a call result
+; directly to a return value.
+
+; CHECK: define i8* @test4
+; CHECK: call i8* @returner()
+; CHECK-NEXT: ret i8* %call
+define i8* @test4() {
+entry:
+  %call = call i8* @returner()
+  %0 = call i8* @objc_retain(i8* %call) nounwind
+  %1 = call i8* @objc_autoreleaseReturnValue(i8* %0) nounwind
+  ret i8* %1
+}
+
+; Delete a redundant fused retain+autoreleaseRV when forwaring a call result
+; directly to a return value.
+
+; TODO
+; HECK: define i8* @test5
+; HECK: call i8* @returner()
+; HECK-NEXT: ret i8* %call
+;define i8* @test5() {
+;entry:
+;  %call = call i8* @returner()
+;  %0 = call i8* @objc_retainAutoreleaseReturnValue(i8* %call) nounwind
+;  ret i8* %0
+;}
+
+; Don't eliminate objc_retainAutoreleasedReturnValue by merging it into
+; an objc_autorelease.
+; TODO? Merge objc_retainAutoreleasedReturnValue and objc_autorelease into
+; objc_retainAutoreleasedReturnValueAutorelease and merge
+; objc_retainAutoreleasedReturnValue and objc_autoreleaseReturnValue
+; into objc_retainAutoreleasedReturnValueAutoreleaseReturnValue?
+; Those entrypoints don't exist yet though.
+
+; CHECK: define i8* @test7(
+; CHECK: call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
+; CHECK: %t = tail call i8* @objc_autoreleaseReturnValue(i8* %p)
+define i8* @test7() {
+  %p = call i8* @returner()
+  call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
+  %t = call i8* @objc_autoreleaseReturnValue(i8* %p)
+  call void @use_pointer(i8* %t)
+  ret i8* %t
+}
+
+; CHECK: define i8* @test7b(
+; CHECK: call i8* @objc_retain(i8* %p)
+; CHECK: %t = tail call i8* @objc_autoreleaseReturnValue(i8* %p)
+define i8* @test7b() {
+  %p = call i8* @returner()
+  call void @use_pointer(i8* %p)
+  call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
+  %t = call i8* @objc_autoreleaseReturnValue(i8* %p)
+  ret i8* %t
+}
+
+; Turn objc_retain into objc_retainAutoreleasedReturnValue if its operand
+; is a return value.
+
+; CHECK: define void @test8()
+; CHECK: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
+define void @test8() {
+  %p = call i8* @returner()
+  call i8* @objc_retain(i8* %p)
+  ret void
+}
+
+; Don't apply the RV optimization to autorelease if there's no retain.
+
+; CHECK: define i8* @test9(i8* %p)
+; CHECK: tail call i8* @objc_autorelease(i8* %p)
+define i8* @test9(i8* %p) {
+  call i8* @objc_autorelease(i8* %p)
+  ret i8* %p
+}
+
+; Apply the RV optimization.
+
+; CHECK: define i8* @test10(i8* %p)
+; CHECK: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %p) nounwind
+; CHECK-NEXT: ret i8* %p
+define i8* @test10(i8* %p) {
+  %1 = call i8* @objc_retain(i8* %p)
+  %2 = call i8* @objc_autorelease(i8* %p)
+  ret i8* %p
+}
+
+; Don't do the autoreleaseRV optimization because @use_pointer
+; could undo the retain.
+
+; CHECK: define i8* @test11(i8* %p)
+; CHECK: tail call i8* @objc_retain(i8* %p)
+; CHECK-NEXT: call void @use_pointer(i8* %p)
+; CHECK: tail call i8* @objc_autorelease(i8* %p)
+; CHECK-NEXT: ret i8* %p
+define i8* @test11(i8* %p) {
+  %1 = call i8* @objc_retain(i8* %p)
+  call void @use_pointer(i8* %p)
+  %2 = call i8* @objc_autorelease(i8* %p)
+  ret i8* %p
+}
+
+; Don't spoil the RV optimization.
+
+; CHECK: define i8* @test12(i8* %p)
+; CHECK: tail call i8* @objc_retain(i8* %p)
+; CHECK: call void @use_pointer(i8* %p)
+; CHECK: tail call i8* @objc_autoreleaseReturnValue(i8* %p)
+; CHECK: ret i8* %p
+define i8* @test12(i8* %p) {
+  %1 = call i8* @objc_retain(i8* %p)
+  call void @use_pointer(i8* %p)
+  %2 = call i8* @objc_autoreleaseReturnValue(i8* %p)
+  ret i8* %p
+}
+
+; Don't zap the objc_retainAutoreleasedReturnValue.
+
+; CHECK: define i8* @test13(
+; CHECK: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
+; CHECK: tail call i8* @objc_autorelease(i8* %p)
+; CHECK: ret i8* %p
+define i8* @test13() {
+  %p = call i8* @returner()
+  %1 = call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
+  call void @callee()
+  %2 = call i8* @objc_autorelease(i8* %p)
+  ret i8* %p
+}
+
+; Convert objc_retainAutoreleasedReturnValue to objc_retain if its
+; argument is not a return value.
+
+; CHECK: define void @test14(
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %p) nounwind
+; CHECK-NEXT: ret void
+define void @test14(i8* %p) {
+  call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
+  ret void
+}
+
+; Don't convert objc_retainAutoreleasedReturnValue to objc_retain if its
+; argument is a return value.
+
+; CHECK: define void @test15(
+; CHECK-NEXT: %y = call i8* @returner()
+; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) nounwind
+; CHECK-NEXT: ret void
+define void @test15() {
+  %y = call i8* @returner()
+  call i8* @objc_retainAutoreleasedReturnValue(i8* %y)
+  ret void
+}
+
+; Convert objc_retain to objc_retainAutoreleasedReturnValue if its
+; argument is a return value.
+
+; CHECK: define void @test16(
+; CHECK-NEXT: %y = call i8* @returner()
+; CHECK-NEXT: tail call i8* @objc_retainAutoreleasedReturnValue(i8* %y) nounwind
+; CHECK-NEXT: ret void
+define void @test16() {
+  %y = call i8* @returner()
+  call i8* @objc_retain(i8* %y)
+  ret void
+}
+
+; Don't convert objc_retain to objc_retainAutoreleasedReturnValue if its
+; argument is not a return value.
+
+; CHECK: define void @test17(
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) nounwind
+; CHECK-NEXT: ret void
+define void @test17(i8* %y) {
+  call i8* @objc_retain(i8* %y)
+  ret void
+}
+
+; Don't Convert objc_retain to objc_retainAutoreleasedReturnValue if it
+; isn't next to the call providing its return value.
+
+; CHECK: define void @test18(
+; CHECK-NEXT: %y = call i8* @returner()
+; CHECK-NEXT: call void @callee()
+; CHECK-NEXT: tail call i8* @objc_retain(i8* %y) nounwind
+; CHECK-NEXT: ret void
+define void @test18() {
+  %y = call i8* @returner()
+  call void @callee()
+  call i8* @objc_retain(i8* %y)
+  ret void
+}
+
+; Delete autoreleaseRV+retainRV pairs.
+
+; CHECK: define i8* @test19(i8* %p) {
+; CHECK-NEXT: ret i8* %p
+define i8* @test19(i8* %p) {
+  call i8* @objc_autoreleaseReturnValue(i8* %p)
+  call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
+  ret i8* %p
+}
+
+; Like test19 but with plain autorelease.
+
+; CHECK: define i8* @test20(i8* %p) {
+; CHECK-NEXT: call i8* @objc_autorelease(i8* %p)
+; CHECK-NEXT: call i8* @objc_retain(i8* %p)
+; CHECK-NEXT: ret i8* %p
+define i8* @test20(i8* %p) {
+  call i8* @objc_autorelease(i8* %p)
+  call i8* @objc_retainAutoreleasedReturnValue(i8* %p)
+  ret i8* %p
+}
+
+; Like test19 but with plain retain.
+
+; CHECK: define i8* @test21(i8* %p) {
+; CHECK-NEXT: call i8* @objc_autoreleaseReturnValue(i8* %p)
+; CHECK-NEXT: call i8* @objc_retain(i8* %p)
+; CHECK-NEXT: ret i8* %p
+define i8* @test21(i8* %p) {
+  call i8* @objc_autoreleaseReturnValue(i8* %p)
+  call i8* @objc_retain(i8* %p)
+  ret i8* %p
+}
+
+; Like test19 but with plain retain and autorelease.
+
+; CHECK: define i8* @test22(i8* %p) {
+; CHECK-NEXT: call i8* @objc_autorelease(i8* %p)
+; CHECK-NEXT: call i8* @objc_retain(i8* %p)
+; CHECK-NEXT: ret i8* %p
+define i8* @test22(i8* %p) {
+  call i8* @objc_autorelease(i8* %p)
+  call i8* @objc_retain(i8* %p)
+  ret i8* %p
+}
+
+; Convert autoreleaseRV to autorelease.
+
+; CHECK: define void @test23(
+; CHECK: tail call i8* @objc_autorelease(i8* %p) nounwind
+define void @test23(i8* %p) {
+  store i8 0, i8* %p
+  call i8* @objc_autoreleaseReturnValue(i8* %p)
+  ret void
+}
diff --git a/test/Transforms/ObjCARC/weak-contract.ll b/test/Transforms/ObjCARC/weak-contract.ll
new file mode 100644
index 0000000..ca69c70
--- /dev/null
+++ b/test/Transforms/ObjCARC/weak-contract.ll
@@ -0,0 +1,14 @@
+; RUN: opt -objc-arc-contract -S < %s | FileCheck %s
+
+declare i8* @objc_initWeak(i8**, i8*)
+
+; Convert objc_initWeak(p, null) to *p = null.
+
+; CHECK:      define i8* @test0(i8** %p) {
+; CHECK-NEXT:   store i8* null, i8** %p
+; CHECK-NEXT:   ret i8* null
+; CHECK-NEXT: }
+define i8* @test0(i8** %p) {
+  %t = call i8* @objc_initWeak(i8** %p, i8* null)
+  ret i8* %t
+}
diff --git a/test/Transforms/ObjCARC/weak-copies.ll b/test/Transforms/ObjCARC/weak-copies.ll
new file mode 100644
index 0000000..e1a94bb
--- /dev/null
+++ b/test/Transforms/ObjCARC/weak-copies.ll
@@ -0,0 +1,87 @@
+; RUN: opt -S -basicaa -objc-arc < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin11.0.0"
+
+%0 = type { i64, i64, i8*, i8*, i8*, i8* }
+%1 = type <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i8* }>
+%struct.__block_descriptor = type { i64, i64 }
+
+@_NSConcreteStackBlock = external global i8*
+@.str = private unnamed_addr constant [6 x i8] c"v8@?0\00"
+@"\01L_OBJC_CLASS_NAME_" = internal global [3 x i8] c"\01@\00", section "__TEXT,__objc_classname,cstring_literals", align 1
+@__block_descriptor_tmp = internal constant %0 { i64 0, i64 40, i8* bitcast (void (i8*, i8*)* @__copy_helper_block_ to i8*), i8* bitcast (void (i8*)* @__destroy_helper_block_ to i8*), i8* getelementptr inbounds ([6 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([3 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0) }
+@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__DATA, __objc_imageinfo, regular, no_dead_strip"
+@llvm.used = appending global [2 x i8*] [i8* getelementptr inbounds ([3 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*)], section "llvm.metadata"
+
+; Eliminate unnecessary weak pointer copies.
+
+; CHECK:      define void @foo() {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %call = call i8* @bar()
+; CHECK-NEXT:   call void @use(i8* %call) nounwind
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+define void @foo() {
+entry:
+  %w = alloca i8*, align 8
+  %x = alloca i8*, align 8
+  %call = call i8* @bar()
+  %0 = call i8* @objc_initWeak(i8** %w, i8* %call) nounwind
+  %1 = call i8* @objc_loadWeak(i8** %w) nounwind
+  %2 = call i8* @objc_initWeak(i8** %x, i8* %1) nounwind
+  %3 = call i8* @objc_loadWeak(i8** %x) nounwind
+  call void @use(i8* %3) nounwind
+  call void @objc_destroyWeak(i8** %x) nounwind
+  call void @objc_destroyWeak(i8** %w) nounwind
+  ret void
+}
+
+; Eliminate unnecessary weak pointer copies in a block initialization.
+
+; CHECK:      define void @qux(i8* %me) nounwind {
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   %block = alloca %1, align 8
+; CHECK-NOT:    alloca
+; CHECK:      }
+define void @qux(i8* %me) nounwind {
+entry:
+  %w = alloca i8*, align 8
+  %block = alloca %1, align 8
+  %0 = call i8* @objc_retain(i8* %me) nounwind
+  %1 = call i8* @objc_initWeak(i8** %w, i8* %0) nounwind
+  %block.isa = getelementptr inbounds %1* %block, i64 0, i32 0
+  store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+  %block.flags = getelementptr inbounds %1* %block, i64 0, i32 1
+  store i32 1107296256, i32* %block.flags, align 8
+  %block.reserved = getelementptr inbounds %1* %block, i64 0, i32 2
+  store i32 0, i32* %block.reserved, align 4
+  %block.invoke = getelementptr inbounds %1* %block, i64 0, i32 3
+  store i8* bitcast (void (i8*)* @__qux_block_invoke_0 to i8*), i8** %block.invoke, align 8
+  %block.descriptor = getelementptr inbounds %1* %block, i64 0, i32 4
+  store %struct.__block_descriptor* bitcast (%0* @__block_descriptor_tmp to %struct.__block_descriptor*), %struct.__block_descriptor** %block.descriptor, align 8
+  %block.captured = getelementptr inbounds %1* %block, i64 0, i32 5
+  %2 = call i8* @objc_loadWeak(i8** %w) nounwind
+  %3 = call i8* @objc_initWeak(i8** %block.captured, i8* %2) nounwind
+  %4 = bitcast %1* %block to void ()*
+  call void @use_block(void ()* %4) nounwind
+  call void @objc_destroyWeak(i8** %block.captured) nounwind
+  call void @objc_destroyWeak(i8** %w) nounwind
+  call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
+  ret void
+}
+
+declare i8* @objc_retain(i8*)
+declare void @use_block(void ()*) nounwind
+declare void @__qux_block_invoke_0(i8* %.block_descriptor) nounwind
+declare void @__copy_helper_block_(i8*, i8*) nounwind
+declare void @objc_copyWeak(i8**, i8**)
+declare void @__destroy_helper_block_(i8*) nounwind
+declare void @objc_release(i8*)
+declare i8* @bar()
+declare i8* @objc_initWeak(i8**, i8*)
+declare i8* @objc_loadWeak(i8**)
+declare void @use(i8*) nounwind
+declare void @objc_destroyWeak(i8**)
+
+!0 = metadata !{}
diff --git a/test/Transforms/ObjCARC/weak.ll b/test/Transforms/ObjCARC/weak.ll
new file mode 100644
index 0000000..85a290c
--- /dev/null
+++ b/test/Transforms/ObjCARC/weak.ll
@@ -0,0 +1,57 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+declare i8* @objc_initWeak(i8**, i8*)
+declare i8* @objc_storeWeak(i8**, i8*)
+declare i8* @objc_loadWeak(i8**)
+declare void @objc_destroyWeak(i8**)
+declare i8* @objc_loadWeakRetained(i8**)
+declare void @objc_moveWeak(i8**, i8**)
+declare void @objc_copyWeak(i8**, i8**)
+
+; If the pointer-to-weak-pointer is null, it's undefined behavior.
+
+; CHECK: define void @test0(
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: store i8* undef, i8** null
+; CHECK: ret void
+define void @test0(i8* %p, i8** %q) {
+entry:
+  call i8* @objc_storeWeak(i8** null, i8* %p)
+  call i8* @objc_storeWeak(i8** undef, i8* %p)
+  call i8* @objc_loadWeakRetained(i8** null)
+  call i8* @objc_loadWeakRetained(i8** undef)
+  call i8* @objc_loadWeak(i8** null)
+  call i8* @objc_loadWeak(i8** undef)
+  call i8* @objc_initWeak(i8** null, i8* %p)
+  call i8* @objc_initWeak(i8** undef, i8* %p)
+  call void @objc_destroyWeak(i8** null)
+  call void @objc_destroyWeak(i8** undef)
+
+  call void @objc_copyWeak(i8** null, i8** %q)
+  call void @objc_copyWeak(i8** undef, i8** %q)
+  call void @objc_copyWeak(i8** %q, i8** null)
+  call void @objc_copyWeak(i8** %q, i8** undef)
+
+  call void @objc_moveWeak(i8** null, i8** %q)
+  call void @objc_moveWeak(i8** undef, i8** %q)
+  call void @objc_moveWeak(i8** %q, i8** null)
+  call void @objc_moveWeak(i8** %q, i8** undef)
+
+  ret void
+}
diff --git a/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll b/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
deleted file mode 100644
index 33e0cfa..0000000
--- a/test/Transforms/PruneEH/2008-09-05-CGUpdate.ll
+++ /dev/null
@@ -1,1445 +0,0 @@
-; RUN: opt < %s -prune-eh -inline -print-callgraph \
-; RUN:   -disable-output |& \
-; RUN:     grep {calls.*ce3806g__fxio__put__put_int64__4.1339} | count 2
-	%struct.FRAME.ce3806g = type { %struct.string___XUB, %struct.string___XUB, %struct.string___XUB, %struct.string___XUB }
-	%struct.FRAME.ce3806g__fxio__put__4 = type { i32, i32, i32, %struct.system__file_control_block__pstring*, i32, i32, i8 }
-	%struct.RETURN = type { i8, i32 }
-	%struct.ada__streams__root_stream_type = type { %struct.ada__tags__dispatch_table* }
-	%struct.ada__tags__dispatch_table = type { [1 x i32] }
-	%struct.ada__tags__select_specific_data = type { i32, %struct.ada__tags__select_specific_data_element }
-	%struct.ada__tags__select_specific_data_element = type { i32, i8 }
-	%struct.ada__tags__type_specific_data = type { i32, i32, [2147483647 x i8]*, [2147483647 x i8]*, %struct.ada__tags__dispatch_table*, i8, i32, i32, i32, i32, [2 x %struct.ada__tags__dispatch_table*] }
-	%struct.ada__text_io__text_afcb = type { %struct.system__file_control_block__afcb, i32, i32, i32, i32, i32, %struct.ada__text_io__text_afcb*, i8, i8 }
-	%struct.exception = type { i8, i8, i32, i8*, i8*, i32, i8* }
-	%struct.long_long_float___PAD = type { x86_fp80, [1 x i32] }
-	%struct.string___XUB = type { i32, i32 }
-	%struct.system__file_control_block__afcb = type { %struct.ada__streams__root_stream_type, i32, %struct.system__file_control_block__pstring, %struct.system__file_control_block__pstring, i8, i8, i8, i8, i8, i8, i8, %struct.system__file_control_block__afcb*, %struct.system__file_control_block__afcb* }
-	%struct.system__file_control_block__pstring = type { i8*, %struct.string___XUB* }
-	%struct.system__finalization_implementation__limited_record_controller = type { %struct.system__finalization_root__root_controlled, %struct.system__finalization_root__root_controlled* }
-	%struct.system__finalization_implementation__record_controller = type { %struct.system__finalization_implementation__limited_record_controller, i32 }
-	%struct.system__finalization_root__empty_root_controlled = type { %struct.ada__tags__dispatch_table* }
-	%struct.system__finalization_root__root_controlled = type { %struct.ada__streams__root_stream_type, %struct.system__finalization_root__root_controlled*, %struct.system__finalization_root__root_controlled* }
-	%struct.system__secondary_stack__mark_id = type { i32, i32 }
-	%struct.system__standard_library__exception_data = type { i8, i8, i32, i32, %struct.system__standard_library__exception_data*, i32, void ()* }
-@.str = internal constant [12 x i8] c"system.ads\00\00"		; <[12 x i8]*> [#uses=1]
-@.str1 = internal constant [14 x i8] c"a-tifiio.adb\00\00"		; <[14 x i8]*> [#uses=1]
-@system__soft_links__abort_undefer = external global void ()*		; <void ()**> [#uses=6]
-@.str2 = internal constant [47 x i8] c"a-tifiio.adb:327 instantiated at ce3806g.adb:52"		; <[47 x i8]*> [#uses=1]
-@C.354.2200 = internal constant %struct.string___XUB { i32 1, i32 47 }		; <%struct.string___XUB*> [#uses=1]
-@ada__io_exceptions__data_error = external global %struct.exception		; <%struct.exception*> [#uses=1]
-@constraint_error = external global %struct.exception		; <%struct.exception*> [#uses=2]
-@__gnat_all_others_value = external constant i32		; <i32*> [#uses=21]
-@.str3 = internal constant [10 x i8] c"0123456789"		; <[10 x i8]*> [#uses=2]
-@ada__text_io__current_out = external global %struct.ada__text_io__text_afcb*		; <%struct.ada__text_io__text_afcb**> [#uses=1]
-@.str4 = internal constant [126 x i8] c"CHECK THAT FIXED_IO PUT OPERATES ON FILES OF MODE OUT_FILE AND IF NO FILE IS SPECIFIED THE CURRENT DEFAULT OUTPUT FILE IS USED"		; <[126 x i8]*> [#uses=1]
-@C.131.1559 = internal constant %struct.string___XUB { i32 1, i32 126 }		; <%struct.string___XUB*> [#uses=1]
-@.str5 = internal constant [7 x i8] c"CE3806G"		; <[7 x i8]*> [#uses=1]
-@C.132.1562 = internal constant %struct.string___XUB { i32 1, i32 7 }		; <%struct.string___XUB*> [#uses=1]
-@incompleteF.1176.b = internal global i1 false		; <i1*> [#uses=2]
-@incomplete.1177 = internal global %struct.exception { i8 0, i8 65, i32 23, i8* getelementptr ([23 x i8]* @incompleteE.1174, i32 0, i32 0), i8* null, i32 0, i8* null }		; <%struct.exception*> [#uses=15]
-@incompleteE.1174 = internal global [23 x i8] c"CE3806G.B_1.INCOMPLETE\00"		; <[23 x i8]*> [#uses=1]
-@.str6 = internal constant [0 x i8] zeroinitializer		; <[0 x i8]*> [#uses=1]
-@C.136.1568 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
-@C.137.1571 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
-@.str7 = internal constant [50 x i8] c"USE_ERROR RAISED ON TEXT CREATE WITH OUT_FILE MODE"		; <[50 x i8]*> [#uses=1]
-@C.139.1577 = internal constant %struct.string___XUB { i32 1, i32 50 }		; <%struct.string___XUB*> [#uses=1]
-@.str8 = internal constant [14 x i8] c"ce3806g.adb:65"		; <[14 x i8]*> [#uses=1]
-@C.140.1580 = internal constant %struct.string___XUB { i32 1, i32 14 }		; <%struct.string___XUB*> [#uses=1]
-@.str9 = internal constant [51 x i8] c"NAME_ERROR RAISED ON TEXT CREATE WITH OUT_FILE MODE"		; <[51 x i8]*> [#uses=1]
-@C.143.1585 = internal constant %struct.string___XUB { i32 1, i32 51 }		; <%struct.string___XUB*> [#uses=1]
-@.str10 = internal constant [14 x i8] c"ce3806g.adb:69"		; <[14 x i8]*> [#uses=1]
-@C.144.1588 = internal constant %struct.string___XUB { i32 1, i32 14 }		; <%struct.string___XUB*> [#uses=1]
-@C.146.1592 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
-@C.147.1595 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
-@C.153.1609 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
-@C.154.1612 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
-@.str12 = internal constant [47 x i8] c"USE_ERROR RAISED ON TEXT OPEN WITH IN_FILE MODE"		; <[47 x i8]*> [#uses=1]
-@C.156.1618 = internal constant %struct.string___XUB { i32 1, i32 47 }		; <%struct.string___XUB*> [#uses=1]
-@.str13 = internal constant [14 x i8] c"ce3806g.adb:88"		; <[14 x i8]*> [#uses=1]
-@C.157.1621 = internal constant %struct.string___XUB { i32 1, i32 14 }		; <%struct.string___XUB*> [#uses=1]
-@C.159.1627 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
-@C.160.1630 = internal constant %struct.string___XUB { i32 1, i32 0 }		; <%struct.string___XUB*> [#uses=1]
-@.str14 = internal constant [33 x i8] c"VALUE INCORRECT - FIXED FROM FILE"		; <[33 x i8]*> [#uses=1]
-@C.162.1637 = internal constant %struct.string___XUB { i32 1, i32 33 }		; <%struct.string___XUB*> [#uses=1]
-@.str15 = internal constant [36 x i8] c"VALUE INCORRECT - FIXED FROM DEFAULT"		; <[36 x i8]*> [#uses=1]
-@C.164.1642 = internal constant %struct.string___XUB { i32 1, i32 36 }		; <%struct.string___XUB*> [#uses=1]
-@ada__io_exceptions__use_error = external global %struct.exception		; <%struct.exception*> [#uses=4]
-@ada__io_exceptions__name_error = external global %struct.exception		; <%struct.exception*> [#uses=2]
-
-define void @_ada_ce3806g() {
-entry:
-	%0 = alloca %struct.system__file_control_block__pstring, align 8		; <%struct.system__file_control_block__pstring*> [#uses=3]
-	%1 = alloca %struct.system__file_control_block__pstring, align 8		; <%struct.system__file_control_block__pstring*> [#uses=3]
-	%2 = alloca %struct.system__file_control_block__pstring, align 8		; <%struct.system__file_control_block__pstring*> [#uses=3]
-	%3 = alloca %struct.system__file_control_block__pstring, align 8		; <%struct.system__file_control_block__pstring*> [#uses=3]
-	%FRAME.356 = alloca %struct.FRAME.ce3806g		; <%struct.FRAME.ce3806g*> [#uses=20]
-	call void @report__test( i8* getelementptr ([7 x i8]* @.str5, i32 0, i32 0), %struct.string___XUB* @C.132.1562, i8* getelementptr ([126 x i8]* @.str4, i32 0, i32 0), %struct.string___XUB* @C.131.1559 )
-	%4 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 3		; <%struct.string___XUB*> [#uses=1]
-	call void @system__secondary_stack__ss_mark( %struct.string___XUB* noalias sret %4 )
-	%.b = load i1* @incompleteF.1176.b		; <i1> [#uses=1]
-	br i1 %.b, label %bb11, label %bb
-
-bb:		; preds = %entry
-	invoke void @system__exception_table__register_exception( %struct.system__standard_library__exception_data* bitcast (%struct.exception* @incomplete.1177 to %struct.system__standard_library__exception_data*) )
-			to label %invcont unwind label %lpad
-
-invcont:		; preds = %bb
-	store i1 true, i1* @incompleteF.1176.b
-	br label %bb11
-
-bb11:		; preds = %entry, %invcont
-	%5 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 2		; <%struct.string___XUB*> [#uses=1]
-	invoke void @system__secondary_stack__ss_mark( %struct.string___XUB* noalias sret %5 )
-			to label %invcont12 unwind label %lpad228
-
-invcont12:		; preds = %bb11
-	invoke void @report__legal_file_name( %struct.system__file_control_block__pstring* noalias sret %3, i32 1, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.137.1571 )
-			to label %invcont17 unwind label %lpad232
-
-invcont17:		; preds = %invcont12
-	%elt18 = getelementptr %struct.system__file_control_block__pstring* %3, i32 0, i32 0		; <i8**> [#uses=1]
-	%val19 = load i8** %elt18, align 8		; <i8*> [#uses=1]
-	%elt20 = getelementptr %struct.system__file_control_block__pstring* %3, i32 0, i32 1		; <%struct.string___XUB**> [#uses=1]
-	%val21 = load %struct.string___XUB** %elt20		; <%struct.string___XUB*> [#uses=1]
-	%6 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__create( %struct.ada__text_io__text_afcb* null, i8 2, i8* %val19, %struct.string___XUB* %val21, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.136.1568 )
-			to label %invcont26 unwind label %lpad232		; <%struct.ada__text_io__text_afcb*> [#uses=2]
-
-invcont26:		; preds = %invcont17
-	%7 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 2, i32 0		; <i32*> [#uses=1]
-	%8 = load i32* %7, align 8		; <i32> [#uses=1]
-	%9 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 2, i32 1		; <i32*> [#uses=1]
-	%10 = load i32* %9, align 4		; <i32> [#uses=1]
-	invoke void @system__secondary_stack__ss_release( i32 %8, i32 %10 )
-			to label %bb73 unwind label %lpad228
-
-bb32:		; preds = %lpad232
-	call void @__gnat_begin_handler( i8* %eh_ptr233 ) nounwind
-	%11 = load void ()** @system__soft_links__abort_undefer, align 4		; <void ()*> [#uses=1]
-	invoke void %11( )
-			to label %invcont33 unwind label %lpad240
-
-invcont33:		; preds = %bb32
-	invoke void @report__not_applicable( i8* getelementptr ([50 x i8]* @.str7, i32 0, i32 0), %struct.string___XUB* @C.139.1577 )
-			to label %invcont38 unwind label %lpad240
-
-invcont38:		; preds = %invcont33
-	invoke void @__gnat_raise_exception( %struct.system__standard_library__exception_data* bitcast (%struct.exception* @incomplete.1177 to %struct.system__standard_library__exception_data*), i8* getelementptr ([14 x i8]* @.str8, i32 0, i32 0), %struct.string___XUB* @C.140.1580 ) noreturn
-			to label %invcont43 unwind label %lpad240
-
-invcont43:		; preds = %invcont38
-	unreachable
-
-bb47:		; preds = %ppad291
-	call void @__gnat_begin_handler( i8* %eh_ptr233 ) nounwind
-	%12 = load void ()** @system__soft_links__abort_undefer, align 4		; <void ()*> [#uses=1]
-	invoke void %12( )
-			to label %invcont49 unwind label %lpad248
-
-invcont49:		; preds = %bb47
-	invoke void @report__not_applicable( i8* getelementptr ([51 x i8]* @.str9, i32 0, i32 0), %struct.string___XUB* @C.143.1585 )
-			to label %invcont54 unwind label %lpad248
-
-invcont54:		; preds = %invcont49
-	invoke void @__gnat_raise_exception( %struct.system__standard_library__exception_data* bitcast (%struct.exception* @incomplete.1177 to %struct.system__standard_library__exception_data*), i8* getelementptr ([14 x i8]* @.str10, i32 0, i32 0), %struct.string___XUB* @C.144.1588 ) noreturn
-			to label %invcont59 unwind label %lpad248
-
-invcont59:		; preds = %invcont54
-	unreachable
-
-bb73:		; preds = %invcont26
-	invoke void @report__legal_file_name( %struct.system__file_control_block__pstring* noalias sret %2, i32 2, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.147.1595 )
-			to label %invcont78 unwind label %lpad228
-
-invcont78:		; preds = %bb73
-	%elt79 = getelementptr %struct.system__file_control_block__pstring* %2, i32 0, i32 0		; <i8**> [#uses=1]
-	%val80 = load i8** %elt79, align 8		; <i8*> [#uses=1]
-	%elt81 = getelementptr %struct.system__file_control_block__pstring* %2, i32 0, i32 1		; <%struct.string___XUB**> [#uses=1]
-	%val82 = load %struct.string___XUB** %elt81		; <%struct.string___XUB*> [#uses=1]
-	%13 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__create( %struct.ada__text_io__text_afcb* null, i8 2, i8* %val80, %struct.string___XUB* %val82, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.146.1592 )
-			to label %invcont87 unwind label %lpad228		; <%struct.ada__text_io__text_afcb*> [#uses=2]
-
-invcont87:		; preds = %invcont78
-	invoke void @ada__text_io__set_output( %struct.ada__text_io__text_afcb* %13 )
-			to label %invcont88 unwind label %lpad228
-
-invcont88:		; preds = %invcont87
-	%14 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 1		; <%struct.string___XUB*> [#uses=1]
-	invoke void @system__secondary_stack__ss_mark( %struct.string___XUB* noalias sret %14 )
-			to label %invcont89 unwind label %lpad228
-
-invcont89:		; preds = %invcont88
-	invoke fastcc void @ce3806g__fxio__put.1149( %struct.ada__text_io__text_afcb* %6 )
-			to label %bb94 unwind label %lpad252
-
-bb94:		; preds = %invcont89
-	invoke fastcc void @ce3806g__fxio__put__2.1155( )
-			to label %invcont95 unwind label %lpad252
-
-invcont95:		; preds = %bb94
-	%15 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__close( %struct.ada__text_io__text_afcb* %6 )
-			to label %invcont96 unwind label %lpad252		; <%struct.ada__text_io__text_afcb*> [#uses=1]
-
-invcont96:		; preds = %invcont95
-	%16 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 0		; <%struct.string___XUB*> [#uses=1]
-	invoke void @system__secondary_stack__ss_mark( %struct.string___XUB* noalias sret %16 )
-			to label %invcont97 unwind label %lpad252
-
-invcont97:		; preds = %invcont96
-	invoke void @report__legal_file_name( %struct.system__file_control_block__pstring* noalias sret %1, i32 1, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.154.1612 )
-			to label %invcont102 unwind label %lpad256
-
-invcont102:		; preds = %invcont97
-	%elt103 = getelementptr %struct.system__file_control_block__pstring* %1, i32 0, i32 0		; <i8**> [#uses=1]
-	%val104 = load i8** %elt103, align 8		; <i8*> [#uses=1]
-	%elt105 = getelementptr %struct.system__file_control_block__pstring* %1, i32 0, i32 1		; <%struct.string___XUB**> [#uses=1]
-	%val106 = load %struct.string___XUB** %elt105		; <%struct.string___XUB*> [#uses=1]
-	%17 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__open( %struct.ada__text_io__text_afcb* %15, i8 0, i8* %val104, %struct.string___XUB* %val106, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.153.1609 )
-			to label %invcont111 unwind label %lpad256		; <%struct.ada__text_io__text_afcb*> [#uses=2]
-
-invcont111:		; preds = %invcont102
-	%18 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
-	%19 = load i32* %18, align 8		; <i32> [#uses=1]
-	%20 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
-	%21 = load i32* %20, align 4		; <i32> [#uses=1]
-	invoke void @system__secondary_stack__ss_release( i32 %19, i32 %21 )
-			to label %bb143 unwind label %lpad252
-
-bb117:		; preds = %lpad256
-	call void @__gnat_begin_handler( i8* %eh_ptr257 ) nounwind
-	%22 = load void ()** @system__soft_links__abort_undefer, align 4		; <void ()*> [#uses=1]
-	invoke void %22( )
-			to label %invcont119 unwind label %lpad264
-
-invcont119:		; preds = %bb117
-	invoke void @report__not_applicable( i8* getelementptr ([47 x i8]* @.str12, i32 0, i32 0), %struct.string___XUB* @C.156.1618 )
-			to label %invcont124 unwind label %lpad264
-
-invcont124:		; preds = %invcont119
-	invoke void @__gnat_raise_exception( %struct.system__standard_library__exception_data* bitcast (%struct.exception* @incomplete.1177 to %struct.system__standard_library__exception_data*), i8* getelementptr ([14 x i8]* @.str13, i32 0, i32 0), %struct.string___XUB* @C.157.1621 ) noreturn
-			to label %invcont129 unwind label %lpad264
-
-invcont129:		; preds = %invcont124
-	unreachable
-
-bb143:		; preds = %invcont111
-	%23 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__standard_output( )
-			to label %invcont144 unwind label %lpad252		; <%struct.ada__text_io__text_afcb*> [#uses=1]
-
-invcont144:		; preds = %bb143
-	invoke void @ada__text_io__set_output( %struct.ada__text_io__text_afcb* %23 )
-			to label %invcont145 unwind label %lpad252
-
-invcont145:		; preds = %invcont144
-	%24 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__close( %struct.ada__text_io__text_afcb* %13 )
-			to label %invcont146 unwind label %lpad252		; <%struct.ada__text_io__text_afcb*> [#uses=1]
-
-invcont146:		; preds = %invcont145
-	invoke void @report__legal_file_name( %struct.system__file_control_block__pstring* noalias sret %0, i32 2, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.160.1630 )
-			to label %invcont151 unwind label %lpad252
-
-invcont151:		; preds = %invcont146
-	%elt152 = getelementptr %struct.system__file_control_block__pstring* %0, i32 0, i32 0		; <i8**> [#uses=1]
-	%val153 = load i8** %elt152, align 8		; <i8*> [#uses=1]
-	%elt154 = getelementptr %struct.system__file_control_block__pstring* %0, i32 0, i32 1		; <%struct.string___XUB**> [#uses=1]
-	%val155 = load %struct.string___XUB** %elt154		; <%struct.string___XUB*> [#uses=1]
-	%25 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__open( %struct.ada__text_io__text_afcb* %24, i8 0, i8* %val153, %struct.string___XUB* %val155, i8* getelementptr ([0 x i8]* @.str6, i32 0, i32 0), %struct.string___XUB* @C.159.1627 )
-			to label %invcont160 unwind label %lpad252		; <%struct.ada__text_io__text_afcb*> [#uses=2]
-
-invcont160:		; preds = %invcont151
-	%26 = invoke fastcc i8 @ce3806g__fxio__get.1137( %struct.ada__text_io__text_afcb* %17 ) signext
-			to label %invcont161 unwind label %lpad252		; <i8> [#uses=1]
-
-invcont161:		; preds = %invcont160
-	%27 = icmp eq i8 %26, -3		; <i1> [#uses=1]
-	br i1 %27, label %bb169, label %bb163
-
-bb163:		; preds = %invcont161
-	invoke void @report__failed( i8* getelementptr ([33 x i8]* @.str14, i32 0, i32 0), %struct.string___XUB* @C.162.1637 )
-			to label %bb169 unwind label %lpad252
-
-bb169:		; preds = %invcont161, %bb163
-	%28 = invoke fastcc i8 @ce3806g__fxio__get.1137( %struct.ada__text_io__text_afcb* %25 ) signext
-			to label %invcont170 unwind label %lpad252		; <i8> [#uses=1]
-
-invcont170:		; preds = %bb169
-	%29 = icmp eq i8 %28, -1		; <i1> [#uses=1]
-	br i1 %29, label %bb187, label %bb172
-
-bb172:		; preds = %invcont170
-	invoke void @report__failed( i8* getelementptr ([36 x i8]* @.str15, i32 0, i32 0), %struct.string___XUB* @C.164.1642 )
-			to label %bb187 unwind label %lpad252
-
-bb187:		; preds = %invcont170, %bb172
-	%30 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 1, i32 0		; <i32*> [#uses=1]
-	%31 = load i32* %30, align 8		; <i32> [#uses=1]
-	%32 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 1, i32 1		; <i32*> [#uses=1]
-	%33 = load i32* %32, align 4		; <i32> [#uses=1]
-	invoke void @system__secondary_stack__ss_release( i32 %31, i32 %33 )
-			to label %bb193 unwind label %lpad228
-
-bb193:		; preds = %bb187
-	%34 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__delete( %struct.ada__text_io__text_afcb* %17 )
-			to label %invcont194 unwind label %lpad268		; <%struct.ada__text_io__text_afcb*> [#uses=0]
-
-invcont194:		; preds = %bb193
-	%35 = invoke %struct.ada__text_io__text_afcb* @ada__text_io__delete( %struct.ada__text_io__text_afcb* %25 )
-			to label %bb221 unwind label %lpad268		; <%struct.ada__text_io__text_afcb*> [#uses=0]
-
-bb196:		; preds = %lpad268
-	call void @__gnat_begin_handler( i8* %eh_ptr269 ) nounwind
-	%36 = load void ()** @system__soft_links__abort_undefer, align 4		; <void ()*> [#uses=1]
-	invoke void %36( )
-			to label %bb203 unwind label %lpad276
-
-bb203:		; preds = %bb196
-	invoke void @__gnat_end_handler( i8* %eh_ptr269 )
-			to label %bb221 unwind label %lpad272
-
-bb205:		; preds = %ppad304
-	call void @__gnat_begin_handler( i8* %eh_exception.1 ) nounwind
-	%37 = load void ()** @system__soft_links__abort_undefer, align 4		; <void ()*> [#uses=1]
-	invoke void %37( )
-			to label %bb212 unwind label %lpad284
-
-bb212:		; preds = %bb205
-	invoke void @__gnat_end_handler( i8* %eh_exception.1 )
-			to label %bb221 unwind label %lpad280
-
-bb221:		; preds = %invcont194, %bb212, %bb203
-	%38 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 3, i32 0		; <i32*> [#uses=1]
-	%39 = load i32* %38, align 8		; <i32> [#uses=1]
-	%40 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 3, i32 1		; <i32*> [#uses=1]
-	%41 = load i32* %40, align 4		; <i32> [#uses=1]
-	call void @system__secondary_stack__ss_release( i32 %39, i32 %41 )
-	call void @report__result( )
-	ret void
-
-lpad:		; preds = %bb
-	%eh_ptr = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select227 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i32* @__gnat_all_others_value )		; <i32> [#uses=0]
-	br label %ppad
-
-lpad228:		; preds = %bb187, %ppad294, %invcont88, %invcont87, %invcont78, %bb73, %ppad288, %invcont26, %bb11
-	%eh_ptr229 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select231 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr229, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
-	br label %ppad304
-
-lpad232:		; preds = %invcont17, %invcont12
-	%eh_ptr233 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=6]
-	%eh_select235 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr233, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @ada__io_exceptions__use_error, %struct.exception* @ada__io_exceptions__name_error, %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=3]
-	%eh_typeid = call i32 @llvm.eh.typeid.for.i32( i8* getelementptr (%struct.exception* @ada__io_exceptions__use_error, i32 0, i32 0) )		; <i32> [#uses=1]
-	%42 = icmp eq i32 %eh_select235, %eh_typeid		; <i1> [#uses=1]
-	br i1 %42, label %bb32, label %ppad291
-
-lpad236:		; preds = %lpad240
-	%eh_ptr237 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select239 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr237, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
-	br label %ppad288
-
-lpad240:		; preds = %invcont38, %invcont33, %bb32
-	%eh_ptr241 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select243 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr241, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
-	invoke void @__gnat_end_handler( i8* %eh_ptr233 )
-			to label %ppad288 unwind label %lpad236
-
-lpad244:		; preds = %lpad248
-	%eh_ptr245 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select247 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr245, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
-	br label %ppad288
-
-lpad248:		; preds = %invcont54, %invcont49, %bb47
-	%eh_ptr249 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select251 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr249, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
-	invoke void @__gnat_end_handler( i8* %eh_ptr233 )
-			to label %ppad288 unwind label %lpad244
-
-lpad252:		; preds = %bb94, %invcont89, %invcont160, %bb169, %bb172, %bb163, %invcont151, %invcont146, %invcont145, %invcont144, %bb143, %ppad295, %invcont111, %invcont96, %invcont95
-	%eh_ptr253 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select255 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr253, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
-	br label %ppad294
-
-lpad256:		; preds = %invcont102, %invcont97
-	%eh_ptr257 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=4]
-	%eh_select259 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr257, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @ada__io_exceptions__use_error, %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=2]
-	%eh_typeid297 = call i32 @llvm.eh.typeid.for.i32( i8* getelementptr (%struct.exception* @ada__io_exceptions__use_error, i32 0, i32 0) )		; <i32> [#uses=1]
-	%43 = icmp eq i32 %eh_select259, %eh_typeid297		; <i1> [#uses=1]
-	br i1 %43, label %bb117, label %ppad295
-
-lpad260:		; preds = %lpad264
-	%eh_ptr261 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select263 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr261, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
-	br label %ppad295
-
-lpad264:		; preds = %invcont124, %invcont119, %bb117
-	%eh_ptr265 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select267 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr265, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
-	invoke void @__gnat_end_handler( i8* %eh_ptr257 )
-			to label %ppad295 unwind label %lpad260
-
-lpad268:		; preds = %invcont194, %bb193
-	%eh_ptr269 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=5]
-	%eh_select271 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr269, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @ada__io_exceptions__use_error, %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=2]
-	%eh_typeid301 = call i32 @llvm.eh.typeid.for.i32( i8* getelementptr (%struct.exception* @ada__io_exceptions__use_error, i32 0, i32 0) )		; <i32> [#uses=1]
-	%44 = icmp eq i32 %eh_select271, %eh_typeid301		; <i1> [#uses=1]
-	br i1 %44, label %bb196, label %ppad304
-
-lpad272:		; preds = %bb203, %lpad276
-	%eh_ptr273 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select275 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr273, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
-	br label %ppad304
-
-lpad276:		; preds = %bb196
-	%eh_ptr277 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select279 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr277, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @incomplete.1177, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
-	invoke void @__gnat_end_handler( i8* %eh_ptr269 )
-			to label %ppad304 unwind label %lpad272
-
-lpad280:		; preds = %bb212, %lpad284
-	%eh_ptr281 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select283 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr281, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i32* @__gnat_all_others_value )		; <i32> [#uses=0]
-	br label %ppad
-
-lpad284:		; preds = %bb205
-	%eh_ptr285 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select287 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr285, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i32* @__gnat_all_others_value )		; <i32> [#uses=0]
-	invoke void @__gnat_end_handler( i8* %eh_exception.1 )
-			to label %ppad unwind label %lpad280
-
-ppad:		; preds = %lpad284, %ppad304, %lpad280, %lpad
-	%eh_exception.2 = phi i8* [ %eh_exception.1, %ppad304 ], [ %eh_ptr281, %lpad280 ], [ %eh_ptr, %lpad ], [ %eh_ptr285, %lpad284 ]		; <i8*> [#uses=1]
-	%45 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 3, i32 0		; <i32*> [#uses=1]
-	%46 = load i32* %45, align 8		; <i32> [#uses=1]
-	%47 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 3, i32 1		; <i32*> [#uses=1]
-	%48 = load i32* %47, align 4		; <i32> [#uses=1]
-	call void @system__secondary_stack__ss_release( i32 %46, i32 %48 )
-	%49 = call i32 (...)* @_Unwind_Resume( i8* %eh_exception.2 )		; <i32> [#uses=0]
-	unreachable
-
-ppad288:		; preds = %lpad248, %lpad240, %ppad291, %lpad244, %lpad236
-	%eh_exception.0 = phi i8* [ %eh_ptr233, %ppad291 ], [ %eh_ptr245, %lpad244 ], [ %eh_ptr237, %lpad236 ], [ %eh_ptr241, %lpad240 ], [ %eh_ptr249, %lpad248 ]		; <i8*> [#uses=1]
-	%eh_selector.0 = phi i32 [ %eh_select235, %ppad291 ], [ %eh_select247, %lpad244 ], [ %eh_select239, %lpad236 ], [ %eh_select243, %lpad240 ], [ %eh_select251, %lpad248 ]		; <i32> [#uses=1]
-	%50 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 2, i32 0		; <i32*> [#uses=1]
-	%51 = load i32* %50, align 8		; <i32> [#uses=1]
-	%52 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 2, i32 1		; <i32*> [#uses=1]
-	%53 = load i32* %52, align 4		; <i32> [#uses=1]
-	invoke void @system__secondary_stack__ss_release( i32 %51, i32 %53 )
-			to label %ppad304 unwind label %lpad228
-
-ppad291:		; preds = %lpad232
-	%eh_typeid292 = call i32 @llvm.eh.typeid.for.i32( i8* getelementptr (%struct.exception* @ada__io_exceptions__name_error, i32 0, i32 0) )		; <i32> [#uses=1]
-	%54 = icmp eq i32 %eh_select235, %eh_typeid292		; <i1> [#uses=1]
-	br i1 %54, label %bb47, label %ppad288
-
-ppad294:		; preds = %ppad295, %lpad252
-	%eh_exception.4 = phi i8* [ %eh_ptr253, %lpad252 ], [ %eh_exception.3, %ppad295 ]		; <i8*> [#uses=1]
-	%eh_selector.4 = phi i32 [ %eh_select255, %lpad252 ], [ %eh_selector.3, %ppad295 ]		; <i32> [#uses=1]
-	%55 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 1, i32 0		; <i32*> [#uses=1]
-	%56 = load i32* %55, align 8		; <i32> [#uses=1]
-	%57 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 1, i32 1		; <i32*> [#uses=1]
-	%58 = load i32* %57, align 4		; <i32> [#uses=1]
-	invoke void @system__secondary_stack__ss_release( i32 %56, i32 %58 )
-			to label %ppad304 unwind label %lpad228
-
-ppad295:		; preds = %lpad264, %lpad256, %lpad260
-	%eh_exception.3 = phi i8* [ %eh_ptr257, %lpad256 ], [ %eh_ptr261, %lpad260 ], [ %eh_ptr265, %lpad264 ]		; <i8*> [#uses=1]
-	%eh_selector.3 = phi i32 [ %eh_select259, %lpad256 ], [ %eh_select263, %lpad260 ], [ %eh_select267, %lpad264 ]		; <i32> [#uses=1]
-	%59 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
-	%60 = load i32* %59, align 8		; <i32> [#uses=1]
-	%61 = getelementptr %struct.FRAME.ce3806g* %FRAME.356, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
-	%62 = load i32* %61, align 4		; <i32> [#uses=1]
-	invoke void @system__secondary_stack__ss_release( i32 %60, i32 %62 )
-			to label %ppad294 unwind label %lpad252
-
-ppad304:		; preds = %lpad276, %ppad294, %ppad288, %lpad268, %lpad272, %lpad228
-	%eh_exception.1 = phi i8* [ %eh_ptr229, %lpad228 ], [ %eh_ptr269, %lpad268 ], [ %eh_ptr273, %lpad272 ], [ %eh_exception.0, %ppad288 ], [ %eh_exception.4, %ppad294 ], [ %eh_ptr277, %lpad276 ]		; <i8*> [#uses=4]
-	%eh_selector.1 = phi i32 [ %eh_select231, %lpad228 ], [ %eh_select271, %lpad268 ], [ %eh_select275, %lpad272 ], [ %eh_selector.0, %ppad288 ], [ %eh_selector.4, %ppad294 ], [ %eh_select279, %lpad276 ]		; <i32> [#uses=1]
-	%eh_typeid305 = call i32 @llvm.eh.typeid.for.i32( i8* getelementptr (%struct.exception* @incomplete.1177, i32 0, i32 0) )		; <i32> [#uses=1]
-	%63 = icmp eq i32 %eh_selector.1, %eh_typeid305		; <i1> [#uses=1]
-	br i1 %63, label %bb205, label %ppad
-}
-
-define internal fastcc i8 @ce3806g__fxio__get.1137(%struct.ada__text_io__text_afcb* %file) signext {
-entry:
-	%0 = invoke x86_fp80 @ada__text_io__float_aux__get( %struct.ada__text_io__text_afcb* %file, i32 0 )
-			to label %invcont unwind label %lpad		; <x86_fp80> [#uses=5]
-
-invcont:		; preds = %entry
-	%1 = fcmp ult x86_fp80 %0, 0xKFFFEFFFFFFFFFFFFFFFF		; <i1> [#uses=1]
-	%2 = fcmp ugt x86_fp80 %0, 0xK7FFEFFFFFFFFFFFFFFFF		; <i1> [#uses=1]
-	%or.cond = or i1 %1, %2		; <i1> [#uses=1]
-	br i1 %or.cond, label %bb2, label %bb4
-
-bb2:		; preds = %invcont
-	invoke void @__gnat_rcheck_12( i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0), i32 1 ) noreturn
-			to label %invcont3 unwind label %lpad
-
-invcont3:		; preds = %bb2
-	unreachable
-
-bb4:		; preds = %invcont
-	%3 = fmul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=1]
-	%4 = fcmp ult x86_fp80 %3, 0xKC0068000000000000000		; <i1> [#uses=1]
-	br i1 %4, label %bb8, label %bb6
-
-bb6:		; preds = %bb4
-	%5 = fmul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=1]
-	%6 = fcmp ugt x86_fp80 %5, 0xK4005FE00000000000000		; <i1> [#uses=1]
-	br i1 %6, label %bb8, label %bb10
-
-bb8:		; preds = %bb4, %bb6
-	invoke void @__gnat_rcheck_10( i8* getelementptr ([14 x i8]* @.str1, i32 0, i32 0), i32 324 ) noreturn
-			to label %invcont9 unwind label %lpad
-
-invcont9:		; preds = %bb8
-	unreachable
-
-bb10:		; preds = %bb6
-	%7 = fmul x86_fp80 %0, 0xK40008000000000000000		; <x86_fp80> [#uses=3]
-	%8 = fcmp ult x86_fp80 %7, 0xK00000000000000000000		; <i1> [#uses=1]
-	br i1 %8, label %bb13, label %bb12
-
-bb12:		; preds = %bb10
-	%9 = fadd x86_fp80 %7, 0xK3FFDFFFFFFFFFFFFFFFF		; <x86_fp80> [#uses=1]
-	br label %bb14
-
-bb13:		; preds = %bb10
-	%10 = fsub x86_fp80 %7, 0xK3FFDFFFFFFFFFFFFFFFF		; <x86_fp80> [#uses=1]
-	br label %bb14
-
-bb14:		; preds = %bb13, %bb12
-	%iftmp.339.0.in = phi x86_fp80 [ %10, %bb13 ], [ %9, %bb12 ]		; <x86_fp80> [#uses=1]
-	%iftmp.339.0 = fptosi x86_fp80 %iftmp.339.0.in to i8		; <i8> [#uses=3]
-	%11 = add i8 %iftmp.339.0, 20		; <i8> [#uses=1]
-	%12 = icmp ugt i8 %11, 40		; <i1> [#uses=1]
-	br i1 %12, label %bb16, label %bb18
-
-bb16:		; preds = %bb14
-	invoke void @__gnat_rcheck_12( i8* getelementptr ([14 x i8]* @.str1, i32 0, i32 0), i32 324 ) noreturn
-			to label %invcont17 unwind label %lpad
-
-invcont17:		; preds = %bb16
-	unreachable
-
-bb18:		; preds = %bb14
-	%13 = add i8 %iftmp.339.0, 20		; <i8> [#uses=1]
-	%14 = icmp ugt i8 %13, 40		; <i1> [#uses=1]
-	br i1 %14, label %bb20, label %bb22
-
-bb20:		; preds = %bb18
-	invoke void @__gnat_rcheck_12( i8* getelementptr ([14 x i8]* @.str1, i32 0, i32 0), i32 324 ) noreturn
-			to label %invcont21 unwind label %lpad
-
-invcont21:		; preds = %bb20
-	unreachable
-
-bb22:		; preds = %bb18
-	ret i8 %iftmp.339.0
-
-bb23:		; preds = %lpad
-	call void @__gnat_begin_handler( i8* %eh_ptr ) nounwind
-	%15 = load void ()** @system__soft_links__abort_undefer, align 4		; <void ()*> [#uses=1]
-	invoke void %15( )
-			to label %invcont24 unwind label %lpad33
-
-invcont24:		; preds = %bb23
-	invoke void @__gnat_raise_exception( %struct.system__standard_library__exception_data* bitcast (%struct.exception* @ada__io_exceptions__data_error to %struct.system__standard_library__exception_data*), i8* getelementptr ([47 x i8]* @.str2, i32 0, i32 0), %struct.string___XUB* @C.354.2200 ) noreturn
-			to label %invcont27 unwind label %lpad33
-
-invcont27:		; preds = %invcont24
-	unreachable
-
-lpad:		; preds = %bb20, %bb16, %bb8, %bb2, %entry
-	%eh_ptr = call i8* @llvm.eh.exception( )		; <i8*> [#uses=4]
-	%eh_select32 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), %struct.exception* @constraint_error, i32* @__gnat_all_others_value )		; <i32> [#uses=1]
-	%eh_typeid = call i32 @llvm.eh.typeid.for.i32( i8* getelementptr (%struct.exception* @constraint_error, i32 0, i32 0) )		; <i32> [#uses=1]
-	%16 = icmp eq i32 %eh_select32, %eh_typeid		; <i1> [#uses=1]
-	br i1 %16, label %bb23, label %Unwind
-
-lpad33:		; preds = %invcont24, %bb23
-	%eh_ptr34 = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select36 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr34, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i32* @__gnat_all_others_value )		; <i32> [#uses=0]
-	call void @__gnat_end_handler( i8* %eh_ptr )
-	br label %Unwind
-
-Unwind:		; preds = %lpad, %lpad33
-	%eh_exception.0 = phi i8* [ %eh_ptr, %lpad ], [ %eh_ptr34, %lpad33 ]		; <i8*> [#uses=1]
-	%17 = call i32 (...)* @_Unwind_Resume( i8* %eh_exception.0 )		; <i32> [#uses=0]
-	unreachable
-}
-
-define internal fastcc void @ce3806g__fxio__put.1149(%struct.ada__text_io__text_afcb* %file) {
-entry:
-	%A.301 = alloca %struct.string___XUB		; <%struct.string___XUB*> [#uses=3]
-	%A.292 = alloca %struct.string___XUB		; <%struct.string___XUB*> [#uses=3]
-	%0 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
-	%1 = alloca [12 x i8]		; <[12 x i8]*> [#uses=1]
-	%.sub = getelementptr [12 x i8]* %1, i32 0, i32 0		; <i8*> [#uses=2]
-	%2 = getelementptr %struct.string___XUB* %A.292, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 1, i32* %2, align 8
-	%3 = getelementptr %struct.string___XUB* %A.292, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 12, i32* %3, align 4
-	%4 = invoke fastcc i32 @ce3806g__fxio__put__4.1215( i8* %.sub, %struct.string___XUB* %A.292, i8 signext -3 )
-			to label %invcont unwind label %lpad		; <i32> [#uses=1]
-
-invcont:		; preds = %entry
-	%5 = getelementptr %struct.string___XUB* %A.301, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 1, i32* %5, align 8
-	%6 = getelementptr %struct.string___XUB* %A.301, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 %4, i32* %6, align 4
-	invoke void @ada__text_io__generic_aux__put_item( %struct.ada__text_io__text_afcb* %file, i8* %.sub, %struct.string___XUB* %A.301 )
-			to label %bb60 unwind label %lpad
-
-bb60:		; preds = %invcont
-	ret void
-
-lpad:		; preds = %entry, %invcont
-	%eh_ptr = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select62 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i32* @__gnat_all_others_value )		; <i32> [#uses=0]
-	call void @llvm.stackrestore( i8* %0 )
-	%7 = call i32 (...)* @_Unwind_Resume( i8* %eh_ptr )		; <i32> [#uses=0]
-	unreachable
-}
-
-define internal fastcc void @ce3806g__fxio__put__2.1155() {
-entry:
-	%A.266 = alloca %struct.string___XUB		; <%struct.string___XUB*> [#uses=3]
-	%A.257 = alloca %struct.string___XUB		; <%struct.string___XUB*> [#uses=3]
-	%0 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
-	%1 = alloca [12 x i8]		; <[12 x i8]*> [#uses=1]
-	%.sub = getelementptr [12 x i8]* %1, i32 0, i32 0		; <i8*> [#uses=2]
-	%2 = getelementptr %struct.string___XUB* %A.257, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 1, i32* %2, align 8
-	%3 = getelementptr %struct.string___XUB* %A.257, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 12, i32* %3, align 4
-	%4 = invoke fastcc i32 @ce3806g__fxio__put__4.1215( i8* %.sub, %struct.string___XUB* %A.257, i8 signext -1 )
-			to label %invcont unwind label %lpad		; <i32> [#uses=1]
-
-invcont:		; preds = %entry
-	%5 = getelementptr %struct.string___XUB* %A.266, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 1, i32* %5, align 8
-	%6 = getelementptr %struct.string___XUB* %A.266, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 %4, i32* %6, align 4
-	%7 = load %struct.ada__text_io__text_afcb** @ada__text_io__current_out, align 4		; <%struct.ada__text_io__text_afcb*> [#uses=1]
-	invoke void @ada__text_io__generic_aux__put_item( %struct.ada__text_io__text_afcb* %7, i8* %.sub, %struct.string___XUB* %A.266 )
-			to label %bb60 unwind label %lpad
-
-bb60:		; preds = %invcont
-	ret void
-
-lpad:		; preds = %entry, %invcont
-	%eh_ptr = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select62 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i32* @__gnat_all_others_value )		; <i32> [#uses=0]
-	call void @llvm.stackrestore( i8* %0 )
-	%8 = call i32 (...)* @_Unwind_Resume( i8* %eh_ptr )		; <i32> [#uses=0]
-	unreachable
-}
-
-define internal fastcc i32 @ce3806g__fxio__put__4.1215(i8* %to.0, %struct.string___XUB* %to.1, i8 signext %item) {
-entry:
-        %P0 = load i32 * @__gnat_all_others_value, align 4  ; <i32*> [#uses=1]
-        %P = alloca i32, i32 %P0	; <i32*> [#uses=1]
-        call void @ext( i32* %P )
-	%to_addr = alloca %struct.system__file_control_block__pstring		; <%struct.system__file_control_block__pstring*> [#uses=4]
-	%FRAME.358 = alloca %struct.FRAME.ce3806g__fxio__put__4		; <%struct.FRAME.ce3806g__fxio__put__4*> [#uses=65]
-	%0 = getelementptr %struct.system__file_control_block__pstring* %to_addr, i32 0, i32 0		; <i8**> [#uses=1]
-	store i8* %to.0, i8** %0, align 8
-	%1 = getelementptr %struct.system__file_control_block__pstring* %to_addr, i32 0, i32 1		; <%struct.string___XUB**> [#uses=1]
-	store %struct.string___XUB* %to.1, %struct.string___XUB** %1
-	%2 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	store %struct.system__file_control_block__pstring* %to_addr, %struct.system__file_control_block__pstring** %2, align 4
-	%3 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 3, i32* %3, align 8
-	%4 = getelementptr %struct.system__file_control_block__pstring* %to_addr, i32 0, i32 1		; <%struct.string___XUB**> [#uses=1]
-	%5 = load %struct.string___XUB** %4, align 4		; <%struct.string___XUB*> [#uses=1]
-	%6 = getelementptr %struct.string___XUB* %5, i32 0, i32 0		; <i32*> [#uses=1]
-	%7 = load i32* %6, align 4		; <i32> [#uses=1]
-	%8 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 2		; <i32*> [#uses=1]
-	store i32 %7, i32* %8, align 8
-	%9 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 2		; <i32*> [#uses=1]
-	%10 = load i32* %9, align 8		; <i32> [#uses=1]
-	%11 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
-	store i32 %10, i32* %11, align 8
-	%item.lobit = lshr i8 %item, 7		; <i8> [#uses=1]
-	%12 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 6		; <i8*> [#uses=1]
-	store i8 %item.lobit, i8* %12, align 8
-	%13 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 2		; <i32*> [#uses=1]
-	%14 = load i32* %13, align 8		; <i32> [#uses=1]
-	%15 = add i32 %14, -1		; <i32> [#uses=1]
-	%16 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %15, i32* %16, align 4
-	%17 = sext i8 %item to i64		; <i64> [#uses=1]
-	%18 = call i64 @system__exn_lli__exn_long_long_integer( i64 10, i32 1 ) readnone		; <i64> [#uses=1]
-	%19 = sub i64 0, %18		; <i64> [#uses=1]
-	%20 = call i64 @system__exn_lli__exn_long_long_integer( i64 10, i32 0 ) readnone		; <i64> [#uses=1]
-	%21 = mul i64 %20, -2		; <i64> [#uses=1]
-	call fastcc void @ce3806g__fxio__put__put_scaled__4.1346( %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i64 %17, i64 %19, i64 %21, i32 0, i32 -1 )
-	%22 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%23 = load i32* %22, align 4		; <i32> [#uses=1]
-	%24 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 2		; <i32*> [#uses=1]
-	%25 = load i32* %24, align 8		; <i32> [#uses=1]
-	%26 = icmp slt i32 %23, %25		; <i1> [#uses=1]
-	br i1 %26, label %bb71, label %bb72
-
-bb71:		; preds = %entry
-	%27 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 0, i32* %27, align 4
-	br label %bb72
-
-bb72:		; preds = %entry, %bb102, %bb71
-	%28 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
-	%29 = load i32* %28, align 4		; <i32> [#uses=1]
-	%30 = icmp slt i32 %29, -1		; <i1> [#uses=1]
-	%31 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%32 = load i32* %31, align 4		; <i32> [#uses=2]
-	br i1 %30, label %bb103, label %bb74
-
-bb74:		; preds = %bb72
-	%33 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 2		; <i32*> [#uses=1]
-	%34 = load i32* %33, align 8		; <i32> [#uses=1]
-	%35 = add i32 %34, -1		; <i32> [#uses=1]
-	%36 = icmp eq i32 %32, %35		; <i1> [#uses=1]
-	%37 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
-	%38 = load i32* %37, align 4		; <i32> [#uses=2]
-	br i1 %36, label %bb76, label %bb98
-
-bb76:		; preds = %bb74
-	%39 = icmp slt i32 %38, 1		; <i1> [#uses=1]
-	br i1 %39, label %bb80, label %bb102
-
-bb80:		; preds = %bb76
-	%40 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
-	%41 = load i32* %40, align 4		; <i32> [#uses=2]
-	%42 = icmp sgt i32 %41, -1		; <i1> [#uses=1]
-	%.op = add i32 %41, 2		; <i32> [#uses=1]
-	%43 = select i1 %42, i32 %.op, i32 2		; <i32> [#uses=1]
-	%44 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 6		; <i8*> [#uses=1]
-	%45 = load i8* %44, align 8		; <i8> [#uses=1]
-	%46 = zext i8 %45 to i32		; <i32> [#uses=1]
-	%47 = add i32 %43, %46		; <i32> [#uses=2]
-	%48 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 0		; <i32*> [#uses=1]
-	%49 = load i32* %48, align 8		; <i32> [#uses=1]
-	%50 = icmp sgt i32 %47, %49		; <i1> [#uses=1]
-	br i1 %50, label %bb88, label %bb85
-
-bb85:		; preds = %bb80, %bb87
-	%j.0 = phi i32 [ %68, %bb87 ], [ %47, %bb80 ]		; <i32> [#uses=2]
-	%51 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%52 = load i32* %51, align 4		; <i32> [#uses=1]
-	%53 = add i32 %52, 1		; <i32> [#uses=1]
-	%54 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %53, i32* %54, align 4
-	%55 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
-	%56 = load i32* %55, align 8		; <i32> [#uses=1]
-	%57 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%58 = load %struct.system__file_control_block__pstring** %57, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%59 = getelementptr %struct.system__file_control_block__pstring* %58, i32 0, i32 0		; <i8**> [#uses=1]
-	%60 = load i8** %59, align 4		; <i8*> [#uses=1]
-	%61 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%62 = load i32* %61, align 4		; <i32> [#uses=1]
-	%63 = sub i32 %62, %56		; <i32> [#uses=1]
-	%64 = getelementptr i8* %60, i32 %63		; <i8*> [#uses=1]
-	store i8 32, i8* %64, align 1
-	%65 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 0		; <i32*> [#uses=1]
-	%66 = load i32* %65, align 8		; <i32> [#uses=1]
-	%67 = icmp eq i32 %66, %j.0		; <i1> [#uses=1]
-	br i1 %67, label %bb88, label %bb87
-
-bb87:		; preds = %bb85
-	%68 = add i32 %j.0, 1		; <i32> [#uses=1]
-	br label %bb85
-
-bb88:		; preds = %bb80, %bb85
-	%69 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 6		; <i8*> [#uses=1]
-	%70 = load i8* %69, align 8		; <i8> [#uses=1]
-	%toBool89 = icmp eq i8 %70, 0		; <i1> [#uses=1]
-	br i1 %toBool89, label %bb91, label %bb90
-
-bb90:		; preds = %bb88
-	%71 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%72 = load i32* %71, align 4		; <i32> [#uses=1]
-	%73 = add i32 %72, 1		; <i32> [#uses=1]
-	%74 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %73, i32* %74, align 4
-	%75 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
-	%76 = load i32* %75, align 8		; <i32> [#uses=1]
-	%77 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%78 = load %struct.system__file_control_block__pstring** %77, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%79 = getelementptr %struct.system__file_control_block__pstring* %78, i32 0, i32 0		; <i8**> [#uses=1]
-	%80 = load i8** %79, align 4		; <i8*> [#uses=1]
-	%81 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%82 = load i32* %81, align 4		; <i32> [#uses=1]
-	%83 = sub i32 %82, %76		; <i32> [#uses=1]
-	%84 = getelementptr i8* %80, i32 %83		; <i8*> [#uses=1]
-	store i8 45, i8* %84, align 1
-	br label %bb91
-
-bb91:		; preds = %bb88, %bb90
-	%85 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
-	%86 = load i32* %85, align 4		; <i32> [#uses=1]
-	%87 = icmp slt i32 %86, 0		; <i1> [#uses=1]
-	br i1 %87, label %bb93, label %bb97
-
-bb93:		; preds = %bb91
-	%88 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%89 = load i32* %88, align 4		; <i32> [#uses=1]
-	%90 = add i32 %89, 1		; <i32> [#uses=1]
-	%91 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %90, i32* %91, align 4
-	%92 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
-	%93 = load i32* %92, align 8		; <i32> [#uses=1]
-	%94 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%95 = load %struct.system__file_control_block__pstring** %94, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%96 = getelementptr %struct.system__file_control_block__pstring* %95, i32 0, i32 0		; <i8**> [#uses=1]
-	%97 = load i8** %96, align 4		; <i8*> [#uses=1]
-	%98 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%99 = load i32* %98, align 4		; <i32> [#uses=1]
-	%100 = sub i32 %99, %93		; <i32> [#uses=1]
-	%101 = getelementptr i8* %97, i32 %100		; <i8*> [#uses=1]
-	store i8 48, i8* %101, align 1
-	%102 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%103 = load i32* %102, align 4		; <i32> [#uses=1]
-	%104 = add i32 %103, 1		; <i32> [#uses=1]
-	%105 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %104, i32* %105, align 4
-	%106 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
-	%107 = load i32* %106, align 8		; <i32> [#uses=1]
-	%108 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%109 = load %struct.system__file_control_block__pstring** %108, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%110 = getelementptr %struct.system__file_control_block__pstring* %109, i32 0, i32 0		; <i8**> [#uses=1]
-	%111 = load i8** %110, align 4		; <i8*> [#uses=1]
-	%112 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%113 = load i32* %112, align 4		; <i32> [#uses=1]
-	%114 = sub i32 %113, %107		; <i32> [#uses=1]
-	%115 = getelementptr i8* %111, i32 %114		; <i8*> [#uses=1]
-	store i8 46, i8* %115, align 1
-	%116 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
-	%117 = load i32* %116, align 4		; <i32> [#uses=1]
-	br label %bb94
-
-bb94:		; preds = %bb96, %bb93
-	%j8.0 = phi i32 [ %117, %bb93 ], [ %133, %bb96 ]		; <i32> [#uses=2]
-	%118 = icmp sgt i32 %j8.0, -2		; <i1> [#uses=1]
-	br i1 %118, label %bb97, label %bb96
-
-bb96:		; preds = %bb94
-	%119 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%120 = load i32* %119, align 4		; <i32> [#uses=1]
-	%121 = add i32 %120, 1		; <i32> [#uses=1]
-	%122 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %121, i32* %122, align 4
-	%123 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
-	%124 = load i32* %123, align 8		; <i32> [#uses=1]
-	%125 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%126 = load %struct.system__file_control_block__pstring** %125, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%127 = getelementptr %struct.system__file_control_block__pstring* %126, i32 0, i32 0		; <i8**> [#uses=1]
-	%128 = load i8** %127, align 4		; <i8*> [#uses=1]
-	%129 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%130 = load i32* %129, align 4		; <i32> [#uses=1]
-	%131 = sub i32 %130, %124		; <i32> [#uses=1]
-	%132 = getelementptr i8* %128, i32 %131		; <i8*> [#uses=1]
-	store i8 48, i8* %132, align 1
-	%133 = add i32 %j8.0, 1		; <i32> [#uses=1]
-	br label %bb94
-
-bb97:		; preds = %bb91, %bb94
-	%134 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%135 = load i32* %134, align 4		; <i32> [#uses=1]
-	%136 = add i32 %135, 1		; <i32> [#uses=1]
-	%137 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %136, i32* %137, align 4
-	%138 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
-	%139 = load i32* %138, align 8		; <i32> [#uses=1]
-	%140 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%141 = load %struct.system__file_control_block__pstring** %140, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%142 = getelementptr %struct.system__file_control_block__pstring* %141, i32 0, i32 0		; <i8**> [#uses=1]
-	%143 = load i8** %142, align 4		; <i8*> [#uses=1]
-	%144 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%145 = load i32* %144, align 4		; <i32> [#uses=1]
-	%146 = sub i32 %145, %139		; <i32> [#uses=1]
-	%147 = getelementptr i8* %143, i32 %146		; <i8*> [#uses=1]
-	store i8 48, i8* %147, align 1
-	br label %bb102
-
-bb98:		; preds = %bb74
-	%148 = icmp eq i32 %38, -1		; <i1> [#uses=1]
-	br i1 %148, label %bb100, label %bb101
-
-bb100:		; preds = %bb98
-	%149 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%150 = load i32* %149, align 4		; <i32> [#uses=1]
-	%151 = add i32 %150, 1		; <i32> [#uses=1]
-	%152 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %151, i32* %152, align 4
-	%153 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
-	%154 = load i32* %153, align 8		; <i32> [#uses=1]
-	%155 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%156 = load %struct.system__file_control_block__pstring** %155, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%157 = getelementptr %struct.system__file_control_block__pstring* %156, i32 0, i32 0		; <i8**> [#uses=1]
-	%158 = load i8** %157, align 4		; <i8*> [#uses=1]
-	%159 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%160 = load i32* %159, align 4		; <i32> [#uses=1]
-	%161 = sub i32 %160, %154		; <i32> [#uses=1]
-	%162 = getelementptr i8* %158, i32 %161		; <i8*> [#uses=1]
-	store i8 46, i8* %162, align 1
-	br label %bb101
-
-bb101:		; preds = %bb98, %bb100
-	%163 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%164 = load i32* %163, align 4		; <i32> [#uses=1]
-	%165 = add i32 %164, 1		; <i32> [#uses=1]
-	%166 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %165, i32* %166, align 4
-	%167 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 4		; <i32*> [#uses=1]
-	%168 = load i32* %167, align 8		; <i32> [#uses=1]
-	%169 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%170 = load %struct.system__file_control_block__pstring** %169, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%171 = getelementptr %struct.system__file_control_block__pstring* %170, i32 0, i32 0		; <i8**> [#uses=1]
-	%172 = load i8** %171, align 4		; <i8*> [#uses=1]
-	%173 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 5		; <i32*> [#uses=1]
-	%174 = load i32* %173, align 4		; <i32> [#uses=1]
-	%175 = sub i32 %174, %168		; <i32> [#uses=1]
-	%176 = getelementptr i8* %172, i32 %175		; <i8*> [#uses=1]
-	store i8 48, i8* %176, align 1
-	br label %bb102
-
-bb102:		; preds = %bb76, %bb101, %bb97
-	%177 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
-	%178 = load i32* %177, align 4		; <i32> [#uses=1]
-	%179 = add i32 %178, -1		; <i32> [#uses=1]
-	%180 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %FRAME.358, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 %179, i32* %180, align 4
-	br label %bb72
-
-bb103:		; preds = %bb72
-	ret i32 %32
-}
-
-declare x86_fp80 @ada__text_io__float_aux__get(%struct.ada__text_io__text_afcb*, i32)
-
-declare void @__gnat_rcheck_12(i8*, i32) noreturn
-
-declare void @__gnat_rcheck_10(i8*, i32) noreturn
-
-declare i8* @llvm.eh.exception() nounwind
-
-declare i32 @llvm.eh.selector.i32(i8*, i8*, ...) nounwind
-
-declare i32 @llvm.eh.typeid.for.i32(i8*) nounwind
-
-declare void @__gnat_begin_handler(i8*) nounwind
-
-declare void @__gnat_raise_exception(%struct.system__standard_library__exception_data*, i8*, %struct.string___XUB*) noreturn
-
-declare void @__gnat_end_handler(i8*)
-
-declare i32 @__gnat_eh_personality(...)
-
-declare i32 @_Unwind_Resume(...)
-
-define internal fastcc void @ce3806g__fxio__put__put_int64__4.1339(%struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i64 %x, i32 %scale) {
-entry:
-	%0 = icmp eq i64 %x, 0		; <i1> [#uses=1]
-	br i1 %0, label %return, label %bb
-
-bb:		; preds = %entry
-	%1 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 %scale, i32* %1, align 4
-	%2 = add i64 %x, 9		; <i64> [#uses=1]
-	%3 = icmp ugt i64 %2, 18		; <i1> [#uses=1]
-	br i1 %3, label %bb18, label %bb19
-
-bb18:		; preds = %bb
-	%4 = add i32 %scale, 1		; <i32> [#uses=1]
-	%5 = sdiv i64 %x, 10		; <i64> [#uses=1]
-	call fastcc void @ce3806g__fxio__put__put_int64__4.1339( %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i64 %5, i32 %4 )
-	br label %bb19
-
-bb19:		; preds = %bb, %bb18
-	%6 = srem i64 %x, 10		; <i64> [#uses=3]
-	%neg = sub i64 0, %6		; <i64> [#uses=1]
-	%abscond = icmp sgt i64 %6, -1		; <i1> [#uses=1]
-	%abs = select i1 %abscond, i64 %6, i64 %neg		; <i64> [#uses=3]
-	%7 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%8 = load i32* %7, align 4		; <i32> [#uses=1]
-	%9 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 2		; <i32*> [#uses=1]
-	%10 = load i32* %9, align 4		; <i32> [#uses=1]
-	%11 = add i32 %10, -1		; <i32> [#uses=1]
-	%12 = icmp eq i32 %8, %11		; <i1> [#uses=1]
-	br i1 %12, label %bb23, label %bb44
-
-bb23:		; preds = %bb19
-	%13 = icmp ne i64 %abs, 0		; <i1> [#uses=1]
-	%14 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
-	%15 = load i32* %14, align 4		; <i32> [#uses=1]
-	%16 = icmp slt i32 %15, 1		; <i1> [#uses=1]
-	%17 = or i1 %13, %16		; <i1> [#uses=1]
-	br i1 %17, label %bb27, label %bb48
-
-bb27:		; preds = %bb23
-	%18 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
-	%19 = load i32* %18, align 4		; <i32> [#uses=2]
-	%20 = icmp sgt i32 %19, -1		; <i1> [#uses=1]
-	%.op = add i32 %19, 2		; <i32> [#uses=1]
-	%21 = select i1 %20, i32 %.op, i32 2		; <i32> [#uses=1]
-	%22 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 6		; <i8*> [#uses=1]
-	%23 = load i8* %22, align 1		; <i8> [#uses=1]
-	%24 = zext i8 %23 to i32		; <i32> [#uses=1]
-	%25 = add i32 %21, %24		; <i32> [#uses=2]
-	%26 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 0		; <i32*> [#uses=1]
-	%27 = load i32* %26, align 4		; <i32> [#uses=1]
-	%28 = icmp sgt i32 %25, %27		; <i1> [#uses=1]
-	br i1 %28, label %bb34, label %bb31
-
-bb31:		; preds = %bb27, %bb33
-	%j.0 = phi i32 [ %46, %bb33 ], [ %25, %bb27 ]		; <i32> [#uses=2]
-	%29 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%30 = load i32* %29, align 4		; <i32> [#uses=1]
-	%31 = add i32 %30, 1		; <i32> [#uses=1]
-	%32 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %31, i32* %32, align 4
-	%33 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
-	%34 = load i32* %33, align 4		; <i32> [#uses=1]
-	%35 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%36 = load %struct.system__file_control_block__pstring** %35, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%37 = getelementptr %struct.system__file_control_block__pstring* %36, i32 0, i32 0		; <i8**> [#uses=1]
-	%38 = load i8** %37, align 4		; <i8*> [#uses=1]
-	%39 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%40 = load i32* %39, align 4		; <i32> [#uses=1]
-	%41 = sub i32 %40, %34		; <i32> [#uses=1]
-	%42 = getelementptr i8* %38, i32 %41		; <i8*> [#uses=1]
-	store i8 32, i8* %42, align 1
-	%43 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 0		; <i32*> [#uses=1]
-	%44 = load i32* %43, align 4		; <i32> [#uses=1]
-	%45 = icmp eq i32 %44, %j.0		; <i1> [#uses=1]
-	br i1 %45, label %bb34, label %bb33
-
-bb33:		; preds = %bb31
-	%46 = add i32 %j.0, 1		; <i32> [#uses=1]
-	br label %bb31
-
-bb34:		; preds = %bb27, %bb31
-	%47 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 6		; <i8*> [#uses=1]
-	%48 = load i8* %47, align 1		; <i8> [#uses=1]
-	%toBool35 = icmp eq i8 %48, 0		; <i1> [#uses=1]
-	br i1 %toBool35, label %bb37, label %bb36
-
-bb36:		; preds = %bb34
-	%49 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%50 = load i32* %49, align 4		; <i32> [#uses=1]
-	%51 = add i32 %50, 1		; <i32> [#uses=1]
-	%52 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %51, i32* %52, align 4
-	%53 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
-	%54 = load i32* %53, align 4		; <i32> [#uses=1]
-	%55 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%56 = load %struct.system__file_control_block__pstring** %55, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%57 = getelementptr %struct.system__file_control_block__pstring* %56, i32 0, i32 0		; <i8**> [#uses=1]
-	%58 = load i8** %57, align 4		; <i8*> [#uses=1]
-	%59 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%60 = load i32* %59, align 4		; <i32> [#uses=1]
-	%61 = sub i32 %60, %54		; <i32> [#uses=1]
-	%62 = getelementptr i8* %58, i32 %61		; <i8*> [#uses=1]
-	store i8 45, i8* %62, align 1
-	br label %bb37
-
-bb37:		; preds = %bb34, %bb36
-	%63 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
-	%64 = load i32* %63, align 4		; <i32> [#uses=1]
-	%65 = icmp slt i32 %64, 0		; <i1> [#uses=1]
-	br i1 %65, label %bb39, label %bb43
-
-bb39:		; preds = %bb37
-	%66 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%67 = load i32* %66, align 4		; <i32> [#uses=1]
-	%68 = add i32 %67, 1		; <i32> [#uses=1]
-	%69 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %68, i32* %69, align 4
-	%70 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
-	%71 = load i32* %70, align 4		; <i32> [#uses=1]
-	%72 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%73 = load %struct.system__file_control_block__pstring** %72, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%74 = getelementptr %struct.system__file_control_block__pstring* %73, i32 0, i32 0		; <i8**> [#uses=1]
-	%75 = load i8** %74, align 4		; <i8*> [#uses=1]
-	%76 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%77 = load i32* %76, align 4		; <i32> [#uses=1]
-	%78 = sub i32 %77, %71		; <i32> [#uses=1]
-	%79 = getelementptr i8* %75, i32 %78		; <i8*> [#uses=1]
-	store i8 48, i8* %79, align 1
-	%80 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%81 = load i32* %80, align 4		; <i32> [#uses=1]
-	%82 = add i32 %81, 1		; <i32> [#uses=1]
-	%83 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %82, i32* %83, align 4
-	%84 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
-	%85 = load i32* %84, align 4		; <i32> [#uses=1]
-	%86 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%87 = load %struct.system__file_control_block__pstring** %86, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%88 = getelementptr %struct.system__file_control_block__pstring* %87, i32 0, i32 0		; <i8**> [#uses=1]
-	%89 = load i8** %88, align 4		; <i8*> [#uses=1]
-	%90 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%91 = load i32* %90, align 4		; <i32> [#uses=1]
-	%92 = sub i32 %91, %85		; <i32> [#uses=1]
-	%93 = getelementptr i8* %89, i32 %92		; <i8*> [#uses=1]
-	store i8 46, i8* %93, align 1
-	%94 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
-	%95 = load i32* %94, align 4		; <i32> [#uses=1]
-	br label %bb40
-
-bb40:		; preds = %bb42, %bb39
-	%j15.0 = phi i32 [ %95, %bb39 ], [ %111, %bb42 ]		; <i32> [#uses=2]
-	%96 = icmp sgt i32 %j15.0, -2		; <i1> [#uses=1]
-	br i1 %96, label %bb43, label %bb42
-
-bb42:		; preds = %bb40
-	%97 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%98 = load i32* %97, align 4		; <i32> [#uses=1]
-	%99 = add i32 %98, 1		; <i32> [#uses=1]
-	%100 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %99, i32* %100, align 4
-	%101 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
-	%102 = load i32* %101, align 4		; <i32> [#uses=1]
-	%103 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%104 = load %struct.system__file_control_block__pstring** %103, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%105 = getelementptr %struct.system__file_control_block__pstring* %104, i32 0, i32 0		; <i8**> [#uses=1]
-	%106 = load i8** %105, align 4		; <i8*> [#uses=1]
-	%107 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%108 = load i32* %107, align 4		; <i32> [#uses=1]
-	%109 = sub i32 %108, %102		; <i32> [#uses=1]
-	%110 = getelementptr i8* %106, i32 %109		; <i8*> [#uses=1]
-	store i8 48, i8* %110, align 1
-	%111 = add i32 %j15.0, 1		; <i32> [#uses=1]
-	br label %bb40
-
-bb43:		; preds = %bb37, %bb40
-	%112 = trunc i64 %abs to i32		; <i32> [#uses=1]
-	%113 = getelementptr [10 x i8]* @.str3, i32 0, i32 %112		; <i8*> [#uses=1]
-	%114 = load i8* %113, align 1		; <i8> [#uses=1]
-	%115 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%116 = load i32* %115, align 4		; <i32> [#uses=1]
-	%117 = add i32 %116, 1		; <i32> [#uses=1]
-	%118 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %117, i32* %118, align 4
-	%119 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
-	%120 = load i32* %119, align 4		; <i32> [#uses=1]
-	%121 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%122 = load %struct.system__file_control_block__pstring** %121, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%123 = getelementptr %struct.system__file_control_block__pstring* %122, i32 0, i32 0		; <i8**> [#uses=1]
-	%124 = load i8** %123, align 4		; <i8*> [#uses=1]
-	%125 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%126 = load i32* %125, align 4		; <i32> [#uses=1]
-	%127 = sub i32 %126, %120		; <i32> [#uses=1]
-	%128 = getelementptr i8* %124, i32 %127		; <i8*> [#uses=1]
-	store i8 %114, i8* %128, align 1
-	br label %bb48
-
-bb44:		; preds = %bb19
-	%129 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
-	%130 = load i32* %129, align 4		; <i32> [#uses=1]
-	%131 = icmp eq i32 %130, -1		; <i1> [#uses=1]
-	br i1 %131, label %bb46, label %bb47
-
-bb46:		; preds = %bb44
-	%132 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%133 = load i32* %132, align 4		; <i32> [#uses=1]
-	%134 = add i32 %133, 1		; <i32> [#uses=1]
-	%135 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %134, i32* %135, align 4
-	%136 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
-	%137 = load i32* %136, align 4		; <i32> [#uses=1]
-	%138 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%139 = load %struct.system__file_control_block__pstring** %138, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%140 = getelementptr %struct.system__file_control_block__pstring* %139, i32 0, i32 0		; <i8**> [#uses=1]
-	%141 = load i8** %140, align 4		; <i8*> [#uses=1]
-	%142 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%143 = load i32* %142, align 4		; <i32> [#uses=1]
-	%144 = sub i32 %143, %137		; <i32> [#uses=1]
-	%145 = getelementptr i8* %141, i32 %144		; <i8*> [#uses=1]
-	store i8 46, i8* %145, align 1
-	br label %bb47
-
-bb47:		; preds = %bb44, %bb46
-	%146 = trunc i64 %abs to i32		; <i32> [#uses=1]
-	%147 = getelementptr [10 x i8]* @.str3, i32 0, i32 %146		; <i8*> [#uses=1]
-	%148 = load i8* %147, align 1		; <i8> [#uses=1]
-	%149 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%150 = load i32* %149, align 4		; <i32> [#uses=1]
-	%151 = add i32 %150, 1		; <i32> [#uses=1]
-	%152 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	store i32 %151, i32* %152, align 4
-	%153 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 4		; <i32*> [#uses=1]
-	%154 = load i32* %153, align 4		; <i32> [#uses=1]
-	%155 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 3		; <%struct.system__file_control_block__pstring**> [#uses=1]
-	%156 = load %struct.system__file_control_block__pstring** %155, align 4		; <%struct.system__file_control_block__pstring*> [#uses=1]
-	%157 = getelementptr %struct.system__file_control_block__pstring* %156, i32 0, i32 0		; <i8**> [#uses=1]
-	%158 = load i8** %157, align 4		; <i8*> [#uses=1]
-	%159 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 5		; <i32*> [#uses=1]
-	%160 = load i32* %159, align 4		; <i32> [#uses=1]
-	%161 = sub i32 %160, %154		; <i32> [#uses=1]
-	%162 = getelementptr i8* %158, i32 %161		; <i8*> [#uses=1]
-	store i8 %148, i8* %162, align 1
-	br label %bb48
-
-bb48:		; preds = %bb23, %bb47, %bb43
-	%163 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
-	%164 = load i32* %163, align 4		; <i32> [#uses=1]
-	%165 = add i32 %164, -1		; <i32> [#uses=1]
-	%166 = getelementptr %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.361, i32 0, i32 1		; <i32*> [#uses=1]
-	store i32 %165, i32* %166, align 4
-	ret void
-
-return:		; preds = %entry
-	ret void
-}
-
-define internal fastcc void @ce3806g__fxio__put__put_scaled__4.1346(%struct.FRAME.ce3806g__fxio__put__4* %CHAIN.365, i64 %x, i64 %y, i64 %z, i32 %a, i32 %e) {
-entry:
-	%0 = alloca { i64, i64 }		; <{ i64, i64 }*> [#uses=3]
-	%1 = call i8* @llvm.stacksave( )		; <i8*> [#uses=1]
-	%2 = add i32 %a, 17		; <i32> [#uses=2]
-	%3 = sdiv i32 %2, 18		; <i32> [#uses=3]
-	%4 = add i32 %3, 1		; <i32> [#uses=7]
-	%5 = icmp sgt i32 %4, -1		; <i1> [#uses=1]
-	%max53 = select i1 %5, i32 %4, i32 0		; <i32> [#uses=1]
-	%6 = alloca i64, i32 %max53		; <i64*> [#uses=21]
-	%7 = icmp sgt i32 %4, 0		; <i1> [#uses=1]
-	br i1 %7, label %bb55, label %bb58
-
-bb55:		; preds = %entry, %bb57
-	%J60b.0 = phi i32 [ %11, %bb57 ], [ 1, %entry ]		; <i32> [#uses=3]
-	%8 = add i32 %J60b.0, -1		; <i32> [#uses=1]
-	%9 = getelementptr i64* %6, i32 %8		; <i64*> [#uses=1]
-	store i64 0, i64* %9, align 8
-	%10 = icmp eq i32 %4, %J60b.0		; <i1> [#uses=1]
-	br i1 %10, label %bb58, label %bb57
-
-bb57:		; preds = %bb55
-	%11 = add i32 %J60b.0, 1		; <i32> [#uses=1]
-	br label %bb55
-
-bb58:		; preds = %entry, %bb55
-	%12 = icmp sgt i32 %4, 0		; <i1> [#uses=1]
-	br i1 %12, label %bb61, label %bb91
-
-bb61:		; preds = %bb58, %bb90
-	%j2.0 = phi i32 [ %88, %bb90 ], [ 1, %bb58 ]		; <i32> [#uses=11]
-	%aa.0 = phi i32 [ %86, %bb90 ], [ %a, %bb58 ]		; <i32> [#uses=6]
-	%yy.0 = phi i64 [ %84, %bb90 ], [ %y, %bb58 ]		; <i64> [#uses=3]
-	%xx.0 = phi i64 [ %21, %bb90 ], [ %x, %bb58 ]		; <i64> [#uses=2]
-	%13 = icmp eq i64 %xx.0, 0		; <i1> [#uses=1]
-	br i1 %13, label %bb91, label %bb63
-
-bb63:		; preds = %bb61
-	%14 = icmp eq i32 %aa.0, 0		; <i1> [#uses=1]
-	%15 = zext i1 %14 to i8		; <i8> [#uses=1]
-	invoke void @system__arith_64__scaled_divide( { i64, i64 }* noalias sret %0, i64 %xx.0, i64 %yy.0, i64 %z, i8 %15 )
-			to label %invcont unwind label %lpad
-
-invcont:		; preds = %bb63
-	%16 = getelementptr { i64, i64 }* %0, i32 0, i32 0		; <i64*> [#uses=1]
-	%17 = load i64* %16, align 8		; <i64> [#uses=1]
-	%18 = add i32 %j2.0, -1		; <i32> [#uses=1]
-	%19 = getelementptr i64* %6, i32 %18		; <i64*> [#uses=1]
-	store i64 %17, i64* %19, align 8
-	%20 = getelementptr { i64, i64 }* %0, i32 0, i32 1		; <i64*> [#uses=1]
-	%21 = load i64* %20, align 8		; <i64> [#uses=1]
-	%22 = add i32 %j2.0, -1		; <i32> [#uses=1]
-	%23 = getelementptr i64* %6, i32 %22		; <i64*> [#uses=1]
-	%24 = load i64* %23, align 8		; <i64> [#uses=1]
-	%25 = icmp eq i64 %24, %yy.0		; <i1> [#uses=1]
-	%26 = add i32 %j2.0, -1		; <i32> [#uses=1]
-	%27 = getelementptr i64* %6, i32 %26		; <i64*> [#uses=1]
-	%28 = load i64* %27, align 8		; <i64> [#uses=1]
-	%29 = sub i64 0, %28		; <i64> [#uses=1]
-	%30 = icmp eq i64 %yy.0, %29		; <i1> [#uses=1]
-	%31 = or i1 %25, %30		; <i1> [#uses=1]
-	%32 = icmp sgt i32 %j2.0, 1		; <i1> [#uses=1]
-	%or.cond = and i1 %31, %32		; <i1> [#uses=1]
-	br i1 %or.cond, label %bb69, label %bb83
-
-bb69:		; preds = %invcont
-	%33 = add i32 %j2.0, -1		; <i32> [#uses=1]
-	%34 = getelementptr i64* %6, i32 %33		; <i64*> [#uses=1]
-	%35 = load i64* %34, align 8		; <i64> [#uses=1]
-	%36 = icmp slt i64 %35, 0		; <i1> [#uses=1]
-	%37 = add i32 %j2.0, -2		; <i32> [#uses=1]
-	%38 = getelementptr i64* %6, i32 %37		; <i64*> [#uses=1]
-	%39 = load i64* %38, align 8		; <i64> [#uses=2]
-	br i1 %36, label %bb71, label %bb72
-
-bb71:		; preds = %bb69
-	%40 = add i64 %39, 1		; <i64> [#uses=1]
-	%41 = add i32 %j2.0, -2		; <i32> [#uses=1]
-	%42 = getelementptr i64* %6, i32 %41		; <i64*> [#uses=1]
-	store i64 %40, i64* %42, align 8
-	br label %bb73
-
-bb72:		; preds = %bb69
-	%43 = add i64 %39, -1		; <i64> [#uses=1]
-	%44 = add i32 %j2.0, -2		; <i32> [#uses=1]
-	%45 = getelementptr i64* %6, i32 %44		; <i64*> [#uses=1]
-	store i64 %43, i64* %45, align 8
-	br label %bb73
-
-bb73:		; preds = %bb72, %bb71
-	%46 = add i32 %j2.0, -1		; <i32> [#uses=1]
-	%47 = getelementptr i64* %6, i32 %46		; <i64*> [#uses=1]
-	store i64 0, i64* %47, align 8
-	br label %bb74
-
-bb74:		; preds = %bb82, %bb73
-	%j1.0 = phi i32 [ %4, %bb73 ], [ %81, %bb82 ]		; <i32> [#uses=12]
-	%48 = icmp slt i32 %j1.0, 2		; <i1> [#uses=1]
-	br i1 %48, label %bb83, label %bb76
-
-bb76:		; preds = %bb74
-	%49 = add i32 %j1.0, -1		; <i32> [#uses=1]
-	%50 = getelementptr i64* %6, i32 %49		; <i64*> [#uses=1]
-	%51 = load i64* %50, align 8		; <i64> [#uses=1]
-	%52 = icmp sgt i64 %51, 999999999999999999		; <i1> [#uses=1]
-	br i1 %52, label %bb78, label %bb79
-
-bb78:		; preds = %bb76
-	%53 = add i32 %j1.0, -2		; <i32> [#uses=1]
-	%54 = getelementptr i64* %6, i32 %53		; <i64*> [#uses=1]
-	%55 = load i64* %54, align 8		; <i64> [#uses=1]
-	%56 = add i64 %55, 1		; <i64> [#uses=1]
-	%57 = add i32 %j1.0, -2		; <i32> [#uses=1]
-	%58 = getelementptr i64* %6, i32 %57		; <i64*> [#uses=1]
-	store i64 %56, i64* %58, align 8
-	%59 = add i32 %j1.0, -1		; <i32> [#uses=1]
-	%60 = getelementptr i64* %6, i32 %59		; <i64*> [#uses=1]
-	%61 = load i64* %60, align 8		; <i64> [#uses=1]
-	%62 = add i64 %61, -1000000000000000000		; <i64> [#uses=1]
-	%63 = add i32 %j1.0, -1		; <i32> [#uses=1]
-	%64 = getelementptr i64* %6, i32 %63		; <i64*> [#uses=1]
-	store i64 %62, i64* %64, align 8
-	br label %bb82
-
-bb79:		; preds = %bb76
-	%65 = add i32 %j1.0, -1		; <i32> [#uses=1]
-	%66 = getelementptr i64* %6, i32 %65		; <i64*> [#uses=1]
-	%67 = load i64* %66, align 8		; <i64> [#uses=1]
-	%68 = icmp slt i64 %67, -999999999999999999		; <i1> [#uses=1]
-	br i1 %68, label %bb81, label %bb82
-
-bb81:		; preds = %bb79
-	%69 = add i32 %j1.0, -2		; <i32> [#uses=1]
-	%70 = getelementptr i64* %6, i32 %69		; <i64*> [#uses=1]
-	%71 = load i64* %70, align 8		; <i64> [#uses=1]
-	%72 = add i64 %71, -1		; <i64> [#uses=1]
-	%73 = add i32 %j1.0, -2		; <i32> [#uses=1]
-	%74 = getelementptr i64* %6, i32 %73		; <i64*> [#uses=1]
-	store i64 %72, i64* %74, align 8
-	%75 = add i32 %j1.0, -1		; <i32> [#uses=1]
-	%76 = getelementptr i64* %6, i32 %75		; <i64*> [#uses=1]
-	%77 = load i64* %76, align 8		; <i64> [#uses=1]
-	%78 = add i64 %77, 1000000000000000000		; <i64> [#uses=1]
-	%79 = add i32 %j1.0, -1		; <i32> [#uses=1]
-	%80 = getelementptr i64* %6, i32 %79		; <i64*> [#uses=1]
-	store i64 %78, i64* %80, align 8
-	br label %bb82
-
-bb82:		; preds = %bb79, %bb81, %bb78
-	%81 = add i32 %j1.0, -1		; <i32> [#uses=1]
-	br label %bb74
-
-bb83:		; preds = %invcont, %bb74
-	%82 = icmp slt i32 %aa.0, 19		; <i1> [#uses=1]
-	%min = select i1 %82, i32 %aa.0, i32 18		; <i32> [#uses=1]
-	%83 = invoke i64 @system__exn_lli__exn_long_long_integer( i64 10, i32 %min ) readnone
-			to label %invcont86 unwind label %lpad		; <i64> [#uses=1]
-
-invcont86:		; preds = %bb83
-	%84 = sub i64 0, %83		; <i64> [#uses=1]
-	%85 = icmp slt i32 %aa.0, 19		; <i1> [#uses=1]
-	%min87 = select i1 %85, i32 %aa.0, i32 18		; <i32> [#uses=1]
-	%86 = sub i32 %aa.0, %min87		; <i32> [#uses=1]
-	%87 = icmp eq i32 %4, %j2.0		; <i1> [#uses=1]
-	br i1 %87, label %bb91, label %bb90
-
-bb90:		; preds = %invcont86
-	%88 = add i32 %j2.0, 1		; <i32> [#uses=1]
-	br label %bb61
-
-bb91:		; preds = %bb58, %bb61, %invcont86
-	%89 = icmp slt i32 %2, 18		; <i1> [#uses=1]
-	br i1 %89, label %bb98, label %bb94
-
-bb94:		; preds = %bb91, %bb97
-	%j.0 = phi i32 [ %97, %bb97 ], [ 1, %bb91 ]		; <i32> [#uses=4]
-	%90 = mul i32 %j.0, 18		; <i32> [#uses=1]
-	%91 = add i32 %90, -18		; <i32> [#uses=1]
-	%92 = sub i32 %e, %91		; <i32> [#uses=1]
-	%93 = add i32 %j.0, -1		; <i32> [#uses=1]
-	%94 = getelementptr i64* %6, i32 %93		; <i64*> [#uses=1]
-	%95 = load i64* %94, align 8		; <i64> [#uses=1]
-	invoke fastcc void @ce3806g__fxio__put__put_int64__4.1339( %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.365, i64 %95, i32 %92 )
-			to label %invcont95 unwind label %lpad
-
-invcont95:		; preds = %bb94
-	%96 = icmp eq i32 %3, %j.0		; <i1> [#uses=1]
-	br i1 %96, label %bb98, label %bb97
-
-bb97:		; preds = %invcont95
-	%97 = add i32 %j.0, 1		; <i32> [#uses=1]
-	br label %bb94
-
-bb98:		; preds = %bb91, %invcont95
-	%98 = sub i32 %e, %a		; <i32> [#uses=1]
-	%99 = getelementptr i64* %6, i32 %3		; <i64*> [#uses=1]
-	%100 = load i64* %99, align 8		; <i64> [#uses=1]
-	invoke fastcc void @ce3806g__fxio__put__put_int64__4.1339( %struct.FRAME.ce3806g__fxio__put__4* %CHAIN.365, i64 %100, i32 %98 )
-			to label %bb101 unwind label %lpad
-
-bb101:		; preds = %bb98
-	ret void
-
-lpad:		; preds = %bb98, %bb94, %bb83, %bb63
-	%eh_ptr = call i8* @llvm.eh.exception( )		; <i8*> [#uses=2]
-	%eh_select103 = call i32 (i8*, i8*, ...)* @llvm.eh.selector.i32( i8* %eh_ptr, i8* bitcast (i32 (...)* @__gnat_eh_personality to i8*), i32* @__gnat_all_others_value )		; <i32> [#uses=0]
-	call void @llvm.stackrestore( i8* %1 )
-	%101 = call i32 (...)* @_Unwind_Resume( i8* %eh_ptr )		; <i32> [#uses=0]
-	unreachable
-}
-
-declare i8* @llvm.stacksave() nounwind
-
-declare void @system__arith_64__scaled_divide({ i64, i64 }* noalias sret, i64, i64, i64, i8)
-
-declare i64 @system__exn_lli__exn_long_long_integer(i64, i32) readnone
-
-declare void @llvm.stackrestore(i8*) nounwind
-
-declare i32 @system__img_real__set_image_real(x86_fp80, i8*, %struct.string___XUB*, i32, i32, i32, i32)
-
-declare void @ada__text_io__generic_aux__put_item(%struct.ada__text_io__text_afcb*, i8*, %struct.string___XUB*)
-
-declare void @report__test(i8*, %struct.string___XUB*, i8*, %struct.string___XUB*)
-
-declare void @system__secondary_stack__ss_mark(%struct.string___XUB* noalias sret)
-
-declare void @system__exception_table__register_exception(%struct.system__standard_library__exception_data*)
-
-declare void @report__legal_file_name(%struct.system__file_control_block__pstring* noalias sret, i32, i8*, %struct.string___XUB*)
-
-declare %struct.ada__text_io__text_afcb* @ada__text_io__create(%struct.ada__text_io__text_afcb*, i8, i8*, %struct.string___XUB*, i8*, %struct.string___XUB*)
-
-declare void @system__secondary_stack__ss_release(i32, i32)
-
-declare void @report__not_applicable(i8*, %struct.string___XUB*)
-
-declare void @ada__text_io__set_output(%struct.ada__text_io__text_afcb*)
-
-declare %struct.ada__text_io__text_afcb* @ada__text_io__close(%struct.ada__text_io__text_afcb*)
-
-declare %struct.ada__text_io__text_afcb* @ada__text_io__open(%struct.ada__text_io__text_afcb*, i8, i8*, %struct.string___XUB*, i8*, %struct.string___XUB*)
-
-declare %struct.ada__text_io__text_afcb* @ada__text_io__standard_output()
-
-declare void @report__failed(i8*, %struct.string___XUB*)
-
-declare void @ext(i32*)
-
-declare %struct.ada__text_io__text_afcb* @ada__text_io__delete(%struct.ada__text_io__text_afcb*)
-
-declare void @report__result()
diff --git a/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll b/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll
index e6c76b3..003fbb1 100644
--- a/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll
+++ b/test/Transforms/Reassociate/2011-01-26-UseAfterFree.ll
@@ -3,8 +3,6 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
 target triple = "i386-gnu-linux"
 
-%ada__tags__T15s = type void ()
-
 define void @exp_averages_intraday__deviation() {
 entry:
   %0 = load i32* undef, align 4
diff --git a/test/Transforms/SCCP/2002-05-02-EdgeFailure.ll b/test/Transforms/SCCP/2002-05-02-EdgeFailure.ll
deleted file mode 100644
index bb0cf04..0000000
--- a/test/Transforms/SCCP/2002-05-02-EdgeFailure.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; edgefailure - This function illustrates how SCCP is not doing it's job.  This
-; function should be optimized almost completely away: the loop should be
-; analyzed to detect that the body executes exactly once, and thus the branch
-; can be eliminated and code becomes trivially dead.  This is distilled from a
-; real benchmark (mst from Olden benchmark, MakeGraph function).  When SCCP is
-; fixed, this should be eliminated by a single SCCP application.
-;
-; RUN: opt < %s -sccp -S | not grep loop
-
-define i32* @test() {
-bb1:
-	%A = malloc i32		; <i32*> [#uses=2]
-	br label %bb2
-bb2:		; preds = %bb2, %bb1
-        ;; Always 0
-	%i = phi i32 [ %i2, %bb2 ], [ 0, %bb1 ]		; <i32> [#uses=2]
-        ;; Always 1
-	%i2 = add i32 %i, 1		; <i32> [#uses=2]
-	store i32 %i, i32* %A
-        ;; Always false
-  	%loop = icmp sle i32 %i2, 0		; <i1> [#uses=1]
-	br i1 %loop, label %bb2, label %bb3
-bb3:		; preds = %bb2
-	ret i32* %A
-}
-
diff --git a/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll b/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll
index 1b26ca9..a40455c 100644
--- a/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll
+++ b/test/Transforms/SCCP/2008-04-22-multiple-ret-sccp.ll
@@ -5,7 +5,7 @@ declare {i32, i32} @bar(i32 %A)
 
 define i32 @foo() {
 	%X = call {i32, i32} @bar(i32 17)
-        %Y = getresult {i32, i32} %X, 0
+        %Y = extractvalue {i32, i32} %X, 0
 	%Z = add i32 %Y, %Y
 	ret i32 %Z
 }
diff --git a/test/Transforms/SCCP/ipsccp-basic.ll b/test/Transforms/SCCP/ipsccp-basic.ll
index a3c7637..b9e3f42 100644
--- a/test/Transforms/SCCP/ipsccp-basic.ll
+++ b/test/Transforms/SCCP/ipsccp-basic.ll
@@ -126,7 +126,7 @@ B:
 
 ; CHECK: define i64 @test5b()
 ; CHECK:     A:
-; CHECK-NEXT:  %c = call i64 @test5c(%0 %a)
+; CHECK-NEXT:  %c = call i64 @test5c({ i64, i64 } %a)
 ; CHECK-NEXT:  ret i64 5
 
 define internal i64 @test5c({i64,i64} %a) {
@@ -153,7 +153,7 @@ define i64 @test6b() {
 
 %T = type {i32,i32}
 
-define internal {i32, i32} @test7a(i32 %A) {
+define internal %T @test7a(i32 %A) {
   %X = add i32 1, %A
   %mrv0 = insertvalue %T undef, i32 %X, 0
   %mrv1 = insertvalue %T %mrv0, i32 %A, 1
@@ -164,8 +164,8 @@ define internal {i32, i32} @test7a(i32 %A) {
 }
 
 define i32 @test7b() {
-	%X = call {i32, i32} @test7a(i32 17)
-        %Y = extractvalue {i32, i32} %X, 0
+	%X = call %T @test7a(i32 17)
+        %Y = extractvalue %T %X, 0
 	%Z = add i32 %Y, %Y
 	ret i32 %Z
 ; CHECK: define i32 @test7b
diff --git a/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll b/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
index 24e6a31..d754987 100644
--- a/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
+++ b/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
@@ -6,10 +6,10 @@ declare i32 @.callback_1(i8*)
 declare void @.iter_2(i32 (i8*)*, i8*)
 
 define i32 @main() {
-	%d = alloca { [80 x i8], i32, i32 }		; <{ [80 x i8], i32, i32 }*> [#uses=2]
-	%tmp.0 = getelementptr { [80 x i8], i32, i32 }* %d, i64 0, i32 2		; <i32*> [#uses=1]
+	%d = alloca %T		; <{ [80 x i8], i32, i32 }*> [#uses=2]
+	%tmp.0 = getelementptr %T* %d, i64 0, i32 2		; <i32*> [#uses=1]
 	store i32 0, i32* %tmp.0
-	%tmp.1 = getelementptr { [80 x i8], i32, i32 }* %d, i64 0, i32 0, i64 0		; <i8*> [#uses=1]
+	%tmp.1 = getelementptr %T* %d, i64 0, i32 0, i64 0		; <i8*> [#uses=1]
 	call void @.iter_2( i32 (i8*)* @.callback_1, i8* %tmp.1 )
 	ret i32 0
 }
diff --git a/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll b/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll
deleted file mode 100644
index e67b610..0000000
--- a/test/Transforms/ScalarRepl/2007-05-24-LargeAggregate.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | grep {alloca.*client_t}
-; PR1446
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-pc-linux-gnu"
-
-	%struct.clientSnapshot_t = type { i32, [32 x i8], %struct.playerState_t, i32, i32, i32, i32, i32 }
-	%struct.client_t = type { i32, [1024 x i8], [64 x [1024 x i8]], i32, i32, i32, i32, i32, i32, %struct.usercmd_t, i32, i32, [1024 x i8], %struct.sharedEntity_t*, [32 x i8], [64 x i8], i32, i32, i32, i32, i32, i32, [8 x i8*], [8 x i32], i32, i32, i32, i32, i32, i32, i32, i32, i32, [32 x %struct.clientSnapshot_t], i32, i32, i32, i32, i32, %struct.netchan_t, %struct.netchan_buffer_t*, %struct.netchan_buffer_t**, i32, [1025 x i32] }
-	%struct.entityShared_t = type { %struct.entityState_t, i32, i32, i32, i32, i32, [3 x float], [3 x float], i32, [3 x float], [3 x float], [3 x float], [3 x float], i32 }
-	%struct.entityState_t = type { i32, i32, i32, %struct.trajectory_t, %struct.trajectory_t, i32, i32, [3 x float], [3 x float], [3 x float], [3 x float], i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.msg_t = type { i32, i32, i32, i8*, i32, i32, i32, i32 }
-	%struct.netadr_t = type { i32, [4 x i8], [10 x i8], i16 }
-	%struct.netchan_buffer_t = type { %struct.msg_t, [16384 x i8], %struct.netchan_buffer_t* }
-	%struct.netchan_t = type { i32, i32, %struct.netadr_t, i32, i32, i32, i32, i32, [16384 x i8], i32, i32, i32, [16384 x i8] }
-	%struct.playerState_t = type { i32, i32, i32, i32, i32, [3 x float], [3 x float], i32, i32, i32, [3 x i32], i32, i32, i32, i32, i32, i32, [3 x float], i32, i32, [2 x i32], [2 x i32], i32, i32, i32, i32, i32, i32, [3 x float], i32, i32, i32, i32, i32, [16 x i32], [16 x i32], [16 x i32], [16 x i32], i32, i32, i32, i32, i32, i32, i32 }
-	%struct.sharedEntity_t = type { %struct.entityState_t, %struct.entityShared_t }
-	%struct.trajectory_t = type { i32, i32, i32, [3 x float], [3 x float] }
-	%struct.usercmd_t = type { i32, [3 x i32], i32, i8, i8, i8, i8 }
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-define void @SV_DirectConnect(i64 %from.0.0, i64 %from.0.1, i32 %from.1) {
-entry:
-	%temp = alloca %struct.client_t, align 16		; <%struct.client_t*> [#uses=1]
-	%temp586 = bitcast %struct.client_t* %temp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* null, i8* %temp586, i32 121596, i32 0 )
-	unreachable
-}
diff --git a/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll b/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
index f1b8b80..cf96c4c 100644
--- a/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
+++ b/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
@@ -3,21 +3,22 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
 target triple = "i686-apple-darwin8"
-	%struct.LongestMember = type { i8, i32 }
-	%struct.MyString = type { i32 }
-	%struct.UnionType = type { %struct.LongestMember }
+
+%struct.LongestMember = type { i8, i32 }
+%struct.MyString = type { i32 }
+%struct.UnionType = type { %struct.LongestMember }
 
 define void @_Z4testP9UnionTypePS0_(%struct.UnionType* %p, %struct.UnionType** %pointerToUnion) {
 entry:
-	%tmp = alloca %struct.UnionType, align 8		; <%struct.UnionType*> [#uses=2]
-	%tmp2 = getelementptr %struct.UnionType* %tmp, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp13 = getelementptr %struct.UnionType* %p, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp2, i8* %tmp13, i32 8, i32 0 )
-	%tmp5 = load %struct.UnionType** %pointerToUnion		; <%struct.UnionType*> [#uses=1]
-	%tmp56 = getelementptr %struct.UnionType* %tmp5, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
-	%tmp7 = getelementptr %struct.UnionType* %tmp, i32 0, i32 0, i32 0		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %tmp56, i8* %tmp7, i32 8, i32 0 )
-	ret void
+  %tmp = alloca %struct.UnionType, align 8
+  %tmp2 = getelementptr %struct.UnionType* %tmp, i32 0, i32 0, i32 0
+  %tmp13 = getelementptr %struct.UnionType* %p, i32 0, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i32 0, i1 false)
+  %tmp5 = load %struct.UnionType** %pointerToUnion
+  %tmp56 = getelementptr %struct.UnionType* %tmp5, i32 0, i32 0, i32 0
+  %tmp7 = getelementptr %struct.UnionType* %tmp, i32 0, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp56, i8* %tmp7, i32 8, i32 0, i1 false)
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll b/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
index 81b6746..48abffe 100644
--- a/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
+++ b/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
@@ -2,7 +2,7 @@
 
 %struct.S = type { i16 }
 
-define i1 @f(i16 signext  %b) zeroext  {
+define zeroext i1 @f(i16 signext  %b)   {
 entry:
 	%b_addr = alloca i16		; <i16*> [#uses=2]
 	%retval = alloca i32		; <i32*> [#uses=2]
diff --git a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
index b704727..71ba601 100644
--- a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
+++ b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
@@ -4,14 +4,14 @@
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
 
-define void @memtest1(i8* %dst, i8* %src) nounwind  {
+define void @memtest1(i8* %dst, i8* %src) nounwind {
 entry:
-	%temp = alloca [200 x i8]		; <[100 x i8]*> [#uses=2]
-	%temp1 = bitcast [200 x i8]* %temp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %temp1, i8* %src, i32 200, i32 1 )
-	%temp3 = bitcast [200 x i8]* %temp to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %dst, i8* %temp3, i32 200, i32 1 )
-	ret void
+  %temp = alloca [200 x i8]
+  %temp1 = bitcast [200 x i8]* %temp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %temp1, i8* %src, i32 200, i32 1, i1 false)
+  %temp3 = bitcast [200 x i8]* %temp to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %temp3, i32 200, i32 1, i1 false)
+  ret void
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll b/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
index 1df01c1..7cccb19 100644
--- a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
+++ b/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
@@ -2,21 +2,22 @@
 ; PR2423
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin8"
-	%struct.x = type { [1 x i32], i32, i32 }
+
+%struct.x = type { [1 x i32], i32, i32 }
 
 define i32 @b() nounwind {
 entry:
-	%s = alloca %struct.x		; <%struct.x*> [#uses=2]
-	%r = alloca %struct.x		; <%struct.x*> [#uses=2]
-	call i32 @a( %struct.x* %s ) nounwind		; <i32>:0 [#uses=0]
-	%r1 = bitcast %struct.x* %r to i8*		; <i8*> [#uses=1]
-	%s2 = bitcast %struct.x* %s to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %r1, i8* %s2, i32 12, i32 8 )
-	getelementptr %struct.x* %r, i32 0, i32 0, i32 1		; <i32*>:1 [#uses=1]
-	load i32* %1, align 4		; <i32>:2 [#uses=1]
-	ret i32 %2
+  %s = alloca %struct.x
+  %r = alloca %struct.x
+  %0 = call i32 @a(%struct.x* %s) nounwind
+  %r1 = bitcast %struct.x* %r to i8*
+  %s2 = bitcast %struct.x* %s to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %r1, i8* %s2, i32 12, i32 8, i1 false)
+  %1 = getelementptr %struct.x* %r, i32 0, i32 0, i32 1
+  %2 = load i32* %1, align 4
+  ret i32 %2
 }
 
 declare i32 @a(%struct.x*)
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll b/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
index e32e683..e7a58f1 100644
--- a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
+++ b/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
@@ -18,8 +18,8 @@ entry:
         ; because the type of the first element in %struct.two is i8.
 	%tmpS = getelementptr %struct.two* %S, i32 0, i32 0, i32 0 
 	%tmpD = bitcast %struct.two* %D to i8*
-        call void @llvm.memmove.i32(i8* %tmpD, i8* %tmpS, i32 4, i32 1)
+        call void @llvm.memmove.p0i8.p0i8.i32(i8* %tmpD, i8* %tmpS, i32 4, i32 1, i1 false)
         ret void
 }
 
-declare void @llvm.memmove.i32(i8*, i8*, i32, i32) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+\ No newline at end of file
diff --git a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
index 526457b..3218d59 100644
--- a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
+++ b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
@@ -12,9 +12,8 @@ entry:
         %0 = getelementptr %struct.st* %s, i32 0, i32 0  ; <i16*> [#uses=1]
         store i16 1, i16* %0, align 4
         %s1 = bitcast %struct.st* %s to i8*  ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32(i8* %p, i8* %s1, i32 2, i32 1)
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %s1, i32 2, i32 1, i1 false)
         ret void
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
-
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll b/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
index 50e7f9a..d71bcb9 100644
--- a/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
+++ b/test/Transforms/ScalarRepl/2009-03-05-Aggre2Scalar-dbg.ll
@@ -1,11 +1,11 @@
 ; RUN: opt < %s -scalarrepl -disable-output -stats |& grep "Number of aggregates converted to scalar"
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
-	type { }		; type %0
-	type { i8*, i32, i32, i16, i16, %2, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %2, %3*, i32, [3 x i8], [1 x i8], %2, i32, i64 }		; type %1
-	type { i8*, i32 }		; type %2
-	type opaque		; type %3
-	type { i32 }		; type %4
+	%0 = type { }		; type %0
+	%1 = type { i8*, i32, i32, i16, i16, %2, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %2, %3*, i32, [3 x i8], [1 x i8], %2, i32, i64 }		; type %1
+	%2 = type { i8*, i32 }		; type %2
+	%3 = type opaque		; type %3
+	%4 = type { i32 }		; type %4
 	%llvm.dbg.anchor.type = type { i32, i32 }
 	%llvm.dbg.basictype.type = type { i32, %0*, i8*, %0*, i32, i64, i64, i64, i32, i32 }
 	%llvm.dbg.compile_unit.type = type { i32, %0*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
diff --git a/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll b/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
index 31d9bae..1993e4f 100644
--- a/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
+++ b/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
@@ -13,54 +13,54 @@ define void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind {
 ; CHECK: @test
 ; CHECK-NOT: alloca
 ; CHECK: "alloca point"
+; CHECK: store <8 x i16>
+; CHECK: store <8 x i16>
+
 entry:
-  %tmp_addr = alloca %struct.int16x8_t            ; <%struct.int16x8_t*> [#uses=3]
-  %dst_addr = alloca %struct.int16x8x2_t*         ; <%struct.int16x8x2_t**> [#uses=2]
-  %__rv = alloca %union..0anon                    ; <%union..0anon*> [#uses=2]
-  %__bx = alloca %struct.int16x8_t                ; <%struct.int16x8_t*> [#uses=2]
-  %__ax = alloca %struct.int16x8_t                ; <%struct.int16x8_t*> [#uses=2]
-  %tmp2 = alloca %struct.int16x8x2_t              ; <%struct.int16x8x2_t*> [#uses=2]
-  %0 = alloca %struct.int16x8x2_t                 ; <%struct.int16x8x2_t*> [#uses=2]
-  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
-  %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %tmp_addr = alloca %struct.int16x8_t
+  %dst_addr = alloca %struct.int16x8x2_t*
+  %__rv = alloca %union..0anon
+  %__bx = alloca %struct.int16x8_t
+  %__ax = alloca %struct.int16x8_t
+  %tmp2 = alloca %struct.int16x8x2_t
+  %0 = alloca %struct.int16x8x2_t
+  %"alloca point" = bitcast i32 0 to i32
+  %1 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0
   store <8 x i16> %tmp.0, <8 x i16>* %1
   store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr
-  %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %4 = load <8 x i16>* %3, align 16               ; <<8 x i16>> [#uses=1]
+  %2 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0
+  %3 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0
+  %4 = load <8 x i16>* %3, align 16
   store <8 x i16> %4, <8 x i16>* %2, align 16
-  %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %7 = load <8 x i16>* %6, align 16               ; <<8 x i16>> [#uses=1]
+  %5 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0
+  %6 = getelementptr inbounds %struct.int16x8_t* %tmp_addr, i32 0, i32 0
+  %7 = load <8 x i16>* %6, align 16
   store <8 x i16> %7, <8 x i16>* %5, align 16
-  %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %9 = load <8 x i16>* %8, align 16               ; <<8 x i16>> [#uses=2]
-  %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
-  %11 = load <8 x i16>* %10, align 16             ; <<8 x i16>> [#uses=2]
-  %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1]
-  %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t* ; <%struct.__neon_int16x8x2_t*> [#uses=2]
-  %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> ; <<8 x i16>> [#uses=1]
-  %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0 ; <<8 x i16>*> [#uses=1]
+  %8 = getelementptr inbounds %struct.int16x8_t* %__ax, i32 0, i32 0
+  %9 = load <8 x i16>* %8, align 16
+  %10 = getelementptr inbounds %struct.int16x8_t* %__bx, i32 0, i32 0
+  %11 = load <8 x i16>* %10, align 16
+  %12 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0
+  %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t*
+  %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %15 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 0
   store <8 x i16> %14, <8 x i16>* %15
-  %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> ; <<8 x i16>> [#uses=1]
-  %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1 ; <<8 x i16>*> [#uses=1]
+  %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %17 = getelementptr inbounds %struct.__neon_int16x8x2_t* %13, i32 0, i32 1
   store <8 x i16> %16, <8 x i16>* %17
-  %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0 ; <%struct.int16x8x2_t*> [#uses=1]
-  %19 = bitcast %struct.int16x8x2_t* %0 to i8*    ; <i8*> [#uses=1]
-  %20 = bitcast %struct.int16x8x2_t* %18 to i8*   ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i32(i8* %19, i8* %20, i32 32, i32 16)
-  %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; <i8*> [#uses=1]
-  %21 = bitcast %struct.int16x8x2_t* %0 to i8*    ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i32(i8* %tmp21, i8* %21, i32 32, i32 16)
-  %22 = load %struct.int16x8x2_t** %dst_addr, align 4 ; <%struct.int16x8x2_t*> [#uses=1]
-  %23 = bitcast %struct.int16x8x2_t* %22 to i8*   ; <i8*> [#uses=1]
-  %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8* ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i32(i8* %23, i8* %tmp22, i32 32, i32 16)
+  %18 = getelementptr inbounds %union..0anon* %__rv, i32 0, i32 0
+  %19 = bitcast %struct.int16x8x2_t* %0 to i8*
+  %20 = bitcast %struct.int16x8x2_t* %18 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %19, i8* %20, i32 32, i32 16, i1 false)
+  %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
+  %21 = bitcast %struct.int16x8x2_t* %0 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp21, i8* %21, i32 32, i32 16, i1 false)
+  %22 = load %struct.int16x8x2_t** %dst_addr, align 4
+  %23 = bitcast %struct.int16x8x2_t* %22 to i8*
+  %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %23, i8* %tmp22, i32 32, i32 16, i1 false)
   br label %return
 
-; CHECK: store <8 x i16>
-; CHECK: store <8 x i16>
-
 return:                                           ; preds = %entry
   ret void
 }
@@ -69,21 +69,22 @@ return:                                           ; preds = %entry
 %struct._NSRange = type { i64 }
 
 define void @test_memcpy_self() nounwind {
-; CHECK: @test_memcpy_self
-; CHECK-NOT: alloca
-; CHECK: br i1
 entry:
-  %range = alloca %struct._NSRange                ; <%struct._NSRange*> [#uses=2]
+  %range = alloca %struct._NSRange
   br i1 undef, label %cond.true, label %cond.false
 
 cond.true:                                        ; preds = %entry
-  %tmp3 = bitcast %struct._NSRange* %range to i8* ; <i8*> [#uses=1]
-  %tmp4 = bitcast %struct._NSRange* %range to i8* ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8)
+  %tmp3 = bitcast %struct._NSRange* %range to i8*
+  %tmp4 = bitcast %struct._NSRange* %range to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8, i1 false)
   ret void
 
 cond.false:                                       ; preds = %entry
   ret void
+
+; CHECK: @test_memcpy_self
+; CHECK-NOT: alloca
+; CHECK: br i1
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll b/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
index 3aee399..52df6d5 100644
--- a/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
+++ b/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
@@ -3,7 +3,7 @@
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
 
-%struct.test = type { [3 x double ] }
+%struct.test = type { [3 x double] }
 
 define void @test_memcpy_self() nounwind {
 ; CHECK: @test_memcpy_self
@@ -11,8 +11,8 @@ define void @test_memcpy_self() nounwind {
 ; CHECK: ret void
   %1 = alloca %struct.test
   %2 = bitcast %struct.test* %1 to i8*
-  call void @llvm.memcpy.i32(i8* %2, i8* %2, i32 24, i32 4)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %2, i32 24, i32 4, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll b/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
index 32e67fb..98fa1c6 100644
--- a/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
+++ b/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
@@ -10,7 +10,8 @@ target triple = "x86_64-apple-macosx10.7.0"
 
 ; CHECK: main
 ; CHECK-NOT: alloca
-; CHECK: extractelement <2 x float> zeroinitializer
+; CHECK: %[[A:[a-z0-9]*]] = and i128
+; CHECK: %[[B:[a-z0-9]*]] = trunc i128 %[[A]] to i32
 
 define void @main() uwtable ssp {
 entry:
@@ -27,7 +28,8 @@ entry:
 
 ; CHECK: test1
 ; CHECK-NOT: alloca
-; CHECK: extractelement <2 x float> zeroinitializer
+; CHECK: %[[A:[a-z0-9]*]] = and i128
+; CHECK: %[[B:[a-z0-9]*]] = trunc i128 %[[A]] to i32
 
 define void @test1() uwtable ssp {
 entry:
diff --git a/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll b/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
new file mode 100644
index 0000000..f8530d6
--- /dev/null
+++ b/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -scalarrepl -S | FileCheck %s
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
+target triple = "thumbv7-apple-darwin10"
+
+; CHECK: f
+; CHECK-NOT: alloca
+; CHECK: %[[A:[a-z0-9]*]] = and i128 undef, -16777216
+; CHECK: %[[B:[a-z0-9]*]] = bitcast i128 %[[A]] to <4 x float>
+; CHECK: %[[C:[a-z0-9]*]] = extractelement <4 x float> %[[B]], i32 0
+; CHECK: ret float %[[C]]
+
+define float @f() nounwind ssp {
+entry:
+  %a = alloca <4 x float>, align 16
+  %p = bitcast <4 x float>* %a to i8*
+  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 3, i32 16, i1 false)
+  %vec = load <4 x float>* %a, align 8
+  %val = extractelement <4 x float> %vec, i32 0
+  ret float %val
+}
+
+; CHECK: g
+; CHECK-NOT: alloca
+; CHECK: and i128
+
+define void @g() nounwind ssp {
+entry:
+  %a = alloca { <4 x float> }, align 16
+  %p = bitcast { <4 x float> }* %a to i8*
+  call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 16, i32 16, i1 false)
+  %q = bitcast { <4 x float> }* %a to [2 x <2 x float>]*
+  %arrayidx = getelementptr inbounds [2 x <2 x float>]* %q, i32 0, i32 0
+  store <2 x float> undef, <2 x float>* %arrayidx, align 8
+  ret void
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/badarray.ll b/test/Transforms/ScalarRepl/badarray.ll
index 3ec3c01..768fec6 100644
--- a/test/Transforms/ScalarRepl/badarray.ll
+++ b/test/Transforms/ScalarRepl/badarray.ll
@@ -48,10 +48,10 @@ entry:
   %callret = call %padded *@test3f() ; <i32> [#uses=2]
   %callretcast = bitcast %padded* %callret to i8*                     ; <i8*> [#uses=1]
   %var_11 = bitcast %padded* %var_1 to i8*        ; <i8*> [#uses=1]
-  call void @llvm.memcpy.i32(i8* %callretcast, i8* %var_11, i32 8, i32 4)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %callretcast, i8* %var_11, i32 8, i32 4, i1 false)
   ret void
 }
 
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
 
 declare %padded* @test3f()
diff --git a/test/Transforms/ScalarRepl/copy-aggregate.ll b/test/Transforms/ScalarRepl/copy-aggregate.ll
index 997da4b..51ba810 100644
--- a/test/Transforms/ScalarRepl/copy-aggregate.ll
+++ b/test/Transforms/ScalarRepl/copy-aggregate.ll
@@ -68,16 +68,15 @@ define i128 @test4(float %a, float %b) nounwind {
 ;; If the elements of a struct or array alloca contain padding, SROA can still
 ;; split up the alloca as long as there is no padding between the elements.
 %padded = type { i16, i8 }
-%arr = type [4 x %padded]
-define void @test5(%arr* %p, %arr* %q) {
+define void @test5([4 x %padded]* %p, [4 x %padded]* %q) {
 entry:
 ; CHECK: test5
 ; CHECK-NOT: i128
-  %var = alloca %arr, align 4
-  %vari8 = bitcast %arr* %var to i8*
-  %pi8 = bitcast %arr* %p to i8*
+  %var = alloca [4 x %padded], align 4
+  %vari8 = bitcast [4 x %padded]* %var to i8*
+  %pi8 = bitcast [4 x %padded]* %p to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %vari8, i8* %pi8, i32 16, i32 4, i1 false)
-  %qi8 = bitcast %arr* %q to i8*
+  %qi8 = bitcast [4 x %padded]* %q to i8*
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %qi8, i8* %vari8, i32 16, i32 4, i1 false)
   ret void
 }
diff --git a/test/Transforms/ScalarRepl/crash.ll b/test/Transforms/ScalarRepl/crash.ll
index 7b62f09..cd4dc32 100644
--- a/test/Transforms/ScalarRepl/crash.ll
+++ b/test/Transforms/ScalarRepl/crash.ll
@@ -143,7 +143,6 @@ entry:
         %struct.anon = type { %struct.aal_spanarray_t }
 
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
 
 define fastcc void @test7() {
 entry:
@@ -158,7 +157,7 @@ cond_next114.i:         ; preds = %cond_true
 
 cond_next:              ; preds = %cond_true
         %SB19 = bitcast %struct.aal_spanbucket_t* %SB to i8*            ; <i8*> [#uses=1]
-        call void @llvm.memcpy.i32( i8* %SB19, i8* null, i32 12, i32 0 )
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %SB19, i8* null, i32 12, i32 0, i1 false)
         br i1 false, label %cond_next34, label %cond_next79
 
 cond_next34:            ; preds = %cond_next
@@ -189,14 +188,14 @@ entry:
 
 
 ; rdar://6808691 - ZeroLengthMemSet
-        type <{ i32, i16, i8, i8, i64, i64, i16, [0 x i16] }>           
+        %0 = type <{ i32, i16, i8, i8, i64, i64, i16, [0 x i16] }>           
 
 define i32 @test9() {
 entry:
         %.compoundliteral = alloca %0           
         %tmp228 = getelementptr %0* %.compoundliteral, i32 0, i32 7
         %tmp229 = bitcast [0 x i16]* %tmp228 to i8*             
-        call void @llvm.memset.i64(i8* %tmp229, i8 0, i64 0, i32 2)
+        call void @llvm.memset.p0i8.i64(i8* %tmp229, i8 0, i64 0, i32 2, i1 false)
         unreachable
 }
 
@@ -245,10 +244,12 @@ entry:
 ; VLAs.
 define void @test12() {
 bb4.i:
-        %0 = malloc [0 x %struct.Item]          ; <[0 x %struct.Item]*> [#uses=1]
+        %malloccall = tail call i8* @malloc(i32 0)
+        %0 = bitcast i8* %malloccall to [0 x %struct.Item]*
         %.sub.i.c.i = getelementptr [0 x %struct.Item]* %0, i32 0, i32 0                ; <%struct.Item*> [#uses=0]
         unreachable
 }
+declare noalias i8* @malloc(i32)
 
 ; PR8680
 define void @test13() nounwind {
@@ -258,3 +259,6 @@ entry:
   call void %0() nounwind
   ret void
 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/memcpy-from-global.ll b/test/Transforms/ScalarRepl/memcpy-from-global.ll
index 5b25864..59475ad 100644
--- a/test/Transforms/ScalarRepl/memcpy-from-global.ll
+++ b/test/Transforms/ScalarRepl/memcpy-from-global.ll
@@ -93,4 +93,18 @@ define void @test4() {
   ret void
 }
 
+declare void @llvm.lifetime.start(i64, i8*)
+define void @test5() {
+  %A = alloca %T
+  %a = bitcast %T* %A to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %a)
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
+  call void @baz(i8* byval %a) 
+; CHECK: @test5
+; CHECK-NEXT: %a = bitcast %T* @G to i8*
+; CHECK-NEXT: call void @baz(i8* byval %a)
+  ret void
+}
+
+
 declare void @baz(i8* byval)
diff --git a/test/Transforms/ScalarRepl/memset-aggregate.ll b/test/Transforms/ScalarRepl/memset-aggregate.ll
index 5aeefcd..42e7a0f 100644
--- a/test/Transforms/ScalarRepl/memset-aggregate.ll
+++ b/test/Transforms/ScalarRepl/memset-aggregate.ll
@@ -14,31 +14,29 @@ entry:
 	%L = alloca %struct.foo, align 8		; <%struct.foo*> [#uses=2]
 	%L2 = bitcast %struct.foo* %L to i8*		; <i8*> [#uses=1]
 	%tmp13 = bitcast %struct.foo* %P to i8*		; <i8*> [#uses=1]
-	call void @llvm.memcpy.i32( i8* %L2, i8* %tmp13, i32 8, i32 4 )
+        call void @llvm.memcpy.p0i8.p0i8.i32(i8* %L2, i8* %tmp13, i32 8, i32 4, i1 false)
 	%tmp4 = getelementptr %struct.foo* %L, i32 0, i32 0		; <i32*> [#uses=1]
 	%tmp5 = load i32* %tmp4		; <i32> [#uses=1]
 	ret i32 %tmp5
 }
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
 
 define i32 @test2() {
 entry:
 	%L = alloca [4 x %struct.foo], align 16		; <[4 x %struct.foo]*> [#uses=2]
 	%L12 = bitcast [4 x %struct.foo]* %L to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.i32( i8* %L12, i8 0, i32 32, i32 16 )
+        call void @llvm.memset.p0i8.i32(i8* %L12, i8 0, i32 32, i32 16, i1 false)
 	%tmp4 = getelementptr [4 x %struct.foo]* %L, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
 	%tmp5 = load i32* %tmp4		; <i32> [#uses=1]
 	ret i32 %tmp5
 }
 
-declare void @llvm.memset.i32(i8*, i8, i32, i32)
 
 define i32 @test3() {
 entry:
 	%B = alloca %struct.bar, align 16		; <%struct.bar*> [#uses=4]
 	%B1 = bitcast %struct.bar* %B to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.i32( i8* %B1, i8 1, i32 24, i32 16 )
+	call void @llvm.memset.p0i8.i32(i8* %B1, i8 1, i32 24, i32 16, i1 false)
 	%tmp3 = getelementptr %struct.bar* %B, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
 	store i32 1, i32* %tmp3
 	%tmp4 = getelementptr %struct.bar* %B, i32 0, i32 2		; <double*> [#uses=1]
@@ -58,9 +56,12 @@ entry:
 	store i32 1, i32* %0, align 8
 	%1 = getelementptr %struct.f* %A, i32 0, i32 1		; <i32*> [#uses=1]
 	%2 = bitcast i32* %1 to i8*		; <i8*> [#uses=1]
-	call void @llvm.memset.i32(i8* %2, i8 2, i32 12, i32 4)
+	call void @llvm.memset.p0i8.i32(i8* %2, i8 2, i32 12, i32 4, i1 false)
 	%3 = getelementptr %struct.f* %A, i32 0, i32 2		; <i32*> [#uses=1]
 	%4 = load i32* %3, align 8		; <i32> [#uses=1]
 	%retval12 = trunc i32 %4 to i16		; <i16> [#uses=1]
 	ret i16 %retval12
 }
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+\ No newline at end of file
diff --git a/test/Transforms/ScalarRepl/phi-select.ll b/test/Transforms/ScalarRepl/phi-select.ll
index fa3972d..ffe0b1d 100644
--- a/test/Transforms/ScalarRepl/phi-select.ll
+++ b/test/Transforms/ScalarRepl/phi-select.ll
@@ -65,9 +65,9 @@ entry:
   %A = alloca %PairTy
   ; CHECK: @test4
   ; CHECK: %A = alloca %PairTy
-  %B = getelementptr {i32, i32}* %A, i32 0, i32 0
+  %B = getelementptr %PairTy* %A, i32 0, i32 0
   store i32 1, i32* %B
-  %C = getelementptr {i32, i32}* %A, i32 0, i32 1
+  %C = getelementptr %PairTy* %A, i32 0, i32 1
   store i32 2, i32* %B
   
   %X = select i1 %c, i32* %B, i32* %C
diff --git a/test/Transforms/SimplifyCFG/2005-08-03-PHIFactorCrash.ll b/test/Transforms/SimplifyCFG/2005-08-03-PHIFactorCrash.ll
deleted file mode 100644
index 477c9c9..0000000
--- a/test/Transforms/SimplifyCFG/2005-08-03-PHIFactorCrash.ll
+++ /dev/null
@@ -1,75 +0,0 @@
-; RUN: opt < %s -simplifycfg -disable-output
-; END.
-
-	%arraytype.1.Char = type { i32, [0 x i8] }
-	%arraytype.4.Signed = type { i32, [0 x i32] }
-	%functiontype.23 = type %structtype.Task* (%structtype.Task*, %structtype.Packet*, %structtype.FailedRun*)
-	%functiontype.27 = type %structtype.object* ()
-	%functiontype.28 = type i1 (%structtype.object*, %structtype.object_vtable*)
-	%functiontype.39 = type i32 (%structtype.listiter*)
-	%opaquetype.RuntimeTypeInfo = type i8* (i8*)
-	%structtype.AssertionError_vtable = type { %structtype.FailedRun_vtable }
-	%structtype.DeviceTask = type { %structtype.Task }
-	%structtype.FailedRun = type { %structtype.object }
-	%structtype.FailedRun_vtable = type { %structtype.object_vtable }
-	%structtype.Packet = type { %structtype.object, %structtype.list.1*, i32, i32, i32, %structtype.Packet* }
-	%structtype.Task = type { %structtype.TaskState, %structtype.FailedRun*, i32, %structtype.Packet*, %structtype.Task*, i32 }
-	%structtype.TaskState = type { %structtype.object, i1, i1, i1 }
-	%structtype.list.1 = type { %arraytype.4.Signed* }
-	%structtype.listiter = type { %structtype.list.1*, i32 }
-	%structtype.object = type { %structtype.object_vtable* }
-	%structtype.object_vtable = type { %structtype.object_vtable*, %opaquetype.RuntimeTypeInfo*, %arraytype.1.Char*, %functiontype.27* }
-@structinstance.59 = external global %structtype.AssertionError_vtable		; <%structtype.AssertionError_vtable*> [#uses=0]
-
-declare fastcc i1 @ll_isinstance__objectPtr_object_vtablePtr()
-
-declare fastcc void @ll_listnext__listiterPtr()
-
-define fastcc void @WorkTask.fn() {
-block0:
-	br label %block1
-block1:		; preds = %block0
-	%v2542 = call fastcc i1 @ll_isinstance__objectPtr_object_vtablePtr( )		; <i1> [#uses=1]
-	br i1 %v2542, label %block4, label %block2
-block2:		; preds = %block1
-	br label %block3
-block3:		; preds = %block2
-	unwind
-block4:		; preds = %block1
-	br label %block5
-block5:		; preds = %block4
-	%v2565 = icmp eq %structtype.Packet* null, null		; <i1> [#uses=1]
-	br i1 %v2565, label %block15, label %block6
-block6:		; preds = %block5
-	%self_2575 = phi %structtype.DeviceTask* [ null, %block5 ]		; <%structtype.DeviceTask*> [#uses=1]
-	br i1 false, label %block14, label %block7
-block7:		; preds = %block14, %block6
-	%self_2635 = phi %structtype.DeviceTask* [ %self_2575, %block6 ], [ null, %block14 ]		; <%structtype.DeviceTask*> [#uses=1]
-	%tmp.124 = getelementptr %structtype.Packet* null, i32 0, i32 2		; <i32*> [#uses=0]
-	br label %block8
-block8:		; preds = %block10, %block7
-	%self_2672 = phi %structtype.DeviceTask* [ %self_2635, %block7 ], [ null, %block10 ]		; <%structtype.DeviceTask*> [#uses=0]
-	invoke fastcc void @ll_listnext__listiterPtr( )
-			to label %block9 unwind label %block8_exception_handling
-block8_exception_handling:		; preds = %block8
-	br i1 false, label %block8_exception_found_branchto_block12, label %block8_not_exception_structinstance.10
-block8_not_exception_structinstance.10:		; preds = %block8_exception_handling
-	unwind
-block8_exception_found_branchto_block12:		; preds = %block8_exception_handling
-	br label %block12
-block9:		; preds = %block8
-	br i1 false, label %block11, label %block10
-block10:		; preds = %block11, %block9
-	br label %block8
-block11:		; preds = %block9
-	br label %block10
-block12:		; preds = %block8_exception_found_branchto_block12
-	br label %block13
-block13:		; preds = %block15, %block12
-	ret void
-block14:		; preds = %block6
-	br label %block7
-block15:		; preds = %block5
-	%v2586 = phi %structtype.DeviceTask* [ null, %block5 ]		; <%structtype.DeviceTask*> [#uses=0]
-	br label %block13
-}
diff --git a/test/Transforms/SimplifyCFG/2008-04-23-MergeMultipleResultRet.ll b/test/Transforms/SimplifyCFG/2008-04-23-MergeMultipleResultRet.ll
deleted file mode 100644
index 8e05a3c..0000000
--- a/test/Transforms/SimplifyCFG/2008-04-23-MergeMultipleResultRet.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; RUN: opt < %s -simplifycfg -disable-output
-; rdar://5882392
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-apple-darwin9"
-	%struct.Py_complex = type { double, double }
-
-define %struct.Py_complex @_Py_c_pow(double %a.0, double %a.1, double %b.0, double %b.1) nounwind  {
-entry:
-	%tmp7 = fcmp une double %b.0, 0.000000e+00		; <i1> [#uses=1]
-	%tmp11 = fcmp une double %b.1, 0.000000e+00		; <i1> [#uses=1]
-	%bothcond = or i1 %tmp7, %tmp11		; <i1> [#uses=1]
-	br i1 %bothcond, label %bb15, label %bb53
-
-bb15:		; preds = %entry
-	%tmp18 = fcmp une double %a.0, 0.000000e+00		; <i1> [#uses=1]
-	%tmp24 = fcmp une double %a.1, 0.000000e+00		; <i1> [#uses=1]
-	%bothcond1 = or i1 %tmp18, %tmp24		; <i1> [#uses=1]
-	br i1 %bothcond1, label %bb29, label %bb27
-
-bb27:		; preds = %bb15
-	%tmp28 = call i32* @__error( ) nounwind 		; <i32*> [#uses=1]
-	store i32 33, i32* %tmp28, align 4
-	ret double undef, double undef
-
-bb29:		; preds = %bb15
-	%tmp36 = fcmp une double %b.1, 0.000000e+00		; <i1> [#uses=1]
-	br i1 %tmp36, label %bb39, label %bb47
-
-bb39:		; preds = %bb29
-	br label %bb47
-
-bb47:		; preds = %bb39, %bb29
-	ret double undef, double undef
-
-bb53:		; preds = %entry
-	ret double undef, double undef
-}
-
-declare i32* @__error()
-
-declare double @pow(double, double) nounwind readonly 
-
-declare double @cos(double) nounwind readonly 
diff --git a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll b/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
deleted file mode 100644
index 9c15efc..0000000
--- a/test/Transforms/SimplifyCFG/2008-04-27-MultipleReturnCrash.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: opt < %s -simplifycfg -disable-output
-; PR2256
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-pc-mingw32"
-
-define { x86_fp80, x86_fp80 } @catanl({ x86_fp80, x86_fp80 }* byval  %Z, i1 %cond) nounwind  {
-bb:		; preds = %entry
-	br i1  %cond, label %bb48, label %bb40
-
-bb40:		; preds = %bb
-	store i32 34, i32* null, align 4
-	br label %bb196
-
-bb48:		; preds = %bb.bb48_crit_edge, %entry.bb48_crit_edge
-	%tmp53 = icmp eq i32 0, 1280		; <i1> [#uses=1]
-	br i1 %tmp53, label %bb56, label %bb174
-
-bb56:		; preds = %bb48
-	%iftmp.0.0 = select i1 false, x86_fp80 0xK3FFFC90FDAA22168C235, x86_fp80 0xKBFFFC90FDAA22168C235		; <x86_fp80> [#uses=0]
-	br label %bb196
-
-
-bb174:		; preds = %bb144, %bb114
-	%tmp191 = fmul x86_fp80 0xK00000000000000000000, 0xK3FFE8000000000000000		; <x86_fp80> [#uses=1]
-	br label %bb196
-
-bb196:		; preds = %bb174, %bb56, %bb40
-	%Res.1.0 = phi x86_fp80 [ 0xK7FFF8000000000000000, %bb40 ], [ %tmp191, %bb174 ], [ 0xK00000000000000000000, %bb56 ]		; <x86_fp80> [#uses=1]
-	ret x86_fp80 0xK00000000000000000000, x86_fp80 %Res.1.0
-}
diff --git a/test/Transforms/SimplifyCFG/2009-03-05-Speculative-Hoist-Dbg.ll b/test/Transforms/SimplifyCFG/2009-03-05-Speculative-Hoist-Dbg.ll
deleted file mode 100644
index db56fdb..0000000
--- a/test/Transforms/SimplifyCFG/2009-03-05-Speculative-Hoist-Dbg.ll
+++ /dev/null
@@ -1,108 +0,0 @@
-; RUN: opt < %s -simplifycfg -S | grep select
-        %llvm.dbg.anchor.type = type { i32, i32 }
-        %llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"
-
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"	; <[6 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-external global <{ i8 }>		; <<{ i8 }>*>:0 [#uses=0]
-@__dso_handle = external global i8*		; <i8**> [#uses=0]
-@_ZSt3cin = external global { i32 (...)**, i32, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, { i32 (...)**, \3 }*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }		; <{ i32 (...)**, i32, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, { i32 (...)**, \3 }*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=1]
-@_ZSt4cout = external global { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }		; <{ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=1]
-external constant [2 x i8]		; <[2 x i8]*>:1 [#uses=1]
-@llvm.global_ctors = external global [1 x { i32, void ()* }]		; <[1 x { i32, void ()* }]*> [#uses=0]
-
-define i32 @main(i32, i8**) {
-; <label>:2
-	%3 = alloca [4096 x i8], align 1		; <[4096 x i8]*> [#uses=1]
-	%4 = call { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }* @_ZNKSt9basic_iosIcSt11char_traitsIcEE5rdbufEv({ { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* }* getelementptr ({ i32 (...)**, i32, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, { i32 (...)**, \3 }*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZSt3cin, i32 0, i32 2))		; <{ i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*> [#uses=1]
-	%5 = getelementptr [4096 x i8]* %3, i32 0, i32 0		; <i8*> [#uses=1]
-	%6 = call { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }* @_ZNSt15basic_streambufIcSt11char_traitsIcEE9pubsetbufEPci({ i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }* %4, i8* %5, i32 4096)		; <{ i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*> [#uses=0]
-	%7 = call { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }* @_ZNKSt9basic_iosIcSt11char_traitsIcEE5rdbufEv({ { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* }* getelementptr ({ i32 (...)**, i32, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, { i32 (...)**, \3 }*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZSt3cin, i32 0, i32 2))		; <{ i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*> [#uses=1]
-	br label %25
-
-; <label>:8		; preds = %25
-	%9 = trunc i32 %26 to i8		; <i8> [#uses=4]
-	%10 = add i32 %.02, 1		; <i32> [#uses=3]
-	%11 = icmp eq i8 %9, 10		; <i1> [#uses=1]
-	br i1 %11, label %12, label %14
-
-; <label>:12		; preds = %8
-	%13 = add i32 %.1, 1		; <i32> [#uses=1]
-	br label %14
-
-; <label>:14		; preds = %12, %8
-	%.0 = phi i32 [ %.1, %8 ], [ %13, %12 ]		; <i32> [#uses=3]
-	%15 = icmp eq i8 %9, 32		; <i1> [#uses=1]
-	br i1 %15, label %20, label %16
-
-; <label>:16		; preds = %14
-	%17 = icmp eq i8 %9, 10		; <i1> [#uses=1]
-	br i1 %17, label %20, label %18
-
-; <label>:18		; preds = %16
-	%19 = icmp eq i8 %9, 9		; <i1> [#uses=1]
-	br i1 %19, label %20, label %21
-
-; <label>:20		; preds = %18, %16, %14
-	br label %25
-
-; <label>:21		; preds = %18
-	%22 = icmp eq i32 %.03, 0		; <i1> [#uses=1]
-	br i1 %22, label %23, label %25
-
-; <label>:23		; preds = %21
-        call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%24 = add i32 %.01, 1		; <i32> [#uses=1]
-	br label %25
-
-; <label>:25		; preds = %23, %21, %20, %2
-	%.03 = phi i32 [ 0, %2 ], [ %.03, %21 ], [ 1, %23 ], [ 0, %20 ]		; <i32> [#uses=2]
-	%.02 = phi i32 [ 0, %2 ], [ %10, %21 ], [ %10, %23 ], [ %10, %20 ]		; <i32> [#uses=2]
-	%.01 = phi i32 [ 0, %2 ], [ %.01, %21 ], [ %24, %23 ], [ %.01, %20 ]		; <i32> [#uses=4]
-	%.1 = phi i32 [ 0, %2 ], [ %.0, %21 ], [ %.0, %23 ], [ %.0, %20 ]		; <i32> [#uses=3]
-	%26 = call i32 @_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv({ i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }* %7)		; <i32> [#uses=2]
-	%27 = icmp eq i32 %26, -1		; <i1> [#uses=1]
-	br i1 %27, label %28, label %8
-
-; <label>:28		; preds = %25
-	%29 = call { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZNSolsEi({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZSt4cout, i32 %.1)		; <{ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=1]
-	%30 = call { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* %29, i8* getelementptr ([2 x i8]* @1, i32 0, i32 0))		; <{ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=1]
-	%31 = call { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZNSolsEi({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* %30, i32 %.01)		; <{ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=1]
-	%32 = call { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* %31, i8* getelementptr ([2 x i8]* @1, i32 0, i32 0))		; <{ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=1]
-	%33 = call { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZNSolsEi({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* %32, i32 %.02)		; <{ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=1]
-	%34 = call { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZNSolsEPFRSoS_E({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* %33, { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* ({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*)* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_)		; <{ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*> [#uses=0]
-	ret i32 0
-}
-
-declare void @""() section "__TEXT,__StaticInit,regular,pure_instructions"
-
-declare fastcc void @""() section "__TEXT,__StaticInit,regular,pure_instructions"
-
-declare void @_ZNSt8ios_base4InitC1Ev(<{ i8 }>*)
-
-declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*) nounwind
-
-declare void @""(i8*)
-
-declare void @_ZNSt8ios_base4InitD1Ev(<{ i8 }>*)
-
-declare { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }* @_ZNKSt9basic_iosIcSt11char_traitsIcEE5rdbufEv({ { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* }*)
-
-declare { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }* @_ZNSt15basic_streambufIcSt11char_traitsIcEE9pubsetbufEPci({ i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, i8*, i32)
-
-declare i32 @_ZNSt15basic_streambufIcSt11char_traitsIcEE6sbumpcEv({ i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*)
-
-declare { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZNSolsEi({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*, i32)
-
-declare { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*, i8*)
-
-declare { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZNSolsEPFRSoS_E({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*, { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* ({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*)*)
-
-declare { i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_({ i32 (...)**, { { i32 (...)**, i32, i32, i32, i32, i32, { \2, void (i32, \6*, i32)*, i32, i32 }*, { i8*, i32 }, [8 x { i8*, i32 }], i32, { i8*, i32 }*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }, \3*, i8, i8, { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, { { i32, { i32 (...)**, i32 }**, i32, { i32 (...)**, i32 }**, i8** }* } }*, { { i32 (...)**, i32 }, i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }*, { { i32 (...)**, i32 } }*, { { i32 (...)**, i32 } }* } }*)
diff --git a/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll b/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
new file mode 100644
index 0000000..1b70c06
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/ForwardSwitchConditionToPHI.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -simplifycfg -S | \
+; RUN:   not grep " switch"
+; PR10131
+
+; ModuleID = '<stdin>'
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-pc-linux-gnu"
+
+define i32 @t(i32 %m) nounwind readnone {
+entry:
+  switch i32 %m, label %sw.bb4 [
+    i32 0, label %sw.bb0
+    i32 1, label %sw.bb1
+    i32 2, label %sw.bb2
+    i32 3, label %sw.bb3
+  ]
+
+sw.bb0:                                           ; preds = %entry
+  br label %return
+
+sw.bb1:                                           ; preds = %entry
+  br label %return
+
+sw.bb2:                                           ; preds = %entry
+  br label %return
+
+sw.bb3:                                           ; preds = %entry
+  br label %return
+
+sw.bb4:                                           ; preds = %entry
+  br label %return
+
+return:                                           ; preds = %entry, %sw.bb4, %sw.bb3, %sw.bb2, %sw.bb1
+  %retval.0 = phi i32 [ 4, %sw.bb4 ], [ 3, %sw.bb3 ], [ 2, %sw.bb2 ], [ 1, %sw.bb1 ], [ 0, %sw.bb0 ]
+  ret i32 %retval.0
+}
diff --git a/test/Transforms/SimplifyCFG/lifetime.ll b/test/Transforms/SimplifyCFG/lifetime.ll
new file mode 100644
index 0000000..b794221
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/lifetime.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; Test that a lifetime intrinsic doesn't prevent us from simplifying this.
+
+; CHECK: foo
+; CHECK: entry:
+; CHECK-NOT: bb0:
+; CHECK-NOT: bb1:
+; CHECK: ret
+define void @foo(i1 %x) {
+entry:
+  %a = alloca i8
+  call void @llvm.lifetime.start(i64 -1, i8* %a) nounwind
+  br i1 %x, label %bb0, label %bb1
+
+bb0:
+  call void @llvm.lifetime.end(i64 -1, i8* %a) nounwind
+  br label %bb1
+
+bb1:
+  call void @f()
+  ret void
+}
+
+declare void @f()
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
diff --git a/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll b/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
index 7d7391a..343e169 100644
--- a/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
+++ b/test/Transforms/SimplifyCFG/switch_switch_fold_dbginfo.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -simplifycfg -S | \
-; RUN:   grep switch | count 1
+; RUN:   not grep " switch"
 
 ; ModuleID = '<stdin>'
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/SimplifyLibCalls/MemCpy.ll b/test/Transforms/SimplifyLibCalls/MemCpy.ll
index 39662b1..c711178 100644
--- a/test/Transforms/SimplifyLibCalls/MemCpy.ll
+++ b/test/Transforms/SimplifyLibCalls/MemCpy.ll
@@ -4,17 +4,16 @@
 @hel = constant [4 x i8] c"hel\00"		; <[4 x i8]*> [#uses=1]
 @hello_u = constant [8 x i8] c"hello_u\00"		; <[8 x i8]*> [#uses=1]
 
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
 define i32 @main() {
-	%h_p = getelementptr [2 x i8]* @h, i32 0, i32 0		; <i8*> [#uses=1]
-	%hel_p = getelementptr [4 x i8]* @hel, i32 0, i32 0		; <i8*> [#uses=1]
-	%hello_u_p = getelementptr [8 x i8]* @hello_u, i32 0, i32 0		; <i8*> [#uses=1]
-	%target = alloca [1024 x i8]		; <[1024 x i8]*> [#uses=1]
-	%target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0		; <i8*> [#uses=3]
-	call void @llvm.memcpy.i32( i8* %target_p, i8* %h_p, i32 2, i32 2 )
-	call void @llvm.memcpy.i32( i8* %target_p, i8* %hel_p, i32 4, i32 4 )
-	call void @llvm.memcpy.i32( i8* %target_p, i8* %hello_u_p, i32 8, i32 8 )
-	ret i32 0
+  %h_p = getelementptr [2 x i8]* @h, i32 0, i32 0
+  %hel_p = getelementptr [4 x i8]* @hel, i32 0, i32 0
+  %hello_u_p = getelementptr [8 x i8]* @hello_u, i32 0, i32 0
+  %target = alloca [1024 x i8]
+  %target_p = getelementptr [1024 x i8]* %target, i32 0, i32 0
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %h_p, i32 2, i32 2, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hel_p, i32 4, i32 4, i1 false)
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %target_p, i8* %hello_u_p, i32 8, i32 8, i1 false)
+  ret i32 0
 }
 
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/TailCallElim/inf-recursion.ll b/test/Transforms/TailCallElim/inf-recursion.ll
index e4ac928..c427869 100644
--- a/test/Transforms/TailCallElim/inf-recursion.ll
+++ b/test/Transforms/TailCallElim/inf-recursion.ll
@@ -30,5 +30,4 @@ define float @fabsf(float %f) {
         ret float %t
 }
 
-declare float @fabsf(float %f)
 declare x86_fp80 @fabsl(x86_fp80 %f)
diff --git a/test/Unit/lit.cfg b/test/Unit/lit.cfg
index 3509296..ba4cbc5 100644
--- a/test/Unit/lit.cfg
+++ b/test/Unit/lit.cfg
@@ -30,14 +30,6 @@ if 'TEMP' in os.environ:
 
 ###
 
-# If necessary, point the dynamic loader at libLLVM.so.
-if config.enable_shared:
-    shlibpath = config.environment.get(config.shlibpath_var,'')
-    if shlibpath:
-        shlibpath = os.pathsep + shlibpath
-    shlibpath = config.shlibdir + shlibpath
-    config.environment[config.shlibpath_var] = shlibpath
-
 # Check that the object root is known.
 if config.test_exec_root is None:
     # Otherwise, we haven't loaded the site specific configuration (the user is
@@ -81,3 +73,11 @@ if config.test_exec_root is None:
     lit.note('using out-of-tree build at %r' % llvm_obj_root)
     lit.load_config(config, site_cfg)
     raise SystemExit
+
+# If necessary, point the dynamic loader at libLLVM.so.
+if config.enable_shared:
+    shlibpath = config.environment.get(config.shlibpath_var,'')
+    if shlibpath:
+        shlibpath = os.pathsep + shlibpath
+    shlibpath = config.shlibdir + shlibpath
+    config.environment[config.shlibpath_var] = shlibpath
diff --git a/test/Verifier/2002-04-13-RetTypes.ll b/test/Verifier/2002-04-13-RetTypes.ll
index 197f5c2..4c1ddd1 100644
--- a/test/Verifier/2002-04-13-RetTypes.ll
+++ b/test/Verifier/2002-04-13-RetTypes.ll
@@ -1,10 +1,9 @@
-; RUN: not llvm-as < %s |& grep {return type does not match operand type}
+; RUN: not llvm-as < %s |& grep {value doesn't match function result type 'i32'}
 
 ; Verify the the operand type of the ret instructions in a function match the
 ; delcared return type of the function they live in.
 ;
 
-define i32 @testfunc()
-begin
+define i32 @testfunc() {
 	ret i32* null
-end
+}
diff --git a/test/Verifier/2005-03-21-UndefinedTypeReference.ll b/test/Verifier/2005-03-21-UndefinedTypeReference.ll
deleted file mode 100644
index 5299397..0000000
--- a/test/Verifier/2005-03-21-UndefinedTypeReference.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: not llvm-as < %s |& grep {use of undefined type named 'InvalidType'}
-
-define void @test() {
-        malloc %InvalidType
-        ret void
-}
-
diff --git a/test/Verifier/2008-11-15-RetVoid.ll b/test/Verifier/2008-11-15-RetVoid.ll
index dbdcae2..aaef703 100644
--- a/test/Verifier/2008-11-15-RetVoid.ll
+++ b/test/Verifier/2008-11-15-RetVoid.ll
@@ -1,4 +1,4 @@
-; RUN: not llvm-as < %s |& grep {returns non-void in Function of void return}
+; RUN: not llvm-as < %s |& grep {value doesn't match function result type 'void'}
 
 define void @foo() {
   ret i32 0
diff --git a/test/Verifier/byval-2.ll b/test/Verifier/byval-2.ll
deleted file mode 100644
index 1d03715..0000000
--- a/test/Verifier/byval-2.ll
+++ /dev/null
@@ -1,4 +0,0 @@
-; RUN: not llvm-as < %s >& /dev/null
-; PR2711
-	%s = type opaque
-declare void @h(%s* byval %num)
diff --git a/test/lit.cfg b/test/lit.cfg
index 9a2f74c..ef56473 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -306,3 +306,6 @@ else:
 
 if loadable_module:
     config.available_features.add('loadable_module')
+
+if config.enable_assertions:
+    config.available_features.add('asserts')
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index 3588aa6..fe152ef 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -7,6 +7,7 @@ config.llvmgcc_dir = "@LLVMGCCDIR@"
 config.lit_tools_dir = "@LLVM_LIT_TOOLS_DIR@"
 config.python_executable = "@PYTHON_EXECUTABLE@"
 config.enable_shared = @ENABLE_SHARED@
+config.enable_assertions = @ENABLE_ASSERTIONS@
 
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index c9072a7..8851a0b 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -59,7 +59,10 @@ if( LLVM_ENABLE_PIC )
 endif()
 
 if( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/clang/CMakeLists.txt )
-  add_subdirectory( ${CMAKE_CURRENT_SOURCE_DIR}/clang )
+  option(LLVM_BUILD_CLANG "Whether to build Clang as part of LLVM" ON)
+  if (${LLVM_BUILD_CLANG})
+    add_subdirectory( ${CMAKE_CURRENT_SOURCE_DIR}/clang )
+  endif()
 endif( EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/clang/CMakeLists.txt )
 
 set(LLVM_COMMON_DEPENDS ${LLVM_COMMON_DEPENDS} PARENT_SCOPE)
diff --git a/tools/bugpoint/ExtractFunction.cpp b/tools/bugpoint/ExtractFunction.cpp
index 593765c..9941add 100644
--- a/tools/bugpoint/ExtractFunction.cpp
+++ b/tools/bugpoint/ExtractFunction.cpp
@@ -116,8 +116,6 @@ Module *BugDriver::performFinalCleanups(Module *M, bool MayModifySemantics) {
   else
     CleanupPasses.push_back("deadargelim");
 
-  CleanupPasses.push_back("deadtypeelim");
-
   Module *New = runPassesOn(M, CleanupPasses);
   if (New == 0) {
     errs() << "Final cleanups failed.  Sorry. :(  Please report a bug!\n";
@@ -175,13 +173,16 @@ void llvm::DeleteFunctionBody(Function *F) {
 static Constant *GetTorInit(std::vector<std::pair<Function*, int> > &TorList) {
   assert(!TorList.empty() && "Don't create empty tor list!");
   std::vector<Constant*> ArrayElts;
+  Type *Int32Ty = Type::getInt32Ty(TorList[0].first->getContext());
+  
+  const StructType *STy =
+    StructType::get(Int32Ty, TorList[0].first->getType(), NULL);
   for (unsigned i = 0, e = TorList.size(); i != e; ++i) {
-    std::vector<Constant*> Elts;
-    Elts.push_back(ConstantInt::get(
-          Type::getInt32Ty(TorList[i].first->getContext()), TorList[i].second));
-    Elts.push_back(TorList[i].first);
-    ArrayElts.push_back(ConstantStruct::get(TorList[i].first->getContext(),
-                                            Elts, false));
+    Constant *Elts[] = {
+      ConstantInt::get(Int32Ty, TorList[i].second),
+      TorList[i].first
+    };
+    ArrayElts.push_back(ConstantStruct::get(STy, Elts));
   }
   return ConstantArray::get(ArrayType::get(ArrayElts[0]->getType(), 
                                            ArrayElts.size()),
diff --git a/tools/bugpoint/Miscompilation.cpp b/tools/bugpoint/Miscompilation.cpp
index 1834fe1..d645dfb 100644
--- a/tools/bugpoint/Miscompilation.cpp
+++ b/tools/bugpoint/Miscompilation.cpp
@@ -794,8 +794,7 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
 
       // Call the old main function and return its result
       BasicBlock *BB = BasicBlock::Create(Safe->getContext(), "entry", newMain);
-      CallInst *call = CallInst::Create(oldMainProto, args.begin(), args.end(),
-                                        "", BB);
+      CallInst *call = CallInst::Create(oldMainProto, args, "", BB);
 
       // If the type of old function wasn't void, return value of call
       ReturnInst::Create(Safe->getContext(), call, BB);
@@ -873,8 +872,7 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
           //
           // call resolver(GetElementPtr...)
           CallInst *Resolver =
-            CallInst::Create(resolverFunc, ResolverArgs.begin(),
-                             ResolverArgs.end(), "resolver", LookupBB);
+            CallInst::Create(resolverFunc, ResolverArgs, "resolver", LookupBB);
 
           // Cast the result from the resolver to correctly-typed function.
           CastInst *CastedResolver =
@@ -899,10 +897,10 @@ static void CleanupAndPrepareModules(BugDriver &BD, Module *&Test,
 
           // Pass on the arguments to the real function, return its result
           if (F->getReturnType()->isVoidTy()) {
-            CallInst::Create(FuncPtr, Args.begin(), Args.end(), "", DoCallBB);
+            CallInst::Create(FuncPtr, Args, "", DoCallBB);
             ReturnInst::Create(F->getContext(), DoCallBB);
           } else {
-            CallInst *Call = CallInst::Create(FuncPtr, Args.begin(), Args.end(),
+            CallInst *Call = CallInst::Create(FuncPtr, Args,
                                               "retval", DoCallBB);
             ReturnInst::Create(F->getContext(),Call, DoCallBB);
           }
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 162d6c8..b36e941 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -22,6 +22,7 @@
 #include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
 #include "llvm/Config/config.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FormattedStream.h"
@@ -31,7 +32,6 @@
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/Signals.h"
-#include "llvm/Target/SubtargetFeature.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -201,6 +201,9 @@ int main(int argc, char **argv) {
 
   // Initialize targets first, so that --version shows registered targets.
   InitializeAllTargets();
+  InitializeAllMCAsmInfos();
+  InitializeAllMCInstrInfos();
+  InitializeAllMCSubtargetInfos();
   InitializeAllAsmPrinters();
   InitializeAllAsmParsers();
 
@@ -261,16 +264,16 @@ int main(int argc, char **argv) {
 
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
-  if (MCPU.size() || MAttrs.size()) {
+  if (MAttrs.size()) {
     SubtargetFeatures Features;
-    Features.setCPU(MCPU);
     for (unsigned i = 0; i != MAttrs.size(); ++i)
       Features.AddFeature(MAttrs[i]);
     FeaturesStr = Features.getString();
   }
 
   std::auto_ptr<TargetMachine>
-    target(TheTarget->createTargetMachine(TheTriple.getTriple(), FeaturesStr));
+    target(TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU,
+                                          FeaturesStr));
   assert(target.get() && "Could not allocate target machine!");
   TargetMachine &Target = *target.get();
 
diff --git a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 9a92671..4ada64a 100644
--- a/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -102,10 +102,11 @@ static const char *GetBlockName(unsigned BlockID,
   default:                           return 0;
   case bitc::MODULE_BLOCK_ID:        return "MODULE_BLOCK";
   case bitc::PARAMATTR_BLOCK_ID:     return "PARAMATTR_BLOCK";
-  case bitc::TYPE_BLOCK_ID:          return "TYPE_BLOCK";
+  case bitc::TYPE_BLOCK_ID_OLD:      return "TYPE_BLOCK_ID_OLD";
+  case bitc::TYPE_BLOCK_ID_NEW:      return "TYPE_BLOCK_ID";
   case bitc::CONSTANTS_BLOCK_ID:     return "CONSTANTS_BLOCK";
   case bitc::FUNCTION_BLOCK_ID:      return "FUNCTION_BLOCK";
-  case bitc::TYPE_SYMTAB_BLOCK_ID:   return "TYPE_SYMTAB";
+  case bitc::TYPE_SYMTAB_BLOCK_ID_OLD: return "TYPE_SYMTAB_OLD";
   case bitc::VALUE_SYMTAB_BLOCK_ID:  return "VALUE_SYMTAB";
   case bitc::METADATA_BLOCK_ID:      return "METADATA_BLOCK";
   case bitc::METADATA_ATTACHMENT_ID: return "METADATA_ATTACHMENT_BLOCK";
@@ -162,25 +163,29 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     default: return 0;
     case bitc::PARAMATTR_CODE_ENTRY: return "ENTRY";
     }
-  case bitc::TYPE_BLOCK_ID:
+  case bitc::TYPE_BLOCK_ID_OLD:
+  case bitc::TYPE_BLOCK_ID_NEW:
     switch (CodeID) {
     default: return 0;
-    case bitc::TYPE_CODE_NUMENTRY:  return "NUMENTRY";
-    case bitc::TYPE_CODE_VOID:      return "VOID";
-    case bitc::TYPE_CODE_FLOAT:     return "FLOAT";
-    case bitc::TYPE_CODE_DOUBLE:    return "DOUBLE";
-    case bitc::TYPE_CODE_LABEL:     return "LABEL";
-    case bitc::TYPE_CODE_OPAQUE:    return "OPAQUE";
-    case bitc::TYPE_CODE_INTEGER:   return "INTEGER";
-    case bitc::TYPE_CODE_POINTER:   return "POINTER";
-    case bitc::TYPE_CODE_FUNCTION:  return "FUNCTION";
-    case bitc::TYPE_CODE_STRUCT:    return "STRUCT";
-    case bitc::TYPE_CODE_ARRAY:     return "ARRAY";
-    case bitc::TYPE_CODE_VECTOR:    return "VECTOR";
-    case bitc::TYPE_CODE_X86_FP80:  return "X86_FP80";
-    case bitc::TYPE_CODE_FP128:     return "FP128";
-    case bitc::TYPE_CODE_PPC_FP128: return "PPC_FP128";
-    case bitc::TYPE_CODE_METADATA:  return "METADATA";
+    case bitc::TYPE_CODE_NUMENTRY:     return "NUMENTRY";
+    case bitc::TYPE_CODE_VOID:         return "VOID";
+    case bitc::TYPE_CODE_FLOAT:        return "FLOAT";
+    case bitc::TYPE_CODE_DOUBLE:       return "DOUBLE";
+    case bitc::TYPE_CODE_LABEL:        return "LABEL";
+    case bitc::TYPE_CODE_OPAQUE:       return "OPAQUE";
+    case bitc::TYPE_CODE_INTEGER:      return "INTEGER";
+    case bitc::TYPE_CODE_POINTER:      return "POINTER";
+    case bitc::TYPE_CODE_FUNCTION:     return "FUNCTION";
+    case bitc::TYPE_CODE_STRUCT_OLD:   return "STRUCT_OLD";
+    case bitc::TYPE_CODE_ARRAY:        return "ARRAY";
+    case bitc::TYPE_CODE_VECTOR:       return "VECTOR";
+    case bitc::TYPE_CODE_X86_FP80:     return "X86_FP80";
+    case bitc::TYPE_CODE_FP128:        return "FP128";
+    case bitc::TYPE_CODE_PPC_FP128:    return "PPC_FP128";
+    case bitc::TYPE_CODE_METADATA:     return "METADATA";
+    case bitc::TYPE_CODE_STRUCT_ANON:  return "STRUCT_ANON";
+    case bitc::TYPE_CODE_STRUCT_NAME:  return "STRUCT_NAME";
+    case bitc::TYPE_CODE_STRUCT_NAMED: return "STRUCT_NAMED";
     }
 
   case bitc::CONSTANTS_BLOCK_ID:
@@ -230,25 +235,19 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
     case bitc::FUNC_CODE_INST_UNREACHABLE:  return "INST_UNREACHABLE";
 
     case bitc::FUNC_CODE_INST_PHI:          return "INST_PHI";
-    case bitc::FUNC_CODE_INST_MALLOC:       return "INST_MALLOC";
-    case bitc::FUNC_CODE_INST_FREE:         return "INST_FREE";
     case bitc::FUNC_CODE_INST_ALLOCA:       return "INST_ALLOCA";
     case bitc::FUNC_CODE_INST_LOAD:         return "INST_LOAD";
-    case bitc::FUNC_CODE_INST_STORE:        return "INST_STORE";
-    case bitc::FUNC_CODE_INST_CALL:         return "INST_CALL";
     case bitc::FUNC_CODE_INST_VAARG:        return "INST_VAARG";
-    case bitc::FUNC_CODE_INST_STORE2:       return "INST_STORE2";
-    case bitc::FUNC_CODE_INST_GETRESULT:    return "INST_GETRESULT";
+    case bitc::FUNC_CODE_INST_STORE:        return "INST_STORE";
     case bitc::FUNC_CODE_INST_EXTRACTVAL:   return "INST_EXTRACTVAL";
     case bitc::FUNC_CODE_INST_INSERTVAL:    return "INST_INSERTVAL";
     case bitc::FUNC_CODE_INST_CMP2:         return "INST_CMP2";
     case bitc::FUNC_CODE_INST_VSELECT:      return "INST_VSELECT";
-    case bitc::FUNC_CODE_DEBUG_LOC:         return "DEBUG_LOC";
     case bitc::FUNC_CODE_DEBUG_LOC_AGAIN:   return "DEBUG_LOC_AGAIN";
-    case bitc::FUNC_CODE_INST_CALL2:        return "INST_CALL2";
-    case bitc::FUNC_CODE_DEBUG_LOC2:        return "DEBUG_LOC2";
+    case bitc::FUNC_CODE_INST_CALL:         return "INST_CALL";
+    case bitc::FUNC_CODE_DEBUG_LOC:         return "DEBUG_LOC";
     }
-  case bitc::TYPE_SYMTAB_BLOCK_ID:
+  case bitc::TYPE_SYMTAB_BLOCK_ID_OLD:
     switch (CodeID) {
     default: return 0;
     case bitc::TST_CODE_ENTRY: return "ENTRY";
@@ -262,22 +261,17 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
   case bitc::METADATA_ATTACHMENT_ID:
     switch(CodeID) {
     default:return 0;
-    case bitc::METADATA_ATTACHMENT:  return "METADATA_ATTACHMENT";
-    case bitc::METADATA_ATTACHMENT2: return "METADATA_ATTACHMENT2";
+    case bitc::METADATA_ATTACHMENT: return "METADATA_ATTACHMENT";
     }
   case bitc::METADATA_BLOCK_ID:
     switch(CodeID) {
     default:return 0;
     case bitc::METADATA_STRING:      return "METADATA_STRING";
+    case bitc::METADATA_NAME:        return "METADATA_NAME";
+    case bitc::METADATA_KIND:        return "METADATA_KIND";
     case bitc::METADATA_NODE:        return "METADATA_NODE";
     case bitc::METADATA_FN_NODE:     return "METADATA_FN_NODE";
-    case bitc::METADATA_NAME:        return "METADATA_NAME";
     case bitc::METADATA_NAMED_NODE:  return "METADATA_NAMED_NODE";
-    case bitc::METADATA_KIND:        return "METADATA_KIND";
-    case bitc::METADATA_ATTACHMENT:  return "METADATA_ATTACHMENT";
-    case bitc::METADATA_NODE2:       return "METADATA_NODE2";
-    case bitc::METADATA_FN_NODE2:    return "METADATA_FN_NODE2";
-    case bitc::METADATA_NAMED_NODE2: return "METADATA_NAMED_NODE2";
     }
   }
 }
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 7453d9e..9020a52 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -74,7 +74,7 @@ public:
     if (!V.getType()->isVoidTy()) {
       OS.PadToColumn(50);
       Padded = true;
-      OS << "; [#uses=" << V.getNumUses() << " type=" << V.getType()->getDescription() << "]";  // Output # uses and type
+      OS << "; [#uses=" << V.getNumUses() << " type=" << *V.getType() << "]";  // Output # uses and type
     }
     if (const Instruction *I = dyn_cast<Instruction>(&V)) {
       const DebugLoc &DL = I->getDebugLoc();
diff --git a/tools/llvm-extract/llvm-extract.cpp b/tools/llvm-extract/llvm-extract.cpp
index 8c2f43a..a6c229f 100644
--- a/tools/llvm-extract/llvm-extract.cpp
+++ b/tools/llvm-extract/llvm-extract.cpp
@@ -149,7 +149,6 @@ int main(int argc, char **argv) {
   if (!DeleteFn)
     Passes.add(createGlobalDCEPass());           // Delete unreachable globals
   Passes.add(createStripDeadDebugInfoPass());    // Remove dead debug info
-  Passes.add(createDeadTypeEliminationPass());   // Remove dead types...
   Passes.add(createStripDeadPrototypesPass());   // Remove dead func decls
 
   std::string ErrorInfo;
diff --git a/tools/llvm-mc/Disassembler.cpp b/tools/llvm-mc/Disassembler.cpp
index 93b9723..c389f6a 100644
--- a/tools/llvm-mc/Disassembler.cpp
+++ b/tools/llvm-mc/Disassembler.cpp
@@ -127,12 +127,12 @@ static bool ByteArrayFromString(ByteArrayTy &ByteArray,
   return false;
 }
 
-int Disassembler::disassemble(const Target &T,  TargetMachine &TM,
+int Disassembler::disassemble(const Target &T,
                               const std::string &Triple,
                               MemoryBuffer &Buffer,
                               raw_ostream &Out) {
   // Set up disassembler.
-  OwningPtr<const MCAsmInfo> AsmInfo(T.createAsmInfo(Triple));
+  OwningPtr<const MCAsmInfo> AsmInfo(T.createMCAsmInfo(Triple));
 
   if (!AsmInfo) {
     errs() << "error: no assembly info for target " << Triple << "\n";
@@ -146,7 +146,7 @@ int Disassembler::disassemble(const Target &T,  TargetMachine &TM,
   }
 
   int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
-  OwningPtr<MCInstPrinter> IP(T.createMCInstPrinter(TM, AsmPrinterVariant,
+  OwningPtr<MCInstPrinter> IP(T.createMCInstPrinter(AsmPrinterVariant,
                                                     *AsmInfo));
   if (!IP) {
     errs() << "error: no instruction printer for target " << Triple << '\n';
diff --git a/tools/llvm-mc/Disassembler.h b/tools/llvm-mc/Disassembler.h
index d738ee7..433e71b 100644
--- a/tools/llvm-mc/Disassembler.h
+++ b/tools/llvm-mc/Disassembler.h
@@ -21,13 +21,11 @@ namespace llvm {
 
 class MemoryBuffer;
 class Target;
-class TargetMachine;
 class raw_ostream;
 
 class Disassembler {
 public:
   static int disassemble(const Target &target,
-                         TargetMachine &TM,
                          const std::string &tripleString,
                          MemoryBuffer &buffer,
                          raw_ostream &Out);
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index eb23a1a..334bf32 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -17,13 +17,15 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Target/TargetAsmBackend.h"
 #include "llvm/Target/TargetAsmParser.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetRegistry.h"
-#include "llvm/Target/SubtargetFeature.h" // FIXME.
 #include "llvm/Target/TargetAsmInfo.h"  // FIXME.
 #include "llvm/Target/TargetLowering.h"  // FIXME.
 #include "llvm/Target/TargetLoweringObjectFile.h"  // FIXME.
@@ -97,7 +99,7 @@ IncludeDirs("I", cl::desc("Directory of include files"),
 
 static cl::opt<std::string>
 ArchName("arch", cl::desc("Target arch to assemble for, "
-                            "see -version for available targets"));
+                          "see -version for available targets"));
 
 static cl::opt<std::string>
 TripleName("triple", cl::desc("Target triple to assemble for, "
@@ -110,12 +112,11 @@ MCPU("mcpu",
      cl::init(""));
 
 static cl::opt<bool>
-NoInitialTextSection("n", cl::desc(
-                   "Don't assume assembly file starts in the text section"));
+NoInitialTextSection("n", cl::desc("Don't assume assembly file starts "
+                                   "in the text section"));
 
 static cl::opt<bool>
-SaveTempLabels("L", cl::desc(
-                 "Don't discard temporary labels"));
+SaveTempLabels("L", cl::desc("Don't discard temporary labels"));
 
 enum ActionType {
   AC_AsLex,
@@ -195,7 +196,7 @@ static int AsLexInput(const char *ProgName) {
   if (!TheTarget)
     return 1;
 
-  llvm::OwningPtr<MCAsmInfo> MAI(TheTarget->createAsmInfo(TripleName));
+  llvm::OwningPtr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(TripleName));
   assert(MAI && "Unable to create target asm info!");
 
   AsmLexer Lexer(*MAI);
@@ -305,22 +306,18 @@ static int AssembleInput(const char *ProgName) {
   SrcMgr.setIncludeDirs(IncludeDirs);
 
 
-  llvm::OwningPtr<MCAsmInfo> MAI(TheTarget->createAsmInfo(TripleName));
+  llvm::OwningPtr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(TripleName));
   assert(MAI && "Unable to create target asm info!");
 
   // Package up features to be passed to target/subtarget
   std::string FeaturesStr;
-  if (MCPU.size()) {
-    SubtargetFeatures Features;
-    Features.setCPU(MCPU);
-    FeaturesStr = Features.getString();
-  }
 
   // FIXME: We shouldn't need to do this (and link in codegen).
   //        When we split this out, we should do it in a way that makes
   //        it straightforward to switch subtargets on the fly (.e.g,
   //        the .cpu and .code16 directives).
   OwningPtr<TargetMachine> TM(TheTarget->createTargetMachine(TripleName,
+                                                             MCPU,
                                                              FeaturesStr));
 
   if (!TM) {
@@ -345,14 +342,18 @@ static int AssembleInput(const char *ProgName) {
     TM->getTargetLowering()->getObjFileLowering();
   const_cast<TargetLoweringObjectFile&>(TLOF).Initialize(Ctx, *TM);
 
+  OwningPtr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
+  OwningPtr<MCSubtargetInfo>
+    STI(TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
+
   // FIXME: There is a bit of code duplication with addPassesToEmitFile.
   if (FileType == OFT_AssemblyFile) {
     MCInstPrinter *IP =
-      TheTarget->createMCInstPrinter(*TM, OutputAsmVariant, *MAI);
+      TheTarget->createMCInstPrinter(OutputAsmVariant, *MAI);
     MCCodeEmitter *CE = 0;
     TargetAsmBackend *TAB = 0;
     if (ShowEncoding) {
-      CE = TheTarget->createCodeEmitter(*TM, Ctx);
+      CE = TheTarget->createCodeEmitter(*MCII, *STI, Ctx);
       TAB = TheTarget->createAsmBackend(TripleName);
     }
     Str.reset(TheTarget->createAsmStreamer(Ctx, FOS, /*asmverbose*/true,
@@ -363,7 +364,7 @@ static int AssembleInput(const char *ProgName) {
     Str.reset(createNullStreamer(Ctx));
   } else {
     assert(FileType == OFT_ObjectFile && "Invalid file type!");
-    MCCodeEmitter *CE = TheTarget->createCodeEmitter(*TM, Ctx);
+    MCCodeEmitter *CE = TheTarget->createCodeEmitter(*MCII, *STI, Ctx);
     TargetAsmBackend *TAB = TheTarget->createAsmBackend(TripleName);
     Str.reset(TheTarget->createObjectStreamer(TripleName, Ctx, *TAB,
                                               FOS, CE, RelaxAll,
@@ -376,7 +377,7 @@ static int AssembleInput(const char *ProgName) {
 
   OwningPtr<MCAsmParser> Parser(createMCAsmParser(*TheTarget, SrcMgr, Ctx,
                                                    *Str.get(), *MAI));
-  OwningPtr<TargetAsmParser> TAP(TheTarget->createAsmParser(*Parser, *TM));
+  OwningPtr<TargetAsmParser> TAP(TheTarget->createAsmParser(*STI, *Parser));
   if (!TAP) {
     errs() << ProgName
            << ": error: this target does not support assembly parsing.\n";
@@ -414,28 +415,7 @@ static int DisassembleInput(const char *ProgName, bool Enhanced) {
     Res =
       Disassembler::disassembleEnhanced(TripleName, *Buffer.take(), Out->os());
   } else {
-    // Package up features to be passed to target/subtarget
-    std::string FeaturesStr;
-    if (MCPU.size()) {
-      SubtargetFeatures Features;
-      Features.setCPU(MCPU);
-      FeaturesStr = Features.getString();
-    }
-
-    // FIXME: We shouldn't need to do this (and link in codegen).
-    //        When we split this out, we should do it in a way that makes
-    //        it straightforward to switch subtargets on the fly (.e.g,
-    //        the .cpu and .code16 directives).
-    OwningPtr<TargetMachine> TM(TheTarget->createTargetMachine(TripleName,
-                                                               FeaturesStr));
-
-    if (!TM) {
-      errs() << ProgName << ": error: could not create target for triple '"
-             << TripleName << "'.\n";
-      return 1;
-    }
-
-    Res = Disassembler::disassemble(*TheTarget, *TM, TripleName,
+    Res = Disassembler::disassemble(*TheTarget, TripleName,
                                     *Buffer.take(), Out->os());
   }
 
@@ -456,6 +436,9 @@ int main(int argc, char **argv) {
   llvm::InitializeAllTargetInfos();
   // FIXME: We shouldn't need to initialize the Target(Machine)s.
   llvm::InitializeAllTargets();
+  llvm::InitializeAllMCAsmInfos();
+  llvm::InitializeAllMCInstrInfos();
+  llvm::InitializeAllMCSubtargetInfos();
   llvm::InitializeAllAsmPrinters();
   llvm::InitializeAllAsmParsers();
   llvm::InitializeAllDisassemblers();
diff --git a/tools/llvm-nm/llvm-nm.cpp b/tools/llvm-nm/llvm-nm.cpp
index 1afa503..014cb29 100644
--- a/tools/llvm-nm/llvm-nm.cpp
+++ b/tools/llvm-nm/llvm-nm.cpp
@@ -143,6 +143,14 @@ namespace {
   StringRef CurrentFilename;
   typedef std::vector<NMSymbol> SymbolListT;
   SymbolListT SymbolList;
+
+  bool error(error_code ec) {
+    if (!ec) return false;
+
+    outs() << ToolName << ": error reading file: " << ec.message() << ".\n";
+    outs().flush();
+    return true;
+  }
 }
 
 static void SortAndPrintSymbolList() {
@@ -261,23 +269,29 @@ static void DumpSymbolNamesFromModule(Module *M) {
 }
 
 static void DumpSymbolNamesFromObject(ObjectFile *obj) {
+  error_code ec;
   for (ObjectFile::symbol_iterator i = obj->begin_symbols(),
-                                   e = obj->end_symbols(); i != e; ++i) {
-    if (!DebugSyms && i->isInternal())
+                                   e = obj->end_symbols();
+                                   i != e; i.increment(ec)) {
+    if (error(ec)) break;
+    bool internal;
+    if (error(i->isInternal(internal))) break;
+    if (!DebugSyms && internal)
       continue;
     NMSymbol s;
     s.Size = object::UnknownAddressOrSize;
     s.Address = object::UnknownAddressOrSize;
-    if (PrintSize || SizeSort)
-      s.Size = i->getSize();
+    if (PrintSize || SizeSort) {
+      if (error(i->getSize(s.Size))) break;
+    }
     if (PrintAddress)
-      s.Address = i->getAddress();
-    s.TypeChar = i->getNMTypeChar();
-    s.Name     = i->getName();
+      if (error(i->getAddress(s.Address))) break;
+    if (error(i->getNMTypeChar(s.TypeChar))) break;
+    if (error(i->getName(s.Name))) break;
     SymbolList.push_back(s);
   }
 
-  CurrentFilename = obj->getFilename();
+  CurrentFilename = obj->getFileName();
   SortAndPrintSymbolList();
 }
 
@@ -317,13 +331,13 @@ static void DumpSymbolNamesFromFile(std::string &Filename) {
     MultipleFiles = true;
     std::for_each (Modules.begin(), Modules.end(), DumpSymbolNamesFromModule);
   } else if (aPath.isObjectFile()) {
-    std::auto_ptr<ObjectFile> obj(ObjectFile::createObjectFile(aPath.str()));
-    if (!obj.get()) {
-      errs() << ToolName << ": " << Filename << ": "
-             << "Failed to open object file\n";
+    OwningPtr<Binary> obj;
+    if (error_code ec = object::createBinary(aPath.str(), obj)) {
+      errs() << ToolName << ": " << Filename << ": " << ec.message() << ".\n";
       return;
     }
-    DumpSymbolNamesFromObject(obj.get());
+    if (object::ObjectFile *o = dyn_cast<ObjectFile>(obj.get()))
+      DumpSymbolNamesFromObject(o);
   } else {
     errs() << ToolName << ": " << Filename << ": "
            << "unrecognizable file type\n";
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index a17624a..4079e4a 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -14,14 +14,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Object/ObjectFile.h"
-// This config must be included before llvm-config.h.
-#include "llvm/Config/config.h"
-#include "../../lib/MC/MCDisassembler/EDDisassembler.h"
-#include "../../lib/MC/MCDisassembler/EDInst.h"
-#include "../../lib/MC/MCDisassembler/EDOperand.h"
-#include "../../lib/MC/MCDisassembler/EDToken.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCDisassembler.h"
 #include "llvm/MC/MCInst.h"
@@ -38,12 +33,9 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/system_error.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegistry.h"
 #include "llvm/Target/TargetSelect.h"
 #include <algorithm>
-#include <cctype>
-#include <cerrno>
 #include <cstring>
 using namespace llvm;
 using namespace object;
@@ -69,6 +61,14 @@ namespace {
                             "see -version for available targets"));
 
   StringRef ToolName;
+
+  bool error(error_code ec) {
+    if (!ec) return false;
+
+    outs() << ToolName << ": error reading file: " << ec.message() << ".\n";
+    outs().flush();
+    return true;
+  }
 }
 
 static const Target *GetTarget(const ObjectFile *Obj = NULL) {
@@ -159,17 +159,46 @@ static void DisassembleInput(const StringRef &Filename) {
 
   outs() << '\n';
   outs() << Filename
-         << ":\tfile format " << Obj->getFileFormatName() << "\n\n\n";
+         << ":\tfile format " << Obj->getFileFormatName() << "\n\n";
 
+  error_code ec;
   for (ObjectFile::section_iterator i = Obj->begin_sections(),
                                     e = Obj->end_sections();
-                                    i != e; ++i) {
-    if (!i->isText())
-      continue;
-    outs() << "Disassembly of section " << i->getName() << ":\n\n";
+                                    i != e; i.increment(ec)) {
+    if (error(ec)) break;
+    bool text;
+    if (error(i->isText(text))) break;
+    if (!text) continue;
+
+    // Make a list of all the symbols in this section.
+    std::vector<std::pair<uint64_t, StringRef> > Symbols;
+    for (ObjectFile::symbol_iterator si = Obj->begin_symbols(),
+                                     se = Obj->end_symbols();
+                                     si != se; si.increment(ec)) {
+      bool contains;
+      if (!error(i->containsSymbol(*si, contains)) && contains) {
+        uint64_t Address;
+        if (error(si->getAddress(Address))) break;
+        StringRef Name;
+        if (error(si->getName(Name))) break;
+        Symbols.push_back(std::make_pair(Address, Name));
+      }
+    }
+
+    // Sort the symbols by address, just in case they didn't come in that way.
+    array_pod_sort(Symbols.begin(), Symbols.end());
+
+    StringRef name;
+    if (error(i->getName(name))) break;
+    outs() << "Disassembly of section " << name << ':';
+
+    // If the section has no symbols just insert a dummy one and disassemble
+    // the whole section.
+    if (Symbols.empty())
+      Symbols.push_back(std::make_pair(0, name));
 
     // Set up disassembler.
-    OwningPtr<const MCAsmInfo> AsmInfo(TheTarget->createAsmInfo(TripleName));
+    OwningPtr<const MCAsmInfo> AsmInfo(TheTarget->createMCAsmInfo(TripleName));
 
     if (!AsmInfo) {
       errs() << "error: no assembly info for target " << TripleName << "\n";
@@ -182,49 +211,49 @@ static void DisassembleInput(const StringRef &Filename) {
       return;
     }
 
-    // FIXME: We shouldn't need to do this (and link in codegen).
-    //        When we split this out, we should do it in a way that makes
-    //        it straightforward to switch subtargets on the fly (.e.g,
-    //        the .cpu and .code16 directives).
-    std::string FeaturesStr;
-    OwningPtr<TargetMachine> TM(TheTarget->createTargetMachine(TripleName,
-                                                               FeaturesStr));
-    if (!TM) {
-      errs() << "error: could not create target for triple " << TripleName << "\n";
-      return;
-    }
-
     int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
     OwningPtr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
-                                  *TM, AsmPrinterVariant, *AsmInfo));
+                                  AsmPrinterVariant, *AsmInfo));
     if (!IP) {
       errs() << "error: no instruction printer for target " << TripleName << '\n';
       return;
     }
 
-    StringRef Bytes = i->getContents();
+    StringRef Bytes;
+    if (error(i->getContents(Bytes))) break;
     StringRefMemoryObject memoryObject(Bytes);
     uint64_t Size;
     uint64_t Index;
-
-    for (Index = 0; Index < Bytes.size(); Index += Size) {
-      MCInst Inst;
-
-#     ifndef NDEBUG
-      raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
-#     else
-      raw_ostream &DebugOut = nulls();
-#     endif
-
-      if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, DebugOut)) {
-        outs() << format("%8x:\t", i->getAddress() + Index);
-        DumpBytes(StringRef(Bytes.data() + Index, Size));
-        IP->printInst(&Inst, outs());
-        outs() << "\n";
-      } else {
-        errs() << ToolName << ": warning: invalid instruction encoding\n";
-        if (Size == 0)
-          Size = 1; // skip illegible bytes
+    uint64_t SectSize;
+    if (error(i->getSize(SectSize))) break;
+
+    // Disassemble symbol by symbol.
+    for (unsigned si = 0, se = Symbols.size(); si != se; ++si) {
+      uint64_t Start = Symbols[si].first;
+      uint64_t End = si == se-1 ? SectSize : Symbols[si + 1].first - 1;
+      outs() << '\n' << Symbols[si].second << ":\n";
+
+      for (Index = Start; Index < End; Index += Size) {
+        MCInst Inst;
+
+#ifndef NDEBUG
+        raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
+#else
+        raw_ostream &DebugOut = nulls();
+#endif
+
+        if (DisAsm->getInstruction(Inst, Size, memoryObject, Index, DebugOut)) {
+          uint64_t addr;
+          if (error(i->getAddress(addr))) break;
+          outs() << format("%8x:\t", addr + Index);
+          DumpBytes(StringRef(Bytes.data() + Index, Size));
+          IP->printInst(&Inst, outs());
+          outs() << "\n";
+        } else {
+          errs() << ToolName << ": warning: invalid instruction encoding\n";
+          if (Size == 0)
+            Size = 1; // skip illegible bytes
+        }
       }
     }
   }
@@ -240,6 +269,7 @@ int main(int argc, char **argv) {
   llvm::InitializeAllTargetInfos();
   // FIXME: We shouldn't need to initialize the Target(Machine)s.
   llvm::InitializeAllTargets();
+  llvm::InitializeAllMCAsmInfos();
   llvm::InitializeAllAsmPrinters();
   llvm::InitializeAllAsmParsers();
   llvm::InitializeAllDisassemblers();
diff --git a/tools/llvmc/src/Hooks.cpp b/tools/llvmc/src/Hooks.cpp
index 5aa250e..ddad08a 100644
--- a/tools/llvmc/src/Hooks.cpp
+++ b/tools/llvmc/src/Hooks.cpp
@@ -47,7 +47,6 @@ const unsigned MArchMapSize = NextHighestPowerOf2(MArchNumKeysARM);
 const char* MArchMCpuKeysARM[] = { "iwmmxt", "ep9312" };
 const char* MArchMCpuValuesARM[] = { "iwmmxt", "ep9312"};
 const unsigned MArchMCpuNumKeysARM = NUM_KEYS(MArchMCpuKeysARM);
-const unsigned MArchMCpuMapSize = NextHighestPowerOf2(MArchMCpuNumKeysARM);
 
 
 void FillInArgMap(ArgMap& Args, const char* Keys[],
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 3abd641..14594cf 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -26,8 +26,8 @@
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/SubtargetFeature.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
@@ -73,6 +73,8 @@ LTOCodeGenerator::LTOCodeGenerator()
       _nativeObjectFile(NULL)
 {
     InitializeAllTargets();
+    InitializeAllMCAsmInfos();
+    InitializeAllMCSubtargetInfos();
     InitializeAllAsmPrinters();
 }
 
@@ -262,9 +264,9 @@ bool LTOCodeGenerator::determineTarget(std::string& errMsg)
 
         // construct LTModule, hand over ownership of module and target
         SubtargetFeatures Features;
-        Features.getDefaultSubtargetFeatures(_mCpu, llvm::Triple(Triple));
+        Features.getDefaultSubtargetFeatures(llvm::Triple(Triple));
         std::string FeatureStr = Features.getString();
-        _target = march->createTargetMachine(Triple, FeatureStr);
+        _target = march->createTargetMachine(Triple, _mCpu, FeatureStr);
     }
     return false;
 }
diff --git a/tools/lto/LTOModule.cpp b/tools/lto/LTOModule.cpp
index 8f2b1f4..dc99b94 100644
--- a/tools/lto/LTOModule.cpp
+++ b/tools/lto/LTOModule.cpp
@@ -29,14 +29,15 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/system_error.h"
 #include "llvm/Target/Mangler.h"
-#include "llvm/Target/SubtargetFeature.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/SubtargetFeature.h"
 #include "llvm/Target/TargetAsmParser.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegistry.h"
@@ -134,6 +135,8 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
   static bool Initialized = false;
   if (!Initialized) {
     InitializeAllTargets();
+    InitializeAllMCAsmInfos();
+    InitializeAllMCSubtargetInfos();
     InitializeAllAsmParsers();
     Initialized = true;
   }
@@ -157,9 +160,10 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer,
 
   // construct LTOModule, hand over ownership of module and target
   SubtargetFeatures Features;
-  Features.getDefaultSubtargetFeatures("" /* cpu */, llvm::Triple(Triple));
+  Features.getDefaultSubtargetFeatures(llvm::Triple(Triple));
   std::string FeatureStr = Features.getString();
-  TargetMachine *target = march->createTargetMachine(Triple, FeatureStr);
+  std::string CPU;
+  TargetMachine *target = march->createTargetMachine(Triple, CPU, FeatureStr);
   LTOModule *Ret = new LTOModule(m.take(), target);
   bool Err = Ret->ParseSymbols();
   if (Err) {
@@ -191,7 +195,7 @@ bool LTOModule::objcClassNameFromExpression(Constant *c, std::string &name) {
       Constant *cn = gvn->getInitializer();
       if (ConstantArray *ca = dyn_cast<ConstantArray>(cn)) {
         if (ca->isCString()) {
-          name = ".objc_class_name_" + ca->getAsString();
+          name = ".objc_class_name_" + ca->getAsCString();
           return true;
         }
       }
@@ -208,7 +212,7 @@ void LTOModule::addObjCClass(GlobalVariable *clgv) {
     if (objcClassNameFromExpression(c->getOperand(1), superclassName)) {
       NameAndAttributes info;
       StringMap<NameAndAttributes>::value_type &entry =
-        _undefines.GetOrCreateValue(superclassName.c_str());
+        _undefines.GetOrCreateValue(superclassName);
       if (!entry.getValue().name) {
         const char *symbolName = entry.getKey().data();
         info.name = symbolName;
@@ -220,7 +224,7 @@ void LTOModule::addObjCClass(GlobalVariable *clgv) {
     std::string className;
     if (objcClassNameFromExpression(c->getOperand(2), className)) {
       StringSet::value_type &entry =
-        _defines.GetOrCreateValue(className.c_str());
+        _defines.GetOrCreateValue(className);
       entry.setValue(1);
       NameAndAttributes info;
       info.name = entry.getKey().data();
@@ -243,7 +247,7 @@ void LTOModule::addObjCCategory(GlobalVariable *clgv) {
       NameAndAttributes info;
 
       StringMap<NameAndAttributes>::value_type &entry =
-        _undefines.GetOrCreateValue(targetclassName.c_str());
+        _undefines.GetOrCreateValue(targetclassName);
 
       if (entry.getValue().name)
         return;
@@ -264,7 +268,7 @@ void LTOModule::addObjCClassRef(GlobalVariable *clgv) {
     NameAndAttributes info;
 
     StringMap<NameAndAttributes>::value_type &entry =
-      _undefines.GetOrCreateValue(targetclassName.c_str());
+      _undefines.GetOrCreateValue(targetclassName);
     if (entry.getValue().name)
       return;
 
@@ -375,7 +379,7 @@ void LTOModule::addDefinedSymbol(GlobalValue *def, Mangler &mangler,
 
   // add to table of symbols
   NameAndAttributes info;
-  StringSet::value_type &entry = _defines.GetOrCreateValue(Buffer.c_str());
+  StringSet::value_type &entry = _defines.GetOrCreateValue(Buffer);
   entry.setValue(1);
 
   StringRef Name = entry.getKey();
@@ -436,7 +440,7 @@ void LTOModule::addPotentialUndefinedSymbol(GlobalValue *decl,
   mangler.getNameWithPrefix(name, decl, false);
 
   StringMap<NameAndAttributes>::value_type &entry =
-    _undefines.GetOrCreateValue(name.c_str());
+    _undefines.GetOrCreateValue(name);
 
   // we already have the symbol
   if (entry.getValue().name)
@@ -595,7 +599,8 @@ namespace {
     virtual void EmitFileDirective(StringRef Filename) {}
     virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
                                           const MCSymbol *LastLabel,
-                                        const MCSymbol *Label) {}
+                                          const MCSymbol *Label,
+                                          unsigned PointerSize) {}
 
     virtual void EmitInstruction(const MCInst &Inst) {
       // Scan for values.
@@ -617,8 +622,12 @@ bool LTOModule::addAsmGlobalSymbols(MCContext &Context) {
   OwningPtr<MCAsmParser> Parser(createMCAsmParser(_target->getTarget(), SrcMgr,
                                                   Context, *Streamer,
                                                   *_target->getMCAsmInfo()));
+  OwningPtr<MCSubtargetInfo> STI(_target->getTarget().
+                      createMCSubtargetInfo(_target->getTargetTriple(),
+                                            _target->getTargetCPU(),
+                                            _target->getTargetFeatureString()));
   OwningPtr<TargetAsmParser>
-    TAP(_target->getTarget().createAsmParser(*Parser.get(), *_target.get()));
+    TAP(_target->getTarget().createAsmParser(*STI, *Parser.get()));
   Parser->setTargetParser(*TAP);
   int Res = Parser->Run(false);
   if (Res)
diff --git a/unittests/ADT/APFloatTest.cpp b/unittests/ADT/APFloatTest.cpp
index 5f05b86..08ac2a0 100644
--- a/unittests/ADT/APFloatTest.cpp
+++ b/unittests/ADT/APFloatTest.cpp
@@ -12,6 +12,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "gtest/gtest.h"
 #include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 
@@ -344,6 +345,54 @@ TEST(APFloatTest, toString) {
   ASSERT_EQ("8.731834E+2", convertToString(873.1834, 0, 0));
 }
 
+TEST(APFloatTest, toInteger) {
+  bool isExact = false;
+  APSInt result(5, /*isUnsigned=*/true);
+
+  EXPECT_EQ(APFloat::opOK,
+            APFloat(APFloat::IEEEdouble, "10")
+            .convertToInteger(result, APFloat::rmTowardZero, &isExact));
+  EXPECT_TRUE(isExact);
+  EXPECT_EQ(APSInt(APInt(5, 10), true), result);
+
+  EXPECT_EQ(APFloat::opInvalidOp,
+            APFloat(APFloat::IEEEdouble, "-10")
+            .convertToInteger(result, APFloat::rmTowardZero, &isExact));
+  EXPECT_FALSE(isExact);
+  EXPECT_EQ(APSInt::getMinValue(5, true), result);
+
+  EXPECT_EQ(APFloat::opInvalidOp,
+            APFloat(APFloat::IEEEdouble, "32")
+            .convertToInteger(result, APFloat::rmTowardZero, &isExact));
+  EXPECT_FALSE(isExact);
+  EXPECT_EQ(APSInt::getMaxValue(5, true), result);
+
+  EXPECT_EQ(APFloat::opInexact,
+            APFloat(APFloat::IEEEdouble, "7.9")
+            .convertToInteger(result, APFloat::rmTowardZero, &isExact));
+  EXPECT_FALSE(isExact);
+  EXPECT_EQ(APSInt(APInt(5, 7), true), result);
+
+  result.setIsUnsigned(false);
+  EXPECT_EQ(APFloat::opOK,
+            APFloat(APFloat::IEEEdouble, "-10")
+            .convertToInteger(result, APFloat::rmTowardZero, &isExact));
+  EXPECT_TRUE(isExact);
+  EXPECT_EQ(APSInt(APInt(5, -10, true), false), result);
+
+  EXPECT_EQ(APFloat::opInvalidOp,
+            APFloat(APFloat::IEEEdouble, "-17")
+            .convertToInteger(result, APFloat::rmTowardZero, &isExact));
+  EXPECT_FALSE(isExact);
+  EXPECT_EQ(APSInt::getMinValue(5, false), result);
+
+  EXPECT_EQ(APFloat::opInvalidOp,
+            APFloat(APFloat::IEEEdouble, "16")
+            .convertToInteger(result, APFloat::rmTowardZero, &isExact));
+  EXPECT_FALSE(isExact);
+  EXPECT_EQ(APSInt::getMaxValue(5, false), result);
+}
+
 static APInt nanbits(const fltSemantics &Sem,
                      bool SNaN, bool Negative, uint64_t fill) {
   APInt apfill(64, fill);
diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp
index dbd0cb7..1f78cd3 100644
--- a/unittests/ADT/APIntTest.cpp
+++ b/unittests/ADT/APIntTest.cpp
@@ -320,6 +320,52 @@ TEST(APIntTest, StringBitsNeeded16) {
   EXPECT_EQ(9U, APInt::getBitsNeeded("-20", 16));
 }
 
+TEST(APIntTest, toString) {
+  SmallString<16> S;
+  bool isSigned;
+
+  APInt(8, 0).toString(S, 2, true, true);
+  EXPECT_EQ(S.str().str(), "0b0");
+  S.clear();
+  APInt(8, 0).toString(S, 8, true, true);
+  EXPECT_EQ(S.str().str(), "00");
+  S.clear();
+  APInt(8, 0).toString(S, 10, true, true);
+  EXPECT_EQ(S.str().str(), "0");
+  S.clear();
+  APInt(8, 0).toString(S, 16, true, true);
+  EXPECT_EQ(S.str().str(), "0x0");
+  S.clear();
+
+  isSigned = false;
+  APInt(8, 255, isSigned).toString(S, 2, isSigned, true);
+  EXPECT_EQ(S.str().str(), "0b11111111");
+  S.clear();
+  APInt(8, 255, isSigned).toString(S, 8, isSigned, true);
+  EXPECT_EQ(S.str().str(), "0377");
+  S.clear();
+  APInt(8, 255, isSigned).toString(S, 10, isSigned, true);
+  EXPECT_EQ(S.str().str(), "255");
+  S.clear();
+  APInt(8, 255, isSigned).toString(S, 16, isSigned, true);
+  EXPECT_EQ(S.str().str(), "0xFF");
+  S.clear();
+
+  isSigned = true;
+  APInt(8, 255, isSigned).toString(S, 2, isSigned, true);
+  EXPECT_EQ(S.str().str(), "-0b1");
+  S.clear();
+  APInt(8, 255, isSigned).toString(S, 8, isSigned, true);
+  EXPECT_EQ(S.str().str(), "-01");
+  S.clear();
+  APInt(8, 255, isSigned).toString(S, 10, isSigned, true);
+  EXPECT_EQ(S.str().str(), "-1");
+  S.clear();
+  APInt(8, 255, isSigned).toString(S, 16, isSigned, true);
+  EXPECT_EQ(S.str().str(), "-0x1");
+  S.clear();
+}
+
 TEST(APIntTest, Log2) {
   EXPECT_EQ(APInt(15, 7).logBase2(), 2U);
   EXPECT_EQ(APInt(15, 7).ceilLogBase2(), 3U);
diff --git a/unittests/ADT/PackedVectorTest.cpp b/unittests/ADT/PackedVectorTest.cpp
new file mode 100644
index 0000000..55b5d8d
--- /dev/null
+++ b/unittests/ADT/PackedVectorTest.cpp
@@ -0,0 +1,115 @@
+//===- llvm/unittest/ADT/PackedVectorTest.cpp - PackedVector tests --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// BitVectorTest tests fail on PowerPC for unknown reasons, so disable this
+// as well since it depends on a BitVector.
+#ifndef __ppc__
+
+#include "llvm/ADT/PackedVector.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(PackedVectorTest, Operation) {
+  PackedVector<unsigned, 2> Vec;
+  EXPECT_EQ(0U, Vec.size());
+  EXPECT_TRUE(Vec.empty());
+
+  Vec.resize(5);
+  EXPECT_EQ(5U, Vec.size());
+  EXPECT_FALSE(Vec.empty());
+
+  Vec.resize(11);
+  EXPECT_EQ(11U, Vec.size());
+  EXPECT_FALSE(Vec.empty());
+
+  PackedVector<unsigned, 2> Vec2(3);
+  EXPECT_EQ(3U, Vec2.size());
+  EXPECT_FALSE(Vec2.empty());
+
+  Vec.clear();
+  EXPECT_EQ(0U, Vec.size());
+  EXPECT_TRUE(Vec.empty());
+
+  Vec.push_back(2);
+  Vec.push_back(0);
+  Vec.push_back(1);
+  Vec.push_back(3);
+
+  EXPECT_EQ(2U, Vec[0]);
+  EXPECT_EQ(0U, Vec[1]);
+  EXPECT_EQ(1U, Vec[2]);
+  EXPECT_EQ(3U, Vec[3]);
+
+  EXPECT_FALSE(Vec == Vec2);
+  EXPECT_TRUE(Vec != Vec2);
+
+  Vec2.swap(Vec);
+  EXPECT_EQ(3U, Vec.size());
+  EXPECT_FALSE(Vec.empty());
+  EXPECT_EQ(0U, Vec[0]);
+  EXPECT_EQ(0U, Vec[1]);
+  EXPECT_EQ(0U, Vec[2]);
+
+  EXPECT_EQ(2U, Vec2[0]);
+  EXPECT_EQ(0U, Vec2[1]);
+  EXPECT_EQ(1U, Vec2[2]);
+  EXPECT_EQ(3U, Vec2[3]);
+
+  Vec = Vec2;
+  EXPECT_TRUE(Vec == Vec2);
+  EXPECT_FALSE(Vec != Vec2);
+
+  Vec[1] = 1;
+  Vec2[1] = 2;
+  Vec |= Vec2;
+  EXPECT_EQ(3U, Vec[1]);
+}
+
+#ifdef EXPECT_DEBUG_DEATH
+
+TEST(PackedVectorTest, UnsignedValues) {
+  PackedVector<unsigned, 2> Vec(1);
+  Vec[0] = 0;
+  Vec[0] = 1;
+  Vec[0] = 2;
+  Vec[0] = 3;
+  EXPECT_DEBUG_DEATH(Vec[0] = 4, "value is too big");
+  EXPECT_DEBUG_DEATH(Vec[0] = -1, "value is too big");
+  EXPECT_DEBUG_DEATH(Vec[0] = 0x100, "value is too big");
+
+  PackedVector<unsigned, 3> Vec2(1);
+  Vec2[0] = 0;
+  Vec2[0] = 7;
+  EXPECT_DEBUG_DEATH(Vec[0] = 8, "value is too big");
+}
+
+TEST(PackedVectorTest, SignedValues) {
+  PackedVector<signed, 2> Vec(1);
+  Vec[0] = -2;
+  Vec[0] = -1;
+  Vec[0] = 0;
+  Vec[0] = 1;
+  EXPECT_DEBUG_DEATH(Vec[0] = -3, "value is too big");
+  EXPECT_DEBUG_DEATH(Vec[0] = 2, "value is too big");
+
+  PackedVector<signed, 3> Vec2(1);
+  Vec2[0] = -4;
+  Vec2[0] = 3;
+  EXPECT_DEBUG_DEATH(Vec[0] = -5, "value is too big");
+  EXPECT_DEBUG_DEATH(Vec[0] = 4, "value is too big");
+}
+
+#endif
+
+}
+
+#endif
diff --git a/unittests/ADT/SmallVectorTest.cpp b/unittests/ADT/SmallVectorTest.cpp
index f4da54d..0d3535d 100644
--- a/unittests/ADT/SmallVectorTest.cpp
+++ b/unittests/ADT/SmallVectorTest.cpp
@@ -35,26 +35,26 @@ public:
   Constructable() : value(0) {
     ++numConstructorCalls;
   }
-  
+
   Constructable(int val) : value(val) {
     ++numConstructorCalls;
   }
-  
+
   Constructable(const Constructable & src) {
     value = src.value;
     ++numConstructorCalls;
   }
-  
+
   ~Constructable() {
     ++numDestructorCalls;
   }
-  
+
   Constructable & operator=(const Constructable & src) {
     value = src.value;
     ++numAssignmentCalls;
     return *this;
   }
-  
+
   int getValue() const {
     return abs(value);
   }
@@ -64,7 +64,7 @@ public:
     numDestructorCalls = 0;
     numAssignmentCalls = 0;
   }
-  
+
   static int getNumConstructorCalls() {
     return numConstructorCalls;
   }
@@ -91,10 +91,10 @@ int Constructable::numAssignmentCalls;
 class SmallVectorTest : public testing::Test {
 protected:
   typedef SmallVector<Constructable, 4> VectorType;
-  
+
   VectorType theVector;
   VectorType otherVector;
-  
+
   void SetUp() {
     Constructable::reset();
   }
@@ -111,7 +111,7 @@ protected:
   // Assert that theVector contains the specified values, in order.
   void assertValuesInOrder(VectorType & v, size_t size, ...) {
     EXPECT_EQ(size, v.size());
-    
+
     va_list ap;
     va_start(ap, size);
     for (size_t i = 0; i < size; ++i) {
@@ -121,7 +121,7 @@ protected:
 
     va_end(ap);
   }
-  
+
   // Generate a sequence of values to initialize the vector.
   void makeSequence(VectorType & v, int start, int end) {
     for (int i = start; i <= end; ++i) {
@@ -155,18 +155,24 @@ TEST_F(SmallVectorTest, PushPopTest) {
   theVector.push_back(Constructable(2));
   assertValuesInOrder(theVector, 2u, 1, 2);
 
+  // Insert at beginning
+  theVector.insert(theVector.begin(), theVector[1]);
+  assertValuesInOrder(theVector, 3u, 2, 1, 2);
+
   // Pop one element
   theVector.pop_back();
-  assertValuesInOrder(theVector, 1u, 1);
+  assertValuesInOrder(theVector, 2u, 2, 1);
 
-  // Pop another element
+  // Pop remaining elements
+  theVector.pop_back();
   theVector.pop_back();
   assertEmpty(theVector);
-  
+
   // Check number of constructor calls. Should be 2 for each list element,
-  // one for the argument to push_back, and one for the list element itself.
-  EXPECT_EQ(4, Constructable::getNumConstructorCalls());
-  EXPECT_EQ(4, Constructable::getNumDestructorCalls());
+  // one for the argument to push_back, one for the argument to insert,
+  // and one for the list element itself.
+  EXPECT_EQ(5, Constructable::getNumConstructorCalls());
+  EXPECT_EQ(5, Constructable::getNumDestructorCalls());
 }
 
 // Clear test.
@@ -198,7 +204,7 @@ TEST_F(SmallVectorTest, ResizeGrowTest) {
   SCOPED_TRACE("ResizeGrowTest");
 
   theVector.resize(2);
-  
+
   // The extra constructor/destructor calls come from the temporary object used
   // to initialize the contents of the resized array (via copy construction).
   EXPECT_EQ(3, Constructable::getNumConstructorCalls());
@@ -226,10 +232,10 @@ TEST_F(SmallVectorTest, OverflowTest) {
   for (int i = 0; i < 10; ++i) {
     EXPECT_EQ(i+1, theVector[i].getValue());
   }
-  
+
   // Now resize back to fixed size.
   theVector.resize(1);
-  
+
   assertValuesInOrder(theVector, 1u, 1);
 }
 
@@ -364,13 +370,13 @@ TEST_F(SmallVectorTest, ComparisonTest) {
 
   makeSequence(theVector, 1, 3);
   makeSequence(otherVector, 1, 3);
-  
+
   EXPECT_TRUE(theVector == otherVector);
   EXPECT_FALSE(theVector != otherVector);
 
   otherVector.clear();
   makeSequence(otherVector, 2, 4);
-  
+
   EXPECT_FALSE(theVector == otherVector);
   EXPECT_TRUE(theVector != otherVector);
 }
diff --git a/unittests/ADT/StringMapTest.cpp b/unittests/ADT/StringMapTest.cpp
index ea91348..2ae5820 100644
--- a/unittests/ADT/StringMapTest.cpp
+++ b/unittests/ADT/StringMapTest.cpp
@@ -51,7 +51,7 @@ protected:
 
     // Iterator tests
     StringMap<uint32_t>::iterator it = testMap.begin();
-    EXPECT_STREQ(testKey, it->first());
+    EXPECT_STREQ(testKey, it->first().data());
     EXPECT_EQ(testValue, it->second);
     ++it;
     EXPECT_TRUE(it == testMap.end());
@@ -157,7 +157,7 @@ TEST_F(StringMapTest, IterationTest) {
       it != testMap.end(); ++it) {
     std::stringstream ss;
     ss << "key_" << it->second;
-    ASSERT_STREQ(ss.str().c_str(), it->first());
+    ASSERT_STREQ(ss.str().c_str(), it->first().data());
     visited[it->second] = true;
   }
 
@@ -189,7 +189,7 @@ TEST_F(StringMapTest, StringMapEntryTest) {
   StringMap<uint32_t>::value_type* entry =
       StringMap<uint32_t>::value_type::Create(
           testKeyFirst, testKeyFirst + testKeyLength, 1u);
-  EXPECT_STREQ(testKey, entry->first());
+  EXPECT_STREQ(testKey, entry->first().data());
   EXPECT_EQ(1u, entry->second);
   free(entry);
 }
diff --git a/unittests/Analysis/ScalarEvolutionTest.cpp b/unittests/Analysis/ScalarEvolutionTest.cpp
index b734160..39ced2a 100644
--- a/unittests/Analysis/ScalarEvolutionTest.cpp
+++ b/unittests/Analysis/ScalarEvolutionTest.cpp
@@ -23,7 +23,7 @@ TEST(ScalarEvolutionsTest, SCEVUnknownRAUW) {
   Module M("world", Context);
 
   const FunctionType *FTy = FunctionType::get(Type::getVoidTy(Context),
-                                              std::vector<const Type *>(), false);
+                                              std::vector<Type *>(), false);
   Function *F = cast<Function>(M.getOrInsertFunction("f", FTy));
   BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
   ReturnInst::Create(Context, 0, BB);
diff --git a/unittests/CMakeLists.txt b/unittests/CMakeLists.txt
index fcf6109..81d7029 100644
--- a/unittests/CMakeLists.txt
+++ b/unittests/CMakeLists.txt
@@ -64,6 +64,7 @@ add_llvm_unittest(ADT
   ADT/ImmutableSetTest.cpp
   ADT/IntEqClassesTest.cpp
   ADT/IntervalMapTest.cpp
+  ADT/PackedVectorTest.cpp
   ADT/SmallBitVectorTest.cpp
   ADT/SmallStringTest.cpp
   ADT/SmallVectorTest.cpp
@@ -105,7 +106,6 @@ add_llvm_unittest(Transforms/Utils
 
 set(VMCoreSources
   VMCore/ConstantsTest.cpp
-  VMCore/DerivedTypesTest.cpp
   VMCore/InstructionsTest.cpp
   VMCore/MetadataTest.cpp
   VMCore/PassManagerTest.cpp
diff --git a/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp b/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
index ff5af3b..039b5e0 100644
--- a/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITMemoryManagerTest.cpp
@@ -14,13 +14,14 @@
 #include "llvm/Function.h"
 #include "llvm/GlobalValue.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/ADT/ArrayRef.h"
 
 using namespace llvm;
 
 namespace {
 
 Function *makeFakeFunction() {
-  std::vector<const Type*> params;
+  std::vector<Type*> params;
   const FunctionType *FTy =
       FunctionType::get(Type::getVoidTy(getGlobalContext()), params, false);
   return Function::Create(FTy, GlobalValue::ExternalLinkage);
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index ceacbbe..9c001c4 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -37,7 +37,7 @@ using namespace llvm;
 namespace {
 
 Function *makeReturnGlobal(std::string Name, GlobalVariable *G, Module *M) {
-  std::vector<const Type*> params;
+  std::vector<Type*> params;
   const FunctionType *FTy = FunctionType::get(G->getType()->getElementType(),
                                               params, false);
   Function *F = Function::Create(FTy, GlobalValue::ExternalLinkage, Name, M);
@@ -322,7 +322,7 @@ TEST_F(JITTest, NonLazyCompilationStillNeedsStubs) {
 
   const FunctionType *Func1Ty =
       cast<FunctionType>(TypeBuilder<void(void), false>::get(Context));
-  std::vector<const Type*> arg_types;
+  std::vector<Type*> arg_types;
   arg_types.push_back(Type::getInt1Ty(Context));
   const FunctionType *FuncTy = FunctionType::get(
       Type::getVoidTy(Context), arg_types, false);
diff --git a/unittests/Support/ConstantRangeTest.cpp b/unittests/Support/ConstantRangeTest.cpp
index 161e2cf..742bcb4 100644
--- a/unittests/Support/ConstantRangeTest.cpp
+++ b/unittests/Support/ConstantRangeTest.cpp
@@ -299,6 +299,8 @@ TEST_F(ConstantRangeTest, Sub) {
   EXPECT_EQ(Empty.sub(APInt(16, 4)), Empty);
   EXPECT_EQ(Some.sub(APInt(16, 4)),
             ConstantRange(APInt(16, 0x6), APInt(16, 0xaa6)));
+  EXPECT_EQ(Some.sub(Some),
+            ConstantRange(APInt(16, 0xf561), APInt(16, 0xaa0)));
   EXPECT_EQ(Wrap.sub(APInt(16, 4)),
             ConstantRange(APInt(16, 0xaa6), APInt(16, 0x6)));
   EXPECT_EQ(One.sub(APInt(16, 4)),
diff --git a/unittests/Support/TypeBuilderTest.cpp b/unittests/Support/TypeBuilderTest.cpp
index e805827..0609178 100644
--- a/unittests/Support/TypeBuilderTest.cpp
+++ b/unittests/Support/TypeBuilderTest.cpp
@@ -9,6 +9,7 @@
 
 #include "llvm/Support/TypeBuilder.h"
 #include "llvm/LLVMContext.h"
+#include "llvm/ADT/ArrayRef.h"
 
 #include "gtest/gtest.h"
 
@@ -119,7 +120,7 @@ TEST(TypeBuilderTest, Derived) {
 }
 
 TEST(TypeBuilderTest, Functions) {
-  std::vector<const Type*> params;
+  std::vector<Type*> params;
   EXPECT_EQ(FunctionType::get(Type::getVoidTy(getGlobalContext()), params, false),
             (TypeBuilder<void(), true>::get(getGlobalContext())));
   EXPECT_EQ(FunctionType::get(Type::getInt8Ty(getGlobalContext()), params, true),
@@ -186,7 +187,7 @@ public:
   static const StructType *get(LLVMContext &Context) {
     // Using the static result variable ensures that the type is
     // only looked up once.
-    std::vector<const Type*> st;
+    std::vector<Type*> st;
     st.push_back(TypeBuilder<int, cross>::get(Context));
     st.push_back(TypeBuilder<int*, cross>::get(Context));
     st.push_back(TypeBuilder<void*[], cross>::get(Context));
@@ -209,7 +210,7 @@ public:
   static const StructType *get(LLVMContext &Context) {
     // Using the static result variable ensures that the type is
     // only looked up once.
-    std::vector<const Type*> st;
+    std::vector<Type*> st;
     st.push_back(TypeBuilder<types::i<32>, cross>::get(Context));
     st.push_back(TypeBuilder<types::i<32>*, cross>::get(Context));
     st.push_back(TypeBuilder<types::i<8>*[], cross>::get(Context));
@@ -230,19 +231,19 @@ public:
 namespace {
 
 TEST(TypeBuilderTest, Extensions) {
-  EXPECT_EQ(PointerType::getUnqual(StructType::get(getGlobalContext(), 
+  EXPECT_EQ(PointerType::getUnqual(StructType::get(
                                      TypeBuilder<int, false>::get(getGlobalContext()),
                                      TypeBuilder<int*, false>::get(getGlobalContext()),
                                      TypeBuilder<void*[], false>::get(getGlobalContext()),
                                      NULL)),
             (TypeBuilder<MyType*, false>::get(getGlobalContext())));
-  EXPECT_EQ(PointerType::getUnqual(StructType::get(getGlobalContext(), 
+  EXPECT_EQ(PointerType::getUnqual(StructType::get(
                                      TypeBuilder<types::i<32>, false>::get(getGlobalContext()),
                                      TypeBuilder<types::i<32>*, false>::get(getGlobalContext()),
                                      TypeBuilder<types::i<8>*[], false>::get(getGlobalContext()),
                                      NULL)),
             (TypeBuilder<MyPortableType*, false>::get(getGlobalContext())));
-  EXPECT_EQ(PointerType::getUnqual(StructType::get(getGlobalContext(), 
+  EXPECT_EQ(PointerType::getUnqual(StructType::get(
                                      TypeBuilder<types::i<32>, false>::get(getGlobalContext()),
                                      TypeBuilder<types::i<32>*, false>::get(getGlobalContext()),
                                      TypeBuilder<types::i<8>*[], false>::get(getGlobalContext()),
diff --git a/unittests/VMCore/DerivedTypesTest.cpp b/unittests/VMCore/DerivedTypesTest.cpp
deleted file mode 100644
index 9562157..0000000
--- a/unittests/VMCore/DerivedTypesTest.cpp
+++ /dev/null
@@ -1,88 +0,0 @@
-//===- llvm/unittest/VMCore/DerivedTypesTest.cpp - Types unit tests -------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "gtest/gtest.h"
-#include "../lib/VMCore/LLVMContextImpl.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
-#include "llvm/Constants.h"
-#include "llvm/Support/ValueHandle.h"
-using namespace llvm;
-
-namespace {
-
-static void PR7658() {
-  LLVMContext ctx;
-
-  WeakVH NullPtr;
-  PATypeHolder h1;
-  {
-    OpaqueType *o1 = OpaqueType::get(ctx);
-    PointerType *p1 = PointerType::get(o1, 0);
-
-    std::vector<const Type *> t1;
-    t1.push_back(IntegerType::get(ctx, 32));
-    t1.push_back(p1);
-    NullPtr = ConstantPointerNull::get(p1);
-    OpaqueType *o2 = OpaqueType::get (ctx);
-    PointerType *p2 = PointerType::get (o2, 0);
-    t1.push_back(p2);
-
-
-    StructType *s1 = StructType::get(ctx, t1);
-    h1 = s1;
-    o1->refineAbstractTypeTo(s1);
-    o2->refineAbstractTypeTo(h1.get());  // h1 = { i32, \2*, \2* }
-  }
-
-
-  OpaqueType *o3 = OpaqueType::get(ctx);
-  PointerType *p3 = PointerType::get(o3, 0);  // p3 = opaque*
-
-  std::vector<const Type *> t2;
-  t2.push_back(IntegerType::get(ctx, 32));
-  t2.push_back(p3);
-
-  std::vector<Constant *> v2;
-  v2.push_back(ConstantInt::get(IntegerType::get(ctx, 32), 14));
-  v2.push_back(ConstantPointerNull::get(p3));
-
-  OpaqueType *o4 = OpaqueType::get(ctx);
-  {
-    PointerType *p4 = PointerType::get(o4, 0);
-    t2.push_back(p4);
-    v2.push_back(ConstantPointerNull::get(p4));
-  }
-
-  WeakVH CS = ConstantStruct::get(ctx, v2, false); // { i32 14, opaque* null, opaque* null}
-
-  StructType *s2 = StructType::get(ctx, t2);
-  PATypeHolder h2(s2);
-  o3->refineAbstractTypeTo(s2);
-  o4->refineAbstractTypeTo(h2.get());
-}
-
-
-TEST(OpaqueTypeTest, RegisterWithContext) {
-  LLVMContext C;
-  LLVMContextImpl *pImpl = C.pImpl;
-
-  // 1 refers to the AlwaysOpaqueTy allocated in the Context's constructor and
-  // destroyed in the destructor.
-  EXPECT_EQ(1u, pImpl->OpaqueTypes.size());
-  {
-    PATypeHolder Type = OpaqueType::get(C);
-    EXPECT_EQ(2u, pImpl->OpaqueTypes.size());
-  }
-  EXPECT_EQ(1u, pImpl->OpaqueTypes.size());
-
-  PR7658();
-}
-
-}  // namespace
diff --git a/unittests/VMCore/PassManagerTest.cpp b/unittests/VMCore/PassManagerTest.cpp
index 2f2a200..af845b0 100644
--- a/unittests/VMCore/PassManagerTest.cpp
+++ b/unittests/VMCore/PassManagerTest.cpp
@@ -405,13 +405,13 @@ namespace llvm {
       mod->setTargetTriple("x86_64-unknown-linux-gnu");
 
       // Type Definitions
-      std::vector<const Type*>FuncTy_0_args;
+      std::vector<Type*>FuncTy_0_args;
       FunctionType* FuncTy_0 = FunctionType::get(
         /*Result=*/IntegerType::get(getGlobalContext(), 32),
         /*Params=*/FuncTy_0_args,
         /*isVarArg=*/false);
 
-      std::vector<const Type*>FuncTy_2_args;
+      std::vector<Type*>FuncTy_2_args;
       FuncTy_2_args.push_back(IntegerType::get(getGlobalContext(), 1));
       FunctionType* FuncTy_2 = FunctionType::get(
         /*Result=*/Type::getVoidTy(getGlobalContext()),
diff --git a/utils/TableGen/ARMDecoderEmitter.cpp b/utils/TableGen/ARMDecoderEmitter.cpp
index 62bd1c6..8a5dc8b 100644
--- a/utils/TableGen/ARMDecoderEmitter.cpp
+++ b/utils/TableGen/ARMDecoderEmitter.cpp
@@ -421,6 +421,9 @@ public:
 protected:
   // Populates the insn given the uid.
   void insnWithID(insn_t &Insn, unsigned Opcode) const {
+    if (AllInstructions[Opcode]->isPseudo)
+      return;
+
     BitsInit &Bits = getBitsField(*AllInstructions[Opcode]->TheDef, "Inst");
 
     for (unsigned i = 0; i < BIT_WIDTH; ++i)
@@ -1592,10 +1595,6 @@ ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI,
     // The following special cases are for conflict resolutions.
     //
 
-    // RSCSri and RSCSrs set the 's' bit, but are not predicated.  We are
-    // better off using the generic RSCri and RSCrs instructions.
-    if (Name == "RSCSri" || Name == "RSCSrs") return false;
-
     // A8-598: VEXT
     // Vector Extract extracts elements from the bottom end of the second
     // operand vector and the top end of the first, concatenates them and
@@ -1628,10 +1627,6 @@ ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI,
     if (Name == "tBX_RET" || Name == "tBX_RET_vararg")
       return false;
 
-    // Ignore the TPsoft (TLS) instructions, which conflict with tBLr9.
-    if (Name == "tTPsoft" || Name == "t2TPsoft")
-      return false;
-
     // Ignore tADR, prefer tADDrPCi.
     if (Name == "tADR")
       return false;
@@ -1644,12 +1639,8 @@ ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI,
     // Ignore tADDrSP, tADDspr, and tPICADD, prefer the generic tADDhirr.
     // Ignore t2SUBrSPs, prefer the t2SUB[S]r[r|s].
     // Ignore t2ADDrSPs, prefer the t2ADD[S]r[r|s].
-    // Ignore t2ADDrSPi/t2SUBrSPi, which have more generic couterparts.
-    // Ignore t2ADDrSPi12/t2SUBrSPi12, which have more generic couterparts
     if (Name == "tADDrSP" || Name == "tADDspr" || Name == "tPICADD" ||
-        Name == "t2SUBrSPs" || Name == "t2ADDrSPs" ||
-        Name == "t2ADDrSPi" || Name == "t2SUBrSPi" ||
-        Name == "t2ADDrSPi12" || Name == "t2SUBrSPi12")
+        Name == "t2SUBrSPs" || Name == "t2ADDrSPs")
       return false;
 
     // FIXME: Use ldr.n to work around a Darwin assembler bug.
@@ -1664,17 +1655,15 @@ ARMDEBackend::populateInstruction(const CodeGenInstruction &CGI,
     // Resolve conflicts:
     //
     //   tBfar conflicts with tBLr9
-    //   tPOP_RET/t2LDMIA_RET conflict with tPOP/t2LDM (ditto)
+    //   t2LDMIA_RET conflict with t2LDM (ditto)
     //   tMOVCCi conflicts with tMOVi8
     //   tMOVCCr conflicts with tMOVgpr2gpr
-    //   tSpill conflicts with tSTRspi
     //   tLDRcp conflicts with tLDRspi
-    //   tRestore conflicts with tLDRspi
     //   t2MOVCCi16 conflicts with tMOVi16
     if (Name == "tBfar" ||
-        Name == "tPOP_RET" || Name == "t2LDMIA_RET" ||
+        Name == "t2LDMIA_RET" ||
         Name == "tMOVCCi" || Name == "tMOVCCr" ||
-        Name == "tSpill" || Name == "tLDRcp" || Name == "tRestore" ||
+        Name == "tLDRcp" || 
         Name == "t2MOVCCi16")
       return false;
   }
diff --git a/utils/TableGen/AsmMatcherEmitter.cpp b/utils/TableGen/AsmMatcherEmitter.cpp
index bbe4349..a6a4fec 100644
--- a/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/utils/TableGen/AsmMatcherEmitter.cpp
@@ -98,6 +98,7 @@
 
 #include "AsmMatcherEmitter.h"
 #include "CodeGenTarget.h"
+#include "Error.h"
 #include "Record.h"
 #include "StringMatcher.h"
 #include "llvm/ADT/OwningPtr.h"
@@ -870,6 +871,31 @@ AsmMatcherInfo::getOperandClass(const CGIOperandList::OperandInfo &OI,
   if (SubOpIdx != -1)
     Rec = dynamic_cast<DefInit*>(OI.MIOperandInfo->getArg(SubOpIdx))->getDef();
 
+  if (Rec->isSubClassOf("RegisterOperand")) {
+    // RegisterOperand may have an associated ParserMatchClass. If it does,
+    // use it, else just fall back to the underlying register class.
+    const RecordVal *R = Rec->getValue("ParserMatchClass");
+    if (R == 0 || R->getValue() == 0)
+      throw "Record `" + Rec->getName() +
+        "' does not have a ParserMatchClass!\n";
+
+    if (DefInit *DI= dynamic_cast<DefInit*>(R->getValue())) {
+      Record *MatchClass = DI->getDef();
+      if (ClassInfo *CI = AsmOperandClasses[MatchClass])
+        return CI;
+    }
+
+    // No custom match class. Just use the register class.
+    Record *ClassRec = Rec->getValueAsDef("RegClass");
+    if (!ClassRec)
+      throw TGError(Rec->getLoc(), "RegisterOperand `" + Rec->getName() +
+                    "' has no associated register class!\n");
+    if (ClassInfo *CI = RegisterClassClasses[ClassRec])
+      return CI;
+    throw TGError(Rec->getLoc(), "register class has no class info!");
+  }
+
+
   if (Rec->isSubClassOf("RegisterClass")) {
     if (ClassInfo *CI = RegisterClassClasses[Rec])
       return CI;
@@ -886,7 +912,8 @@ AsmMatcherInfo::getOperandClass(const CGIOperandList::OperandInfo &OI,
 
 void AsmMatcherInfo::
 BuildRegisterClasses(SmallPtrSet<Record*, 16> &SingletonRegisters) {
-  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
+  const std::vector<CodeGenRegister*> &Registers =
+    Target.getRegBank().getRegisters();
   const std::vector<CodeGenRegisterClass> &RegClassList =
     Target.getRegisterClasses();
 
@@ -896,8 +923,8 @@ BuildRegisterClasses(SmallPtrSet<Record*, 16> &SingletonRegisters) {
   // Gather the defined sets.
   for (std::vector<CodeGenRegisterClass>::const_iterator it =
        RegClassList.begin(), ie = RegClassList.end(); it != ie; ++it)
-    RegisterSets.insert(std::set<Record*>(it->Elements.begin(),
-                                          it->Elements.end()));
+    RegisterSets.insert(std::set<Record*>(it->getOrder().begin(),
+                                          it->getOrder().end()));
 
   // Add any required singleton sets.
   for (SmallPtrSet<Record*, 16>::iterator it = SingletonRegisters.begin(),
@@ -910,9 +937,9 @@ BuildRegisterClasses(SmallPtrSet<Record*, 16> &SingletonRegisters) {
   // a unique register set class), and build the mapping of registers to the set
   // they should classify to.
   std::map<Record*, std::set<Record*> > RegisterMap;
-  for (std::vector<CodeGenRegister>::const_iterator it = Registers.begin(),
+  for (std::vector<CodeGenRegister*>::const_iterator it = Registers.begin(),
          ie = Registers.end(); it != ie; ++it) {
-    const CodeGenRegister &CGR = *it;
+    const CodeGenRegister &CGR = **it;
     // Compute the intersection of all sets containing this register.
     std::set<Record*> ContainingSet;
 
@@ -971,8 +998,8 @@ BuildRegisterClasses(SmallPtrSet<Record*, 16> &SingletonRegisters) {
   // Name the register classes which correspond to a user defined RegisterClass.
   for (std::vector<CodeGenRegisterClass>::const_iterator
        it = RegClassList.begin(), ie = RegClassList.end(); it != ie; ++it) {
-    ClassInfo *CI = RegisterSetClasses[std::set<Record*>(it->Elements.begin(),
-                                                         it->Elements.end())];
+    ClassInfo *CI = RegisterSetClasses[std::set<Record*>(it->getOrder().begin(),
+                                                         it->getOrder().end())];
     if (CI->ValueName.empty()) {
       CI->ClassName = it->getName();
       CI->Name = "MCK_" + it->getName();
@@ -1126,7 +1153,7 @@ void AsmMatcherInfo::BuildInfo() {
     assert(FeatureNo < 32 && "Too many subtarget features!");
   }
 
-  StringRef CommentDelimiter = AsmParser->getValueAsString("CommentDelimiter");
+  std::string CommentDelimiter = AsmParser->getValueAsString("CommentDelimiter");
 
   // Parse the instructions; we need to do this first so that we can gather the
   // singleton register classes.
@@ -1629,6 +1656,10 @@ static void EmitValidateOperandClass(AsmMatcherInfo &Info,
   OS << "  " << Info.Target.getName() << "Operand &Operand = *("
      << Info.Target.getName() << "Operand*)GOp;\n";
 
+  // The InvalidMatchClass is not to match any operand.
+  OS << "  if (Kind == InvalidMatchClass)\n";
+  OS << "    return false;\n\n";
+
   // Check for Token operands first.
   OS << "  if (Operand.isToken())\n";
   OS << "    return MatchTokenString(Operand.getToken()) == Kind;\n\n";
@@ -1745,14 +1776,16 @@ static void EmitMatchRegisterName(CodeGenTarget &Target, Record *AsmParser,
                                   raw_ostream &OS) {
   // Construct the match list.
   std::vector<StringMatcher::StringPair> Matches;
-  for (unsigned i = 0, e = Target.getRegisters().size(); i != e; ++i) {
-    const CodeGenRegister &Reg = Target.getRegisters()[i];
-    if (Reg.TheDef->getValueAsString("AsmName").empty())
+  const std::vector<CodeGenRegister*> &Regs =
+    Target.getRegBank().getRegisters();
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    const CodeGenRegister *Reg = Regs[i];
+    if (Reg->TheDef->getValueAsString("AsmName").empty())
       continue;
 
     Matches.push_back(StringMatcher::StringPair(
-                                        Reg.TheDef->getValueAsString("AsmName"),
-                                        "return " + utostr(i + 1) + ";"));
+                                     Reg->TheDef->getValueAsString("AsmName"),
+                                     "return " + utostr(Reg->EnumValue) + ";"));
   }
 
   OS << "static unsigned MatchRegisterName(StringRef Name) {\n";
@@ -1788,15 +1821,44 @@ static void EmitComputeAvailableFeatures(AsmMatcherInfo &Info,
     Info.AsmParser->getValueAsString("AsmParserClassName");
 
   OS << "unsigned " << Info.Target.getName() << ClassName << "::\n"
-     << "ComputeAvailableFeatures(const " << Info.Target.getName()
-     << "Subtarget *Subtarget) const {\n";
+     << "ComputeAvailableFeatures(uint64_t FB) const {\n";
   OS << "  unsigned Features = 0;\n";
   for (std::map<Record*, SubtargetFeatureInfo*>::const_iterator
          it = Info.SubtargetFeatures.begin(),
          ie = Info.SubtargetFeatures.end(); it != ie; ++it) {
     SubtargetFeatureInfo &SFI = *it->second;
-    OS << "  if (" << SFI.TheDef->getValueAsString("CondString")
-       << ")\n";
+
+    OS << "  if (";
+    std::string CondStorage = SFI.TheDef->getValueAsString("AssemblerCondString");
+    StringRef Conds = CondStorage;
+    std::pair<StringRef,StringRef> Comma = Conds.split(',');
+    bool First = true;
+    do {
+      if (!First)
+        OS << " && ";
+
+      bool Neg = false;
+      StringRef Cond = Comma.first;
+      if (Cond[0] == '!') {
+        Neg = true;
+        Cond = Cond.substr(1);
+      }
+
+      OS << "((FB & " << Info.Target.getName() << "::" << Cond << ")";
+      if (Neg)
+        OS << " == 0";
+      else
+        OS << " != 0";
+      OS << ")";
+
+      if (Comma.second.empty())
+        break;
+
+      First = false;
+      Comma = Comma.second.split(',');
+    } while (true);
+
+    OS << ")\n";
     OS << "    Features |= " << SFI.getEnumName() << ";\n";
   }
   OS << "  return Features;\n";
@@ -2111,8 +2173,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "#undef GET_ASSEMBLER_HEADER\n";
   OS << "  // This should be included into the middle of the declaration of\n";
   OS << "  // your subclasses implementation of TargetAsmParser.\n";
-  OS << "  unsigned ComputeAvailableFeatures(const " <<
-           Target.getName() << "Subtarget *Subtarget) const;\n";
+  OS << "  unsigned ComputeAvailableFeatures(uint64_t FeatureBits) const;\n";
   OS << "  enum MatchResultTy {\n";
   OS << "    Match_ConversionFail,\n";
   OS << "    Match_InvalidOperand,\n";
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index e6deb69..f44f050 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -14,9 +14,11 @@
 
 #include "AsmWriterEmitter.h"
 #include "AsmWriterInst.h"
+#include "Error.h"
 #include "CodeGenTarget.h"
 #include "Record.h"
 #include "StringToOffsetTable.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
@@ -457,46 +459,107 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
   O << "}\n";
 }
 
+static void
+emitRegisterNameString(raw_ostream &O, StringRef AltName,
+  const std::vector<CodeGenRegister*> &Registers) {
+  StringToOffsetTable StringTable;
+  O << "  static const unsigned RegAsmOffset" << AltName << "[] = {\n    ";
+  for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
+    const CodeGenRegister &Reg = *Registers[i];
+
+    std::string AsmName;
+    // "NoRegAltName" is special. We don't need to do a lookup for that,
+    // as it's just a reference to the default register name.
+    if (AltName == "" || AltName == "NoRegAltName") {
+      AsmName = Reg.TheDef->getValueAsString("AsmName");
+      if (AsmName.empty())
+        AsmName = Reg.getName();
+    } else {
+      // Make sure the register has an alternate name for this index.
+      std::vector<Record*> AltNameList =
+        Reg.TheDef->getValueAsListOfDefs("RegAltNameIndices");
+      unsigned Idx = 0, e;
+      for (e = AltNameList.size();
+           Idx < e && (AltNameList[Idx]->getName() != AltName);
+           ++Idx)
+        ;
+      // If the register has an alternate name for this index, use it.
+      // Otherwise, leave it empty as an error flag.
+      if (Idx < e) {
+        std::vector<std::string> AltNames =
+          Reg.TheDef->getValueAsListOfStrings("AltNames");
+        if (AltNames.size() <= Idx)
+          throw TGError(Reg.TheDef->getLoc(),
+                        (Twine("Register definition missing alt name for '") +
+                        AltName + "'.").str());
+        AsmName = AltNames[Idx];
+      }
+    }
+
+    O << StringTable.GetOrAddStringOffset(AsmName);
+    if (((i + 1) % 14) == 0)
+      O << ",\n    ";
+    else
+      O << ", ";
+
+  }
+  O << "0\n"
+    << "  };\n"
+    << "\n";
+
+  O << "  const char *AsmStrs" << AltName << " =\n";
+  StringTable.EmitString(O);
+  O << ";\n";
+}
 
 void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
   CodeGenTarget Target(Records);
   Record *AsmWriter = Target.getAsmWriter();
   std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
-  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
+  const std::vector<CodeGenRegister*> &Registers =
+    Target.getRegBank().getRegisters();
+  std::vector<Record*> AltNameIndices = Target.getRegAltNameIndices();
+  bool hasAltNames = AltNameIndices.size() > 1;
 
-  StringToOffsetTable StringTable;
   O <<
   "\n\n/// getRegisterName - This method is automatically generated by tblgen\n"
   "/// from the register set description.  This returns the assembler name\n"
   "/// for the specified register.\n"
-  "const char *" << Target.getName() << ClassName
-  << "::getRegisterName(unsigned RegNo) {\n"
-  << "  assert(RegNo && RegNo < " << (Registers.size()+1)
-  << " && \"Invalid register number!\");\n"
-  << "\n"
-  << "  static const unsigned RegAsmOffset[] = {";
-  for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
-    const CodeGenRegister &Reg = Registers[i];
-
-    std::string AsmName = Reg.TheDef->getValueAsString("AsmName");
-    if (AsmName.empty())
-      AsmName = Reg.getName();
-
-
-    if ((i % 14) == 0)
-      O << "\n    ";
-
-    O << StringTable.GetOrAddStringOffset(AsmName) << ", ";
-  }
-  O << "0\n"
-    << "  };\n"
+  "const char *" << Target.getName() << ClassName << "::";
+  if (hasAltNames)
+    O << "\ngetRegisterName(unsigned RegNo, unsigned AltIdx) {\n";
+  else
+    O << "getRegisterName(unsigned RegNo) {\n";
+  O << "  assert(RegNo && RegNo < " << (Registers.size()+1)
+    << " && \"Invalid register number!\");\n"
     << "\n";
 
-  O << "  const char *AsmStrs =\n";
-  StringTable.EmitString(O);
-  O << ";\n";
+  if (hasAltNames) {
+    for (unsigned i = 0, e = AltNameIndices.size(); i < e; ++i)
+      emitRegisterNameString(O, AltNameIndices[i]->getName(), Registers);
+  } else
+    emitRegisterNameString(O, "", Registers);
+
+  if (hasAltNames) {
+    O << "  const unsigned *RegAsmOffset;\n"
+      << "  const char *AsmStrs;\n"
+      << "  switch(AltIdx) {\n"
+      << "  default: assert(0 && \"Invalid register alt name index!\");\n";
+    for (unsigned i = 0, e = AltNameIndices.size(); i < e; ++i) {
+      StringRef Namespace = AltNameIndices[1]->getValueAsString("Namespace");
+      StringRef AltName(AltNameIndices[i]->getName());
+      O << "  case " << Namespace << "::" << AltName
+        << ":\n"
+        << "    AsmStrs = AsmStrs" << AltName  << ";\n"
+        << "    RegAsmOffset = RegAsmOffset" << AltName << ";\n"
+        << "    break;\n";
+    }
+    O << "}\n";
+  }
 
-  O << "  return AsmStrs+RegAsmOffset[RegNo-1];\n"
+  O << "  assert (*(AsmStrs+RegAsmOffset[RegNo-1]) &&\n"
+    << "          \"Invalid alt name index for register!\");\n"
+    << "  return AsmStrs+RegAsmOffset[RegNo-1];\n"
     << "}\n";
 }
 
@@ -543,92 +606,29 @@ void AsmWriterEmitter::EmitGetInstructionName(raw_ostream &O) {
 }
 
 namespace {
-
-/// SubtargetFeatureInfo - Helper class for storing information on a subtarget
-/// feature which participates in instruction matching.
-struct SubtargetFeatureInfo {
-  /// \brief The predicate record for this feature.
-  const Record *TheDef;
-
-  /// \brief An unique index assigned to represent this feature.
-  unsigned Index;
-
-  SubtargetFeatureInfo(const Record *D, unsigned Idx) : TheDef(D), Index(Idx) {}
-
-  /// \brief The name of the enumerated constant identifying this feature.
-  std::string getEnumName() const {
-    return "Feature_" + TheDef->getName();
-  }
-};
-
-struct AsmWriterInfo {
-  /// Map of Predicate records to their subtarget information.
-  std::map<const Record*, SubtargetFeatureInfo*> SubtargetFeatures;
-
-  /// getSubtargetFeature - Lookup or create the subtarget feature info for the
-  /// given operand.
-  SubtargetFeatureInfo *getSubtargetFeature(const Record *Def) const {
-    assert(Def->isSubClassOf("Predicate") && "Invalid predicate type!");
-    std::map<const Record*, SubtargetFeatureInfo*>::const_iterator I =
-      SubtargetFeatures.find(Def);
-    return I == SubtargetFeatures.end() ? 0 : I->second;
-  }
-
-  void addReqFeatures(const std::vector<Record*> &Features) {
-    for (std::vector<Record*>::const_iterator
-           I = Features.begin(), E = Features.end(); I != E; ++I) {
-      const Record *Pred = *I;
-
-      // Ignore predicates that are not intended for the assembler.
-      if (!Pred->getValueAsBit("AssemblerMatcherPredicate"))
-        continue;
-
-      if (Pred->getName().empty())
-        throw TGError(Pred->getLoc(), "Predicate has no name!");
-
-      // Don't add the predicate again.
-      if (getSubtargetFeature(Pred))
-        continue;
-
-      unsigned FeatureNo = SubtargetFeatures.size();
-      SubtargetFeatures[Pred] = new SubtargetFeatureInfo(Pred, FeatureNo);
-      assert(FeatureNo < 32 && "Too many subtarget features!");
-    }
-  }
-
-  const SubtargetFeatureInfo *getFeatureInfo(const Record *R) {
-    return SubtargetFeatures[R];
-  }
-};
-
 // IAPrinter - Holds information about an InstAlias. Two InstAliases match if
 // they both have the same conditionals. In which case, we cannot print out the
 // alias for that pattern.
 class IAPrinter {
-  AsmWriterInfo &AWI;
   std::vector<std::string> Conds;
   std::map<StringRef, unsigned> OpMap;
   std::string Result;
   std::string AsmString;
   std::vector<Record*> ReqFeatures;
 public:
-  IAPrinter(AsmWriterInfo &Info, std::string R, std::string AS)
-    : AWI(Info), Result(R), AsmString(AS) {}
+  IAPrinter(std::string R, std::string AS)
+    : Result(R), AsmString(AS) {}
 
   void addCond(const std::string &C) { Conds.push_back(C); }
-  void addReqFeatures(const std::vector<Record*> &Features) {
-    AWI.addReqFeatures(Features);
-    ReqFeatures = Features;
-  }
 
   void addOperand(StringRef Op, unsigned Idx) { OpMap[Op] = Idx; }
   unsigned getOpIndex(StringRef Op) { return OpMap[Op]; }
   bool isOpMapped(StringRef Op) { return OpMap.find(Op) != OpMap.end(); }
 
-  bool print(raw_ostream &O) {
+  void print(raw_ostream &O) {
     if (Conds.empty() && ReqFeatures.empty()) {
       O.indent(6) << "return true;\n";
-      return false;
+      return;
     }
 
     O << "if (";
@@ -643,27 +643,6 @@ public:
       O << *I;
     }
 
-    if (!ReqFeatures.empty()) {
-      if (Conds.begin() != Conds.end()) {
-        O << " &&\n";
-        O.indent(8);
-      } else {
-        O << "if (";
-      }
-
-      std::string Req;
-      raw_string_ostream ReqO(Req);
-
-      for (std::vector<Record*>::iterator
-             I = ReqFeatures.begin(), E = ReqFeatures.end(); I != E; ++I) {
-        if (I != ReqFeatures.begin()) ReqO << " | ";
-        ReqO << AWI.getFeatureInfo(*I)->getEnumName();
-      }
-
-      O << "(AvailableFeatures & (" << ReqO.str() << ")) == ("
-        << ReqO.str() << ')';
-    }
-
     O << ") {\n";
     O.indent(6) << "// " << Result << "\n";
     O.indent(6) << "AsmString = \"" << AsmString << "\";\n";
@@ -675,7 +654,6 @@ public:
 
     O.indent(6) << "break;\n";
     O.indent(4) << '}';
-    return !ReqFeatures.empty();
   }
 
   bool operator==(const IAPrinter &RHS) {
@@ -707,53 +685,6 @@ public:
 
 } // end anonymous namespace
 
-/// EmitSubtargetFeatureFlagEnumeration - Emit the subtarget feature flag
-/// definitions.
-static void EmitSubtargetFeatureFlagEnumeration(AsmWriterInfo &Info,
-                                                raw_ostream &O) {
-  O << "namespace {\n\n";
-  O << "// Flags for subtarget features that participate in "
-    << "alias instruction matching.\n";
-  O << "enum SubtargetFeatureFlag {\n";
-
-  for (std::map<const Record*, SubtargetFeatureInfo*>::const_iterator
-         I = Info.SubtargetFeatures.begin(),
-         E = Info.SubtargetFeatures.end(); I != E; ++I) {
-    SubtargetFeatureInfo &SFI = *I->second;
-    O << "  " << SFI.getEnumName() << " = (1 << " << SFI.Index << "),\n";
-  }
-
-  O << "  Feature_None = 0\n";
-  O << "};\n\n";
-  O << "} // end anonymous namespace\n\n";
-}
-
-/// EmitComputeAvailableFeatures - Emit the function to compute the list of
-/// available features given a subtarget.
-static void EmitComputeAvailableFeatures(AsmWriterInfo &Info,
-                                         Record *AsmWriter,
-                                         CodeGenTarget &Target,
-                                         raw_ostream &O) {
-  std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
-
-  O << "unsigned " << Target.getName() << ClassName << "::\n"
-    << "ComputeAvailableFeatures(const " << Target.getName()
-    << "Subtarget *Subtarget) const {\n";
-  O << "  unsigned Features = 0;\n";
-
-  for (std::map<const Record*, SubtargetFeatureInfo*>::const_iterator
-         I = Info.SubtargetFeatures.begin(),
-         E = Info.SubtargetFeatures.end(); I != E; ++I) {
-    SubtargetFeatureInfo &SFI = *I->second;
-    O << "  if (" << SFI.TheDef->getValueAsString("CondString")
-      << ")\n";
-    O << "    Features |= " << SFI.getEnumName() << ";\n";
-  }
-
-  O << "  return Features;\n";
-  O << "}\n\n";
-}
-
 static void EmitGetMapOperandNumber(raw_ostream &O) {
   O << "static unsigned getMapOperandNumber("
     << "const SmallVectorImpl<std::pair<StringRef, unsigned> > &OpMap,\n";
@@ -805,16 +736,16 @@ void AsmWriterEmitter::EmitRegIsInRegClass(raw_ostream &O) {
     O << "  case RC_" << Name << ":\n";
   
     // Emit the register list now.
-    unsigned IE = RC.Elements.size();
+    unsigned IE = RC.getOrder().size();
     if (IE == 1) {
-      O << "    if (Reg == " << getQualifiedName(RC.Elements[0]) << ")\n";
+      O << "    if (Reg == " << getQualifiedName(RC.getOrder()[0]) << ")\n";
       O << "      return true;\n";
     } else {
       O << "    switch (Reg) {\n";
       O << "    default: break;\n";
 
       for (unsigned II = 0; II != IE; ++II) {
-        Record *Reg = RC.Elements[II];
+        Record *Reg = RC.getOrder()[II];
         O << "    case " << getQualifiedName(Reg) << ":\n";
       }
 
@@ -830,10 +761,46 @@ void AsmWriterEmitter::EmitRegIsInRegClass(raw_ostream &O) {
   O << "}\n\n";
 }
 
+static unsigned CountNumOperands(StringRef AsmString) {
+  unsigned NumOps = 0;
+  std::pair<StringRef, StringRef> ASM = AsmString.split(' ');
+
+  while (!ASM.second.empty()) {
+    ++NumOps;
+    ASM = ASM.second.split(' ');
+  }
+
+  return NumOps;
+}
+
+static unsigned CountResultNumOperands(StringRef AsmString) {
+  unsigned NumOps = 0;
+  std::pair<StringRef, StringRef> ASM = AsmString.split('\t');
+
+  if (!ASM.second.empty()) {
+    size_t I = ASM.second.find('{');
+    StringRef Str = ASM.second;
+    if (I != StringRef::npos)
+      Str = ASM.second.substr(I, ASM.second.find('|', I));
+
+    ASM = Str.split(' ');
+
+    do {
+      ++NumOps;
+      ASM = ASM.second.split(' ');
+    } while (!ASM.second.empty());
+  }
+
+  return NumOps;
+}
+
 void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   CodeGenTarget Target(Records);
   Record *AsmWriter = Target.getAsmWriter();
 
+  if (!AsmWriter->getValueAsBit("isMCAsmWriter"))
+    return;
+
   O << "\n#ifdef PRINT_ALIAS_INSTR\n";
   O << "#undef PRINT_ALIAS_INSTR\n\n";
 
@@ -842,9 +809,6 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   // Emit the method that prints the alias instruction.
   std::string ClassName = AsmWriter->getValueAsString("AsmWriterClassName");
 
-  bool isMC = AsmWriter->getValueAsBit("isMCAsmWriter");
-  const char *MachineInstrClassName = isMC ? "MCInst" : "MachineInstr";
-
   std::vector<Record*> AllInstAliases =
     Records.getAllDerivedDefinitions("InstAlias");
 
@@ -864,7 +828,6 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   // A map of which conditions need to be met for each instruction operand
   // before it can be matched to the mnemonic.
   std::map<std::string, std::vector<IAPrinter*> > IAPrinterMap;
-  AsmWriterInfo AWI;
 
   for (std::map<std::string, std::vector<CodeGenInstAlias*> >::iterator
          I = AliasMap.begin(), E = AliasMap.end(); I != E; ++I) {
@@ -873,12 +836,16 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
     for (std::vector<CodeGenInstAlias*>::iterator
            II = Aliases.begin(), IE = Aliases.end(); II != IE; ++II) {
       const CodeGenInstAlias *CGA = *II;
-      IAPrinter *IAP = new IAPrinter(AWI, CGA->Result->getAsString(),
-                                     CGA->AsmString);
+      unsigned LastOpNo = CGA->ResultInstOperandIndex.size();
+      unsigned NumResultOps =
+        CountResultNumOperands(CGA->ResultInst->AsmString);
 
-      IAP->addReqFeatures(CGA->TheDef->getValueAsListOfDefs("Predicates"));
+      // Don't emit the alias if it has more operands than what it's aliasing.
+      if (NumResultOps < CountNumOperands(CGA->AsmString))
+        continue;
 
-      unsigned LastOpNo = CGA->ResultInstOperandIndex.size();
+      IAPrinter *IAP = new IAPrinter(CGA->Result->getAsString(),
+                                     CGA->AsmString);
 
       std::string Cond;
       Cond = std::string("MI->getNumOperands() == ") + llvm::utostr(LastOpNo);
@@ -896,6 +863,9 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
           const Record *Rec = RO.getRecord();
           StringRef ROName = RO.getName();
 
+
+          if (Rec->isSubClassOf("RegisterOperand"))
+            Rec = Rec->getValueAsDef("RegClass");
           if (Rec->isSubClassOf("RegisterClass")) {
             Cond = std::string("MI->getOperand(")+llvm::utostr(i)+").isReg()";
             IAP->addCond(Cond);
@@ -914,7 +884,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
             }
           } else {
             assert(Rec->isSubClassOf("Operand") && "Unexpected operand!");
-            // FIXME: We need to handle these situations.
+            // FIXME: We may need to handle these situations.
             delete IAP;
             IAP = 0;
             CantHandle = true;
@@ -945,19 +915,15 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
     }
   }
 
-  EmitSubtargetFeatureFlagEnumeration(AWI, O);
-  EmitComputeAvailableFeatures(AWI, AsmWriter, Target, O);
-
   std::string Header;
   raw_string_ostream HeaderO(Header);
 
   HeaderO << "bool " << Target.getName() << ClassName
-          << "::printAliasInstr(const " << MachineInstrClassName
+          << "::printAliasInstr(const MCInst"
           << " *MI, raw_ostream &OS) {\n";
 
   std::string Cases;
   raw_string_ostream CasesO(Cases);
-  bool NeedAvailableFeatures = false;
 
   for (std::map<std::string, std::vector<IAPrinter*> >::iterator
          I = IAPrinterMap.begin(), E = IAPrinterMap.end(); I != E; ++I) {
@@ -988,14 +954,14 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
            II = UniqueIAPs.begin(), IE = UniqueIAPs.end(); II != IE; ++II) {
       IAPrinter *IAP = *II;
       CasesO.indent(4);
-      NeedAvailableFeatures |= IAP->print(CasesO);
+      IAP->print(CasesO);
       CasesO << '\n';
     }
 
     CasesO.indent(4) << "return false;\n";
   }
 
-  if (CasesO.str().empty() || !isMC) {
+  if (CasesO.str().empty()) {
     O << HeaderO.str();
     O << "  return false;\n";
     O << "}\n\n";
@@ -1008,8 +974,6 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   O << HeaderO.str();
   O.indent(2) << "StringRef AsmString;\n";
   O.indent(2) << "SmallVector<std::pair<StringRef, unsigned>, 4> OpMap;\n";
-  if (NeedAvailableFeatures)
-    O.indent(2) << "unsigned AvailableFeatures = getAvailableFeatures();\n\n";
   O.indent(2) << "switch (MI->getOpcode()) {\n";
   O.indent(2) << "default: return false;\n";
   O << CasesO.str();
diff --git a/utils/TableGen/CMakeLists.txt b/utils/TableGen/CMakeLists.txt
index a24c921..4d71259 100644
--- a/utils/TableGen/CMakeLists.txt
+++ b/utils/TableGen/CMakeLists.txt
@@ -25,6 +25,7 @@ add_llvm_utility(tblgen
   DAGISelMatcher.cpp
   DisassemblerEmitter.cpp
   EDEmitter.cpp
+  Error.cpp
   FastISelEmitter.cpp
   FixedLenDecoderEmitter.cpp
   InstrEnumEmitter.cpp
@@ -33,6 +34,7 @@ add_llvm_utility(tblgen
   LLVMCConfigurationEmitter.cpp
   NeonEmitter.cpp
   OptParserEmitter.cpp
+  PseudoLoweringEmitter.cpp
   Record.cpp
   RegisterInfoEmitter.cpp
   SetTheory.cpp
diff --git a/utils/TableGen/ClangDiagnosticsEmitter.cpp b/utils/TableGen/ClangDiagnosticsEmitter.cpp
index acaa1f8..0a48e75 100644
--- a/utils/TableGen/ClangDiagnosticsEmitter.cpp
+++ b/utils/TableGen/ClangDiagnosticsEmitter.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/VectorExtras.h"
 #include <map>
 #include <algorithm>
@@ -195,6 +196,15 @@ void ClangDiagsDefsEmitter::run(raw_ostream &OS) {
 // Warning Group Tables generation
 //===----------------------------------------------------------------------===//
 
+static std::string getDiagCategoryEnum(llvm::StringRef name) {
+  if (name.empty())
+    return "DiagCat_None";
+  llvm::SmallString<256> enumName = llvm::StringRef("DiagCat_");
+  for (llvm::StringRef::iterator I = name.begin(), E = name.end(); I != E; ++I)
+    enumName += isalnum(*I) ? *I : '_';
+  return enumName.str();
+}
+
 namespace {
 struct GroupInfo {
   std::vector<const Record*> DiagsInGroup;
@@ -301,7 +311,7 @@ void ClangDiagGroupsEmitter::run(raw_ostream &OS) {
   OS << "\n#ifdef GET_CATEGORY_TABLE\n";
   for (DiagCategoryIDMap::iterator I = CategoriesByID.begin(),
        E = CategoriesByID.end(); I != E; ++I)
-    OS << "CATEGORY(\"" << *I << "\")\n";
+    OS << "CATEGORY(\"" << *I << "\", " << getDiagCategoryEnum(*I) << ")\n";
   OS << "#endif // GET_CATEGORY_TABLE\n\n";
 }
 
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index 9d4dc5c4..d828dfc 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -34,7 +34,8 @@ void CodeEmitterGen::reverseBits(std::vector<Record*> &Insts) {
   for (std::vector<Record*>::iterator I = Insts.begin(), E = Insts.end();
        I != E; ++I) {
     Record *R = *I;
-    if (R->getValueAsString("Namespace") == "TargetOpcode")
+    if (R->getValueAsString("Namespace") == "TargetOpcode" ||
+        R->getValueAsBit("isPseudo"))
       continue;
 
     BitsInit *BI = R->getValueAsBitsInit("Inst");
@@ -231,7 +232,8 @@ void CodeEmitterGen::run(raw_ostream &o) {
     const CodeGenInstruction *CGI = *IN;
     Record *R = CGI->TheDef;
 
-    if (R->getValueAsString("Namespace") == "TargetOpcode") {
+    if (R->getValueAsString("Namespace") == "TargetOpcode" ||
+        R->getValueAsBit("isPseudo")) {
       o << "    0U,\n";
       continue;
     }
@@ -255,7 +257,8 @@ void CodeEmitterGen::run(raw_ostream &o) {
   for (std::vector<Record*>::iterator IC = Insts.begin(), EC = Insts.end();
         IC != EC; ++IC) {
     Record *R = *IC;
-    if (R->getValueAsString("Namespace") == "TargetOpcode")
+    if (R->getValueAsString("Namespace") == "TargetOpcode" ||
+        R->getValueAsBit("isPseudo"))
       continue;
     const std::string &InstName = R->getValueAsString("Namespace") + "::"
       + R->getName();
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index a08cde6..072893f 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenDAGPatterns.h"
+#include "Error.h"
 #include "Record.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
@@ -1241,6 +1242,16 @@ TreePatternNode *TreePatternNode::InlinePatternFragments(TreePattern &TP) {
 ///
 static EEVT::TypeSet getImplicitType(Record *R, unsigned ResNo,
                                      bool NotRegisters, TreePattern &TP) {
+  // Check to see if this is a register operand.
+  if (R->isSubClassOf("RegisterOperand")) {
+    assert(ResNo == 0 && "Regoperand ref only has one result!");
+    if (NotRegisters)
+      return EEVT::TypeSet(); // Unknown.
+    Record *RegClass = R->getValueAsDef("RegClass");
+    const CodeGenTarget &T = TP.getDAGPatterns().getTargetInfo();
+    return EEVT::TypeSet(T.getRegisterClass(RegClass).getValueTypes());
+  }
+
   // Check to see if this is a register or a register class.
   if (R->isSubClassOf("RegisterClass")) {
     assert(ResNo == 0 && "Regclass ref only has one result!");
@@ -1523,6 +1534,11 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
 
       if (ResultNode->isSubClassOf("PointerLikeRegClass")) {
         MadeChange |= UpdateNodeType(ResNo, MVT::iPTR, TP);
+      } else if (ResultNode->isSubClassOf("RegisterOperand")) {
+        Record *RegClass = ResultNode->getValueAsDef("RegClass");
+        const CodeGenRegisterClass &RC =
+          CDP.getTargetInfo().getRegisterClass(RegClass);
+        MadeChange |= UpdateNodeType(ResNo, RC.getValueTypes(), TP);
       } else if (ResultNode->getName() == "unknown") {
         // Nothing to do.
       } else {
@@ -1581,6 +1597,11 @@ bool TreePatternNode::ApplyTypeConstraints(TreePattern &TP, bool NotRegisters) {
         const CodeGenRegisterClass &RC =
           CDP.getTargetInfo().getRegisterClass(OperandNode);
         MadeChange |= Child->UpdateNodeType(ChildResNo, RC.getValueTypes(), TP);
+      } else if (OperandNode->isSubClassOf("RegisterOperand")) {
+        Record *RegClass = OperandNode->getValueAsDef("RegClass");
+        const CodeGenRegisterClass &RC =
+          CDP.getTargetInfo().getRegisterClass(RegClass);
+        MadeChange |= Child->UpdateNodeType(ChildResNo, RC.getValueTypes(), TP);
       } else if (OperandNode->isSubClassOf("Operand")) {
         VT = getValueType(OperandNode->getValueAsDef("Type"));
         MadeChange |= Child->UpdateNodeType(ChildResNo, VT, TP);
@@ -1723,7 +1744,7 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
     Record *R = DI->getDef();
 
     // Direct reference to a leaf DagNode or PatFrag?  Turn it into a
-    // TreePatternNode if its own.  For example:
+    // TreePatternNode of its own.  For example:
     ///   (foo GPR, imm) -> (foo GPR, (imm))
     if (R->isSubClassOf("SDNode") || R->isSubClassOf("PatFrag"))
       return ParseTreePattern(new DagInit(DI, "",
@@ -1927,7 +1948,8 @@ InferAllTypes(const StringMap<SmallVector<TreePatternNode*,1> > *InNamedTypes) {
           //  def : Pat<(v1i64 (bitconvert(v2i32 DPR:$src))), (v1i64 DPR:$src)>;
           if (Nodes[i] == Trees[0] && Nodes[i]->isLeaf()) {
             DefInit *DI = dynamic_cast<DefInit*>(Nodes[i]->getLeafValue());
-            if (DI && DI->getDef()->isSubClassOf("RegisterClass"))
+            if (DI && (DI->getDef()->isSubClassOf("RegisterClass") ||
+                       DI->getDef()->isSubClassOf("RegisterOperand")))
               continue;
           }
 
@@ -2210,7 +2232,8 @@ static bool HandleUse(TreePattern *I, TreePatternNode *Pat,
   if (Pat->getName().empty()) {
     if (Pat->isLeaf()) {
       DefInit *DI = dynamic_cast<DefInit*>(Pat->getLeafValue());
-      if (DI && DI->getDef()->isSubClassOf("RegisterClass"))
+      if (DI && (DI->getDef()->isSubClassOf("RegisterClass") ||
+                 DI->getDef()->isSubClassOf("RegisterOperand")))
         I->error("Input " + DI->getDef()->getName() + " must be named!");
     }
     return false;
@@ -2317,6 +2340,7 @@ FindPatternInputsAndOutputs(TreePattern *I, TreePatternNode *Pat,
       I->error("set destination should be a register!");
 
     if (Val->getDef()->isSubClassOf("RegisterClass") ||
+        Val->getDef()->isSubClassOf("RegisterOperand") ||
         Val->getDef()->isSubClassOf("PointerLikeRegClass")) {
       if (Dest->getName().empty())
         I->error("set destination must have a name!");
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index e4e8574..936fd01 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -26,7 +26,7 @@
 
 namespace llvm {
   class Record;
-  struct Init;
+  class Init;
   class ListInit;
   class DagInit;
   class SDNodeInfo;
diff --git a/utils/TableGen/CodeGenInstruction.cpp b/utils/TableGen/CodeGenInstruction.cpp
index 5b0aedf..a52ce86 100644
--- a/utils/TableGen/CodeGenInstruction.cpp
+++ b/utils/TableGen/CodeGenInstruction.cpp
@@ -13,6 +13,7 @@
 
 #include "CodeGenInstruction.h"
 #include "CodeGenTarget.h"
+#include "Error.h"
 #include "Record.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
@@ -66,10 +67,14 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
     Record *Rec = Arg->getDef();
     std::string PrintMethod = "printOperand";
     std::string EncoderMethod;
+    std::string OperandType = "OPERAND_UNKNOWN";
     unsigned NumOps = 1;
     DagInit *MIOpInfo = 0;
-    if (Rec->isSubClassOf("Operand")) {
+    if (Rec->isSubClassOf("RegisterOperand")) {
       PrintMethod = Rec->getValueAsString("PrintMethod");
+    } else if (Rec->isSubClassOf("Operand")) {
+      PrintMethod = Rec->getValueAsString("PrintMethod");
+      OperandType = Rec->getValueAsString("OperandType");
       // If there is an explicit encoder method, use it.
       EncoderMethod = Rec->getValueAsString("EncoderMethod");
       MIOpInfo = Rec->getValueAsDag("MIOperandInfo");
@@ -93,8 +98,9 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
     } else if (Rec->getName() == "variable_ops") {
       isVariadic = true;
       continue;
-    } else if (!Rec->isSubClassOf("RegisterClass") &&
-               !Rec->isSubClassOf("PointerLikeRegClass") &&
+    } else if (Rec->isSubClassOf("RegisterClass")) {
+      OperandType = "OPERAND_REGISTER";
+    } else if (!Rec->isSubClassOf("PointerLikeRegClass") &&
                Rec->getName() != "unknown")
       throw "Unknown operand class '" + Rec->getName() +
       "' in '" + R->getName() + "' instruction!";
@@ -108,7 +114,8 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
       " has the same name as a previous operand!";
 
     OperandList.push_back(OperandInfo(Rec, ArgName, PrintMethod, EncoderMethod,
-                                      MIOperandNo, NumOps, MIOpInfo));
+                                      OperandType, MIOperandNo, NumOps,
+                                      MIOpInfo));
     MIOperandNo += NumOps;
   }
 
@@ -308,6 +315,8 @@ CodeGenInstruction::CodeGenInstruction(Record *R) : TheDef(R), Operands(R) {
   isAsCheapAsAMove = R->getValueAsBit("isAsCheapAsAMove");
   hasExtraSrcRegAllocReq = R->getValueAsBit("hasExtraSrcRegAllocReq");
   hasExtraDefRegAllocReq = R->getValueAsBit("hasExtraDefRegAllocReq");
+  isCodeGenOnly = R->getValueAsBit("isCodeGenOnly");
+  isPseudo = R->getValueAsBit("isPseudo");
   ImplicitDefs = R->getValueAsListOfDefs("Defs");
   ImplicitUses = R->getValueAsListOfDefs("Uses");
 
@@ -414,10 +423,14 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
 
   // Handle explicit registers.
   if (ADI && ADI->getDef()->isSubClassOf("Register")) {
+    if (InstOpRec->isSubClassOf("RegisterOperand"))
+      InstOpRec = InstOpRec->getValueAsDef("RegClass");
+
     if (!InstOpRec->isSubClassOf("RegisterClass"))
       return false;
 
-    if (!T.getRegisterClass(InstOpRec).containsRegister(ADI->getDef()))
+    if (!T.getRegisterClass(InstOpRec)
+        .contains(T.getRegBank().getReg(ADI->getDef())))
       throw TGError(Loc, "fixed register " +ADI->getDef()->getName()
                     + " is not a member of the " + InstOpRec->getName() +
                     " register class!");
diff --git a/utils/TableGen/CodeGenInstruction.h b/utils/TableGen/CodeGenInstruction.h
index 5f1e0be..8d7669a 100644
--- a/utils/TableGen/CodeGenInstruction.h
+++ b/utils/TableGen/CodeGenInstruction.h
@@ -78,6 +78,10 @@ namespace llvm {
       /// for binary encoding. "getMachineOpValue" by default.
       std::string EncoderMethodName;
 
+      /// OperandType - A value from MCOI::OperandType representing the type of
+      /// the operand.
+      std::string OperandType;
+
       /// MIOperandNo - Currently (this is meant to be phased out), some logical
       /// operands correspond to multiple MachineInstr operands.  In the X86
       /// target for example, one address operand is represented as 4
@@ -101,10 +105,11 @@ namespace llvm {
       std::vector<ConstraintInfo> Constraints;
 
       OperandInfo(Record *R, const std::string &N, const std::string &PMN,
-                  const std::string &EMN, unsigned MION, unsigned MINO,
-                  DagInit *MIOI)
+                  const std::string &EMN, const std::string &OT, unsigned MION,
+                  unsigned MINO, DagInit *MIOI)
       : Rec(R), Name(N), PrinterMethodName(PMN), EncoderMethodName(EMN),
-        MIOperandNo(MION), MINumOperands(MINO), MIOperandInfo(MIOI) {}
+        OperandType(OT), MIOperandNo(MION), MINumOperands(MINO),
+        MIOperandInfo(MIOI) {}
 
 
       /// getTiedOperand - If this operand is tied to another one, return the
@@ -235,6 +240,8 @@ namespace llvm {
     bool isAsCheapAsAMove;
     bool hasExtraSrcRegAllocReq;
     bool hasExtraDefRegAllocReq;
+    bool isCodeGenOnly;
+    bool isPseudo;
 
 
     CodeGenInstruction(Record *R);
diff --git a/utils/TableGen/CodeGenRegisters.cpp b/utils/TableGen/CodeGenRegisters.cpp
index a4504e4..1acf3a8 100644
--- a/utils/TableGen/CodeGenRegisters.cpp
+++ b/utils/TableGen/CodeGenRegisters.cpp
@@ -14,6 +14,7 @@
 
 #include "CodeGenRegisters.h"
 #include "CodeGenTarget.h"
+#include "Error.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 
@@ -72,10 +73,21 @@ CodeGenRegister::getSubRegs(CodeGenRegBank &RegBank) {
   for (unsigned i = 0, e = SubList.size(); i != e; ++i) {
     CodeGenRegister *SR = RegBank.getReg(SubList[i]);
     const SubRegMap &Map = SR->getSubRegs(RegBank);
+
+    // Add this as a super-register of SR now all sub-registers are in the list.
+    // This creates a topological ordering, the exact order depends on the
+    // order getSubRegs is called on all registers.
+    SR->SuperRegs.push_back(this);
+
     for (SubRegMap::const_iterator SI = Map.begin(), SE = Map.end(); SI != SE;
-         ++SI)
+         ++SI) {
       if (!SubRegs.insert(*SI).second)
         Orphans.push_back(Orphan(SI->second, Indices[i], SI->first));
+
+      // Noop sub-register indexes are possible, so avoid duplicates.
+      if (SI->second != SR)
+        SI->second->SuperRegs.push_back(this);
+    }
   }
 
   // Process the composites.
@@ -128,11 +140,122 @@ CodeGenRegister::getSubRegs(CodeGenRegBank &RegBank) {
   return SubRegs;
 }
 
+void
+CodeGenRegister::addSubRegsPreOrder(SetVector<CodeGenRegister*> &OSet) const {
+  assert(SubRegsComplete && "Must precompute sub-registers");
+  std::vector<Record*> Indices = TheDef->getValueAsListOfDefs("SubRegIndices");
+  for (unsigned i = 0, e = Indices.size(); i != e; ++i) {
+    CodeGenRegister *SR = SubRegs.find(Indices[i])->second;
+    if (OSet.insert(SR))
+      SR->addSubRegsPreOrder(OSet);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//                               RegisterTuples
+//===----------------------------------------------------------------------===//
+
+// A RegisterTuples def is used to generate pseudo-registers from lists of
+// sub-registers. We provide a SetTheory expander class that returns the new
+// registers.
+namespace {
+struct TupleExpander : SetTheory::Expander {
+  void expand(SetTheory &ST, Record *Def, SetTheory::RecSet &Elts) {
+    std::vector<Record*> Indices = Def->getValueAsListOfDefs("SubRegIndices");
+    unsigned Dim = Indices.size();
+    ListInit *SubRegs = Def->getValueAsListInit("SubRegs");
+    if (Dim != SubRegs->getSize())
+      throw TGError(Def->getLoc(), "SubRegIndices and SubRegs size mismatch");
+    if (Dim < 2)
+      throw TGError(Def->getLoc(), "Tuples must have at least 2 sub-registers");
+
+    // Evaluate the sub-register lists to be zipped.
+    unsigned Length = ~0u;
+    SmallVector<SetTheory::RecSet, 4> Lists(Dim);
+    for (unsigned i = 0; i != Dim; ++i) {
+      ST.evaluate(SubRegs->getElement(i), Lists[i]);
+      Length = std::min(Length, unsigned(Lists[i].size()));
+    }
+
+    if (Length == 0)
+      return;
+
+    // Precompute some types.
+    Record *RegisterCl = Def->getRecords().getClass("Register");
+    RecTy *RegisterRecTy = new RecordRecTy(RegisterCl);
+    StringInit *BlankName = new StringInit("");
+
+    // Zip them up.
+    for (unsigned n = 0; n != Length; ++n) {
+      std::string Name;
+      Record *Proto = Lists[0][n];
+      std::vector<Init*> Tuple;
+      unsigned CostPerUse = 0;
+      for (unsigned i = 0; i != Dim; ++i) {
+        Record *Reg = Lists[i][n];
+        if (i) Name += '_';
+        Name += Reg->getName();
+        Tuple.push_back(new DefInit(Reg));
+        CostPerUse = std::max(CostPerUse,
+                              unsigned(Reg->getValueAsInt("CostPerUse")));
+      }
+
+      // Create a new Record representing the synthesized register. This record
+      // is only for consumption by CodeGenRegister, it is not added to the
+      // RecordKeeper.
+      Record *NewReg = new Record(Name, Def->getLoc(), Def->getRecords());
+      Elts.insert(NewReg);
+
+      // Copy Proto super-classes.
+      for (unsigned i = 0, e = Proto->getSuperClasses().size(); i != e; ++i)
+        NewReg->addSuperClass(Proto->getSuperClasses()[i]);
+
+      // Copy Proto fields.
+      for (unsigned i = 0, e = Proto->getValues().size(); i != e; ++i) {
+        RecordVal RV = Proto->getValues()[i];
+
+        // Replace the sub-register list with Tuple.
+        if (RV.getName() == "SubRegs")
+          RV.setValue(new ListInit(Tuple, RegisterRecTy));
+
+        // Provide a blank AsmName. MC hacks are required anyway.
+        if (RV.getName() == "AsmName")
+          RV.setValue(BlankName);
+
+        // CostPerUse is aggregated from all Tuple members.
+        if (RV.getName() == "CostPerUse")
+          RV.setValue(new IntInit(CostPerUse));
+
+        // Copy fields from the RegisterTuples def.
+        if (RV.getName() == "SubRegIndices" ||
+            RV.getName() == "CompositeIndices") {
+          NewReg->addValue(*Def->getValue(RV.getName()));
+          continue;
+        }
+
+        // Some fields get their default uninitialized value.
+        if (RV.getName() == "DwarfNumbers" ||
+            RV.getName() == "DwarfAlias" ||
+            RV.getName() == "Aliases") {
+          if (const RecordVal *DefRV = RegisterCl->getValue(RV.getName()))
+            NewReg->addValue(*DefRV);
+          continue;
+        }
+
+        // Everything else is copied from Proto.
+        NewReg->addValue(RV);
+      }
+    }
+  }
+};
+}
+
 //===----------------------------------------------------------------------===//
 //                            CodeGenRegisterClass
 //===----------------------------------------------------------------------===//
 
-CodeGenRegisterClass::CodeGenRegisterClass(Record *R) : TheDef(R) {
+CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
+  : TheDef(R) {
   // Rename anonymous register classes.
   if (R->getName().size() > 9 && R->getName()[9] == '.') {
     static unsigned AnonCounter = 0;
@@ -149,13 +272,26 @@ CodeGenRegisterClass::CodeGenRegisterClass(Record *R) : TheDef(R) {
   }
   assert(!VTs.empty() && "RegisterClass must contain at least one ValueType!");
 
-  std::vector<Record*> RegList = R->getValueAsListOfDefs("MemberList");
-  for (unsigned i = 0, e = RegList.size(); i != e; ++i) {
-    Record *Reg = RegList[i];
-    if (!Reg->isSubClassOf("Register"))
-      throw "Register Class member '" + Reg->getName() +
-            "' does not derive from the Register class!";
-    Elements.push_back(Reg);
+  // Default allocation order always contains all registers.
+  Elements = RegBank.getSets().expand(R);
+  for (unsigned i = 0, e = Elements->size(); i != e; ++i)
+    Members.insert(RegBank.getReg((*Elements)[i]));
+
+  // Alternative allocation orders may be subsets.
+  ListInit *Alts = R->getValueAsListInit("AltOrders");
+  AltOrders.resize(Alts->size());
+  SetTheory::RecSet Order;
+  for (unsigned i = 0, e = Alts->size(); i != e; ++i) {
+    RegBank.getSets().evaluate(Alts->getElement(i), Order);
+    AltOrders[i].append(Order.begin(), Order.end());
+    // Verify that all altorder members are regclass members.
+    while (!Order.empty()) {
+      CodeGenRegister *Reg = RegBank.getReg(Order.back());
+      Order.pop_back();
+      if (!contains(Reg))
+        throw TGError(R->getLoc(), " AltOrder register " + Reg->getName() +
+                      " is not a class member");
+    }
   }
 
   // SubRegClasses is a list<dag> containing (RC, subregindex, ...) dags.
@@ -189,8 +325,28 @@ CodeGenRegisterClass::CodeGenRegisterClass(Record *R) : TheDef(R) {
   SpillAlignment = R->getValueAsInt("Alignment");
   CopyCost = R->getValueAsInt("CopyCost");
   Allocatable = R->getValueAsBit("isAllocatable");
-  MethodBodies = R->getValueAsCode("MethodBodies");
-  MethodProtos = R->getValueAsCode("MethodProtos");
+  AltOrderSelect = R->getValueAsCode("AltOrderSelect");
+}
+
+bool CodeGenRegisterClass::contains(const CodeGenRegister *Reg) const {
+  return Members.count(Reg);
+}
+
+// Returns true if RC is a strict subclass.
+// RC is a sub-class of this class if it is a valid replacement for any
+// instruction operand where a register of this classis required. It must
+// satisfy these conditions:
+//
+// 1. All RC registers are also in this.
+// 2. The RC spill size must not be smaller than our spill size.
+// 3. RC spill alignment must be compatible with ours.
+//
+bool CodeGenRegisterClass::hasSubClass(const CodeGenRegisterClass *RC) const {
+  return SpillAlignment && RC->SpillAlignment % SpillAlignment == 0 &&
+    SpillSize <= RC->SpillSize &&
+    std::includes(Members.begin(), Members.end(),
+                  RC->Members.begin(), RC->Members.end(),
+                  CodeGenRegister::Less());
 }
 
 const std::string &CodeGenRegisterClass::getName() const {
@@ -202,6 +358,10 @@ const std::string &CodeGenRegisterClass::getName() const {
 //===----------------------------------------------------------------------===//
 
 CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
+  // Configure register Sets to understand register classes and tuples.
+  Sets.addFieldExpander("RegisterClass", "MemberList");
+  Sets.addExpander("RegisterTuples", new TupleExpander());
+
   // Read in the user-defined (named) sub-register indices.
   // More indices will be synthesized later.
   SubRegIndices = Records.getAllDerivedDefinitions("SubRegIndex");
@@ -214,18 +374,45 @@ CodeGenRegBank::CodeGenRegBank(RecordKeeper &Records) : Records(Records) {
   Registers.reserve(Regs.size());
   // Assign the enumeration values.
   for (unsigned i = 0, e = Regs.size(); i != e; ++i)
-    Registers.push_back(CodeGenRegister(Regs[i], i + 1));
+    getReg(Regs[i]);
+
+  // Expand tuples and number the new registers.
+  std::vector<Record*> Tups =
+    Records.getAllDerivedDefinitions("RegisterTuples");
+  for (unsigned i = 0, e = Tups.size(); i != e; ++i) {
+    const std::vector<Record*> *TupRegs = Sets.expand(Tups[i]);
+    for (unsigned j = 0, je = TupRegs->size(); j != je; ++j)
+      getReg((*TupRegs)[j]);
+  }
+
+  // Read in register class definitions.
+  std::vector<Record*> RCs = Records.getAllDerivedDefinitions("RegisterClass");
+  if (RCs.empty())
+    throw std::string("No 'RegisterClass' subclasses defined!");
+
+  RegClasses.reserve(RCs.size());
+  for (unsigned i = 0, e = RCs.size(); i != e; ++i)
+    RegClasses.push_back(CodeGenRegisterClass(*this, RCs[i]));
 }
 
 CodeGenRegister *CodeGenRegBank::getReg(Record *Def) {
-  if (Def2Reg.empty())
-    for (unsigned i = 0, e = Registers.size(); i != e; ++i)
-      Def2Reg[Registers[i].TheDef] = &Registers[i];
-
-  if (CodeGenRegister *Reg = Def2Reg[Def])
+  CodeGenRegister *&Reg = Def2Reg[Def];
+  if (Reg)
     return Reg;
+  Reg = new CodeGenRegister(Def, Registers.size() + 1);
+  Registers.push_back(Reg);
+  return Reg;
+}
 
-  throw TGError(Def->getLoc(), "Not a known Register!");
+CodeGenRegisterClass *CodeGenRegBank::getRegClass(Record *Def) {
+  if (Def2RC.empty())
+    for (unsigned i = 0, e = RegClasses.size(); i != e; ++i)
+      Def2RC[RegClasses[i].TheDef] = &RegClasses[i];
+
+  if (CodeGenRegisterClass *RC = Def2RC[Def])
+    return RC;
+
+  throw TGError(Def->getLoc(), "Not a known RegisterClass!");
 }
 
 Record *CodeGenRegBank::getCompositeSubRegIndex(Record *A, Record *B,
@@ -254,11 +441,11 @@ void CodeGenRegBank::computeComposites() {
   // Precompute all sub-register maps. This will create Composite entries for
   // all inferred sub-register indices.
   for (unsigned i = 0, e = Registers.size(); i != e; ++i)
-    Registers[i].getSubRegs(*this);
+    Registers[i]->getSubRegs(*this);
 
   for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
-    CodeGenRegister *Reg1 = &Registers[i];
-    const CodeGenRegister::SubRegMap &SRM1 = Reg1->getSubRegs(*this);
+    CodeGenRegister *Reg1 = Registers[i];
+    const CodeGenRegister::SubRegMap &SRM1 = Reg1->getSubRegs();
     for (CodeGenRegister::SubRegMap::const_iterator i1 = SRM1.begin(),
          e1 = SRM1.end(); i1 != e1; ++i1) {
       Record *Idx1 = i1->first;
@@ -266,7 +453,7 @@ void CodeGenRegBank::computeComposites() {
       // Ignore identity compositions.
       if (Reg1 == Reg2)
         continue;
-      const CodeGenRegister::SubRegMap &SRM2 = Reg2->getSubRegs(*this);
+      const CodeGenRegister::SubRegMap &SRM2 = Reg2->getSubRegs();
       // Try composing Idx1 with another SubRegIndex.
       for (CodeGenRegister::SubRegMap::const_iterator i2 = SRM2.begin(),
            e2 = SRM2.end(); i2 != e2; ++i2) {
@@ -306,7 +493,127 @@ void CodeGenRegBank::computeComposites() {
   }
 }
 
+// Compute sets of overlapping registers.
+//
+// The standard set is all super-registers and all sub-registers, but the
+// target description can add arbitrary overlapping registers via the 'Aliases'
+// field. This complicates things, but we can compute overlapping sets using
+// the following rules:
+//
+// 1. The relation overlap(A, B) is reflexive and symmetric but not transitive.
+//
+// 2. overlap(A, B) implies overlap(A, S) for all S in supers(B).
+//
+// Alternatively:
+//
+//    overlap(A, B) iff there exists:
+//    A' in { A, subregs(A) } and B' in { B, subregs(B) } such that:
+//    A' = B' or A' in aliases(B') or B' in aliases(A').
+//
+// Here subregs(A) is the full flattened sub-register set returned by
+// A.getSubRegs() while aliases(A) is simply the special 'Aliases' field in the
+// description of register A.
+//
+// This also implies that registers with a common sub-register are considered
+// overlapping. This can happen when forming register pairs:
+//
+//    P0 = (R0, R1)
+//    P1 = (R1, R2)
+//    P2 = (R2, R3)
+//
+// In this case, we will infer an overlap between P0 and P1 because of the
+// shared sub-register R1. There is no overlap between P0 and P2.
+//
+void CodeGenRegBank::
+computeOverlaps(std::map<const CodeGenRegister*, CodeGenRegister::Set> &Map) {
+  assert(Map.empty());
+
+  // Collect overlaps that don't follow from rule 2.
+  for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
+    CodeGenRegister *Reg = Registers[i];
+    CodeGenRegister::Set &Overlaps = Map[Reg];
+
+    // Reg overlaps itself.
+    Overlaps.insert(Reg);
+
+    // All super-registers overlap.
+    const CodeGenRegister::SuperRegList &Supers = Reg->getSuperRegs();
+    Overlaps.insert(Supers.begin(), Supers.end());
+
+    // Form symmetrical relations from the special Aliases[] lists.
+    std::vector<Record*> RegList = Reg->TheDef->getValueAsListOfDefs("Aliases");
+    for (unsigned i2 = 0, e2 = RegList.size(); i2 != e2; ++i2) {
+      CodeGenRegister *Reg2 = getReg(RegList[i2]);
+      CodeGenRegister::Set &Overlaps2 = Map[Reg2];
+      const CodeGenRegister::SuperRegList &Supers2 = Reg2->getSuperRegs();
+      // Reg overlaps Reg2 which implies it overlaps supers(Reg2).
+      Overlaps.insert(Reg2);
+      Overlaps.insert(Supers2.begin(), Supers2.end());
+      Overlaps2.insert(Reg);
+      Overlaps2.insert(Supers.begin(), Supers.end());
+    }
+  }
+
+  // Apply rule 2. and inherit all sub-register overlaps.
+  for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
+    CodeGenRegister *Reg = Registers[i];
+    CodeGenRegister::Set &Overlaps = Map[Reg];
+    const CodeGenRegister::SubRegMap &SRM = Reg->getSubRegs();
+    for (CodeGenRegister::SubRegMap::const_iterator i2 = SRM.begin(),
+         e2 = SRM.end(); i2 != e2; ++i2) {
+      CodeGenRegister::Set &Overlaps2 = Map[i2->second];
+      Overlaps.insert(Overlaps2.begin(), Overlaps2.end());
+    }
+  }
+}
+
 void CodeGenRegBank::computeDerivedInfo() {
   computeComposites();
 }
 
+/// getRegisterClassForRegister - Find the register class that contains the
+/// specified physical register.  If the register is not in a register class,
+/// return null. If the register is in multiple classes, and the classes have a
+/// superset-subset relationship and the same set of types, return the
+/// superclass.  Otherwise return null.
+const CodeGenRegisterClass*
+CodeGenRegBank::getRegClassForRegister(Record *R) {
+  const CodeGenRegister *Reg = getReg(R);
+  const std::vector<CodeGenRegisterClass> &RCs = getRegClasses();
+  const CodeGenRegisterClass *FoundRC = 0;
+  for (unsigned i = 0, e = RCs.size(); i != e; ++i) {
+    const CodeGenRegisterClass &RC = RCs[i];
+    if (!RC.contains(Reg))
+      continue;
+
+    // If this is the first class that contains the register,
+    // make a note of it and go on to the next class.
+    if (!FoundRC) {
+      FoundRC = &RC;
+      continue;
+    }
+
+    // If a register's classes have different types, return null.
+    if (RC.getValueTypes() != FoundRC->getValueTypes())
+      return 0;
+
+    // Check to see if the previously found class that contains
+    // the register is a subclass of the current class. If so,
+    // prefer the superclass.
+    if (RC.hasSubClass(FoundRC)) {
+      FoundRC = &RC;
+      continue;
+    }
+
+    // Check to see if the previously found class that contains
+    // the register is a superclass of the current class. If so,
+    // prefer the superclass.
+    if (FoundRC->hasSubClass(&RC))
+      continue;
+
+    // Multiple classes, and neither is a superclass of the other.
+    // Return null.
+    return 0;
+  }
+  return FoundRC;
+}
diff --git a/utils/TableGen/CodeGenRegisters.h b/utils/TableGen/CodeGenRegisters.h
index 09341f0..5edbf47 100644
--- a/utils/TableGen/CodeGenRegisters.h
+++ b/utils/TableGen/CodeGenRegisters.h
@@ -16,8 +16,11 @@
 #define CODEGEN_REGISTERS_H
 
 #include "Record.h"
+#include "SetTheory.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
 #include <cstdlib>
 #include <map>
 #include <string>
@@ -49,16 +52,44 @@ namespace llvm {
       return SubRegs;
     }
 
+    // Add sub-registers to OSet following a pre-order defined by the .td file.
+    void addSubRegsPreOrder(SetVector<CodeGenRegister*> &OSet) const;
+
+    // List of super-registers in topological order, small to large.
+    typedef std::vector<CodeGenRegister*> SuperRegList;
+
+    // Get the list of super-registers.
+    // This is only valid after computeDerivedInfo has visited all registers.
+    const SuperRegList &getSuperRegs() const {
+      assert(SubRegsComplete && "Must precompute sub-registers");
+      return SuperRegs;
+    }
+
+    // Order CodeGenRegister pointers by EnumValue.
+    struct Less {
+      bool operator()(const CodeGenRegister *A,
+                      const CodeGenRegister *B) const {
+        return A->EnumValue < B->EnumValue;
+      }
+    };
+
+    // Canonically ordered set.
+    typedef std::set<const CodeGenRegister*, Less> Set;
+
   private:
     bool SubRegsComplete;
     SubRegMap SubRegs;
+    SuperRegList SuperRegs;
   };
 
 
-  struct CodeGenRegisterClass {
+  class CodeGenRegisterClass {
+    CodeGenRegister::Set Members;
+    const std::vector<Record*> *Elements;
+    std::vector<SmallVector<Record*, 16> > AltOrders;
+  public:
     Record *TheDef;
     std::string Namespace;
-    std::vector<Record*> Elements;
     std::vector<MVT::SimpleValueType> VTs;
     unsigned SpillSize;
     unsigned SpillAlignment;
@@ -66,7 +97,7 @@ namespace llvm {
     bool Allocatable;
     // Map SubRegIndex -> RegisterClass
     DenseMap<Record*,Record*> SubRegClasses;
-    std::string MethodProtos, MethodBodies;
+    std::string AltOrderSelect;
 
     const std::string &getName() const;
     const std::vector<MVT::SimpleValueType> &getValueTypes() const {return VTs;}
@@ -79,13 +110,10 @@ namespace llvm {
       abort();
     }
 
-    bool containsRegister(Record *R) const {
-      for (unsigned i = 0, e = Elements.size(); i != e; ++i)
-        if (Elements[i] == R) return true;
-      return false;
-    }
+    // Return true if this this class contains the register.
+    bool contains(const CodeGenRegister*) const;
 
-    // Returns true if RC is a strict subclass.
+    // Returns true if RC is a subclass.
     // RC is a sub-class of this class if it is a valid replacement for any
     // instruction operand where a register of this classis required. It must
     // satisfy these conditions:
@@ -94,40 +122,38 @@ namespace llvm {
     // 2. The RC spill size must not be smaller than our spill size.
     // 3. RC spill alignment must be compatible with ours.
     //
-    bool hasSubClass(const CodeGenRegisterClass *RC) const {
-
-      if (RC->Elements.size() > Elements.size() ||
-          (SpillAlignment && RC->SpillAlignment % SpillAlignment) ||
-          SpillSize > RC->SpillSize)
-        return false;
-
-      std::set<Record*> RegSet;
-      for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
-        Record *Reg = Elements[i];
-        RegSet.insert(Reg);
-      }
-
-      for (unsigned i = 0, e = RC->Elements.size(); i != e; ++i) {
-        Record *Reg = RC->Elements[i];
-        if (!RegSet.count(Reg))
-          return false;
-      }
-
-      return true;
+    bool hasSubClass(const CodeGenRegisterClass *RC) const;
+
+    // Returns an ordered list of class members.
+    // The order of registers is the same as in the .td file.
+    // No = 0 is the default allocation order, No = 1 is the first alternative.
+    ArrayRef<Record*> getOrder(unsigned No = 0) const {
+      if (No == 0)
+        return *Elements;
+      else
+        return AltOrders[No - 1];
     }
 
-    CodeGenRegisterClass(Record *R);
+    // Return the total number of allocation orders available.
+    unsigned getNumOrders() const { return 1 + AltOrders.size(); }
+
+    CodeGenRegisterClass(CodeGenRegBank&, Record *R);
   };
 
   // CodeGenRegBank - Represent a target's registers and the relations between
   // them.
   class CodeGenRegBank {
     RecordKeeper &Records;
+    SetTheory Sets;
+
     std::vector<Record*> SubRegIndices;
     unsigned NumNamedIndices;
-    std::vector<CodeGenRegister> Registers;
+    std::vector<CodeGenRegister*> Registers;
     DenseMap<Record*, CodeGenRegister*> Def2Reg;
 
+    std::vector<CodeGenRegisterClass> RegClasses;
+    DenseMap<Record*, CodeGenRegisterClass*> Def2RC;
+
     // Composite SubRegIndex instances.
     // Map (SubRegIndex, SubRegIndex) -> SubRegIndex.
     typedef DenseMap<std::pair<Record*, Record*>, Record*> CompositeMap;
@@ -139,6 +165,8 @@ namespace llvm {
   public:
     CodeGenRegBank(RecordKeeper&);
 
+    SetTheory &getSets() { return Sets; }
+
     // Sub-register indices. The first NumNamedIndices are defined by the user
     // in the .td files. The rest are synthesized such that all sub-registers
     // have a unique name.
@@ -151,13 +179,36 @@ namespace llvm {
     // Find or create a sub-register index representing the A+B composition.
     Record *getCompositeSubRegIndex(Record *A, Record *B, bool create = false);
 
-    const std::vector<CodeGenRegister> &getRegisters() { return Registers; }
+    const std::vector<CodeGenRegister*> &getRegisters() { return Registers; }
 
     // Find a register from its Record def.
     CodeGenRegister *getReg(Record*);
 
+    const std::vector<CodeGenRegisterClass> &getRegClasses() {
+      return RegClasses;
+    }
+
+    // Find a register class from its def.
+    CodeGenRegisterClass *getRegClass(Record*);
+
+    /// getRegisterClassForRegister - Find the register class that contains the
+    /// specified physical register.  If the register is not in a register
+    /// class, return null. If the register is in multiple classes, and the
+    /// classes have a superset-subset relationship and the same set of types,
+    /// return the superclass.  Otherwise return null.
+    const CodeGenRegisterClass* getRegClassForRegister(Record *R);
+
     // Computed derived records such as missing sub-register indices.
     void computeDerivedInfo();
+
+    // Compute full overlap sets for every register. These sets include the
+    // rarely used aliases that are neither sub nor super-registers.
+    //
+    // Map[R1].count(R2) is reflexive and symmetric, but not transitive.
+    //
+    // If R1 is a sub-register of R2, Map[R1] is a subset of Map[R2].
+    void computeOverlaps(std::map<const CodeGenRegister*,
+                                  CodeGenRegister::Set> &Map);
   };
 }
 
diff --git a/utils/TableGen/CodeGenTarget.cpp b/utils/TableGen/CodeGenTarget.cpp
index a0c64ff..929791c 100644
--- a/utils/TableGen/CodeGenTarget.cpp
+++ b/utils/TableGen/CodeGenTarget.cpp
@@ -90,6 +90,7 @@ std::string llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::Metadata: return "MVT::Metadata";
   case MVT::iPTR:     return "MVT::iPTR";
   case MVT::iPTRAny:  return "MVT::iPTRAny";
+  case MVT::untyped:  return "MVT::untyped";
   default: assert(0 && "ILLEGAL VALUE TYPE!"); return "";
   }
 }
@@ -163,40 +164,32 @@ CodeGenRegBank &CodeGenTarget::getRegBank() const {
   return *RegBank;
 }
 
-void CodeGenTarget::ReadRegisterClasses() const {
-  std::vector<Record*> RegClasses =
-    Records.getAllDerivedDefinitions("RegisterClass");
-  if (RegClasses.empty())
-    throw std::string("No 'RegisterClass' subclasses defined!");
-
-  RegisterClasses.reserve(RegClasses.size());
-  RegisterClasses.assign(RegClasses.begin(), RegClasses.end());
+void CodeGenTarget::ReadRegAltNameIndices() const {
+  RegAltNameIndices = Records.getAllDerivedDefinitions("RegAltNameIndex");
+  std::sort(RegAltNameIndices.begin(), RegAltNameIndices.end(), LessRecord());
 }
 
 /// getRegisterByName - If there is a register with the specific AsmName,
 /// return it.
 const CodeGenRegister *CodeGenTarget::getRegisterByName(StringRef Name) const {
-  const std::vector<CodeGenRegister> &Regs = getRegBank().getRegisters();
-  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    const CodeGenRegister &Reg = Regs[i];
-    if (Reg.TheDef->getValueAsString("AsmName") == Name)
-      return &Reg;
-  }
+  const std::vector<CodeGenRegister*> &Regs = getRegBank().getRegisters();
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i)
+    if (Regs[i]->TheDef->getValueAsString("AsmName") == Name)
+      return Regs[i];
 
   return 0;
 }
 
 std::vector<MVT::SimpleValueType> CodeGenTarget::
 getRegisterVTs(Record *R) const {
+  const CodeGenRegister *Reg = getRegBank().getReg(R);
   std::vector<MVT::SimpleValueType> Result;
   const std::vector<CodeGenRegisterClass> &RCs = getRegisterClasses();
   for (unsigned i = 0, e = RCs.size(); i != e; ++i) {
-    const CodeGenRegisterClass &RC = RegisterClasses[i];
-    for (unsigned ei = 0, ee = RC.Elements.size(); ei != ee; ++ei) {
-      if (R == RC.Elements[ei]) {
-        const std::vector<MVT::SimpleValueType> &InVTs = RC.getValueTypes();
-        Result.insert(Result.end(), InVTs.begin(), InVTs.end());
-      }
+    const CodeGenRegisterClass &RC = RCs[i];
+    if (RC.contains(Reg)) {
+      const std::vector<MVT::SimpleValueType> &InVTs = RC.getValueTypes();
+      Result.insert(Result.end(), InVTs.begin(), InVTs.end());
     }
   }
 
diff --git a/utils/TableGen/CodeGenTarget.h b/utils/TableGen/CodeGenTarget.h
index 1f1c34c..143daed 100644
--- a/utils/TableGen/CodeGenTarget.h
+++ b/utils/TableGen/CodeGenTarget.h
@@ -66,9 +66,9 @@ class CodeGenTarget {
 
   mutable DenseMap<const Record*, CodeGenInstruction*> Instructions;
   mutable CodeGenRegBank *RegBank;
-  mutable std::vector<CodeGenRegisterClass> RegisterClasses;
+  mutable std::vector<Record*> RegAltNameIndices;
   mutable std::vector<MVT::SimpleValueType> LegalValueTypes;
-  void ReadRegisterClasses() const;
+  void ReadRegAltNameIndices() const;
   void ReadInstructions() const;
   void ReadLegalValueTypes() const;
 
@@ -98,80 +98,21 @@ public:
   /// getRegBank - Return the register bank description.
   CodeGenRegBank &getRegBank() const;
 
-  const std::vector<CodeGenRegister> &getRegisters() const {
-    return getRegBank().getRegisters();
-  }
-
   /// getRegisterByName - If there is a register with the specific AsmName,
   /// return it.
   const CodeGenRegister *getRegisterByName(StringRef Name) const;
 
-  const std::vector<CodeGenRegisterClass> &getRegisterClasses() const {
-    if (RegisterClasses.empty()) ReadRegisterClasses();
-    return RegisterClasses;
+  const std::vector<Record*> &getRegAltNameIndices() const {
+    if (RegAltNameIndices.empty()) ReadRegAltNameIndices();
+    return RegAltNameIndices;
   }
 
-  const CodeGenRegisterClass &getRegisterClass(Record *R) const {
-    const std::vector<CodeGenRegisterClass> &RC = getRegisterClasses();
-    for (unsigned i = 0, e = RC.size(); i != e; ++i)
-      if (RC[i].TheDef == R)
-        return RC[i];
-    assert(0 && "Didn't find the register class");
-    abort();
+  const std::vector<CodeGenRegisterClass> &getRegisterClasses() const {
+    return getRegBank().getRegClasses();
   }
 
-  /// getRegisterClassForRegister - Find the register class that contains the
-  /// specified physical register.  If the register is not in a register
-  /// class, return null. If the register is in multiple classes, and the
-  /// classes have a superset-subset relationship and the same set of
-  /// types, return the superclass.  Otherwise return null.
-  const CodeGenRegisterClass *getRegisterClassForRegister(Record *R) const {
-    const std::vector<CodeGenRegisterClass> &RCs = getRegisterClasses();
-    const CodeGenRegisterClass *FoundRC = 0;
-    for (unsigned i = 0, e = RCs.size(); i != e; ++i) {
-      const CodeGenRegisterClass &RC = RegisterClasses[i];
-      for (unsigned ei = 0, ee = RC.Elements.size(); ei != ee; ++ei) {
-        if (R != RC.Elements[ei])
-          continue;
-
-        // If a register's classes have different types, return null.
-        if (FoundRC && RC.getValueTypes() != FoundRC->getValueTypes())
-          return 0;
-
-        // If this is the first class that contains the register,
-        // make a note of it and go on to the next class.
-        if (!FoundRC) {
-          FoundRC = &RC;
-          break;
-        }
-
-        std::vector<Record *> Elements(RC.Elements);
-        std::vector<Record *> FoundElements(FoundRC->Elements);
-        std::sort(Elements.begin(), Elements.end());
-        std::sort(FoundElements.begin(), FoundElements.end());
-
-        // Check to see if the previously found class that contains
-        // the register is a subclass of the current class. If so,
-        // prefer the superclass.
-        if (std::includes(Elements.begin(), Elements.end(),
-                          FoundElements.begin(), FoundElements.end())) {
-          FoundRC = &RC;
-          break;
-        }
-
-        // Check to see if the previously found class that contains
-        // the register is a superclass of the current class. If so,
-        // prefer the superclass.
-        if (std::includes(FoundElements.begin(), FoundElements.end(),
-                          Elements.begin(), Elements.end()))
-          break;
-
-        // Multiple classes, and neither is a superclass of the other.
-        // Return null.
-        return 0;
-      }
-    }
-    return FoundRC;
+  const CodeGenRegisterClass &getRegisterClass(Record *R) const {
+    return *getRegBank().getRegClass(R);
   }
 
   /// getRegisterVTs - Find the union of all possible SimpleValueTypes for the
diff --git a/utils/TableGen/DAGISelMatcherGen.cpp b/utils/TableGen/DAGISelMatcherGen.cpp
index 402a239..c5897c7 100644
--- a/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/utils/TableGen/DAGISelMatcherGen.cpp
@@ -25,12 +25,12 @@ static MVT::SimpleValueType getRegisterValueType(Record *R,
                                                  const CodeGenTarget &T) {
   bool FoundRC = false;
   MVT::SimpleValueType VT = MVT::Other;
+  const CodeGenRegister *Reg = T.getRegBank().getReg(R);
   const std::vector<CodeGenRegisterClass> &RCs = T.getRegisterClasses();
-  std::vector<Record*>::const_iterator Element;
 
   for (unsigned rc = 0, e = RCs.size(); rc != e; ++rc) {
     const CodeGenRegisterClass &RC = RCs[rc];
-    if (!std::count(RC.Elements.begin(), RC.Elements.end(), R))
+    if (!RC.contains(Reg))
       continue;
 
     if (!FoundRC) {
@@ -93,10 +93,6 @@ namespace {
     /// CurPredicate - As we emit matcher nodes, this points to the latest check
     /// which should have future checks stuck into its Next position.
     Matcher *CurPredicate;
-
-    /// RegisterDefMap - A map of register record definitions to the
-    /// corresponding target CodeGenRegister entry.
-    DenseMap<const Record *, const CodeGenRegister *> RegisterDefMap;
   public:
     MatcherGen(const PatternToMatch &pattern, const CodeGenDAGPatterns &cgp);
 
@@ -165,12 +161,6 @@ MatcherGen::MatcherGen(const PatternToMatch &pattern,
 
   // If there are types that are manifestly known, infer them.
   InferPossibleTypes();
-
-  // Populate the map from records to CodeGenRegister entries.
-  const CodeGenTarget &CGT = CGP.getTargetInfo();
-  const std::vector<CodeGenRegister> &Registers = CGT.getRegisters();
-  for (unsigned i = 0, e = Registers.size(); i != e; ++i)
-    RegisterDefMap[Registers[i].TheDef] = &Registers[i];
 }
 
 /// InferPossibleTypes - As we emit the pattern, we end up generating type
@@ -234,6 +224,7 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
   Record *LeafRec = DI->getDef();
   if (// Handle register references.  Nothing to do here, they always match.
       LeafRec->isSubClassOf("RegisterClass") ||
+      LeafRec->isSubClassOf("RegisterOperand") ||
       LeafRec->isSubClassOf("PointerLikeRegClass") ||
       LeafRec->isSubClassOf("SubRegIndex") ||
       // Place holder for SRCVALUE nodes. Nothing to do here.
@@ -589,14 +580,16 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
 
   // If this is an explicit register reference, handle it.
   if (DefInit *DI = dynamic_cast<DefInit*>(N->getLeafValue())) {
-    if (DI->getDef()->isSubClassOf("Register")) {
-      AddMatcher(new EmitRegisterMatcher(RegisterDefMap[DI->getDef()],
-                                         N->getType(0)));
+    Record *Def = DI->getDef();
+    if (Def->isSubClassOf("Register")) {
+      const CodeGenRegister *Reg =
+        CGP.getTargetInfo().getRegBank().getReg(Def);
+      AddMatcher(new EmitRegisterMatcher(Reg, N->getType(0)));
       ResultOps.push_back(NextRecordedOperandNo++);
       return;
     }
 
-    if (DI->getDef()->getName() == "zero_reg") {
+    if (Def->getName() == "zero_reg") {
       AddMatcher(new EmitRegisterMatcher(0, N->getType(0)));
       ResultOps.push_back(NextRecordedOperandNo++);
       return;
@@ -604,16 +597,18 @@ void MatcherGen::EmitResultLeafAsOperand(const TreePatternNode *N,
 
     // Handle a reference to a register class. This is used
     // in COPY_TO_SUBREG instructions.
-    if (DI->getDef()->isSubClassOf("RegisterClass")) {
-      std::string Value = getQualifiedName(DI->getDef()) + "RegClassID";
+    if (Def->isSubClassOf("RegisterOperand"))
+      Def = Def->getValueAsDef("RegClass");
+    if (Def->isSubClassOf("RegisterClass")) {
+      std::string Value = getQualifiedName(Def) + "RegClassID";
       AddMatcher(new EmitStringIntegerMatcher(Value, MVT::i32));
       ResultOps.push_back(NextRecordedOperandNo++);
       return;
     }
 
     // Handle a subregister index. This is used for INSERT_SUBREG etc.
-    if (DI->getDef()->isSubClassOf("SubRegIndex")) {
-      std::string Value = getQualifiedName(DI->getDef());
+    if (Def->isSubClassOf("SubRegIndex")) {
+      std::string Value = getQualifiedName(Def);
       AddMatcher(new EmitStringIntegerMatcher(Value, MVT::i32));
       ResultOps.push_back(NextRecordedOperandNo++);
       return;
diff --git a/utils/TableGen/DisassemblerEmitter.cpp b/utils/TableGen/DisassemblerEmitter.cpp
index d68d3b0..07313d1 100644
--- a/utils/TableGen/DisassemblerEmitter.cpp
+++ b/utils/TableGen/DisassemblerEmitter.cpp
@@ -9,6 +9,7 @@
 
 #include "DisassemblerEmitter.h"
 #include "CodeGenTarget.h"
+#include "Error.h"
 #include "Record.h"
 #include "X86DisassemblerTables.h"
 #include "X86RecognizableInstr.h"
diff --git a/utils/TableGen/EDEmitter.cpp b/utils/TableGen/EDEmitter.cpp
index daf9617..2f9814a 100644
--- a/utils/TableGen/EDEmitter.cpp
+++ b/utils/TableGen/EDEmitter.cpp
@@ -588,7 +588,11 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
   IMM("imm0_31");
   IMM("imm0_31_m1");
   IMM("nModImm");
+  IMM("imm0_7");
+  IMM("imm0_15");
+  IMM("imm0_255");
   IMM("imm0_4095");
+  IMM("imm0_65535");
   IMM("jt2block_operand");
   IMM("t_imm_s4");
   IMM("pclabel");
@@ -652,12 +656,12 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
   MISC("t2am_imm8s4_offset", "kOperandTypeThumb2AddrModeImm8s4Offset");
                                                                   // R, I
   MISC("tb_addrmode", "kOperandTypeARMTBAddrMode");               // I
-  MISC("t_addrmode_rrs1", "kOperandTypeThumbAddrModeRegS");       // R, R
-  MISC("t_addrmode_rrs2", "kOperandTypeThumbAddrModeRegS");       // R, R
-  MISC("t_addrmode_rrs4", "kOperandTypeThumbAddrModeRegS");       // R, R
-  MISC("t_addrmode_is1", "kOperandTypeThumbAddrModeImmS");        // R, I
-  MISC("t_addrmode_is2", "kOperandTypeThumbAddrModeImmS");        // R, I
-  MISC("t_addrmode_is4", "kOperandTypeThumbAddrModeImmS");        // R, I
+  MISC("t_addrmode_rrs1", "kOperandTypeThumbAddrModeRegS1");      // R, R
+  MISC("t_addrmode_rrs2", "kOperandTypeThumbAddrModeRegS2");      // R, R
+  MISC("t_addrmode_rrs4", "kOperandTypeThumbAddrModeRegS4");      // R, R
+  MISC("t_addrmode_is1", "kOperandTypeThumbAddrModeImmS1");       // R, I
+  MISC("t_addrmode_is2", "kOperandTypeThumbAddrModeImmS2");       // R, I
+  MISC("t_addrmode_is4", "kOperandTypeThumbAddrModeImmS4");       // R, I
   MISC("t_addrmode_rr", "kOperandTypeThumbAddrModeRR");           // R, R
   MISC("t_addrmode_sp", "kOperandTypeThumbAddrModeSP");           // R, I
   MISC("t_addrmode_pc", "kOperandTypeThumbAddrModePC");           // R, I
@@ -665,14 +669,9 @@ static int ARMFlagFromOpName(LiteralConstantEmitter *type,
   return 1;
 }
 
-#undef SOREG
-#undef SOIMM
-#undef PRED
 #undef REG
 #undef MEM
-#undef LEA
-#undef IMM
-#undef PCR
+#undef MISC
 
 #undef SET
 
@@ -773,6 +772,11 @@ static void populateInstInfo(CompoundConstantEmitter &infoArray,
   for (index = 0; index < numInstructions; ++index) {
     const CodeGenInstruction& inst = *numberedInstructions[index];
 
+    // We don't need to do anything for pseudo-instructions, as we'll never
+    // see them here. We'll only see real instructions.
+    if (inst.isPseudo)
+      continue;
+
     CompoundConstantEmitter *infoStruct = new CompoundConstantEmitter;
     infoArray.addEntry(infoStruct);
 
@@ -868,8 +872,12 @@ static void emitCommonEnums(raw_ostream &o, unsigned int &i) {
   operandTypes.addEntry("kOperandTypeARMSPRRegisterList");
   operandTypes.addEntry("kOperandTypeARMTBAddrMode");
   operandTypes.addEntry("kOperandTypeThumbITMask");
-  operandTypes.addEntry("kOperandTypeThumbAddrModeRegS");
-  operandTypes.addEntry("kOperandTypeThumbAddrModeImmS");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeImmS1");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeImmS2");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeImmS4");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeRegS1");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeRegS2");
+  operandTypes.addEntry("kOperandTypeThumbAddrModeRegS4");
   operandTypes.addEntry("kOperandTypeThumbAddrModeRR");
   operandTypes.addEntry("kOperandTypeThumbAddrModeSP");
   operandTypes.addEntry("kOperandTypeThumbAddrModePC");
diff --git a/utils/TableGen/Error.cpp b/utils/TableGen/Error.cpp
new file mode 100644
index 0000000..3f6cda8
--- /dev/null
+++ b/utils/TableGen/Error.cpp
@@ -0,0 +1,39 @@
+//===- Error.cpp - tblgen error handling helper routines --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains error handling helper routines to pretty-print diagnostic
+// messages from tblgen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Error.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+SourceMgr SrcMgr;
+
+void PrintError(SMLoc ErrorLoc, const Twine &Msg) {
+  SrcMgr.PrintMessage(ErrorLoc, Msg, "error");
+}
+
+void PrintError(const char *Loc, const Twine &Msg) {
+  SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
+}
+
+void PrintError(const Twine &Msg) {
+  errs() << "error:" << Msg << "\n";
+}
+
+void PrintError(const TGError &Error) {
+  PrintError(Error.getLoc(), Error.getMessage());
+}
+
+} // end namespace llvm
diff --git a/utils/TableGen/Error.h b/utils/TableGen/Error.h
new file mode 100644
index 0000000..b3a0146
--- /dev/null
+++ b/utils/TableGen/Error.h
@@ -0,0 +1,43 @@
+//===- Error.h - tblgen error handling helper routines ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains error handling helper routines to pretty-print diagnostic
+// messages from tblgen.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef ERROR_H
+#define ERROR_H
+
+#include "llvm/Support/SourceMgr.h"
+
+namespace llvm {
+
+class TGError {
+  SMLoc Loc;
+  std::string Message;
+public:
+  TGError(SMLoc loc, const std::string &message) : Loc(loc), Message(message) {}
+
+  SMLoc getLoc() const { return Loc; }
+  const std::string &getMessage() const { return Message; }
+};
+
+void PrintError(SMLoc ErrorLoc, const Twine &Msg);
+void PrintError(const char *Loc, const Twine &Msg);
+void PrintError(const Twine &Msg);
+void PrintError(const TGError &Error);
+
+
+extern SourceMgr SrcMgr;
+
+
+} // end namespace "llvm"
+
+#endif
diff --git a/utils/TableGen/FastISelEmitter.cpp b/utils/TableGen/FastISelEmitter.cpp
index 78ac556..f54e8df 100644
--- a/utils/TableGen/FastISelEmitter.cpp
+++ b/utils/TableGen/FastISelEmitter.cpp
@@ -18,6 +18,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "FastISelEmitter.h"
+#include "Error.h"
 #include "Record.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/VectorExtras.h"
@@ -247,10 +248,12 @@ struct OperandsSignature {
       
       // For now, the only other thing we accept is register operands.
       const CodeGenRegisterClass *RC = 0;
+      if (OpLeafRec->isSubClassOf("RegisterOperand"))
+        OpLeafRec = OpLeafRec->getValueAsDef("RegClass");
       if (OpLeafRec->isSubClassOf("RegisterClass"))
         RC = &Target.getRegisterClass(OpLeafRec);
       else if (OpLeafRec->isSubClassOf("Register"))
-        RC = Target.getRegisterClassForRegister(OpLeafRec);
+        RC = Target.getRegBank().getRegClassForRegister(OpLeafRec);
       else
         return false;
 
@@ -406,15 +409,7 @@ static std::string PhyRegForNode(TreePatternNode *Op,
   PhysReg += static_cast<StringInit*>(OpLeafRec->getValue( \
              "Namespace")->getValue())->getValue();
   PhysReg += "::";
-
-  std::vector<CodeGenRegister> Regs = Target.getRegisters();
-  for (unsigned i = 0; i < Regs.size(); ++i) {
-    if (Regs[i].TheDef == OpLeafRec) {
-      PhysReg += Regs[i].getName();
-      break;
-    }
-  }
-
+  PhysReg += Target.getRegBank().getReg(OpLeafRec)->getName();
   return PhysReg;
 }
 
@@ -461,6 +456,8 @@ void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) {
     std::string SubRegNo;
     if (Op->getName() != "EXTRACT_SUBREG") {
       Record *Op0Rec = II.Operands[0].Rec;
+      if (Op0Rec->isSubClassOf("RegisterOperand"))
+        Op0Rec = Op0Rec->getValueAsDef("RegClass");
       if (!Op0Rec->isSubClassOf("RegisterClass"))
         continue;
       DstRC = &Target.getRegisterClass(Op0Rec);
diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp
index 9312fe8..c9dcb01 100644
--- a/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -1225,14 +1225,14 @@ bool FixedLenDecoderEmitter::populateInstruction(const CodeGenInstruction &CGI,
   //
   // This also removes pseudo instructions from considerations of disassembly,
   // which is a better design and less fragile than the name matchings.
-  BitsInit &Bits = getBitsField(Def, "Inst");
-  if (Bits.allInComplete()) return false;
-
   // Ignore "asm parser only" instructions.
   if (Def.getValueAsBit("isAsmParserOnly") ||
       Def.getValueAsBit("isCodeGenOnly"))
     return false;
 
+  BitsInit &Bits = getBitsField(Def, "Inst");
+  if (Bits.allInComplete()) return false;
+
   std::vector<OperandInfo> InsnOperands;
 
   // If the instruction has specified a custom decoding hook, use that instead
@@ -1305,8 +1305,10 @@ bool FixedLenDecoderEmitter::populateInstruction(const CodeGenInstruction &CGI,
       RecordRecTy *Type = dynamic_cast<RecordRecTy*>(TI->getType());
       Record *TypeRecord = Type->getRecord();
       bool isReg = false;
+      if (TypeRecord->isSubClassOf("RegisterOperand"))
+        TypeRecord = TypeRecord->getValueAsDef("RegClass");
       if (TypeRecord->isSubClassOf("RegisterClass")) {
-        Decoder = "Decode" + Type->getRecord()->getName() + "RegisterClass";
+        Decoder = "Decode" + TypeRecord->getName() + "RegisterClass";
         isReg = true;
       }
 
@@ -1352,7 +1354,8 @@ bool FixedLenDecoderEmitter::populateInstruction(const CodeGenInstruction &CGI,
 void FixedLenDecoderEmitter::populateInstructions() {
   for (unsigned i = 0, e = NumberedInstructions.size(); i < e; ++i) {
     Record *R = NumberedInstructions[i]->TheDef;
-    if (R->getValueAsString("Namespace") == "TargetOpcode")
+    if (R->getValueAsString("Namespace") == "TargetOpcode" ||
+        R->getValueAsBit("isPseudo"))
       continue;
 
     if (populateInstruction(*NumberedInstructions[i], i))
diff --git a/utils/TableGen/InstrInfoEmitter.cpp b/utils/TableGen/InstrInfoEmitter.cpp
index 67cce0e..5ebaf17 100644
--- a/utils/TableGen/InstrInfoEmitter.cpp
+++ b/utils/TableGen/InstrInfoEmitter.cpp
@@ -27,14 +27,6 @@ static void PrintDefList(const std::vector<Record*> &Uses,
   OS << "0 };\n";
 }
 
-static void PrintBarriers(std::vector<Record*> &Barriers,
-                          unsigned Num, raw_ostream &OS) {
-  OS << "static const TargetRegisterClass* Barriers" << Num << "[] = { ";
-  for (unsigned i = 0, e = Barriers.size(); i != e; ++i)
-    OS << "&" << getQualifiedName(Barriers[i]) << "RegClass, ";
-  OS << "NULL };\n";
-}
-
 //===----------------------------------------------------------------------===//
 // Instruction Itinerary Information.
 //===----------------------------------------------------------------------===//
@@ -43,10 +35,10 @@ void InstrInfoEmitter::GatherItinClasses() {
   std::vector<Record*> DefList =
   Records.getAllDerivedDefinitions("InstrItinClass");
   std::sort(DefList.begin(), DefList.end(), LessRecord());
-  
+
   for (unsigned i = 0, N = DefList.size(); i < N; i++)
     ItinClassMap[DefList[i]->getName()] = i;
-}  
+}
 
 unsigned InstrInfoEmitter::getItinClassNumber(const Record *InstRec) {
   return ItinClassMap[InstRec->getValueAsDef("Itinerary")->getName()];
@@ -59,7 +51,7 @@ unsigned InstrInfoEmitter::getItinClassNumber(const Record *InstRec) {
 std::vector<std::string>
 InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
   std::vector<std::string> Result;
-  
+
   for (unsigned i = 0, e = Inst.Operands.size(); i != e; ++i) {
     // Handle aggregate operands and normal operands the same way by expanding
     // either case into a list of operands for this op.
@@ -70,7 +62,7 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
     // operand, which has a single operand, but no declared class for the
     // operand.
     DagInit *MIOI = Inst.Operands[i].MIOperandInfo;
-    
+
     if (!MIOI || MIOI->getNumArgs() == 0) {
       // Single, anonymous, operand.
       OperandList.push_back(Inst.Operands[i]);
@@ -86,7 +78,9 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
     for (unsigned j = 0, e = OperandList.size(); j != e; ++j) {
       Record *OpR = OperandList[j].Rec;
       std::string Res;
-      
+
+      if (OpR->isSubClassOf("RegisterOperand"))
+        OpR = OpR->getValueAsDef("RegClass");
       if (OpR->isSubClassOf("RegisterClass"))
         Res += getQualifiedName(OpR) + "RegClassID, ";
       else if (OpR->isSubClassOf("PointerLikeRegClass"))
@@ -94,39 +88,44 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
       else
         // -1 means the operand does not have a fixed register class.
         Res += "-1, ";
-      
+
       // Fill in applicable flags.
       Res += "0";
-        
+
       // Ptr value whose register class is resolved via callback.
       if (OpR->isSubClassOf("PointerLikeRegClass"))
-        Res += "|(1<<TOI::LookupPtrRegClass)";
+        Res += "|(1<<MCOI::LookupPtrRegClass)";
 
       // Predicate operands.  Check to see if the original unexpanded operand
       // was of type PredicateOperand.
       if (Inst.Operands[i].Rec->isSubClassOf("PredicateOperand"))
-        Res += "|(1<<TOI::Predicate)";
-        
+        Res += "|(1<<MCOI::Predicate)";
+
       // Optional def operands.  Check to see if the original unexpanded operand
       // was of type OptionalDefOperand.
       if (Inst.Operands[i].Rec->isSubClassOf("OptionalDefOperand"))
-        Res += "|(1<<TOI::OptionalDef)";
+        Res += "|(1<<MCOI::OptionalDef)";
 
       // Fill in constraint info.
       Res += ", ";
-      
+
       const CGIOperandList::ConstraintInfo &Constraint =
         Inst.Operands[i].Constraints[j];
       if (Constraint.isNone())
         Res += "0";
       else if (Constraint.isEarlyClobber())
-        Res += "(1 << TOI::EARLY_CLOBBER)";
+        Res += "(1 << MCOI::EARLY_CLOBBER)";
       else {
         assert(Constraint.isTied());
         Res += "((" + utostr(Constraint.getTiedOperand()) +
-                    " << 16) | (1 << TOI::TIED_TO))";
+                    " << 16) | (1 << MCOI::TIED_TO))";
       }
-        
+
+      // Fill in operand type.
+      Res += ", MCOI::";
+      assert(!Inst.Operands[i].OperandType.empty() && "Invalid operand type.");
+      Res += Inst.Operands[i].OperandType;
+
       Result.push_back(Res);
     }
   }
@@ -134,12 +133,12 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
   return Result;
 }
 
-void InstrInfoEmitter::EmitOperandInfo(raw_ostream &OS, 
+void InstrInfoEmitter::EmitOperandInfo(raw_ostream &OS,
                                        OperandInfoMapTy &OperandInfoIDs) {
   // ID #0 is for no operand info.
   unsigned OperandListNum = 0;
   OperandInfoIDs[std::vector<std::string>()] = ++OperandListNum;
-  
+
   OS << "\n";
   const CodeGenTarget &Target = CDP.getTargetInfo();
   for (CodeGenTarget::inst_iterator II = Target.inst_begin(),
@@ -147,65 +146,40 @@ void InstrInfoEmitter::EmitOperandInfo(raw_ostream &OS,
     std::vector<std::string> OperandInfo = GetOperandInfo(**II);
     unsigned &N = OperandInfoIDs[OperandInfo];
     if (N != 0) continue;
-    
+
     N = ++OperandListNum;
-    OS << "static const TargetOperandInfo OperandInfo" << N << "[] = { ";
+    OS << "static const MCOperandInfo OperandInfo" << N << "[] = { ";
     for (unsigned i = 0, e = OperandInfo.size(); i != e; ++i)
       OS << "{ " << OperandInfo[i] << " }, ";
     OS << "};\n";
   }
 }
 
-void InstrInfoEmitter::DetectRegisterClassBarriers(std::vector<Record*> &Defs,
-                                  const std::vector<CodeGenRegisterClass> &RCs,
-                                  std::vector<Record*> &Barriers) {
-  std::set<Record*> DefSet;
-  unsigned NumDefs = Defs.size();
-  for (unsigned i = 0; i < NumDefs; ++i)
-    DefSet.insert(Defs[i]);
-
-  for (unsigned i = 0, e = RCs.size(); i != e; ++i) {
-    const CodeGenRegisterClass &RC = RCs[i];
-    unsigned NumRegs = RC.Elements.size();
-    if (NumRegs > NumDefs)
-      continue; // Can't possibly clobber this RC.
-
-    bool Clobber = true;
-    for (unsigned j = 0; j < NumRegs; ++j) {
-      Record *Reg = RC.Elements[j];
-      if (!DefSet.count(Reg)) {
-        Clobber = false;
-        break;
-      }
-    }
-    if (Clobber)
-      Barriers.push_back(RC.TheDef);
-  }
-}
-
 //===----------------------------------------------------------------------===//
 // Main Output.
 //===----------------------------------------------------------------------===//
 
 // run - Emit the main instruction description records for the target...
 void InstrInfoEmitter::run(raw_ostream &OS) {
+  emitEnums(OS);
+
   GatherItinClasses();
 
   EmitSourceFileHeader("Target Instruction Descriptors", OS);
+
+  OS << "\n#ifdef GET_INSTRINFO_MC_DESC\n";
+  OS << "#undef GET_INSTRINFO_MC_DESC\n";
+
   OS << "namespace llvm {\n\n";
 
   CodeGenTarget &Target = CDP.getTargetInfo();
   const std::string &TargetName = Target.getName();
   Record *InstrInfo = Target.getInstructionSet();
-  const std::vector<CodeGenRegisterClass> &RCs = Target.getRegisterClasses();
 
   // Keep track of all of the def lists we have emitted already.
   std::map<std::vector<Record*>, unsigned> EmittedLists;
   unsigned ListNumber = 0;
-  std::map<std::vector<Record*>, unsigned> EmittedBarriers;
-  unsigned BarrierNumber = 0;
-  std::map<Record*, unsigned> BarriersMap;
- 
+
   // Emit all of the instruction's implicit uses and defs.
   for (CodeGenTarget::inst_iterator II = Target.inst_begin(),
          E = Target.inst_end(); II != E; ++II) {
@@ -217,42 +191,67 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
     }
     std::vector<Record*> Defs = Inst->getValueAsListOfDefs("Defs");
     if (!Defs.empty()) {
-      std::vector<Record*> RCBarriers;
-      DetectRegisterClassBarriers(Defs, RCs, RCBarriers);
-      if (!RCBarriers.empty()) {
-        unsigned &IB = EmittedBarriers[RCBarriers];
-        if (!IB) PrintBarriers(RCBarriers, IB = ++BarrierNumber, OS);
-        BarriersMap.insert(std::make_pair(Inst, IB));
-      }
-
       unsigned &IL = EmittedLists[Defs];
       if (!IL) PrintDefList(Defs, IL = ++ListNumber, OS);
     }
   }
 
   OperandInfoMapTy OperandInfoIDs;
-  
+
   // Emit all of the operand info records.
   EmitOperandInfo(OS, OperandInfoIDs);
-  
-  // Emit all of the TargetInstrDesc records in their ENUM ordering.
+
+  // Emit all of the MCInstrDesc records in their ENUM ordering.
   //
-  OS << "\nstatic const TargetInstrDesc " << TargetName
-     << "Insts[] = {\n";
+  OS << "\nMCInstrDesc " << TargetName << "Insts[] = {\n";
   const std::vector<const CodeGenInstruction*> &NumberedInstructions =
     Target.getInstructionsByEnumValue();
 
   for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i)
     emitRecord(*NumberedInstructions[i], i, InstrInfo, EmittedLists,
-               BarriersMap, OperandInfoIDs, OS);
-  OS << "};\n";
+               OperandInfoIDs, OS);
+  OS << "};\n\n";
+
+  // MCInstrInfo initialization routine.
+  OS << "static inline void Init" << TargetName
+     << "MCInstrInfo(MCInstrInfo *II) {\n";
+  OS << "  II->InitMCInstrInfo(" << TargetName << "Insts, "
+     << NumberedInstructions.size() << ");\n}\n\n";
+
+  OS << "} // End llvm namespace \n";
+
+  OS << "#endif // GET_INSTRINFO_MC_DESC\n\n";
+
+  // Create a TargetInstrInfo subclass to hide the MC layer initialization.
+  OS << "\n#ifdef GET_INSTRINFO_HEADER\n";
+  OS << "#undef GET_INSTRINFO_HEADER\n";
+
+  std::string ClassName = TargetName + "GenInstrInfo";
+  OS << "namespace llvm {\n";
+  OS << "struct " << ClassName << " : public TargetInstrInfoImpl {\n"
+     << "  explicit " << ClassName << "(int SO = -1, int DO = -1);\n"
+     << "};\n";
+  OS << "} // End llvm namespace \n";
+
+  OS << "#endif // GET_INSTRINFO_HEADER\n\n";
+
+  OS << "\n#ifdef GET_INSTRINFO_CTOR\n";
+  OS << "#undef GET_INSTRINFO_CTOR\n";
+
+  OS << "namespace llvm {\n";
+  OS << "extern MCInstrDesc " << TargetName << "Insts[];\n";
+  OS << ClassName << "::" << ClassName << "(int SO, int DO)\n"
+     << "  : TargetInstrInfoImpl(SO, DO) {\n"
+     << "  InitMCInstrInfo(" << TargetName << "Insts, "
+     << NumberedInstructions.size() << ");\n}\n";
   OS << "} // End llvm namespace \n";
+
+  OS << "#endif // GET_INSTRINFO_CTOR\n\n";
 }
 
 void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
                                   Record *InstrInfo,
                          std::map<std::vector<Record*>, unsigned> &EmittedLists,
-                                  std::map<Record*, unsigned> &BarriersMap,
                                   const OperandInfoMapTy &OpInfo,
                                   raw_ostream &OS) {
   int MinOperands = 0;
@@ -263,35 +262,37 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
 
   OS << "  { ";
   OS << Num << ",\t" << MinOperands << ",\t"
-     << Inst.Operands.NumDefs << ",\t" << getItinClassNumber(Inst.TheDef)
-     << ",\t\"" << Inst.TheDef->getName() << "\", 0";
+     << Inst.Operands.NumDefs << ",\t"
+     << getItinClassNumber(Inst.TheDef) << ",\t"
+     << Inst.TheDef->getValueAsInt("Size") << ",\t\""
+     << Inst.TheDef->getName() << "\", 0";
 
   // Emit all of the target indepedent flags...
-  if (Inst.isReturn)           OS << "|(1<<TID::Return)";
-  if (Inst.isBranch)           OS << "|(1<<TID::Branch)";
-  if (Inst.isIndirectBranch)   OS << "|(1<<TID::IndirectBranch)";
-  if (Inst.isCompare)          OS << "|(1<<TID::Compare)";
-  if (Inst.isMoveImm)          OS << "|(1<<TID::MoveImm)";
-  if (Inst.isBitcast)          OS << "|(1<<TID::Bitcast)";
-  if (Inst.isBarrier)          OS << "|(1<<TID::Barrier)";
-  if (Inst.hasDelaySlot)       OS << "|(1<<TID::DelaySlot)";
-  if (Inst.isCall)             OS << "|(1<<TID::Call)";
-  if (Inst.canFoldAsLoad)      OS << "|(1<<TID::FoldableAsLoad)";
-  if (Inst.mayLoad)            OS << "|(1<<TID::MayLoad)";
-  if (Inst.mayStore)           OS << "|(1<<TID::MayStore)";
-  if (Inst.isPredicable)       OS << "|(1<<TID::Predicable)";
-  if (Inst.isConvertibleToThreeAddress) OS << "|(1<<TID::ConvertibleTo3Addr)";
-  if (Inst.isCommutable)       OS << "|(1<<TID::Commutable)";
-  if (Inst.isTerminator)       OS << "|(1<<TID::Terminator)";
-  if (Inst.isReMaterializable) OS << "|(1<<TID::Rematerializable)";
-  if (Inst.isNotDuplicable)    OS << "|(1<<TID::NotDuplicable)";
-  if (Inst.Operands.hasOptionalDef) OS << "|(1<<TID::HasOptionalDef)";
-  if (Inst.usesCustomInserter) OS << "|(1<<TID::UsesCustomInserter)";
-  if (Inst.Operands.isVariadic)OS << "|(1<<TID::Variadic)";
-  if (Inst.hasSideEffects)     OS << "|(1<<TID::UnmodeledSideEffects)";
-  if (Inst.isAsCheapAsAMove)   OS << "|(1<<TID::CheapAsAMove)";
-  if (Inst.hasExtraSrcRegAllocReq) OS << "|(1<<TID::ExtraSrcRegAllocReq)";
-  if (Inst.hasExtraDefRegAllocReq) OS << "|(1<<TID::ExtraDefRegAllocReq)";
+  if (Inst.isReturn)           OS << "|(1<<MCID::Return)";
+  if (Inst.isBranch)           OS << "|(1<<MCID::Branch)";
+  if (Inst.isIndirectBranch)   OS << "|(1<<MCID::IndirectBranch)";
+  if (Inst.isCompare)          OS << "|(1<<MCID::Compare)";
+  if (Inst.isMoveImm)          OS << "|(1<<MCID::MoveImm)";
+  if (Inst.isBitcast)          OS << "|(1<<MCID::Bitcast)";
+  if (Inst.isBarrier)          OS << "|(1<<MCID::Barrier)";
+  if (Inst.hasDelaySlot)       OS << "|(1<<MCID::DelaySlot)";
+  if (Inst.isCall)             OS << "|(1<<MCID::Call)";
+  if (Inst.canFoldAsLoad)      OS << "|(1<<MCID::FoldableAsLoad)";
+  if (Inst.mayLoad)            OS << "|(1<<MCID::MayLoad)";
+  if (Inst.mayStore)           OS << "|(1<<MCID::MayStore)";
+  if (Inst.isPredicable)       OS << "|(1<<MCID::Predicable)";
+  if (Inst.isConvertibleToThreeAddress) OS << "|(1<<MCID::ConvertibleTo3Addr)";
+  if (Inst.isCommutable)       OS << "|(1<<MCID::Commutable)";
+  if (Inst.isTerminator)       OS << "|(1<<MCID::Terminator)";
+  if (Inst.isReMaterializable) OS << "|(1<<MCID::Rematerializable)";
+  if (Inst.isNotDuplicable)    OS << "|(1<<MCID::NotDuplicable)";
+  if (Inst.Operands.hasOptionalDef) OS << "|(1<<MCID::HasOptionalDef)";
+  if (Inst.usesCustomInserter) OS << "|(1<<MCID::UsesCustomInserter)";
+  if (Inst.Operands.isVariadic)OS << "|(1<<MCID::Variadic)";
+  if (Inst.hasSideEffects)     OS << "|(1<<MCID::UnmodeledSideEffects)";
+  if (Inst.isAsCheapAsAMove)   OS << "|(1<<MCID::CheapAsAMove)";
+  if (Inst.hasExtraSrcRegAllocReq) OS << "|(1<<MCID::ExtraSrcRegAllocReq)";
+  if (Inst.hasExtraDefRegAllocReq) OS << "|(1<<MCID::ExtraDefRegAllocReq)";
 
   // Emit all of the target-specific flags...
   BitsInit *TSF = Inst.TheDef->getValueAsBitsInit("TSFlags");
@@ -320,12 +321,6 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   else
     OS << "ImplicitList" << EmittedLists[DefList] << ", ";
 
-  std::map<Record*, unsigned>::iterator BI = BarriersMap.find(Inst.TheDef);
-  if (BI == BarriersMap.end())
-    OS << "NULL, ";
-  else
-    OS << "Barriers" << BI->second << ", ";
-
   // Emit the operand info.
   std::vector<std::string> OperandInfo = GetOperandInfo(Inst);
   if (OperandInfo.empty())
@@ -335,3 +330,38 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
 
   OS << " },  // Inst #" << Num << " = " << Inst.TheDef->getName() << "\n";
 }
+
+// emitEnums - Print out enum values for all of the instructions.
+void InstrInfoEmitter::emitEnums(raw_ostream &OS) {
+  EmitSourceFileHeader("Target Instruction Enum Values", OS);
+
+  OS << "\n#ifdef GET_INSTRINFO_ENUM\n";
+  OS << "#undef GET_INSTRINFO_ENUM\n";
+
+  OS << "namespace llvm {\n\n";
+
+  CodeGenTarget Target(Records);
+
+  // We must emit the PHI opcode first...
+  std::string Namespace = Target.getInstNamespace();
+  
+  if (Namespace.empty()) {
+    fprintf(stderr, "No instructions defined!\n");
+    exit(1);
+  }
+
+  const std::vector<const CodeGenInstruction*> &NumberedInstructions =
+    Target.getInstructionsByEnumValue();
+
+  OS << "namespace " << Namespace << " {\n";
+  OS << "  enum {\n";
+  for (unsigned i = 0, e = NumberedInstructions.size(); i != e; ++i) {
+    OS << "    " << NumberedInstructions[i]->TheDef->getName()
+       << "\t= " << i << ",\n";
+  }
+  OS << "    INSTRUCTION_LIST_END = " << NumberedInstructions.size() << "\n";
+  OS << "  };\n}\n";
+  OS << "} // End llvm namespace \n";
+
+  OS << "#endif // GET_INSTRINFO_ENUM\n\n";
+}
diff --git a/utils/TableGen/InstrInfoEmitter.h b/utils/TableGen/InstrInfoEmitter.h
index abb1c6b..165ce42 100644
--- a/utils/TableGen/InstrInfoEmitter.h
+++ b/utils/TableGen/InstrInfoEmitter.h
@@ -39,12 +39,12 @@ public:
   void run(raw_ostream &OS);
 
 private:
-  typedef std::map<std::vector<std::string>, unsigned> OperandInfoMapTy;
-  
+  void emitEnums(raw_ostream &OS);
+
+  typedef std::map<std::vector<std::string>, unsigned> OperandInfoMapTy;  
   void emitRecord(const CodeGenInstruction &Inst, unsigned Num,
                   Record *InstrInfo, 
                   std::map<std::vector<Record*>, unsigned> &EL,
-                  std::map<Record*, unsigned> &BM,
                   const OperandInfoMapTy &OpInfo,
                   raw_ostream &OS);
 
@@ -55,10 +55,6 @@ private:
   // Operand information.
   void EmitOperandInfo(raw_ostream &OS, OperandInfoMapTy &OperandInfoIDs);
   std::vector<std::string> GetOperandInfo(const CodeGenInstruction &Inst);
-
-  void DetectRegisterClassBarriers(std::vector<Record*> &Defs,
-                                   const std::vector<CodeGenRegisterClass> &RCs,
-                                   std::vector<Record*> &Barriers);
 };
 
 } // End llvm namespace
diff --git a/utils/TableGen/IntrinsicEmitter.cpp b/utils/TableGen/IntrinsicEmitter.cpp
index 39eb3bd..e5e7cea 100644
--- a/utils/TableGen/IntrinsicEmitter.cpp
+++ b/utils/TableGen/IntrinsicEmitter.cpp
@@ -214,7 +214,7 @@ static void EmitTypeGenerate(raw_ostream &OS,
   if (ArgTypes.size() == 1)
     return EmitTypeGenerate(OS, ArgTypes.front(), ArgNo);
 
-  OS << "StructType::get(Context, ";
+  OS << "StructType::get(";
 
   for (std::vector<Record*>::const_iterator
          I = ArgTypes.begin(), E = ArgTypes.end(); I != E; ++I) {
@@ -259,7 +259,7 @@ static void EmitTypeGenerate(raw_ostream &OS, const Record *ArgType,
   } else if (VT == MVT::iPTRAny) {
     // Make sure the user has passed us an argument type to overload. If not,
     // treat it as an ordinary (not overloaded) intrinsic.
-    OS << "(" << ArgNo << " < numTys) ? Tys[" << ArgNo 
+    OS << "(" << ArgNo << " < Tys.size()) ? Tys[" << ArgNo
     << "] : PointerType::getUnqual(";
     EmitTypeGenerate(OS, ArgType->getValueAsDef("ElTy"), ArgNo);
     OS << ")";
diff --git a/utils/TableGen/LLVMCConfigurationEmitter.cpp b/utils/TableGen/LLVMCConfigurationEmitter.cpp
index 090faf5..cd0cbeb 100644
--- a/utils/TableGen/LLVMCConfigurationEmitter.cpp
+++ b/utils/TableGen/LLVMCConfigurationEmitter.cpp
@@ -2969,8 +2969,8 @@ void EmitHookDeclarations(const ToolDescriptions& ToolDescs,
 
   for (HookInfoMap::const_iterator B = HookNames.begin(),
          E = HookNames.end(); B != E; ++B) {
-    const char* HookName = B->first();
-    const HookInfo& Info = B->second;
+    StringRef HookName = B->first();
+    const HookInfo &Info = B->second;
 
     O.indent(Indent1) << "std::string " << HookName << "(";
 
diff --git a/utils/TableGen/NeonEmitter.cpp b/utils/TableGen/NeonEmitter.cpp
index 23fdbde..ef977443 100644
--- a/utils/TableGen/NeonEmitter.cpp
+++ b/utils/TableGen/NeonEmitter.cpp
@@ -24,6 +24,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "NeonEmitter.h"
+#include "Error.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
diff --git a/utils/TableGen/PseudoLoweringEmitter.cpp b/utils/TableGen/PseudoLoweringEmitter.cpp
new file mode 100644
index 0000000..db33c1f
--- /dev/null
+++ b/utils/TableGen/PseudoLoweringEmitter.cpp
@@ -0,0 +1,243 @@
+//===- PseudoLoweringEmitter.cpp - PseudoLowering Generator -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pseudo-lowering"
+#include "Error.h"
+#include "CodeGenInstruction.h"
+#include "PseudoLoweringEmitter.h"
+#include "Record.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
+#include <vector>
+using namespace llvm;
+
+// FIXME: This pass currently can only expand a pseudo to a single instruction.
+//        The pseudo expansion really should take a list of dags, not just
+//        a single dag, so we can do fancier things.
+
+unsigned PseudoLoweringEmitter::
+addDagOperandMapping(Record *Rec, DagInit *Dag, CodeGenInstruction &Insn,
+                     IndexedMap<OpData> &OperandMap, unsigned BaseIdx) {
+  unsigned OpsAdded = 0;
+  for (unsigned i = 0, e = Dag->getNumArgs(); i != e; ++i) {
+    if (DefInit *DI = dynamic_cast<DefInit*>(Dag->getArg(i))) {
+      // Physical register reference. Explicit check for the special case
+      // "zero_reg" definition.
+      if (DI->getDef()->isSubClassOf("Register") ||
+          DI->getDef()->getName() == "zero_reg") {
+        OperandMap[BaseIdx + i].Kind = OpData::Reg;
+        OperandMap[BaseIdx + i].Data.Reg = DI->getDef();
+        ++OpsAdded;
+        continue;
+      }
+
+      // Normal operands should always have the same type, or we have a
+      // problem.
+      // FIXME: We probably shouldn't ever get a non-zero BaseIdx here.
+      assert(BaseIdx == 0 && "Named subargument in pseudo expansion?!");
+      if (DI->getDef() != Insn.Operands[BaseIdx + i].Rec)
+        throw TGError(Rec->getLoc(),
+                      "Pseudo operand type '" + DI->getDef()->getName() +
+                      "' does not match expansion operand type '" +
+                      Insn.Operands[BaseIdx + i].Rec->getName() + "'");
+      // Source operand maps to destination operand. The Data element
+      // will be filled in later, just set the Kind for now. Do it
+      // for each corresponding MachineInstr operand, not just the first.
+      for (unsigned I = 0, E = Insn.Operands[i].MINumOperands; I != E; ++I)
+        OperandMap[BaseIdx + i + I].Kind = OpData::Operand;
+      OpsAdded += Insn.Operands[i].MINumOperands;
+    } else if (IntInit *II = dynamic_cast<IntInit*>(Dag->getArg(i))) {
+      OperandMap[BaseIdx + i].Kind = OpData::Imm;
+      OperandMap[BaseIdx + i].Data.Imm = II->getValue();
+      ++OpsAdded;
+    } else if (DagInit *SubDag = dynamic_cast<DagInit*>(Dag->getArg(i))) {
+      // Just add the operands recursively. This is almost certainly
+      // a constant value for a complex operand (> 1 MI operand).
+      unsigned NewOps =
+        addDagOperandMapping(Rec, SubDag, Insn, OperandMap, BaseIdx + i);
+      OpsAdded += NewOps;
+      // Since we added more than one, we also need to adjust the base.
+      BaseIdx += NewOps - 1;
+    } else
+      assert(0 && "Unhandled pseudo-expansion argument type!");
+  }
+  return OpsAdded;
+}
+
+void PseudoLoweringEmitter::evaluateExpansion(Record *Rec) {
+  DEBUG(dbgs() << "Pseudo definition: " << Rec->getName() << "\n");
+
+  // Validate that the result pattern has the corrent number and types
+  // of arguments for the instruction it references.
+  DagInit *Dag = Rec->getValueAsDag("ResultInst");
+  assert(Dag && "Missing result instruction in pseudo expansion!");
+  DEBUG(dbgs() << "  Result: " << *Dag << "\n");
+
+  DefInit *OpDef = dynamic_cast<DefInit*>(Dag->getOperator());
+  if (!OpDef)
+    throw TGError(Rec->getLoc(), Rec->getName() +
+                  " has unexpected operator type!");
+  Record *Operator = OpDef->getDef();
+  if (!Operator->isSubClassOf("Instruction"))
+    throw TGError(Rec->getLoc(), "Pseudo result '" + Operator->getName() +
+                                 "' is not an instruction!");
+
+  CodeGenInstruction Insn(Operator);
+
+  if (Insn.isCodeGenOnly || Insn.isPseudo)
+    throw TGError(Rec->getLoc(), "Pseudo result '" + Operator->getName() +
+                                 "' cannot be another pseudo instruction!");
+
+  if (Insn.Operands.size() != Dag->getNumArgs())
+    throw TGError(Rec->getLoc(), "Pseudo result '" + Operator->getName() +
+                                 "' operand count mismatch");
+
+  IndexedMap<OpData> OperandMap;
+  OperandMap.grow(Insn.Operands.size());
+
+  addDagOperandMapping(Rec, Dag, Insn, OperandMap, 0);
+
+  // If there are more operands that weren't in the DAG, they have to
+  // be operands that have default values, or we have an error. Currently,
+  // PredicateOperand and OptionalDefOperand both have default values.
+
+
+  // Validate that each result pattern argument has a matching (by name)
+  // argument in the source instruction, in either the (outs) or (ins) list.
+  // Also check that the type of the arguments match.
+  //
+  // Record the mapping of the source to result arguments for use by
+  // the lowering emitter.
+  CodeGenInstruction SourceInsn(Rec);
+  StringMap<unsigned> SourceOperands;
+  for (unsigned i = 0, e = SourceInsn.Operands.size(); i != e; ++i)
+    SourceOperands[SourceInsn.Operands[i].Name] = i;
+
+  DEBUG(dbgs() << "  Operand mapping:\n");
+  for (unsigned i = 0, e = Insn.Operands.size(); i != e; ++i) {
+    // We've already handled constant values. Just map instruction operands
+    // here.
+    if (OperandMap[Insn.Operands[i].MIOperandNo].Kind != OpData::Operand)
+      continue;
+    StringMap<unsigned>::iterator SourceOp =
+      SourceOperands.find(Dag->getArgName(i));
+    if (SourceOp == SourceOperands.end())
+      throw TGError(Rec->getLoc(),
+                    "Pseudo output operand '" + Dag->getArgName(i) +
+                    "' has no matching source operand.");
+    // Map the source operand to the destination operand index for each
+    // MachineInstr operand.
+    for (unsigned I = 0, E = Insn.Operands[i].MINumOperands; I != E; ++I)
+      OperandMap[Insn.Operands[i].MIOperandNo + I].Data.Operand =
+        SourceOp->getValue();
+
+    DEBUG(dbgs() << "    " << SourceOp->getValue() << " ==> " << i << "\n");
+  }
+
+  Expansions.push_back(PseudoExpansion(SourceInsn, Insn, OperandMap));
+}
+
+void PseudoLoweringEmitter::emitLoweringEmitter(raw_ostream &o) {
+  // Emit file header.
+  EmitSourceFileHeader("Pseudo-instruction MC lowering Source Fragment", o);
+
+  o << "bool " << Target.getName() + "AsmPrinter" << "::\n"
+    << "emitPseudoExpansionLowering(MCStreamer &OutStreamer,\n"
+    << "                            const MachineInstr *MI) {\n"
+    << "  switch (MI->getOpcode()) {\n"
+    << "    default: return false;\n";
+  for (unsigned i = 0, e = Expansions.size(); i != e; ++i) {
+    PseudoExpansion &Expansion = Expansions[i];
+    CodeGenInstruction &Source = Expansion.Source;
+    CodeGenInstruction &Dest = Expansion.Dest;
+    o << "    case " << Source.Namespace << "::"
+      << Source.TheDef->getName() << ": {\n"
+      << "      MCInst TmpInst;\n"
+      << "      MCOperand MCOp;\n"
+      << "      TmpInst.setOpcode(" << Dest.Namespace << "::"
+      << Dest.TheDef->getName() << ");\n";
+
+    // Copy the operands from the source instruction.
+    // FIXME: Instruction operands with defaults values (predicates and cc_out
+    //        in ARM, for example shouldn't need explicit values in the
+    //        expansion DAG.
+    unsigned MIOpNo = 0;
+    for (unsigned OpNo = 0, E = Dest.Operands.size(); OpNo != E;
+         ++OpNo) {
+      o << "      // Operand: " << Dest.Operands[OpNo].Name << "\n";
+      for (unsigned i = 0, e = Dest.Operands[OpNo].MINumOperands;
+           i != e; ++i) {
+        switch (Expansion.OperandMap[MIOpNo + i].Kind) {
+        default:
+          llvm_unreachable("Unknown operand type?!");
+        case OpData::Operand:
+          o << "      lowerOperand(MI->getOperand("
+            << Source.Operands[Expansion.OperandMap[MIOpNo].Data
+                .Operand].MIOperandNo + i
+            << "), MCOp);\n"
+            << "      TmpInst.addOperand(MCOp);\n";
+          break;
+        case OpData::Imm:
+          o << "      TmpInst.addOperand(MCOperand::CreateImm("
+            << Expansion.OperandMap[MIOpNo + i].Data.Imm << "));\n";
+          break;
+        case OpData::Reg: {
+          Record *Reg = Expansion.OperandMap[MIOpNo + i].Data.Reg;
+          o << "      TmpInst.addOperand(MCOperand::CreateReg(";
+          // "zero_reg" is special.
+          if (Reg->getName() == "zero_reg")
+            o << "0";
+          else
+            o << Reg->getValueAsString("Namespace") << "::" << Reg->getName();
+          o << "));\n";
+          break;
+        }
+        }
+      }
+      MIOpNo += Dest.Operands[OpNo].MINumOperands;
+    }
+    if (Dest.Operands.isVariadic) {
+      o << "      // variable_ops\n";
+      o << "      for (unsigned i = " << MIOpNo
+        << ", e = MI->getNumOperands(); i != e; ++i)\n"
+        << "        if (lowerOperand(MI->getOperand(i), MCOp))\n"
+        << "          TmpInst.addOperand(MCOp);\n";
+    }
+    o << "      OutStreamer.EmitInstruction(TmpInst);\n"
+      << "      break;\n"
+      << "    }\n";
+  }
+  o << "  }\n  return true;\n}\n\n";
+}
+
+void PseudoLoweringEmitter::run(raw_ostream &o) {
+  Record *ExpansionClass = Records.getClass("PseudoInstExpansion");
+  Record *InstructionClass = Records.getClass("PseudoInstExpansion");
+  assert(ExpansionClass && "PseudoInstExpansion class definition missing!");
+  assert(InstructionClass && "Instruction class definition missing!");
+
+  std::vector<Record*> Insts;
+  for (std::map<std::string, Record*>::const_iterator I =
+         Records.getDefs().begin(), E = Records.getDefs().end(); I != E; ++I) {
+    if (I->second->isSubClassOf(ExpansionClass) &&
+        I->second->isSubClassOf(InstructionClass))
+      Insts.push_back(I->second);
+  }
+
+  // Process the pseudo expansion definitions, validating them as we do so.
+  for (unsigned i = 0, e = Insts.size(); i != e; ++i)
+    evaluateExpansion(Insts[i]);
+
+  // Generate expansion code to lower the pseudo to an MCInst of the real
+  // instruction.
+  emitLoweringEmitter(o);
+}
+
diff --git a/utils/TableGen/PseudoLoweringEmitter.h b/utils/TableGen/PseudoLoweringEmitter.h
new file mode 100644
index 0000000..2749280
--- /dev/null
+++ b/utils/TableGen/PseudoLoweringEmitter.h
@@ -0,0 +1,65 @@
+//===- PseudoLoweringEmitter.h - PseudoLowering Generator -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PSEUDOLOWERINGEMITTER_H
+#define PSEUDOLOWERINGEMITTER_H
+
+#include "CodeGenInstruction.h"
+#include "CodeGenTarget.h"
+#include "TableGenBackend.h"
+#include "llvm/ADT/IndexedMap.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+class PseudoLoweringEmitter : public TableGenBackend {
+  struct OpData {
+    enum MapKind { Operand, Imm, Reg };
+    MapKind Kind;
+    union {
+      unsigned Operand;   // Operand number mapped to.
+      uint64_t Imm;       // Integer immedate value.
+      Record *Reg;        // Physical register.
+    } Data;
+  };
+  struct PseudoExpansion {
+    CodeGenInstruction Source;  // The source pseudo instruction definition.
+    CodeGenInstruction Dest;    // The destination instruction to lower to.
+    IndexedMap<OpData> OperandMap;
+
+    PseudoExpansion(CodeGenInstruction &s, CodeGenInstruction &d,
+                    IndexedMap<OpData> &m) :
+      Source(s), Dest(d), OperandMap(m) {}
+  };
+
+  RecordKeeper &Records;
+
+  // It's overkill to have an instance of the full CodeGenTarget object,
+  // but it loads everything on demand, not in the constructor, so it's
+  // lightweight in performance, so it works out OK.
+  CodeGenTarget Target;
+
+  SmallVector<PseudoExpansion, 64> Expansions;
+
+  unsigned addDagOperandMapping(Record *Rec, DagInit *Dag,
+                                CodeGenInstruction &Insn,
+                                IndexedMap<OpData> &OperandMap,
+                                unsigned BaseIdx);
+  void evaluateExpansion(Record *Pseudo);
+  void emitLoweringEmitter(raw_ostream &o);
+public:
+  PseudoLoweringEmitter(RecordKeeper &R) : Records(R), Target(R) {}
+
+  /// run - Output the pseudo-lowerings.
+  void run(raw_ostream &o);
+};
+
+} // end llvm namespace
+
+#endif
diff --git a/utils/TableGen/Record.cpp b/utils/TableGen/Record.cpp
index 8ac8cd9..730eca1 100644
--- a/utils/TableGen/Record.cpp
+++ b/utils/TableGen/Record.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Record.h"
+#include "Error.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Format.h"
 #include "llvm/ADT/StringExtras.h"
@@ -1442,6 +1443,25 @@ Record::getValueAsListOfInts(StringRef FieldName) const {
   return Ints;
 }
 
+/// getValueAsListOfStrings - This method looks up the specified field and
+/// returns its value as a vector of strings, throwing an exception if the
+/// field does not exist or if the value is not the right type.
+///
+std::vector<std::string>
+Record::getValueAsListOfStrings(StringRef FieldName) const {
+  ListInit *List = getValueAsListInit(FieldName);
+  std::vector<std::string> Strings;
+  for (unsigned i = 0; i < List->getSize(); i++) {
+    if (StringInit *II = dynamic_cast<StringInit*>(List->getElement(i))) {
+      Strings.push_back(II->getValue());
+    } else {
+      throw "Record `" + getName() + "', field `" + FieldName.str() +
+            "' does not have a list of strings initializer!";
+    }
+  }
+  return Strings;
+}
+
 /// getValueAsDef - This method looks up the specified field and returns its
 /// value as a Record, throwing an exception if the field does not exist or if
 /// the value is not the right type.
diff --git a/utils/TableGen/Record.h b/utils/TableGen/Record.h
index 522b719..2f4080b 100644
--- a/utils/TableGen/Record.h
+++ b/utils/TableGen/Record.h
@@ -34,7 +34,7 @@ class DagRecTy;
 class RecordRecTy;
 
 // Init subclasses.
-struct Init;
+class Init;
 class UnsetInit;
 class BitInit;
 class BitsInit;
@@ -453,7 +453,8 @@ RecTy *resolveTypes(RecTy *T1, RecTy *T2);
 //  Initializer Classes
 //===----------------------------------------------------------------------===//
 
-struct Init {
+class Init {
+public:
   virtual ~Init() {}
 
   /// isComplete - This virtual method should be overridden by values that may
@@ -1368,6 +1369,12 @@ public:
   ///
   std::vector<int64_t> getValueAsListOfInts(StringRef FieldName) const;
 
+  /// getValueAsListOfStrings - This method looks up the specified field and
+  /// returns its value as a vector of strings, throwing an exception if the
+  /// field does not exist or if the value is not the right type.
+  ///
+  std::vector<std::string> getValueAsListOfStrings(StringRef FieldName) const;
+
   /// getValueAsDef - This method looks up the specified field and returns its
   /// value as a Record, throwing an exception if the field does not exist or if
   /// the value is not the right type.
@@ -1486,22 +1493,8 @@ struct LessRecordFieldName {
   }
 };
 
-
-class TGError {
-  SMLoc Loc;
-  std::string Message;
-public:
-  TGError(SMLoc loc, const std::string &message) : Loc(loc), Message(message) {}
-
-  SMLoc getLoc() const { return Loc; }
-  const std::string &getMessage() const { return Message; }
-};
-
-
 raw_ostream &operator<<(raw_ostream &OS, const RecordKeeper &RK);
 
-void PrintError(SMLoc ErrorLoc, const Twine &Msg);
-
 } // End llvm namespace
 
 #endif
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index 5a441e2..65d4a9b 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -19,19 +19,24 @@
 #include "Record.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Format.h"
 #include <algorithm>
 #include <set>
 using namespace llvm;
 
 // runEnums - Print out enum values for all of the registers.
-void RegisterInfoEmitter::runEnums(raw_ostream &OS) {
-  CodeGenTarget Target(Records);
-  CodeGenRegBank &Bank = Target.getRegBank();
-  const std::vector<CodeGenRegister> &Registers = Target.getRegisters();
+void
+RegisterInfoEmitter::runEnums(raw_ostream &OS,
+                              CodeGenTarget &Target, CodeGenRegBank &Bank) {
+  const std::vector<CodeGenRegister*> &Registers = Bank.getRegisters();
 
-  std::string Namespace = Registers[0].TheDef->getValueAsString("Namespace");
+  std::string Namespace = Registers[0]->TheDef->getValueAsString("Namespace");
 
   EmitSourceFileHeader("Target Register Enum Values", OS);
+
+  OS << "\n#ifdef GET_REGINFO_ENUM\n";
+  OS << "#undef GET_REGINFO_ENUM\n";
+
   OS << "namespace llvm {\n\n";
 
   if (!Namespace.empty())
@@ -39,35 +44,166 @@ void RegisterInfoEmitter::runEnums(raw_ostream &OS) {
   OS << "enum {\n  NoRegister,\n";
 
   for (unsigned i = 0, e = Registers.size(); i != e; ++i)
-    OS << "  " << Registers[i].getName() << " = " <<
-      Registers[i].EnumValue << ",\n";
-  assert(Registers.size() == Registers[Registers.size()-1].EnumValue &&
+    OS << "  " << Registers[i]->getName() << " = " <<
+      Registers[i]->EnumValue << ",\n";
+  assert(Registers.size() == Registers[Registers.size()-1]->EnumValue &&
          "Register enum value mismatch!");
   OS << "  NUM_TARGET_REGS \t// " << Registers.size()+1 << "\n";
   OS << "};\n";
   if (!Namespace.empty())
     OS << "}\n";
 
-  const std::vector<Record*> &SubRegIndices = Bank.getSubRegIndices();
-  if (!SubRegIndices.empty()) {
-    OS << "\n// Subregister indices\n";
-    Namespace = SubRegIndices[0]->getValueAsString("Namespace");
+  const std::vector<CodeGenRegisterClass> &RegisterClasses =
+    Target.getRegisterClasses();
+  if (!RegisterClasses.empty()) {
+    OS << "\n// Register classes\n";
     if (!Namespace.empty())
       OS << "namespace " << Namespace << " {\n";
-    OS << "enum {\n  NoSubRegister,\n";
-    for (unsigned i = 0, e = Bank.getNumNamedIndices(); i != e; ++i)
-      OS << "  " << SubRegIndices[i]->getName() << ",\t// " << i+1 << "\n";
-    OS << "  NUM_TARGET_NAMED_SUBREGS = " << SubRegIndices.size()+1 << "\n";
+    OS << "enum {\n";
+    for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) {
+      if (i) OS << ",\n";
+      OS << "  " << RegisterClasses[i].getName() << "RegClassID";
+      OS << " = " << i;
+    }
+    OS << "\n  };\n";
+    if (!Namespace.empty())
+      OS << "}\n";
+  }
+
+  const std::vector<Record*> RegAltNameIndices = Target.getRegAltNameIndices();
+  // If the only definition is the default NoRegAltName, we don't need to
+  // emit anything.
+  if (RegAltNameIndices.size() > 1) {
+    OS << "\n// Register alternate name indices\n";
+    if (!Namespace.empty())
+      OS << "namespace " << Namespace << " {\n";
+    OS << "enum {\n";
+    for (unsigned i = 0, e = RegAltNameIndices.size(); i != e; ++i)
+      OS << "  " << RegAltNameIndices[i]->getName() << ",\t// " << i << "\n";
+    OS << "  NUM_TARGET_REG_ALT_NAMES = " << RegAltNameIndices.size() << "\n";
     OS << "};\n";
     if (!Namespace.empty())
       OS << "}\n";
   }
+
+
   OS << "} // End llvm namespace \n";
+  OS << "#endif // GET_REGINFO_ENUM\n\n";
 }
 
-void RegisterInfoEmitter::runHeader(raw_ostream &OS) {
+//
+// runMCDesc - Print out MC register descriptions.
+//
+void
+RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
+                               CodeGenRegBank &RegBank) {
+  EmitSourceFileHeader("MC Register Information", OS);
+
+  OS << "\n#ifdef GET_REGINFO_MC_DESC\n";
+  OS << "#undef GET_REGINFO_MC_DESC\n";
+
+  std::map<const CodeGenRegister*, CodeGenRegister::Set> Overlaps;
+  RegBank.computeOverlaps(Overlaps);
+
+  OS << "namespace llvm {\n\n";
+
+  const std::string &TargetName = Target.getName();
+  std::string ClassName = TargetName + "GenMCRegisterInfo";
+  OS << "struct " << ClassName << " : public MCRegisterInfo {\n"
+     << "  explicit " << ClassName << "(const MCRegisterDesc *D);\n";
+  OS << "};\n";
+
+  OS << "\nnamespace {\n";
+
+  const std::vector<CodeGenRegister*> &Regs = RegBank.getRegisters();
+
+  // Emit an overlap list for all registers.
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    const CodeGenRegister *Reg = Regs[i];
+    const CodeGenRegister::Set &O = Overlaps[Reg];
+    // Move Reg to the front so TRI::getAliasSet can share the list.
+    OS << "  const unsigned " << Reg->getName() << "_Overlaps[] = { "
+       << getQualifiedName(Reg->TheDef) << ", ";
+    for (CodeGenRegister::Set::const_iterator I = O.begin(), E = O.end();
+         I != E; ++I)
+      if (*I != Reg)
+        OS << getQualifiedName((*I)->TheDef) << ", ";
+    OS << "0 };\n";
+  }
+
+  // Emit the empty sub-registers list
+  OS << "  const unsigned Empty_SubRegsSet[] = { 0 };\n";
+  // Loop over all of the registers which have sub-registers, emitting the
+  // sub-registers list to memory.
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    const CodeGenRegister &Reg = *Regs[i];
+    if (Reg.getSubRegs().empty())
+     continue;
+    // getSubRegs() orders by SubRegIndex. We want a topological order.
+    SetVector<CodeGenRegister*> SR;
+    Reg.addSubRegsPreOrder(SR);
+    OS << "  const unsigned " << Reg.getName() << "_SubRegsSet[] = { ";
+    for (unsigned j = 0, je = SR.size(); j != je; ++j)
+      OS << getQualifiedName(SR[j]->TheDef) << ", ";
+    OS << "0 };\n";
+  }
+
+  // Emit the empty super-registers list
+  OS << "  const unsigned Empty_SuperRegsSet[] = { 0 };\n";
+  // Loop over all of the registers which have super-registers, emitting the
+  // super-registers list to memory.
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    const CodeGenRegister &Reg = *Regs[i];
+    const CodeGenRegister::SuperRegList &SR = Reg.getSuperRegs();
+    if (SR.empty())
+      continue;
+    OS << "  const unsigned " << Reg.getName() << "_SuperRegsSet[] = { ";
+    for (unsigned j = 0, je = SR.size(); j != je; ++j)
+      OS << getQualifiedName(SR[j]->TheDef) << ", ";
+    OS << "0 };\n";
+  }
+  OS << "}\n";       // End of anonymous namespace...
+
+  OS << "\nMCRegisterDesc " << TargetName
+     << "RegDesc[] = { // Descriptors\n";
+  OS << "  { \"NOREG\",\t0,\t0,\t0 },\n";
+
+  // Now that register alias and sub-registers sets have been emitted, emit the
+  // register descriptors now.
+  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
+    const CodeGenRegister &Reg = *Regs[i];
+    OS << "  { \"";
+    OS << Reg.getName() << "\",\t" << Reg.getName() << "_Overlaps,\t";
+    if (!Reg.getSubRegs().empty())
+      OS << Reg.getName() << "_SubRegsSet,\t";
+    else
+      OS << "Empty_SubRegsSet,\t";
+    if (!Reg.getSuperRegs().empty())
+      OS << Reg.getName() << "_SuperRegsSet";
+    else
+      OS << "Empty_SuperRegsSet";
+    OS << " },\n";
+  }
+  OS << "};\n\n";      // End of register descriptors...
+
+  // MCRegisterInfo initialization routine.
+  OS << "static inline void Init" << TargetName
+     << "MCRegisterInfo(MCRegisterInfo *RI) {\n";
+  OS << "  RI->InitMCRegisterInfo(" << TargetName << "RegDesc, "
+     << Regs.size()+1 << ");\n}\n\n";
+
+  OS << "} // End llvm namespace \n";
+  OS << "#endif // GET_REGINFO_MC_DESC\n\n";
+}
+
+void
+RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,
+                                     CodeGenRegBank &RegBank) {
   EmitSourceFileHeader("Register Information Header Fragment", OS);
-  CodeGenTarget Target(Records);
+
+  OS << "\n#ifdef GET_REGINFO_HEADER\n";
+  OS << "#undef GET_REGINFO_HEADER\n";
+
   const std::string &TargetName = Target.getName();
   std::string ClassName = TargetName + "GenRegisterInfo";
 
@@ -77,8 +213,7 @@ void RegisterInfoEmitter::runHeader(raw_ostream &OS) {
   OS << "namespace llvm {\n\n";
 
   OS << "struct " << ClassName << " : public TargetRegisterInfo {\n"
-     << "  explicit " << ClassName
-     << "(int CallFrameSetupOpcode = -1, int CallFrameDestroyOpcode = -1);\n"
+     << "  explicit " << ClassName << "();\n"
      << "  virtual int getDwarfRegNumFull(unsigned RegNum, "
      << "unsigned Flavour) const;\n"
      << "  virtual int getLLVMRegNumFull(unsigned DwarfRegNum, "
@@ -91,6 +226,21 @@ void RegisterInfoEmitter::runHeader(raw_ostream &OS) {
      << "  unsigned composeSubRegIndices(unsigned, unsigned) const;\n"
      << "};\n\n";
 
+  const std::vector<Record*> &SubRegIndices = RegBank.getSubRegIndices();
+  if (!SubRegIndices.empty()) {
+    OS << "\n// Subregister indices\n";
+    std::string Namespace = SubRegIndices[0]->getValueAsString("Namespace");
+    if (!Namespace.empty())
+      OS << "namespace " << Namespace << " {\n";
+    OS << "enum {\n  NoSubRegister,\n";
+    for (unsigned i = 0, e = RegBank.getNumNamedIndices(); i != e; ++i)
+      OS << "  " << SubRegIndices[i]->getName() << ",\t// " << i+1 << "\n";
+    OS << "  NUM_TARGET_NAMED_SUBREGS = " << SubRegIndices.size()+1 << "\n";
+    OS << "};\n";
+    if (!Namespace.empty())
+      OS << "}\n";
+  }
+
   const std::vector<CodeGenRegisterClass> &RegisterClasses =
     Target.getRegisterClasses();
 
@@ -98,21 +248,17 @@ void RegisterInfoEmitter::runHeader(raw_ostream &OS) {
     OS << "namespace " << RegisterClasses[0].Namespace
        << " { // Register classes\n";
 
-    OS << "  enum {\n";
-    for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) {
-      if (i) OS << ",\n";
-      OS << "    " << RegisterClasses[i].getName() << "RegClassID";
-      OS << " = " << i;
-    }
-    OS << "\n  };\n\n";
-
     for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) {
-      const std::string &Name = RegisterClasses[i].getName();
+      const CodeGenRegisterClass &RC = RegisterClasses[i];
+      const std::string &Name = RC.getName();
 
       // Output the register class definition.
       OS << "  struct " << Name << "Class : public TargetRegisterClass {\n"
-         << "    " << Name << "Class();\n"
-         << RegisterClasses[i].MethodProtos << "  };\n";
+         << "    " << Name << "Class();\n";
+      if (!RC.AltOrderSelect.empty())
+        OS << "    ArrayRef<unsigned> "
+              "getRawAllocationOrder(const MachineFunction&) const;\n";
+      OS << "  };\n";
 
       // Output the extern for the instance.
       OS << "  extern " << Name << "Class\t" << Name << "RegClass;\n";
@@ -123,72 +269,19 @@ void RegisterInfoEmitter::runHeader(raw_ostream &OS) {
     OS << "} // end of namespace " << TargetName << "\n\n";
   }
   OS << "} // End llvm namespace \n";
+  OS << "#endif // GET_REGINFO_HEADER\n\n";
 }
 
-static void addSuperReg(Record *R, Record *S,
-                  std::map<Record*, std::set<Record*>, LessRecord> &SubRegs,
-                  std::map<Record*, std::set<Record*>, LessRecord> &SuperRegs,
-                  std::map<Record*, std::set<Record*>, LessRecord> &Aliases) {
-  if (R == S) {
-    errs() << "Error: recursive sub-register relationship between"
-           << " register " << getQualifiedName(R)
-           << " and its sub-registers?\n";
-    abort();
-  }
-  if (!SuperRegs[R].insert(S).second)
-    return;
-  SubRegs[S].insert(R);
-  Aliases[R].insert(S);
-  Aliases[S].insert(R);
-  if (SuperRegs.count(S))
-    for (std::set<Record*>::iterator I = SuperRegs[S].begin(),
-           E = SuperRegs[S].end(); I != E; ++I)
-      addSuperReg(R, *I, SubRegs, SuperRegs, Aliases);
-}
-
-static void addSubSuperReg(Record *R, Record *S,
-                   std::map<Record*, std::set<Record*>, LessRecord> &SubRegs,
-                   std::map<Record*, std::set<Record*>, LessRecord> &SuperRegs,
-                   std::map<Record*, std::set<Record*>, LessRecord> &Aliases) {
-  if (R == S) {
-    errs() << "Error: recursive sub-register relationship between"
-           << " register " << getQualifiedName(R)
-           << " and its sub-registers?\n";
-    abort();
-  }
-
-  if (!SubRegs[R].insert(S).second)
-    return;
-  addSuperReg(S, R, SubRegs, SuperRegs, Aliases);
-  Aliases[R].insert(S);
-  Aliases[S].insert(R);
-  if (SubRegs.count(S))
-    for (std::set<Record*>::iterator I = SubRegs[S].begin(),
-           E = SubRegs[S].end(); I != E; ++I)
-      addSubSuperReg(R, *I, SubRegs, SuperRegs, Aliases);
-}
-
-class RegisterSorter {
-private:
-  std::map<Record*, std::set<Record*>, LessRecord> &RegisterSubRegs;
-
-public:
-  RegisterSorter(std::map<Record*, std::set<Record*>, LessRecord> &RS)
-    : RegisterSubRegs(RS) {}
-
-  bool operator()(Record *RegA, Record *RegB) {
-    // B is sub-register of A.
-    return RegisterSubRegs.count(RegA) && RegisterSubRegs[RegA].count(RegB);
-  }
-};
-
-// RegisterInfoEmitter::run - Main register file description emitter.
 //
-void RegisterInfoEmitter::run(raw_ostream &OS) {
-  CodeGenTarget Target(Records);
-  CodeGenRegBank &RegBank = Target.getRegBank();
-  RegBank.computeDerivedInfo();
-  EmitSourceFileHeader("Register Information Source Fragment", OS);
+// runTargetDesc - Output the target register and register file descriptions.
+//
+void
+RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
+                                   CodeGenRegBank &RegBank){
+  EmitSourceFileHeader("Target Register and Register Classes Information", OS);
+
+  OS << "\n#ifdef GET_REGINFO_TARGET_DESC\n";
+  OS << "#undef GET_REGINFO_TARGET_DESC\n";
 
   OS << "namespace llvm {\n\n";
 
@@ -205,20 +298,21 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   // Emit the register enum value arrays for each RegisterClass
   for (unsigned rc = 0, e = RegisterClasses.size(); rc != e; ++rc) {
     const CodeGenRegisterClass &RC = RegisterClasses[rc];
+    ArrayRef<Record*> Order = RC.getOrder();
 
     // Collect allocatable registers.
     if (RC.Allocatable)
-      AllocatableRegs.insert(RC.Elements.begin(), RC.Elements.end());
+      AllocatableRegs.insert(Order.begin(), Order.end());
 
     // Give the register class a legal C name if it's anonymous.
-    std::string Name = RC.TheDef->getName();
+    std::string Name = RC.getName();
 
     // Emit the register list now.
     OS << "  // " << Name << " Register Class...\n"
        << "  static const unsigned " << Name
        << "[] = {\n    ";
-    for (unsigned i = 0, e = RC.Elements.size(); i != e; ++i) {
-      Record *Reg = RC.Elements[i];
+    for (unsigned i = 0, e = Order.size(); i != e; ++i) {
+      Record *Reg = Order[i];
       OS << getQualifiedName(Reg) << ", ";
     }
     OS << "\n  };\n\n";
@@ -229,7 +323,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
     const CodeGenRegisterClass &RC = RegisterClasses[rc];
 
     // Give the register class a legal C name if it's anonymous.
-    std::string Name = RC.TheDef->getName() + "VTs";
+    std::string Name = RC.getName() + "VTs";
 
     // Emit the register list now.
     OS << "  // " << Name
@@ -397,10 +491,9 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
       OS << "\n  };\n\n";
     }
 
-
+    // Emit methods.
     for (unsigned i = 0, e = RegisterClasses.size(); i != e; ++i) {
       const CodeGenRegisterClass &RC = RegisterClasses[i];
-      OS << RC.MethodBodies << "\n";
       OS << RC.getName() << "Class::" << RC.getName()
          << "Class()  : TargetRegisterClass("
          << RC.getName() + "RegClassID" << ", "
@@ -416,8 +509,30 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
          << RC.SpillAlignment/8 << ", "
          << RC.CopyCost << ", "
          << RC.Allocatable << ", "
-         << RC.getName() << ", " << RC.getName() << " + " << RC.Elements.size()
+         << RC.getName() << ", " << RC.getName() << " + "
+         << RC.getOrder().size()
          << ") {}\n";
+      if (!RC.AltOrderSelect.empty()) {
+        OS << "\nstatic inline unsigned " << RC.getName()
+           << "AltOrderSelect(const MachineFunction &MF) {"
+           << RC.AltOrderSelect << "}\n\nArrayRef<unsigned> "
+           << RC.getName() << "Class::"
+           << "getRawAllocationOrder(const MachineFunction &MF) const {\n";
+        for (unsigned oi = 1 , oe = RC.getNumOrders(); oi != oe; ++oi) {
+          ArrayRef<Record*> Elems = RC.getOrder(oi);
+          OS << "  static const unsigned AltOrder" << oi << "[] = {";
+          for (unsigned elem = 0; elem != Elems.size(); ++elem)
+            OS << (elem ? ", " : " ") << getQualifiedName(Elems[elem]);
+          OS << " };\n";
+        }
+        OS << "  static const ArrayRef<unsigned> Order[] = {\n"
+           << "    ArrayRef<unsigned>(" << RC.getName();
+        for (unsigned oi = 1, oe = RC.getNumOrders(); oi != oe; ++oi)
+          OS << "),\n    ArrayRef<unsigned>(AltOrder" << oi;
+        OS << ")\n  };\n  const unsigned Select = " << RC.getName()
+           << "AltOrderSelect(MF);\n  assert(Select < " << RC.getNumOrders()
+           << ");\n  return Order[Select];\n}\n";
+        }
     }
 
     OS << "}\n";
@@ -429,295 +544,33 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
     OS << "    &" << getQualifiedName(RegisterClasses[i].TheDef)
        << "RegClass,\n";
   OS << "  };\n";
+  OS << "}\n";       // End of anonymous namespace...
 
-  // Emit register sub-registers / super-registers, aliases...
-  std::map<Record*, std::set<Record*>, LessRecord> RegisterSubRegs;
-  std::map<Record*, std::set<Record*>, LessRecord> RegisterSuperRegs;
-  std::map<Record*, std::set<Record*>, LessRecord> RegisterAliases;
-  typedef std::map<Record*, std::vector<int64_t>, LessRecord> DwarfRegNumsMapTy;
-  DwarfRegNumsMapTy DwarfRegNums;
-
-  const std::vector<CodeGenRegister> &Regs = Target.getRegisters();
-
-  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    Record *R = Regs[i].TheDef;
-    std::vector<Record*> LI = Regs[i].TheDef->getValueAsListOfDefs("Aliases");
-    // Add information that R aliases all of the elements in the list... and
-    // that everything in the list aliases R.
-    for (unsigned j = 0, e = LI.size(); j != e; ++j) {
-      Record *Reg = LI[j];
-      if (RegisterAliases[R].count(Reg))
-        errs() << "Warning: register alias between " << getQualifiedName(R)
-               << " and " << getQualifiedName(Reg)
-               << " specified multiple times!\n";
-      RegisterAliases[R].insert(Reg);
-
-      if (RegisterAliases[Reg].count(R))
-        errs() << "Warning: register alias between " << getQualifiedName(R)
-               << " and " << getQualifiedName(Reg)
-               << " specified multiple times!\n";
-      RegisterAliases[Reg].insert(R);
-    }
-  }
-
-  // Process sub-register sets.
-  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    Record *R = Regs[i].TheDef;
-    std::vector<Record*> LI = Regs[i].TheDef->getValueAsListOfDefs("SubRegs");
-    // Process sub-register set and add aliases information.
-    for (unsigned j = 0, e = LI.size(); j != e; ++j) {
-      Record *SubReg = LI[j];
-      if (RegisterSubRegs[R].count(SubReg))
-        errs() << "Warning: register " << getQualifiedName(SubReg)
-               << " specified as a sub-register of " << getQualifiedName(R)
-               << " multiple times!\n";
-      addSubSuperReg(R, SubReg, RegisterSubRegs, RegisterSuperRegs,
-                     RegisterAliases);
-    }
-  }
-
-  // Print the SubregHashTable, a simple quadratically probed
-  // hash table for determining if a register is a subregister
-  // of another register.
-  unsigned NumSubRegs = 0;
-  std::map<Record*, unsigned> RegNo;
-  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    RegNo[Regs[i].TheDef] = i;
-    NumSubRegs += RegisterSubRegs[Regs[i].TheDef].size();
-  }
-
-  unsigned SubregHashTableSize = 2 * NextPowerOf2(2 * NumSubRegs);
-  unsigned* SubregHashTable = new unsigned[2 * SubregHashTableSize];
-  std::fill(SubregHashTable, SubregHashTable + 2 * SubregHashTableSize, ~0U);
-
-  unsigned hashMisses = 0;
-
-  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    Record* R = Regs[i].TheDef;
-    for (std::set<Record*>::iterator I = RegisterSubRegs[R].begin(),
-         E = RegisterSubRegs[R].end(); I != E; ++I) {
-      Record* RJ = *I;
-      // We have to increase the indices of both registers by one when
-      // computing the hash because, in the generated code, there
-      // will be an extra empty slot at register 0.
-      size_t index = ((i+1) + (RegNo[RJ]+1) * 37) & (SubregHashTableSize-1);
-      unsigned ProbeAmt = 2;
-      while (SubregHashTable[index*2] != ~0U &&
-             SubregHashTable[index*2+1] != ~0U) {
-        index = (index + ProbeAmt) & (SubregHashTableSize-1);
-        ProbeAmt += 2;
-
-        hashMisses++;
-      }
-
-      SubregHashTable[index*2] = i;
-      SubregHashTable[index*2+1] = RegNo[RJ];
-    }
-  }
-
-  OS << "\n\n  // Number of hash collisions: " << hashMisses << "\n";
-
-  if (SubregHashTableSize) {
-    std::string Namespace = Regs[0].TheDef->getValueAsString("Namespace");
-
-    OS << "  const unsigned SubregHashTable[] = { ";
-    for (unsigned i = 0; i < SubregHashTableSize - 1; ++i) {
-      if (i != 0)
-        // Insert spaces for nice formatting.
-        OS << "                                       ";
-
-      if (SubregHashTable[2*i] != ~0U) {
-        OS << getQualifiedName(Regs[SubregHashTable[2*i]].TheDef) << ", "
-           << getQualifiedName(Regs[SubregHashTable[2*i+1]].TheDef) << ", \n";
-      } else {
-        OS << Namespace << "::NoRegister, " << Namespace << "::NoRegister, \n";
-      }
-    }
-
-    unsigned Idx = SubregHashTableSize*2-2;
-    if (SubregHashTable[Idx] != ~0U) {
-      OS << "                                       "
-         << getQualifiedName(Regs[SubregHashTable[Idx]].TheDef) << ", "
-         << getQualifiedName(Regs[SubregHashTable[Idx+1]].TheDef) << " };\n";
-    } else {
-      OS << Namespace << "::NoRegister, " << Namespace << "::NoRegister };\n";
-    }
-
-    OS << "  const unsigned SubregHashTableSize = "
-       << SubregHashTableSize << ";\n";
-  } else {
-    OS << "  const unsigned SubregHashTable[] = { ~0U, ~0U };\n"
-       << "  const unsigned SubregHashTableSize = 1;\n";
-  }
-
-  delete [] SubregHashTable;
-
-
-  // Print the AliasHashTable, a simple quadratically probed
-  // hash table for determining if a register aliases another register.
-  unsigned NumAliases = 0;
-  RegNo.clear();
-  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    RegNo[Regs[i].TheDef] = i;
-    NumAliases += RegisterAliases[Regs[i].TheDef].size();
-  }
-
-  unsigned AliasesHashTableSize = 2 * NextPowerOf2(2 * NumAliases);
-  unsigned* AliasesHashTable = new unsigned[2 * AliasesHashTableSize];
-  std::fill(AliasesHashTable, AliasesHashTable + 2 * AliasesHashTableSize, ~0U);
-
-  hashMisses = 0;
-
-  for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    Record* R = Regs[i].TheDef;
-    for (std::set<Record*>::iterator I = RegisterAliases[R].begin(),
-         E = RegisterAliases[R].end(); I != E; ++I) {
-      Record* RJ = *I;
-      // We have to increase the indices of both registers by one when
-      // computing the hash because, in the generated code, there
-      // will be an extra empty slot at register 0.
-      size_t index = ((i+1) + (RegNo[RJ]+1) * 37) & (AliasesHashTableSize-1);
-      unsigned ProbeAmt = 2;
-      while (AliasesHashTable[index*2] != ~0U &&
-             AliasesHashTable[index*2+1] != ~0U) {
-        index = (index + ProbeAmt) & (AliasesHashTableSize-1);
-        ProbeAmt += 2;
-
-        hashMisses++;
-      }
-
-      AliasesHashTable[index*2] = i;
-      AliasesHashTable[index*2+1] = RegNo[RJ];
-    }
-  }
-
-  OS << "\n\n  // Number of hash collisions: " << hashMisses << "\n";
-
-  if (AliasesHashTableSize) {
-    std::string Namespace = Regs[0].TheDef->getValueAsString("Namespace");
-
-    OS << "  const unsigned AliasesHashTable[] = { ";
-    for (unsigned i = 0; i < AliasesHashTableSize - 1; ++i) {
-      if (i != 0)
-        // Insert spaces for nice formatting.
-        OS << "                                       ";
-
-      if (AliasesHashTable[2*i] != ~0U) {
-        OS << getQualifiedName(Regs[AliasesHashTable[2*i]].TheDef) << ", "
-           << getQualifiedName(Regs[AliasesHashTable[2*i+1]].TheDef) << ", \n";
-      } else {
-        OS << Namespace << "::NoRegister, " << Namespace << "::NoRegister, \n";
-      }
-    }
-
-    unsigned Idx = AliasesHashTableSize*2-2;
-    if (AliasesHashTable[Idx] != ~0U) {
-      OS << "                                       "
-         << getQualifiedName(Regs[AliasesHashTable[Idx]].TheDef) << ", "
-         << getQualifiedName(Regs[AliasesHashTable[Idx+1]].TheDef) << " };\n";
-    } else {
-      OS << Namespace << "::NoRegister, " << Namespace << "::NoRegister };\n";
-    }
-
-    OS << "  const unsigned AliasesHashTableSize = "
-       << AliasesHashTableSize << ";\n";
-  } else {
-    OS << "  const unsigned AliasesHashTable[] = { ~0U, ~0U };\n"
-       << "  const unsigned AliasesHashTableSize = 1;\n";
-  }
-
-  delete [] AliasesHashTable;
-
-  if (!RegisterAliases.empty())
-    OS << "\n\n  // Register Overlap Lists...\n";
-
-  // Emit an overlap list for all registers.
-  for (std::map<Record*, std::set<Record*>, LessRecord >::iterator
-         I = RegisterAliases.begin(), E = RegisterAliases.end(); I != E; ++I) {
-    OS << "  const unsigned " << I->first->getName() << "_Overlaps[] = { "
-       << getQualifiedName(I->first) << ", ";
-    for (std::set<Record*>::iterator ASI = I->second.begin(),
-           E = I->second.end(); ASI != E; ++ASI)
-      OS << getQualifiedName(*ASI) << ", ";
-    OS << "0 };\n";
-  }
-
-  if (!RegisterSubRegs.empty())
-    OS << "\n\n  // Register Sub-registers Sets...\n";
-
-  // Emit the empty sub-registers list
-  OS << "  const unsigned Empty_SubRegsSet[] = { 0 };\n";
-  // Loop over all of the registers which have sub-registers, emitting the
-  // sub-registers list to memory.
-  for (std::map<Record*, std::set<Record*>, LessRecord>::iterator
-         I = RegisterSubRegs.begin(), E = RegisterSubRegs.end(); I != E; ++I) {
-   if (I->second.empty())
-     continue;
-    OS << "  const unsigned " << I->first->getName() << "_SubRegsSet[] = { ";
-    std::vector<Record*> SubRegsVector;
-    for (std::set<Record*>::iterator ASI = I->second.begin(),
-           E = I->second.end(); ASI != E; ++ASI)
-      SubRegsVector.push_back(*ASI);
-    RegisterSorter RS(RegisterSubRegs);
-    std::stable_sort(SubRegsVector.begin(), SubRegsVector.end(), RS);
-    for (unsigned i = 0, e = SubRegsVector.size(); i != e; ++i)
-      OS << getQualifiedName(SubRegsVector[i]) << ", ";
-    OS << "0 };\n";
-  }
-
-  if (!RegisterSuperRegs.empty())
-    OS << "\n\n  // Register Super-registers Sets...\n";
-
-  // Emit the empty super-registers list
-  OS << "  const unsigned Empty_SuperRegsSet[] = { 0 };\n";
-  // Loop over all of the registers which have super-registers, emitting the
-  // super-registers list to memory.
-  for (std::map<Record*, std::set<Record*>, LessRecord >::iterator
-         I = RegisterSuperRegs.begin(), E = RegisterSuperRegs.end(); I != E; ++I) {
-    if (I->second.empty())
-      continue;
-    OS << "  const unsigned " << I->first->getName() << "_SuperRegsSet[] = { ";
-
-    std::vector<Record*> SuperRegsVector;
-    for (std::set<Record*>::iterator ASI = I->second.begin(),
-           E = I->second.end(); ASI != E; ++ASI)
-      SuperRegsVector.push_back(*ASI);
-    RegisterSorter RS(RegisterSubRegs);
-    std::stable_sort(SuperRegsVector.begin(), SuperRegsVector.end(), RS);
-    for (unsigned i = 0, e = SuperRegsVector.size(); i != e; ++i)
-      OS << getQualifiedName(SuperRegsVector[i]) << ", ";
-    OS << "0 };\n";
-  }
-
-  OS<<"\n  const TargetRegisterDesc RegisterDescriptors[] = { // Descriptors\n";
-  OS << "    { \"NOREG\",\t0,\t0,\t0,\t0,\t0 },\n";
+  // Emit extra information about registers.
+  const std::string &TargetName = Target.getName();
+  OS << "\n  static const TargetRegisterInfoDesc "
+     << TargetName << "RegInfoDesc[] = "
+     << "{ // Extra Descriptors\n";
+  OS << "    { 0, 0 },\n";
 
-  // Now that register alias and sub-registers sets have been emitted, emit the
-  // register descriptors now.
+  const std::vector<CodeGenRegister*> &Regs = RegBank.getRegisters();
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    const CodeGenRegister &Reg = Regs[i];
-    OS << "    { \"";
-    OS << Reg.getName() << "\",\t" << Reg.getName() << "_Overlaps,\t";
-    if (!RegisterSubRegs[Reg.TheDef].empty())
-      OS << Reg.getName() << "_SubRegsSet,\t";
-    else
-      OS << "Empty_SubRegsSet,\t";
-    if (!RegisterSuperRegs[Reg.TheDef].empty())
-      OS << Reg.getName() << "_SuperRegsSet,\t";
-    else
-      OS << "Empty_SuperRegsSet,\t";
-    OS << Reg.CostPerUse << ",\t"
+    const CodeGenRegister &Reg = *Regs[i];
+    OS << "    { ";
+    OS << Reg.CostPerUse << ", "
        << int(AllocatableRegs.count(Reg.TheDef)) << " },\n";
   }
   OS << "  };\n";      // End of register descriptors...
 
+
   // Calculate the mapping of subregister+index pairs to physical registers.
   // This will also create further anonymous indexes.
   unsigned NamedIndices = RegBank.getNumNamedIndices();
 
   // Emit SubRegIndex names, skipping 0
   const std::vector<Record*> &SubRegIndices = RegBank.getSubRegIndices();
-  OS << "\n  const char *const SubRegIndexTable[] = { \"";
+  OS << "\n  static const char *const " << TargetName
+     << "SubRegIndexTable[] = { \"";
   for (unsigned i = 0, e = SubRegIndices.size(); i != e; ++i) {
     OS << SubRegIndices[i]->getName();
     if (i+1 != e)
@@ -735,7 +588,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
     }
     OS << "\n  };\n\n";
   }
-  OS << "}\n\n";       // End of anonymous namespace...
+  OS << "\n";
 
   std::string ClassName = Target.getName() + "GenRegisterInfo";
 
@@ -746,10 +599,10 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
      << "  switch (RegNo) {\n"
      << "  default:\n    return 0;\n";
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    const CodeGenRegister::SubRegMap &SRM = Regs[i].getSubRegs();
+    const CodeGenRegister::SubRegMap &SRM = Regs[i]->getSubRegs();
     if (SRM.empty())
       continue;
-    OS << "  case " << getQualifiedName(Regs[i].TheDef) << ":\n";
+    OS << "  case " << getQualifiedName(Regs[i]->TheDef) << ":\n";
     OS << "    switch (Index) {\n";
     OS << "    default: return 0;\n";
     for (CodeGenRegister::SubRegMap::const_iterator ii = SRM.begin(),
@@ -767,10 +620,10 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
      << "  switch (RegNo) {\n"
      << "  default:\n    return 0;\n";
    for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-     const CodeGenRegister::SubRegMap &SRM = Regs[i].getSubRegs();
+     const CodeGenRegister::SubRegMap &SRM = Regs[i]->getSubRegs();
      if (SRM.empty())
        continue;
-    OS << "  case " << getQualifiedName(Regs[i].TheDef) << ":\n";
+    OS << "  case " << getQualifiedName(Regs[i]->TheDef) << ":\n";
     for (CodeGenRegister::SubRegMap::const_iterator ii = SRM.begin(),
          ie = SRM.end(); ii != ie; ++ii)
       OS << "    if (SubRegNo == " << getQualifiedName(ii->second->TheDef)
@@ -806,22 +659,25 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   OS << "  }\n}\n\n";
 
   // Emit the constructor of the class...
+  OS << "extern MCRegisterDesc " << TargetName << "RegDesc[];\n";
+
   OS << ClassName << "::" << ClassName
-     << "(int CallFrameSetupOpcode, int CallFrameDestroyOpcode)\n"
-     << "  : TargetRegisterInfo(RegisterDescriptors, " << Regs.size()+1
+     << "()\n"
+     << "  : TargetRegisterInfo(" << TargetName << "RegInfoDesc"
      << ", RegisterClasses, RegisterClasses+" << RegisterClasses.size() <<",\n"
-     << "                 SubRegIndexTable,\n"
-     << "                 CallFrameSetupOpcode, CallFrameDestroyOpcode,\n"
-     << "                 SubregHashTable, SubregHashTableSize,\n"
-     << "                 AliasesHashTable, AliasesHashTableSize) {\n"
+     << "                 " << TargetName << "SubRegIndexTable) {\n"
+     << "  InitMCRegisterInfo(" << TargetName << "RegDesc, "
+     << Regs.size()+1 << ");\n"
      << "}\n\n";
 
   // Collect all information about dwarf register numbers
+  typedef std::map<Record*, std::vector<int64_t>, LessRecord> DwarfRegNumsMapTy;
+  DwarfRegNumsMapTy DwarfRegNums;
 
   // First, just pull all provided information to the map
   unsigned maxLength = 0;
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    Record *Reg = Regs[i].TheDef;
+    Record *Reg = Regs[i]->TheDef;
     std::vector<int64_t> RegNums = Reg->getValueAsListOfInts("DwarfNumbers");
     maxLength = std::max((size_t)maxLength, RegNums.size());
     if (DwarfRegNums.count(Reg))
@@ -864,7 +720,7 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   OS << "  };\n}\n\n";
 
   for (unsigned i = 0, e = Regs.size(); i != e; ++i) {
-    Record *Reg = Regs[i].TheDef;
+    Record *Reg = Regs[i]->TheDef;
     const RecordVal *V = Reg->getValue("DwarfAlias");
     if (!V || !V->getValue())
       continue;
@@ -904,4 +760,16 @@ void RegisterInfoEmitter::run(raw_ostream &OS) {
   OS << "  };\n}\n\n";
 
   OS << "} // End llvm namespace \n";
+  OS << "#endif // GET_REGINFO_TARGET_DESC\n\n";
+}
+
+void RegisterInfoEmitter::run(raw_ostream &OS) {
+  CodeGenTarget Target(Records);
+  CodeGenRegBank &RegBank = Target.getRegBank();
+  RegBank.computeDerivedInfo();
+
+  runEnums(OS, Target, RegBank);
+  runMCDesc(OS, Target, RegBank);
+  runTargetHeader(OS, Target, RegBank);
+  runTargetDesc(OS, Target, RegBank);
 }
diff --git a/utils/TableGen/RegisterInfoEmitter.h b/utils/TableGen/RegisterInfoEmitter.h
index 1456b4f..2c01b5c 100644
--- a/utils/TableGen/RegisterInfoEmitter.h
+++ b/utils/TableGen/RegisterInfoEmitter.h
@@ -20,19 +20,30 @@
 
 namespace llvm {
 
+class CodeGenRegBank;
+class CodeGenTarget;
+
 class RegisterInfoEmitter : public TableGenBackend {
   RecordKeeper &Records;
 public:
   RegisterInfoEmitter(RecordKeeper &R) : Records(R) {}
 
-  // run - Output the register file description, returning true on failure.
-  void run(raw_ostream &o);
+  // runEnums - Print out enum values for all of the registers.
+  void runEnums(raw_ostream &o, CodeGenTarget &Target, CodeGenRegBank &Bank);
 
-  // runHeader - Emit a header fragment for the register info emitter.
-  void runHeader(raw_ostream &o);
+  // runMCDesc - Print out MC register descriptions.
+  void runMCDesc(raw_ostream &o, CodeGenTarget &Target, CodeGenRegBank &Bank);
 
-  // runEnums - Print out enum values for all of the registers.
-  void runEnums(raw_ostream &o);
+  // runTargetHeader - Emit a header fragment for the register info emitter.
+  void runTargetHeader(raw_ostream &o, CodeGenTarget &Target,
+                       CodeGenRegBank &Bank);
+
+  // runTargetDesc - Output the target register and register file descriptions.
+  void runTargetDesc(raw_ostream &o, CodeGenTarget &Target,
+                     CodeGenRegBank &Bank);
+
+  // run - Output the register file description.
+  void run(raw_ostream &o);
 };
 
 } // End llvm namespace
diff --git a/utils/TableGen/SetTheory.cpp b/utils/TableGen/SetTheory.cpp
index ade1825..21ac09c 100644
--- a/utils/TableGen/SetTheory.cpp
+++ b/utils/TableGen/SetTheory.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SetTheory.h"
+#include "Error.h"
 #include "Record.h"
 #include "llvm/Support/Format.h"
 
@@ -155,10 +156,15 @@ struct SequenceOp : public SetTheory::Operator {
       From = II->getValue();
     else
       throw "From must be an integer: " + Expr->getAsString();
+    if (From < 0 || From >= (1 << 30))
+      throw "From out of range";
+
     if (IntInit *II = dynamic_cast<IntInit*>(Expr->arg_begin()[2]))
       To = II->getValue();
     else
       throw "From must be an integer: " + Expr->getAsString();
+    if (To < 0 || To >= (1 << 30))
+      throw "To out of range";
 
     RecordKeeper &Records =
       dynamic_cast<DefInit&>(*Expr->getOperator()).getDef()->getRecords();
@@ -167,7 +173,7 @@ struct SequenceOp : public SetTheory::Operator {
     for (To += Step; From != To; From += Step) {
       std::string Name;
       raw_string_ostream OS(Name);
-      OS << format(Format.c_str(), From);
+      OS << format(Format.c_str(), unsigned(From));
       Record *Rec = Records.getDef(OS.str());
       if (!Rec)
         throw "No def named '" + Name + "': " + Expr->getAsString();
diff --git a/utils/TableGen/SetTheory.h b/utils/TableGen/SetTheory.h
index e37a76e..6e8313b 100644
--- a/utils/TableGen/SetTheory.h
+++ b/utils/TableGen/SetTheory.h
@@ -55,7 +55,7 @@
 namespace llvm {
 
 class DagInit;
-struct Init;
+class Init;
 class Record;
 class RecordKeeper;
 
diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp
index 928fa4b..978e91a 100644
--- a/utils/TableGen/SubtargetEmitter.cpp
+++ b/utils/TableGen/SubtargetEmitter.cpp
@@ -29,16 +29,20 @@ void SubtargetEmitter::Enumeration(raw_ostream &OS,
   std::vector<Record*> DefList = Records.getAllDerivedDefinitions(ClassName);
   std::sort(DefList.begin(), DefList.end(), LessRecord());
 
-  // Open enumeration
-  OS << "enum {\n";
-
-  // For each record
   unsigned N = DefList.size();
+  if (N == 0)
+    return;
   if (N > 64) {
     errs() << "Too many (> 64) subtarget features!\n";
     exit(1);
   }
 
+  OS << "namespace " << Target << " {\n";
+
+  // Open enumeration
+  OS << "enum {\n";
+
+  // For each record
   for (unsigned i = 0; i < N;) {
     // Next record
     Record *Def = DefList[i];
@@ -57,23 +61,30 @@ void SubtargetEmitter::Enumeration(raw_ostream &OS,
 
   // Close enumeration
   OS << "};\n";
+
+  OS << "}\n";
 }
 
 //
 // FeatureKeyValues - Emit data of all the subtarget features.  Used by the
 // command line.
 //
-void SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
+unsigned SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
   // Gather and sort all the features
   std::vector<Record*> FeatureList =
                            Records.getAllDerivedDefinitions("SubtargetFeature");
+
+  if (FeatureList.empty())
+    return 0;
+
   std::sort(FeatureList.begin(), FeatureList.end(), LessRecordFieldName());
 
   // Begin feature table
   OS << "// Sorted (by key) array of values for CPU features.\n"
-     << "static const llvm::SubtargetFeatureKV FeatureKV[] = {\n";
+     << "llvm::SubtargetFeatureKV " << Target << "FeatureKV[] = {\n";
 
   // For each feature
+  unsigned NumFeatures = 0;
   for (unsigned i = 0, N = FeatureList.size(); i < N; ++i) {
     // Next feature
     Record *Feature = FeatureList[i];
@@ -88,7 +99,7 @@ void SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
     OS << "  { "
        << "\"" << CommandLineName << "\", "
        << "\"" << Desc << "\", "
-       << Name << ", ";
+       << Target << "::" << Name << ", ";
 
     const std::vector<Record*> &ImpliesList =
       Feature->getValueAsListOfDefs("Implies");
@@ -97,12 +108,13 @@ void SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
       OS << "0ULL";
     } else {
       for (unsigned j = 0, M = ImpliesList.size(); j < M;) {
-        OS << ImpliesList[j]->getName();
+        OS << Target << "::" << ImpliesList[j]->getName();
         if (++j < M) OS << " | ";
       }
     }
 
     OS << " }";
+    ++NumFeatures;
 
     // Depending on 'if more in the list' emit comma
     if ((i + 1) < N) OS << ",";
@@ -113,17 +125,14 @@ void SubtargetEmitter::FeatureKeyValues(raw_ostream &OS) {
   // End feature table
   OS << "};\n";
 
-  // Emit size of table
-  OS<<"\nenum {\n";
-  OS<<"  FeatureKVSize = sizeof(FeatureKV)/sizeof(llvm::SubtargetFeatureKV)\n";
-  OS<<"};\n";
+  return NumFeatures;
 }
 
 //
 // CPUKeyValues - Emit data of all the subtarget processors.  Used by command
 // line.
 //
-void SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
+unsigned SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
   // Gather and sort processor information
   std::vector<Record*> ProcessorList =
                           Records.getAllDerivedDefinitions("Processor");
@@ -131,7 +140,7 @@ void SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
 
   // Begin processor table
   OS << "// Sorted (by key) array of values for CPU subtype.\n"
-     << "static const llvm::SubtargetFeatureKV SubTypeKV[] = {\n";
+     << "llvm::SubtargetFeatureKV " << Target << "SubTypeKV[] = {\n";
 
   // For each processor
   for (unsigned i = 0, N = ProcessorList.size(); i < N;) {
@@ -151,7 +160,7 @@ void SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
       OS << "0ULL";
     } else {
       for (unsigned j = 0, M = FeatureList.size(); j < M;) {
-        OS << FeatureList[j]->getName();
+        OS << Target << "::" << FeatureList[j]->getName();
         if (++j < M) OS << " | ";
       }
     }
@@ -168,10 +177,7 @@ void SubtargetEmitter::CPUKeyValues(raw_ostream &OS) {
   // End processor table
   OS << "};\n";
 
-  // Emit size of table
-  OS<<"\nenum {\n";
-  OS<<"  SubTypeKVSize = sizeof(SubTypeKV)/sizeof(llvm::SubtargetFeatureKV)\n";
-  OS<<"};\n";
+  return ProcessorList.size();
 }
 
 //
@@ -192,11 +198,6 @@ CollectAllItinClasses(raw_ostream &OS,
     ItinClassesMap[ItinClass->getName()] = i;
   }
 
-  // Emit size of table
-  OS<<"\nenum {\n";
-  OS<<"  ItinClassesSize = " << N << "\n";
-  OS<<"};\n";
-
   // Return itinerary class count
   return N;
 }
@@ -326,9 +327,9 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
       OS << "\n// Pipeline forwarding pathes for itineraries \"" << Name
          << "\"\n" << "namespace " << Name << "Bypass {\n";
 
-      OS << "  const unsigned NoBypass = 0;\n";
+      OS << "  unsigned NoBypass = 0;\n";
       for (unsigned j = 0, BPN = BPs.size(); j < BPN; ++j)
-        OS << "  const unsigned " << BPs[j]->getName()
+        OS << "  unsigned " << BPs[j]->getName()
            << " = 1 << " << j << ";\n";
 
       OS << "}\n";
@@ -336,15 +337,17 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
   }
 
   // Begin stages table
-  std::string StageTable = "\nstatic const llvm::InstrStage Stages[] = {\n";
+  std::string StageTable = "\nllvm::InstrStage " + Target + "Stages[] = {\n";
   StageTable += "  { 0, 0, 0, llvm::InstrStage::Required }, // No itinerary\n";
 
   // Begin operand cycle table
-  std::string OperandCycleTable = "static const unsigned OperandCycles[] = {\n";
+  std::string OperandCycleTable = "unsigned " + Target +
+    "OperandCycles[] = {\n";
   OperandCycleTable += "  0, // No itinerary\n";
 
   // Begin pipeline bypass table
-  std::string BypassTable = "static const unsigned ForwardingPathes[] = {\n";
+  std::string BypassTable = "unsigned " + Target +
+    "ForwardingPathes[] = {\n";
   BypassTable += "  0, // No itinerary\n";
 
   unsigned StageCount = 1, OperandCycleCount = 1;
@@ -457,12 +460,6 @@ void SubtargetEmitter::EmitStageAndOperandCycleData(raw_ostream &OS,
   OS << StageTable;
   OS << OperandCycleTable;
   OS << BypassTable;
-
-  // Emit size of tables
-  OS<<"\nenum {\n";
-  OS<<"  StagesSize = sizeof(Stages)/sizeof(llvm::InstrStage),\n";
-  OS<<"  OperandCyclesSize = sizeof(OperandCycles)/sizeof(unsigned)\n";
-  OS<<"};\n";
 }
 
 //
@@ -491,7 +488,7 @@ EmitProcessorData(raw_ostream &OS,
 
     // Begin processor itinerary table
     OS << "\n";
-    OS << "static const llvm::InstrItinerary " << Name << "[] = {\n";
+    OS << "llvm::InstrItinerary " << Name << "[] = {\n";
 
     // For each itinerary class
     std::vector<InstrItinerary> &ItinList = *ProcListIter++;
@@ -533,7 +530,8 @@ void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
   // Begin processor table
   OS << "\n";
   OS << "// Sorted (by key) array of itineraries for CPU subtype.\n"
-     << "static const llvm::SubtargetInfoKV ProcItinKV[] = {\n";
+     << "llvm::SubtargetInfoKV "
+     << Target << "ProcItinKV[] = {\n";
 
   // For each processor
   for (unsigned i = 0, N = ProcessorList.size(); i < N;) {
@@ -559,12 +557,6 @@ void SubtargetEmitter::EmitProcessorLookup(raw_ostream &OS) {
 
   // End processor table
   OS << "};\n";
-
-  // Emit size of table
-  OS<<"\nenum {\n";
-  OS<<"  ProcItinKVSize = sizeof(ProcItinKV)/"
-                            "sizeof(llvm::SubtargetInfoKV)\n";
-  OS<<"};\n";
 }
 
 //
@@ -599,23 +591,27 @@ void SubtargetEmitter::EmitData(raw_ostream &OS) {
 // ParseFeaturesFunction - Produces a subtarget specific function for parsing
 // the subtarget features string.
 //
-void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) {
+void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS,
+                                             unsigned NumFeatures,
+                                             unsigned NumProcs) {
   std::vector<Record*> Features =
                        Records.getAllDerivedDefinitions("SubtargetFeature");
   std::sort(Features.begin(), Features.end(), LessRecord());
 
   OS << "// ParseSubtargetFeatures - Parses features string setting specified\n"
      << "// subtarget options.\n"
-     << "std::string llvm::";
+     << "void llvm::";
   OS << Target;
-  OS << "Subtarget::ParseSubtargetFeatures(const std::string &FS,\n"
-     << "                                  const std::string &CPU) {\n"
+  OS << "Subtarget::ParseSubtargetFeatures(StringRef CPU, StringRef FS) {\n"
      << "  DEBUG(dbgs() << \"\\nFeatures:\" << FS);\n"
-     << "  DEBUG(dbgs() << \"\\nCPU:\" << CPU);\n"
-     << "  SubtargetFeatures Features(FS);\n"
-     << "  Features.setCPUIfNone(CPU);\n"
-     << "  uint64_t Bits =  Features.getBits(SubTypeKV, SubTypeKVSize,\n"
-     << "                                    FeatureKV, FeatureKVSize);\n";
+     << "  DEBUG(dbgs() << \"\\nCPU:\" << CPU);\n";
+
+  if (Features.empty()) {
+    OS << "}\n";
+    return;
+  }
+
+  OS << "  uint64_t Bits = ReInitMCSubtargetInfo(CPU, FS);\n";
 
   for (unsigned i = 0; i < Features.size(); i++) {
     // Next record
@@ -625,23 +621,17 @@ void SubtargetEmitter::ParseFeaturesFunction(raw_ostream &OS) {
     const std::string &Attribute = R->getValueAsString("Attribute");
 
     if (Value=="true" || Value=="false")
-      OS << "  if ((Bits & " << Instance << ") != 0) "
+      OS << "  if ((Bits & " << Target << "::"
+         << Instance << ") != 0) "
          << Attribute << " = " << Value << ";\n";
     else
-      OS << "  if ((Bits & " << Instance << ") != 0 && " << Attribute <<
-            " < " << Value << ") " << Attribute << " = " << Value << ";\n";
-  }
-
-  if (HasItineraries) {
-    OS << "\n"
-       << "  InstrItinerary *Itinerary = (InstrItinerary *)"
-       <<              "Features.getInfo(ProcItinKV, ProcItinKVSize);\n"
-       << "  InstrItins = InstrItineraryData(Stages, OperandCycles, "
-       << "ForwardingPathes, Itinerary);\n";
+      OS << "  if ((Bits & " << Target << "::"
+         << Instance << ") != 0 && "
+         << Attribute << " < " << Value << ") "
+         << Attribute << " = " << Value << ";\n";
   }
 
-  OS << "  return Features.getCPU();\n"
-     << "}\n";
+  OS << "}\n";
 }
 
 //
@@ -652,22 +642,114 @@ void SubtargetEmitter::run(raw_ostream &OS) {
 
   EmitSourceFileHeader("Subtarget Enumeration Source Fragment", OS);
 
-  OS << "#include \"llvm/Support/Debug.h\"\n";
-  OS << "#include \"llvm/Support/raw_ostream.h\"\n";
-  OS << "#include \"llvm/Target/SubtargetFeature.h\"\n";
-  OS << "#include \"llvm/Target/TargetInstrItineraries.h\"\n\n";
+  OS << "\n#ifdef GET_SUBTARGETINFO_ENUM\n";
+  OS << "#undef GET_SUBTARGETINFO_ENUM\n";
 
-//  Enumeration(OS, "FuncUnit", true);
-//  OS<<"\n";
-//  Enumeration(OS, "InstrItinClass", false);
-//  OS<<"\n";
+  OS << "namespace llvm {\n";
   Enumeration(OS, "SubtargetFeature", true);
-  OS<<"\n";
-  FeatureKeyValues(OS);
-  OS<<"\n";
-  CPUKeyValues(OS);
-  OS<<"\n";
+  OS << "} // End llvm namespace \n";
+  OS << "#endif // GET_SUBTARGETINFO_ENUM\n\n";
+
+  OS << "\n#ifdef GET_SUBTARGETINFO_MC_DESC\n";
+  OS << "#undef GET_SUBTARGETINFO_MC_DESC\n";
+
+  OS << "namespace llvm {\n";
+#if 0
+  OS << "namespace {\n";
+#endif
+  unsigned NumFeatures = FeatureKeyValues(OS);
+  OS << "\n";
+  unsigned NumProcs = CPUKeyValues(OS);
+  OS << "\n";
   EmitData(OS);
-  OS<<"\n";
-  ParseFeaturesFunction(OS);
+  OS << "\n";
+#if 0
+  OS << "}\n";
+#endif
+
+  // MCInstrInfo initialization routine.
+  OS << "static inline void Init" << Target
+     << "MCSubtargetInfo(MCSubtargetInfo *II, "
+     << "StringRef TT, StringRef CPU, StringRef FS) {\n";
+  OS << "  II->InitMCSubtargetInfo(TT, CPU, FS, ";
+  if (NumFeatures)
+    OS << Target << "FeatureKV, ";
+  else
+    OS << "0, ";
+  if (NumProcs)
+    OS << Target << "SubTypeKV, ";
+  else
+    OS << "0, ";
+  if (HasItineraries) {
+    OS << Target << "ProcItinKV, "
+       << Target << "Stages, "
+       << Target << "OperandCycles, "
+       << Target << "ForwardingPathes, ";
+  } else
+    OS << "0, 0, 0, 0, ";
+  OS << NumFeatures << ", " << NumProcs << ");\n}\n\n";
+
+  OS << "} // End llvm namespace \n";
+
+  OS << "#endif // GET_SUBTARGETINFO_MC_DESC\n\n";
+
+  OS << "\n#ifdef GET_SUBTARGETINFO_TARGET_DESC\n";
+  OS << "#undef GET_SUBTARGETINFO_TARGET_DESC\n";
+
+  OS << "#include \"llvm/Support/Debug.h\"\n";
+  OS << "#include \"llvm/Support/raw_ostream.h\"\n";
+  ParseFeaturesFunction(OS, NumFeatures, NumProcs);
+
+  OS << "#endif // GET_SUBTARGETINFO_TARGET_DESC\n\n";
+
+  // Create a TargetSubtargetInfo subclass to hide the MC layer initialization.
+  OS << "\n#ifdef GET_SUBTARGETINFO_HEADER\n";
+  OS << "#undef GET_SUBTARGETINFO_HEADER\n";
+
+  std::string ClassName = Target + "GenSubtargetInfo";
+  OS << "namespace llvm {\n";
+  OS << "struct " << ClassName << " : public TargetSubtargetInfo {\n"
+     << "  explicit " << ClassName << "(StringRef TT, StringRef CPU, "
+     << "StringRef FS);\n"
+     << "};\n";
+  OS << "} // End llvm namespace \n";
+
+  OS << "#endif // GET_SUBTARGETINFO_HEADER\n\n";
+
+  OS << "\n#ifdef GET_SUBTARGETINFO_CTOR\n";
+  OS << "#undef GET_SUBTARGETINFO_CTOR\n";
+
+  OS << "namespace llvm {\n";
+  OS << "extern llvm::SubtargetFeatureKV " << Target << "FeatureKV[];\n";
+  OS << "extern llvm::SubtargetFeatureKV " << Target << "SubTypeKV[];\n";
+  if (HasItineraries) {
+    OS << "extern llvm::SubtargetInfoKV " << Target << "ProcItinKV[];\n";
+    OS << "extern llvm::InstrStage " << Target << "Stages[];\n";
+    OS << "extern unsigned " << Target << "OperandCycles[];\n";
+    OS << "extern unsigned " << Target << "ForwardingPathes[];\n";
+  }
+
+  OS << ClassName << "::" << ClassName << "(StringRef TT, StringRef CPU, "
+     << "StringRef FS)\n"
+     << "  : TargetSubtargetInfo() {\n"
+     << "  InitMCSubtargetInfo(TT, CPU, FS, ";
+  if (NumFeatures)
+    OS << Target << "FeatureKV, ";
+  else
+    OS << "0, ";
+  if (NumProcs)
+    OS << Target << "SubTypeKV, ";
+  else
+    OS << "0, ";
+  if (HasItineraries) {
+    OS << Target << "ProcItinKV, "
+       << Target << "Stages, "
+       << Target << "OperandCycles, "
+       << Target << "ForwardingPathes, ";
+  } else
+    OS << "0, 0, 0, 0, ";
+  OS << NumFeatures << ", " << NumProcs << ");\n}\n\n";
+  OS << "} // End llvm namespace \n";
+
+  OS << "#endif // GET_SUBTARGETINFO_CTOR\n\n";
 }
diff --git a/utils/TableGen/SubtargetEmitter.h b/utils/TableGen/SubtargetEmitter.h
index 93055b7..b239f3d 100644
--- a/utils/TableGen/SubtargetEmitter.h
+++ b/utils/TableGen/SubtargetEmitter.h
@@ -15,7 +15,7 @@
 #define SUBTARGET_EMITTER_H
 
 #include "TableGenBackend.h"
-#include "llvm/Target/TargetInstrItineraries.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include <vector>
 #include <map>
 #include <string>
@@ -30,8 +30,8 @@ class SubtargetEmitter : public TableGenBackend {
   bool HasItineraries;
 
   void Enumeration(raw_ostream &OS, const char *ClassName, bool isBits);
-  void FeatureKeyValues(raw_ostream &OS);
-  void CPUKeyValues(raw_ostream &OS);
+  unsigned FeatureKeyValues(raw_ostream &OS);
+  unsigned CPUKeyValues(raw_ostream &OS);
   unsigned CollectAllItinClasses(raw_ostream &OS,
                                  std::map<std::string,unsigned> &ItinClassesMap,
                                  std::vector<Record*> &ItinClassList);
@@ -52,7 +52,8 @@ class SubtargetEmitter : public TableGenBackend {
                          std::vector<std::vector<InstrItinerary> > &ProcList);
   void EmitProcessorLookup(raw_ostream &OS);
   void EmitData(raw_ostream &OS);
-  void ParseFeaturesFunction(raw_ostream &OS);
+  void ParseFeaturesFunction(raw_ostream &OS, unsigned NumFeatures,
+                             unsigned NumProcs);
 
 public:
   SubtargetEmitter(RecordKeeper &R) : Records(R), HasItineraries(false) {}
diff --git a/utils/TableGen/TGLexer.cpp b/utils/TableGen/TGLexer.cpp
index 572c36d..b4b90ff 100644
--- a/utils/TableGen/TGLexer.cpp
+++ b/utils/TableGen/TGLexer.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "TGLexer.h"
+#include "Error.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Config/config.h"
@@ -35,7 +36,6 @@ SMLoc TGLexer::getLoc() const {
   return SMLoc::getFromPointer(TokStart);
 }
 
-
 /// ReturnError - Set the error to the specified string at the specified
 /// location.  This is defined to always return tgtok::Error.
 tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
@@ -43,16 +43,6 @@ tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
   return tgtok::Error;
 }
 
-
-void TGLexer::PrintError(const char *Loc, const Twine &Msg) const {
-  SrcMgr.PrintMessage(SMLoc::getFromPointer(Loc), Msg, "error");
-}
-
-void TGLexer::PrintError(SMLoc Loc, const Twine &Msg) const {
-  SrcMgr.PrintMessage(Loc, Msg, "error");
-}
-
-
 int TGLexer::getNextChar() {
   char CurChar = *CurPtr++;
   switch (CurChar) {
diff --git a/utils/TableGen/TGLexer.h b/utils/TableGen/TGLexer.h
index c2a6453..84d328b 100644
--- a/utils/TableGen/TGLexer.h
+++ b/utils/TableGen/TGLexer.h
@@ -101,9 +101,6 @@ public:
   }
 
   SMLoc getLoc() const;
-
-  void PrintError(const char *Loc, const Twine &Msg) const;
-  void PrintError(SMLoc Loc, const Twine &Msg) const;
   
 private:
   /// LexToken - Read the next token and return its code.
diff --git a/utils/TableGen/TGParser.h b/utils/TableGen/TGParser.h
index 419a99b..dce7e1d 100644
--- a/utils/TableGen/TGParser.h
+++ b/utils/TableGen/TGParser.h
@@ -15,6 +15,7 @@
 #define TGPARSER_H
 
 #include "TGLexer.h"
+#include "Error.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/SourceMgr.h"
 #include <map>
@@ -24,7 +25,7 @@ namespace llvm {
   class RecordVal;
   class RecordKeeper;
   struct RecTy;
-  struct Init;
+  class Init;
   struct MultiClass;
   struct SubClassReference;
   struct SubMultiClassReference;
@@ -60,7 +61,7 @@ public:
   bool ParseFile();
   
   bool Error(SMLoc L, const Twine &Msg) const {
-    Lex.PrintError(L, Msg);
+    PrintError(L, Msg);
     return true;
   }
   bool TokError(const Twine &Msg) const {
diff --git a/utils/TableGen/TableGen.cpp b/utils/TableGen/TableGen.cpp
index 4e4da36..e8eacb8 100644
--- a/utils/TableGen/TableGen.cpp
+++ b/utils/TableGen/TableGen.cpp
@@ -26,13 +26,14 @@
 #include "DAGISelEmitter.h"
 #include "DisassemblerEmitter.h"
 #include "EDEmitter.h"
+#include "Error.h"
 #include "FastISelEmitter.h"
-#include "InstrEnumEmitter.h"
 #include "InstrInfoEmitter.h"
 #include "IntrinsicEmitter.h"
 #include "LLVMCConfigurationEmitter.h"
 #include "NeonEmitter.h"
 #include "OptParserEmitter.h"
+#include "PseudoLoweringEmitter.h"
 #include "Record.h"
 #include "RegisterInfoEmitter.h"
 #include "ARMDecoderEmitter.h"
@@ -53,10 +54,13 @@ using namespace llvm;
 enum ActionType {
   PrintRecords,
   GenEmitter,
-  GenRegisterEnums, GenRegister, GenRegisterHeader,
-  GenInstrEnums, GenInstrs, GenAsmWriter, GenAsmMatcher,
+  GenRegisterInfo,
+  GenInstrInfo,
+  GenAsmWriter,
+  GenAsmMatcher,
   GenARMDecoder,
   GenDisassembler,
+  GenPseudoLowering,
   GenCallingConv,
   GenClangAttrClasses,
   GenClangAttrImpl,
@@ -92,15 +96,9 @@ namespace {
                                "Print all records to stdout (default)"),
                     clEnumValN(GenEmitter, "gen-emitter",
                                "Generate machine code emitter"),
-                    clEnumValN(GenRegisterEnums, "gen-register-enums",
-                               "Generate enum values for registers"),
-                    clEnumValN(GenRegister, "gen-register-desc",
-                               "Generate a register info description"),
-                    clEnumValN(GenRegisterHeader, "gen-register-desc-header",
-                               "Generate a register info description header"),
-                    clEnumValN(GenInstrEnums, "gen-instr-enums",
-                               "Generate enum values for instructions"),
-                    clEnumValN(GenInstrs, "gen-instr-desc",
+                    clEnumValN(GenRegisterInfo, "gen-register-info",
+                               "Generate registers and register classes info"),
+                    clEnumValN(GenInstrInfo, "gen-instr-info",
                                "Generate instruction descriptions"),
                     clEnumValN(GenCallingConv, "gen-callingconv",
                                "Generate calling convention descriptions"),
@@ -110,6 +108,8 @@ namespace {
                                "Generate decoders for ARM/Thumb"),
                     clEnumValN(GenDisassembler, "gen-disassembler",
                                "Generate disassembler"),
+                    clEnumValN(GenPseudoLowering, "gen-pseudo-lowering",
+                               "Generate pseudo instruction lowering"),
                     clEnumValN(GenAsmMatcher, "gen-asm-matcher",
                                "Generate assembly instruction matcher"),
                     clEnumValN(GenDAGISel, "gen-dag-isel",
@@ -194,12 +194,6 @@ namespace {
 }
 
 
-static SourceMgr SrcMgr;
-
-void llvm::PrintError(SMLoc ErrorLoc, const Twine &Msg) {
-  SrcMgr.PrintMessage(ErrorLoc, Msg, "error");
-}
-
 int main(int argc, char **argv) {
   RecordKeeper Records;
 
@@ -266,20 +260,10 @@ int main(int argc, char **argv) {
     case GenEmitter:
       CodeEmitterGen(Records).run(Out.os());
       break;
-
-    case GenRegisterEnums:
-      RegisterInfoEmitter(Records).runEnums(Out.os());
-      break;
-    case GenRegister:
+    case GenRegisterInfo:
       RegisterInfoEmitter(Records).run(Out.os());
       break;
-    case GenRegisterHeader:
-      RegisterInfoEmitter(Records).runHeader(Out.os());
-      break;
-    case GenInstrEnums:
-      InstrEnumEmitter(Records).run(Out.os());
-      break;
-    case GenInstrs:
+    case GenInstrInfo:
       InstrInfoEmitter(Records).run(Out.os());
       break;
     case GenCallingConv:
@@ -334,6 +318,9 @@ int main(int argc, char **argv) {
     case GenDisassembler:
       DisassemblerEmitter(Records).run(Out.os());
       break;
+    case GenPseudoLowering:
+      PseudoLoweringEmitter(Records).run(Out.os());
+      break;
     case GenOptParserDefs:
       OptParserEmitter(Records, true).run(Out.os());
       break;
@@ -403,13 +390,11 @@ int main(int argc, char **argv) {
     return 0;
 
   } catch (const TGError &Error) {
-    errs() << argv[0] << ": error:\n";
-    PrintError(Error.getLoc(), Error.getMessage());
-
+    PrintError(Error);
   } catch (const std::string &Error) {
-    errs() << argv[0] << ": " << Error << "\n";
+    PrintError(Error);
   } catch (const char *Error) {
-    errs() << argv[0] << ": " << Error << "\n";
+    PrintError(Error);
   } catch (...) {
     errs() << argv[0] << ": Unknown unexpected exception occurred.\n";
   }
diff --git a/utils/TableGen/X86RecognizableInstr.cpp b/utils/TableGen/X86RecognizableInstr.cpp
index f7518a9..ea3bb70 100644
--- a/utils/TableGen/X86RecognizableInstr.cpp
+++ b/utils/TableGen/X86RecognizableInstr.cpp
@@ -229,6 +229,30 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
   HasFROperands    = hasFROperands();
   HasVEX_LPrefix   = has256BitOperands() || Rec->getValueAsBit("hasVEX_L");
   
+  // Check for 64-bit inst which does not require REX
+  Is64Bit = false;
+  // FIXME: Is there some better way to check for In64BitMode?
+  std::vector<Record*> Predicates = Rec->getValueAsListOfDefs("Predicates");
+  for (unsigned i = 0, e = Predicates.size(); i != e; ++i) {
+    if (Predicates[i]->getName().find("64Bit") != Name.npos) {
+      Is64Bit = true;
+      break;
+    }
+  }
+  // FIXME: These instructions aren't marked as 64-bit in any way
+  Is64Bit |= Rec->getName() == "JMP64pcrel32" || 
+             Rec->getName() == "MASKMOVDQU64" || 
+             Rec->getName() == "POPFS64" || 
+             Rec->getName() == "POPGS64" || 
+             Rec->getName() == "PUSHFS64" || 
+             Rec->getName() == "PUSHGS64" ||
+             Rec->getName() == "REX64_PREFIX" ||
+             Rec->getName().find("VMREAD64") != Name.npos ||
+             Rec->getName().find("VMWRITE64") != Name.npos ||
+             Rec->getName().find("MOV64") != Name.npos || 
+             Rec->getName().find("PUSH64") != Name.npos ||
+             Rec->getName().find("POP64") != Name.npos;
+
   ShouldBeEmitted  = true;
 }
   
@@ -276,7 +300,7 @@ InstructionContext RecognizableInstr::insnContext() const {
       insnContext = IC_VEX_XS;
     else
       insnContext = IC_VEX;
-  } else if (Name.find("64") != Name.npos || HasREX_WPrefix) {
+  } else if (Is64Bit || HasREX_WPrefix) {
     if (HasREX_WPrefix && HasOpSizePrefix)
       insnContext = IC_64BIT_REXW_OPSIZE;
     else if (HasOpSizePrefix)
diff --git a/utils/TableGen/X86RecognizableInstr.h b/utils/TableGen/X86RecognizableInstr.h
index c7ec18c..677d9f0 100644
--- a/utils/TableGen/X86RecognizableInstr.h
+++ b/utils/TableGen/X86RecognizableInstr.h
@@ -64,6 +64,8 @@ private:
   bool HasLockPrefix;
   /// The isCodeGenOnly filed from the record
   bool IsCodeGenOnly;
+  // Whether the instruction has the predicate "Mode64Bit"
+  bool Is64Bit;
   
   /// The instruction name as listed in the tables
   std::string Name;
diff --git a/utils/lit/lit/LitConfig.py b/utils/lit/lit/LitConfig.py
index 7ca1b9c..bda9174 100644
--- a/utils/lit/lit/LitConfig.py
+++ b/utils/lit/lit/LitConfig.py
@@ -96,7 +96,7 @@ class LitConfig:
         # bash
         self.bashPath = Util.which('bash', dir)
         if self.bashPath is None:
-            self.warning("Unable to find 'bash.exe'.")
+            self.note("Unable to find 'bash.exe'.")
             self.bashPath = ''
 
         return dir
diff --git a/utils/lit/lit/Util.py b/utils/lit/lit/Util.py
index 5635f50..226e453 100644
--- a/utils/lit/lit/Util.py
+++ b/utils/lit/lit/Util.py
@@ -12,7 +12,7 @@ def detectCPUs():
             if isinstance(ncpus, int) and ncpus > 0:
                 return ncpus
         else: # OSX:
-            return int(os.popen2("sysctl -n hw.ncpu")[1].read())
+            return int(capture(['sysctl', '-n', 'hw.ncpu']))
     # Windows:
     if os.environ.has_key("NUMBER_OF_PROCESSORS"):
         ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])
diff --git a/utils/llvm.grm b/utils/llvm.grm
index 9d6bdf7..3f33702 100644
--- a/utils/llvm.grm
+++ b/utils/llvm.grm
@@ -172,6 +172,8 @@ FuncAttr      ::= noreturn
  | optsize
  | ssp
  | sspreq
+ | hotpatch
+ | nonlazybind
  ;
 
 OptFuncAttrs  ::= + _ | OptFuncAttrs FuncAttr ;
diff --git a/utils/llvmbuild b/utils/llvmbuild
index 5912c50..c7d8814 100755
--- a/utils/llvmbuild
+++ b/utils/llvmbuild
@@ -200,7 +200,8 @@ def check_options(parser, options, valid_builds):
 
     # See if we can find source directories.
     for src in options.src:
-        for component in ["llvm", "llvm-gcc", "gcc", "dragonegg"]:
+        for component in components:
+            component = component.rstrip("2")
             compsrc = src + "/" + component
             if (not os.path.isdir(compsrc)):
                 parser.error("'" + compsrc + "' does not exist")
@@ -410,6 +411,8 @@ class Builder(threading.Thread):
         configure_flags = dict(
             llvm=dict(debug=["--prefix=" + self.install_prefix,
                              "--with-extra-options=-Werror",
+                             "--enable-assertions",
+                             "--disable-optimized",
                              "--with-cxx-include-root=" + cxxroot,
                              "--with-cxx-include-arch=" + cxxarch],
                       release=["--prefix=" + self.install_prefix,
@@ -419,7 +422,9 @@ class Builder(threading.Thread):
                                "--with-cxx-include-arch=" + cxxarch],
                       paranoid=["--prefix=" + self.install_prefix,
                                 "--with-extra-options=-Werror",
+                                "--enable-assertions",
                                 "--enable-expensive-checks",
+                                "--disable-optimized",
                                 "--with-cxx-include-root=" + cxxroot,
                                 "--with-cxx-include-arch=" + cxxarch]),
             llvm_gcc=dict(debug=["--prefix=" + self.install_prefix,
@@ -444,6 +449,8 @@ class Builder(threading.Thread):
                                     "--enable-languages=c,c++"]),
             llvm2=dict(debug=["--prefix=" + self.install_prefix,
                               "--with-extra-options=-Werror",
+                              "--enable-assertions",
+                              "--disable-optimized",
                               "--with-llvmgccdir=" + self.install_prefix + "/bin",
                               "--with-cxx-include-root=" + cxxroot,
                               "--with-cxx-include-arch=" + cxxarch],
@@ -455,7 +462,9 @@ class Builder(threading.Thread):
                                 "--with-cxx-include-arch=" + cxxarch],
                        paranoid=["--prefix=" + self.install_prefix,
                                  "--with-extra-options=-Werror",
+                                 "--enable-assertions",
                                  "--enable-expensive-checks",
+                                 "--disable-optimized",
                                  "--with-llvmgccdir=" + self.install_prefix + "/bin",
                                  "--with-cxx-include-root=" + cxxroot,
                                  "--with-cxx-include-arch=" + cxxarch]),
@@ -611,7 +620,7 @@ class Builder(threading.Thread):
                            release=dict(),
                            paranoid=dict()))
 
-        for component in ["llvm", "llvm-gcc", "llvm2", "gcc", "dragonegg"]:
+        for component in components:
             comp = component[:]
 
             srcdir = source + "/" + comp.rstrip("2")
@@ -625,7 +634,7 @@ class Builder(threading.Thread):
 
             config_args = configure_flags[comp_key][build][:]
             config_args.extend(getattr(self.options,
-                                       "extra_" + comp_key
+                                       "extra_" + comp_key.rstrip("2")
                                        + "_config_flags").split())
 
             self.logger.info("Configuring " + component + " in " + builddir)
@@ -703,6 +712,8 @@ class Builder(threading.Thread):
 
 # Global constants
 build_abbrev = dict(debug="dbg", release="opt", paranoid="par")
+#components = ["llvm", "llvm-gcc", "llvm2", "gcc", "dragonegg"]
+components = ["llvm", "llvm2", "gcc", "dragonegg"]
 
 # Parse options
 parser = optparse.OptionParser(version="%prog 1.0")
@@ -718,7 +729,10 @@ else:
                         format='%(name)-13s: %(message)s')
 
 source_abbrev = get_path_abbrevs(set(options.src))
-branch_abbrev = get_path_abbrevs(set(options.branch))
+
+branch_abbrev = None
+if options.branch is not None:
+    branch_abbrev = get_path_abbrevs(set(options.branch))
 
 work_queue = queue.Queue()