373 files changed, 9535 insertions, 3572 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d94506f..56f9355 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -236,6 +236,7 @@ add_subdirectory(lib/Transforms/Hello)
 add_subdirectory(lib/Linker)
 add_subdirectory(lib/Analysis)
 add_subdirectory(lib/Analysis/IPA)
+add_subdirectory(lib/MC)
 
  set(LLVM_ENUM_ASM_PRINTERS "")
  foreach(t ${LLVM_TARGETS_TO_BUILD})
diff --git a/Makefile.rules b/Makefile.rules
index 9325ca4..a1a1924 100644
--- a/Makefile.rules
+++ b/Makefile.rules
@@ -198,6 +198,7 @@ install-bytecode:: install-bytecode-local
 ifdef LLVMC_PLUGIN
 
 LIBRARYNAME := $(patsubst %,plugin_llvmc_%,$(LLVMC_PLUGIN))
+CPP.Flags += -DLLVMC_PLUGIN_NAME=$(LLVMC_PLUGIN)
 REQUIRES_EH := 1
 
 # Build a dynamic library if the user runs `make` directly from the plugin
@@ -213,6 +214,49 @@ endif
 
 endif # LLVMC_PLUGIN
 
+ifdef LLVMC_BASED_DRIVER
+
+TOOLNAME = $(LLVMC_BASED_DRIVER)
+LLVMLIBS = CompilerDriver.a
+LINK_COMPONENTS = support system
+REQUIRES_EH := 1
+
+# Preprocessor magic that generates references to static variables in built-in
+# plugins.
+ifneq ($(LLVMC_BUILTIN_PLUGINS),)
+
+USEDLIBS += $(patsubst %,plugin_llvmc_%.a,$(LLVMC_BUILTIN_PLUGINS))
+
+LLVMC_BUILTIN_PLUGIN_1 = $(word 1, $(LLVMC_BUILTIN_PLUGINS))
+LLVMC_BUILTIN_PLUGIN_2 = $(word 2, $(LLVMC_BUILTIN_PLUGINS))
+LLVMC_BUILTIN_PLUGIN_3 = $(word 3, $(LLVMC_BUILTIN_PLUGINS))
+LLVMC_BUILTIN_PLUGIN_4 = $(word 4, $(LLVMC_BUILTIN_PLUGINS))
+LLVMC_BUILTIN_PLUGIN_5 = $(word 5, $(LLVMC_BUILTIN_PLUGINS))
+
+ifneq ($(LLVMC_BUILTIN_PLUGIN_1),)
+CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_1=$(LLVMC_BUILTIN_PLUGIN_1)
+endif
+
+ifneq ($(LLVMC_BUILTIN_PLUGIN_2),)
+CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_2=$(LLVMC_BUILTIN_PLUGIN_2)
+endif
+
+ifneq ($(LLVMC_BUILTIN_PLUGIN_3),)
+CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_3=$(LLVMC_BUILTIN_PLUGIN_3)
+endif
+
+ifneq ($(LLVMC_BUILTIN_PLUGIN_4),)
+CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_4=$(LLVMC_BUILTIN_PLUGIN_4)
+endif
+
+ifneq ($(LLVMC_BUILTIN_PLUGIN_5),)
+CPP.Flags += -DLLVMC_BUILTIN_PLUGIN_5=$(LLVMC_BUILTIN_PLUGIN_5)
+endif
+
+endif
+
+endif # LLVMC_BASED_DRIVER
+
 ###############################################################################
 # VARIABLES: Set up various variables based on configuration data
 ###############################################################################
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 0abe234..f71e648 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -829,6 +829,9 @@ if test "$ENABLE_THREADS" -eq 1 ; then
   AC_SEARCH_LIBS(pthread_rwlock_init,pthread,
                  AC_DEFINE([HAVE_PTHREAD_RWLOCK_INIT],[1],
                  [Have pthread_rwlock_init]))
+  AC_SEARCH_LIBS(pthread_getspecific,pthread,
+                 AC_DEFINE([HAVE_PTHREAD_GETSPECIFIC],[1],
+                 [Have pthread_getspecific]))
 fi
 
 dnl Allow extra x86-disassembler library
diff --git a/bindings/ocaml/analysis/Makefile b/bindings/ocaml/analysis/Makefile
index 0e95ecd..cbfcb24 100644
--- a/bindings/ocaml/analysis/Makefile
+++ b/bindings/ocaml/analysis/Makefile
@@ -13,7 +13,6 @@
 
 LEVEL := ../../..
 LIBRARYNAME := llvm_analysis
-DONT_BUILD_RELINKED := 1
 UsedComponents := analysis
 UsedOcamlInterfaces := llvm
 
diff --git a/bindings/ocaml/bitreader/Makefile b/bindings/ocaml/bitreader/Makefile
index 6a11cc6..a1c7de8 100644
--- a/bindings/ocaml/bitreader/Makefile
+++ b/bindings/ocaml/bitreader/Makefile
@@ -13,7 +13,6 @@
 
 LEVEL := ../../..
 LIBRARYNAME := llvm_bitreader
-DONT_BUILD_RELINKED := 1
 UsedComponents := bitreader
 UsedOcamlInterfaces := llvm
 
diff --git a/bindings/ocaml/bitwriter/Makefile b/bindings/ocaml/bitwriter/Makefile
index e57e3fe..cec0a59 100644
--- a/bindings/ocaml/bitwriter/Makefile
+++ b/bindings/ocaml/bitwriter/Makefile
@@ -13,7 +13,6 @@
 
 LEVEL := ../../..
 LIBRARYNAME := llvm_bitwriter
-DONT_BUILD_RELINKED := 1
 UsedComponents := bitwriter
 UsedOcamlInterfaces := llvm
 
diff --git a/bindings/ocaml/executionengine/Makefile b/bindings/ocaml/executionengine/Makefile
index 40fb98e..5fa3f22 100644
--- a/bindings/ocaml/executionengine/Makefile
+++ b/bindings/ocaml/executionengine/Makefile
@@ -13,7 +13,6 @@
 
 LEVEL := ../../..
 LIBRARYNAME := llvm_executionengine
-DONT_BUILD_RELINKED := 1
 UsedComponents := executionengine jit interpreter native
 UsedOcamlInterfaces := llvm llvm_target
 
diff --git a/bindings/ocaml/executionengine/executionengine_ocaml.c b/bindings/ocaml/executionengine/executionengine_ocaml.c
index ec40338..647759f 100644
--- a/bindings/ocaml/executionengine/executionengine_ocaml.c
+++ b/bindings/ocaml/executionengine/executionengine_ocaml.c
@@ -16,6 +16,7 @@
 \*===----------------------------------------------------------------------===*/
 
 #include "llvm-c/ExecutionEngine.h"
+#include "llvm-c/Target.h"
 #include "caml/alloc.h"
 #include "caml/custom.h"
 #include "caml/fail.h"
@@ -23,6 +24,12 @@
 #include <string.h>
 #include <assert.h>
 
+/* Force the LLVM interpreter, JIT, and native target to be linked in. */
+void llvm_initialize(void) {
+  LLVMLinkInInterpreter();
+  LLVMLinkInJIT();
+  LLVMInitializeNativeTarget();
+}
 
 /* Can't use the recommended caml_named_value mechanism for backwards
    compatibility reasons. This is largely equivalent. */
diff --git a/bindings/ocaml/llvm/Makefile b/bindings/ocaml/llvm/Makefile
index 3a06fb6..cd974d4 100644
--- a/bindings/ocaml/llvm/Makefile
+++ b/bindings/ocaml/llvm/Makefile
@@ -13,7 +13,6 @@
 
 LEVEL := ../../..
 LIBRARYNAME := llvm
-DONT_BUILD_RELINKED := 1
 UsedComponents := core
 UsedOcamLibs := llvm
 
diff --git a/bindings/ocaml/target/Makefile b/bindings/ocaml/target/Makefile
index 5cd677b..3c48cd8 100644
--- a/bindings/ocaml/target/Makefile
+++ b/bindings/ocaml/target/Makefile
@@ -13,7 +13,6 @@
 
 LEVEL := ../../..
 LIBRARYNAME := llvm_target
-DONT_BUILD_RELINKED := 1
 UsedComponents := target
 UsedOcamlInterfaces := llvm
 
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index c460fa2..61161ed 100755
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -64,6 +64,8 @@ check_symbol_exists(mallinfo malloc.h HAVE_MALLINFO)
 check_symbol_exists(malloc_zone_statistics malloc/malloc.h
                     HAVE_MALLOC_ZONE_STATISTICS)
 check_symbol_exists(pthread_mutex_lock pthread.h HAVE_PTHREAD_MUTEX_LOCK)
+check_symbol_exists(pthread_rwlock_init pthread.h HAVE_PTHREAD_RWLOCK_INIT)
+check_symbol_exists(pthread_getspecific pthread.h HAVE_PTHREAD_GETSPECIFIC)
 check_symbol_exists(strtoll stdlib.h HAVE_STRTOLL)
 
 check_symbol_exists(__GLIBC__ stdio.h LLVM_USING_GLIBC)
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index b196c63..660bd70 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -58,4 +58,7 @@ macro(add_llvm_target target_name)
   endif( TABLEGEN_OUTPUT )
   include_directories(BEFORE ${CMAKE_CURRENT_BINARY_DIR})
   add_llvm_library(LLVM${target_name} ${ARGN} ${TABLEGEN_OUTPUT})
+  if ( TABLEGEN_OUTPUT )
+    add_dependencies(LLVM${target_name} ${target_name}Table_gen)
+  endif (TABLEGEN_OUTPUT)
 endmacro(add_llvm_target)
diff --git a/cmake/modules/TableGen.cmake b/cmake/modules/TableGen.cmake
index 16c732b..0a96b55 100644
--- a/cmake/modules/TableGen.cmake
+++ b/cmake/modules/TableGen.cmake
@@ -20,4 +20,6 @@ macro(tablegen ofn)
     COMMENT "Building ${ofn}..."
     )
   set(TABLEGEN_OUTPUT ${TABLEGEN_OUTPUT} ${CMAKE_CURRENT_BINARY_DIR}/${ofn})
+  set_source_files_properties(${CMAKE_CURRENT_BINARY_DIR}/${ofn} 
+    PROPERTIES GENERATED 1)
 endmacro(tablegen)
diff --git a/configure b/configure
index 4d0d90f..a237d67 100755
--- a/configure
+++ b/configure
@@ -28034,6 +28034,109 @@ _ACEOF
 
 fi
 
+  { echo "$as_me:$LINENO: checking for library containing pthread_getspecific" >&5
+echo $ECHO_N "checking for library containing pthread_getspecific... $ECHO_C" >&6; }
+if test "${ac_cv_search_pthread_getspecific+set}" = set; then
+  echo $ECHO_N "(cached) $ECHO_C" >&6
+else
+  ac_func_search_save_LIBS=$LIBS
+cat >conftest.$ac_ext <<_ACEOF
+/* confdefs.h.  */
+_ACEOF
+cat confdefs.h >>conftest.$ac_ext
+cat >>conftest.$ac_ext <<_ACEOF
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char pthread_getspecific ();
+int
+main ()
+{
+return pthread_getspecific ();
+  ;
+  return 0;
+}
+_ACEOF
+for ac_lib in '' pthread; do
+  if test -z "$ac_lib"; then
+    ac_res="none required"
+  else
+    ac_res=-l$ac_lib
+    LIBS="-l$ac_lib  $ac_func_search_save_LIBS"
+  fi
+  rm -f conftest.$ac_objext conftest$ac_exeext
+if { (ac_try="$ac_link"
+case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_link") 2>conftest.er1
+  ac_status=$?
+  grep -v '^ *+' conftest.er1 >conftest.err
+  rm -f conftest.er1
+  cat conftest.err >&5
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); } &&
+	 { ac_try='test -z "$ac_c_werror_flag" || test ! -s conftest.err'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; } &&
+	 { ac_try='test -s conftest$ac_exeext'
+  { (case "(($ac_try" in
+  *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
+  *) ac_try_echo=$ac_try;;
+esac
+eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
+  (eval "$ac_try") 2>&5
+  ac_status=$?
+  echo "$as_me:$LINENO: \$? = $ac_status" >&5
+  (exit $ac_status); }; }; then
+  ac_cv_search_pthread_getspecific=$ac_res
+else
+  echo "$as_me: failed program was:" >&5
+sed 's/^/| /' conftest.$ac_ext >&5
+
+
+fi
+
+rm -f core conftest.err conftest.$ac_objext \
+      conftest$ac_exeext
+  if test "${ac_cv_search_pthread_getspecific+set}" = set; then
+  break
+fi
+done
+if test "${ac_cv_search_pthread_getspecific+set}" = set; then
+  :
+else
+  ac_cv_search_pthread_getspecific=no
+fi
+rm conftest.$ac_ext
+LIBS=$ac_func_search_save_LIBS
+fi
+{ echo "$as_me:$LINENO: result: $ac_cv_search_pthread_getspecific" >&5
+echo "${ECHO_T}$ac_cv_search_pthread_getspecific" >&6; }
+ac_res=$ac_cv_search_pthread_getspecific
+if test "$ac_res" != no; then
+  test "$ac_res" = "none required" || LIBS="$ac_res $LIBS"
+
+cat >>confdefs.h <<\_ACEOF
+#define HAVE_PTHREAD_GETSPECIFIC 1
+_ACEOF
+
+fi
+
 fi
 
 
diff --git a/docs/CommandGuide/llvmc.pod b/docs/CommandGuide/llvmc.pod
index 18ffb9f..97445ed 100644
--- a/docs/CommandGuide/llvmc.pod
+++ b/docs/CommandGuide/llvmc.pod
@@ -65,8 +65,11 @@ name. Hidden option, useful for debugging LLVMC plugins.
 
 =item B<--save-temps>
 
-Write temporary files to the current directory and do not delete them
-on exit. Hidden option, useful for debugging.
+Write temporary files to the current directory and do not delete them on
+exit. This option can also take an argument: the I<--save-temps=obj> switch will
+write files into the directory specified with the I<-o> option. The
+I<--save-temps=cwd> and I<--save-temps> switches are both synonyms for the
+default behaviour.
 
 =item B<--help>
 
diff --git a/docs/CompilerDriver.html b/docs/CompilerDriver.html
index 5b42148..e79ec5c 100644
--- a/docs/CompilerDriver.html
+++ b/docs/CompilerDriver.html
@@ -109,6 +109,11 @@ until the next -x option.</li>
 <li><tt class="docutils literal"><span class="pre">-load</span> <span class="pre">PLUGIN_NAME</span></tt> - Load the specified plugin DLL. Example:
 <tt class="docutils literal"><span class="pre">-load</span> <span class="pre">$LLVM_DIR/Release/lib/LLVMCSimple.so</span></tt>.</li>
 <li><tt class="docutils literal"><span class="pre">-v</span></tt> - Enable verbose mode, i.e. print out all executed commands.</li>
+<li><tt class="docutils literal"><span class="pre">--save-temps</span></tt> - Write temporary files to the current directory and do not
+delete them on exit. This option can also take an argument: the
+<tt class="docutils literal"><span class="pre">--save-temps=obj</span></tt> switch will write files into the directory specified with
+the <tt class="docutils literal"><span class="pre">-o</span></tt> option. The <tt class="docutils literal"><span class="pre">--save-temps=cwd</span></tt> and <tt class="docutils literal"><span class="pre">--save-temps</span></tt> switches are
+both synonyms for the default behaviour.</li>
 <li><tt class="docutils literal"><span class="pre">--check-graph</span></tt> - Check the compilation for common errors like mismatched
 output/input language names, multiple default edges and cycles. Because of
 plugins, these checks can't be performed at compile-time. Exit with code zero
@@ -122,8 +127,6 @@ directory with the compilation graph description in Graphviz format (identical
 to the file used by the <tt class="docutils literal"><span class="pre">--view-graph</span></tt> option). The <tt class="docutils literal"><span class="pre">-o</span></tt> option can be
 used to set the output file name. Hidden option, useful for debugging LLVMC
 plugins.</li>
-<li><tt class="docutils literal"><span class="pre">--save-temps</span></tt> - Write temporary files to the current directory
-and do not delete them on exit. Hidden option, useful for debugging.</li>
 <li><tt class="docutils literal"><span class="pre">--help</span></tt>, <tt class="docutils literal"><span class="pre">--help-hidden</span></tt>, <tt class="docutils literal"><span class="pre">--version</span></tt> - These options have
 their standard meaning.</li>
 </ul>
@@ -642,7 +645,7 @@ errors as its status code.</p>
 <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a><br />
 <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br />
 
-Last modified: $Date: 2009-06-17 02:56:48 +0000 (Wed, 17 Jun 2009) $
+Last modified: $Date: 2009-06-25 20:21:10 +0200 (Thu, 25 Jun 2009) $
 </address></div>
 </div>
 </div>
diff --git a/docs/ReleaseNotes-2.6.html b/docs/ReleaseNotes-2.6.html
new file mode 100644
index 0000000..ddf3db4
--- /dev/null
+++ b/docs/ReleaseNotes-2.6.html
@@ -0,0 +1,777 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
+                      "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
+  <link rel="stylesheet" href="llvm.css" type="text/css">
+  <title>LLVM 2.6 Release Notes</title>
+</head>
+<body>
+
+<div class="doc_title">LLVM 2.6 Release Notes</div>
+
+<ol>
+  <li><a href="#intro">Introduction</a></li>
+  <li><a href="#subproj">Sub-project Status Update</a></li>
+  <li><a href="#externalproj">External Projects Using LLVM 2.6</a></li>
+  <li><a href="#whatsnew">What's New in LLVM 2.6?</a></li>
+  <li><a href="GettingStarted.html">Installation Instructions</a></li>
+  <li><a href="#portability">Portability and Supported Platforms</a></li>
+  <li><a href="#knownproblems">Known Problems</a></li>
+  <li><a href="#additionalinfo">Additional Information</a></li>
+</ol>
+
+<div class="doc_author">
+  <p>Written by the <a href="http://llvm.org">LLVM Team</a></p>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="intro">Introduction</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This document contains the release notes for the LLVM Compiler
+Infrastructure, release 2.6.  Here we describe the status of LLVM, including
+major improvements from the previous release and significant known problems.
+All LLVM releases may be downloaded from the <a
+href="http://llvm.org/releases/">LLVM releases web site</a>.</p>
+
+<p>For more information about LLVM, including information about the latest
+release, please check out the <a href="http://llvm.org/">main LLVM
+web site</a>.  If you have questions or comments, the <a
+href="http://mail.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVM Developer's Mailing
+List</a> is a good place to send them.</p>
+
+<p>Note that if you are reading this file from a Subversion checkout or the
+main LLVM web page, this document applies to the <i>next</i> release, not the
+current one.  To see the release notes for a specific release, please see the
+<a href="http://llvm.org/releases/">releases page</a>.</p>
+
+</div>
+
+<!-- Unfinished features in 2.5:
+  Machine LICM
+  Machine Sinking
+  target-specific intrinsics
+  gold lto plugin
+  pre-alloc splitter, strong phi elim
+  <tt>llc -enable-value-prop</tt>, propagation of value info
+       (sign/zero ext info) from one MBB to another
+  debug info for optimized code
+  interpreter + libffi
+  postalloc scheduler: anti dependence breaking, hazard recognizer?
+
+initial support for debug line numbers when optimization enabled, not useful in
+  2.5 but will be for 2.6.
+
+ -->
+
+ <!-- for announcement email:
+   -->
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="subproj">Sub-project Status Update</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+<p>
+The LLVM 2.6 distribution currently consists of code from the core LLVM
+repository &mdash;which roughly includes the LLVM optimizers, code generators
+and supporting tools &mdash; and the llvm-gcc repository.  In addition to this
+code, the LLVM Project includes other sub-projects that are in development.  The
+two which are the most actively developed are the <a href="#clang">Clang
+Project</a> and the <a href="#vmkit">VMKit Project</a>.
+</p>
+
+</div>
+
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="clang">Clang: C/C++/Objective-C Frontend Toolkit</a>
+</div>
+
+<div class="doc_text">
+
+<p>The <a href="http://clang.llvm.org/">Clang project</a> is an effort to build
+a set of new 'LLVM native' front-end technologies for the LLVM optimizer and
+code generator.  While Clang is not included in the LLVM 2.6 release, it is
+continuing to make major strides forward in all areas.  Its C and Objective-C
+parsing and code generation support is now very solid.  For example, it is
+capable of successfully building many real-world applications for X86-32
+and X86-64,
+including the <a href="http://wiki.freebsd.org/BuildingFreeBSDWithClang">FreeBSD
+kernel</a> and <a href="http://gcc.gnu.org/gcc-4.2/">gcc 4.2</a>.  C++ is also
+making <a href="http://clang.llvm.org/cxx_status.html">incredible progress</a>,
+and work on templates has recently started.  If you are
+interested in fast compiles and good diagnostics, we encourage you to try it out
+by <a href="http://clang.llvm.org/get_started.html">building from mainline</a>
+and reporting any issues you hit to the <a
+href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-dev">Clang front-end mailing
+list</a>.</p>
+
+<p>In the LLVM 2.6 time-frame, the Clang team has made many improvements:</p>
+
+<ul>
+<li>Something wonderful!</li>
+<li>Many many bugs are fixed and many features have been added.</li>
+</ul>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="clangsa">Clang Static Analyzer</a>
+</div>
+
+<div class="doc_text">
+
+<p>Previously announced in the 2.4 LLVM release, the Clang project also
+includes an early stage static source code analysis tool for <a
+href="http://clang.llvm.org/StaticAnalysis.html">automatically finding bugs</a>
+in C and Objective-C programs. The tool performs a growing set of checks to find
+bugs that occur on a specific path within a program.</p>
+
+<p>In the LLVM 2.6 time-frame there have been many significant improvements to
+XYZ.</p>
+
+<p>The set of checks performed by the static analyzer continues to expand, and
+future plans for the tool include full source-level inter-procedural analysis
+and deeper checks such as buffer overrun detection. There are many opportunities
+to extend and enhance the static analyzer, and anyone interested in working on
+this project is encouraged to get involved!</p>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="vmkit">VMKit: JVM/CLI Virtual Machine Implementation</a>
+</div>
+
+<div class="doc_text">
+<p>
+The <a href="http://vmkit.llvm.org/">VMKit project</a> is an implementation of
+a JVM and a CLI Virtual Machines (Microsoft .NET is an
+implementation of the CLI) using the Just-In-Time compiler of LLVM.</p>
+
+<p>Following LLVM 2.6, VMKit has its XYZ release that you can find on its
+<a href="http://vmkit.llvm.org/releases/">webpage</a>. The release includes
+bug fixes, cleanup and new features. The major changes are:</p>
+
+<ul>
+
+<li>Something wonderful!</li>
+
+</ul>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="externalproj">External Projects Using LLVM 2.6</a>
+</div>
+<!-- *********************************************************************** -->
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="pure">Pure</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://pure-lang.googlecode.com/">Pure</a>
+is an algebraic/functional programming language based on term rewriting.
+Programs are collections of equations which are used to evaluate expressions in
+a symbolic fashion. Pure offers dynamic typing, eager and lazy evaluation,
+lexical closures, a hygienic macro system (also based on term rewriting),
+built-in list and matrix support (including list and matrix comprehensions) and
+an easy-to-use C interface. The interpreter uses LLVM as a backend to
+ JIT-compile Pure programs to fast native code.</p>
+
+<p>In addition to the usual algebraic data structures, Pure also has
+MATLAB-style matrices in order to support numeric computations and signal
+processing in an efficient way. Pure is mainly aimed at mathematical
+applications right now, but it has been designed as a general purpose language.
+The dynamic interpreter environment and the C interface make it possible to use
+it as a kind of functional scripting language for many application areas.
+</p>
+</div>
+
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="ldc">LLVM D Compiler</a>
+</div>
+
+<div class="doc_text">
+<p>
+<a href="http://www.dsource.org/projects/ldc">LDC</a> is an implementation of
+the D Programming Language using the LLVM optimizer and code generator.
+The LDC project works great with the LLVM 2.6 release.  General improvements in
+this
+cycle have included new inline asm constraint handling, better debug info
+support, general bugfixes, and better x86-64 support.  This has allowed
+some major improvements in LDC, getting us much closer to being as
+fully featured as the original DMD compiler from DigitalMars.
+</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="RoadsendPHP">Roadsend PHP</a>
+</div>
+
+<div class="doc_text">
+<p><a href="http://code.roadsend.com/rphp">Roadsend PHP</a> (rphp) is an open
+source implementation of the PHP programming 
+language that uses LLVM for its optimizer, JIT, and static compiler. This is a 
+reimplementation of an earlier project that is now based on LLVM.</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="Unladen Swallow">Unladen Swallow</a>
+</div>
+
+<div class="doc_text">
+<p><a href="http://code.google.com/p/unladen-swallow/">Unladen Swallow</a> is a
+branch of <a href="http://python.org/">Python</a> intended to be fully
+compatible and significantly faster.  It uses LLVM's optimization passes and JIT
+compiler.</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="Rubinius">Rubinius</a>
+</div>
+
+<div class="doc_text">
+<p><a href="http://github.com/evanphx/rubinius">Rubinius</a> is a new virtual
+machine for Ruby. It leverages LLVM to dynamically compile Ruby code down to
+machine code using LLVM's JIT.</p>
+</div>
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="whatsnew">What's New in LLVM 2.6?</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This release includes a huge number of bug fixes, performance tweaks, and
+minor improvements.  Some of the major improvements and new features are listed
+in this section.
+</p>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="majorfeatures">Major New Features</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM 2.6 includes several major new capabilities:</p>
+
+<ul>
+<li>Something wonderful!</li>
+</ul>
+
+</div>
+
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="llvm-gcc">llvm-gcc 4.2 Improvements</a>
+</div>
+
+<div class="doc_text">
+
+<p>LLVM fully supports the llvm-gcc 4.2 front-end, which marries the GCC
+front-ends and driver with the LLVM optimizer and code generator.  It currently
+includes support for the C, C++, Objective-C, Ada, and Fortran front-ends.</p>
+
+<ul>
+<li>Something wonderful!</li>
+</ul>
+
+</div>
+
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="coreimprovements">LLVM IR and Core Improvements</a>
+</div>
+
+<div class="doc_text">
+<p>LLVM IR has several new features that are used by our existing front-ends and
+can be useful if you are writing a front-end for LLVM:</p>
+
+<ul>
+<li>Something wonderful!</li>
+</ul>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="optimizer">Optimizer Improvements</a>
+</div>
+
+<div class="doc_text">
+
+<p>In addition to a large array of bug fixes and minor performance tweaks, this
+release includes a few major enhancements and additions to the optimizers:</p>
+
+<ul>
+
+<li>Something wonderful!</li>
+
+</ul>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="codegen">Target Independent Code Generator Improvements</a>
+</div>
+
+<div class="doc_text">
+
+<p>We have put a significant amount of work into the code generator
+infrastructure, which allows us to implement more aggressive algorithms and make
+it run faster:</p>
+
+<ul>
+
+<li>Something wonderful!</li>
+</ul>
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="x86">X86-32 and X86-64 Target Improvements</a>
+</div>
+
+<div class="doc_text">
+<p>New features of the X86 target include:
+</p>
+
+<ul>
+
+<li>Something wonderful!</li>
+</ul>
+
+</div>
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="pic16">PIC16 Target Improvements</a>
+</div>
+
+<div class="doc_text">
+<p>New features of the PIC16 target include:
+</p>
+
+<ul>
+<li>Something wonderful!</li>
+</ul>
+
+<p>Things not yet supported:</p>
+
+<ul>
+<li>Floating point.</li>
+<li>Passing/returning aggregate types to and from functions.</li>
+<li>Variable arguments.</li>
+<li>Indirect function calls.</li>
+<li>Interrupts/programs.</li>
+<li>Debug info.</li>
+</ul>
+
+</div>
+
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="llvmc">Improvements in LLVMC</a>
+</div>
+
+<div class="doc_text">
+<p>New features include:</p>
+
+<ul>
+<li>Something wonderful!</li>
+</ul>
+
+</div>
+
+
+<!--=========================================================================-->
+<div class="doc_subsection">
+<a name="changes">Major Changes and Removed Features</a>
+</div>
+
+<div class="doc_text">
+
+<p>If you're already an LLVM user or developer with out-of-tree changes based
+on LLVM 2.4, this section lists some "gotchas" that you may run into upgrading
+from the previous release.</p>
+
+<ul>
+
+<li>Something horrible!</li>
+
+</ul>
+
+
+<p>In addition, many APIs have changed in this release.  Some of the major LLVM
+API changes are:</p>
+
+<ul>
+<li>The <tt>getABITypeSize</tt> methods are now called <tt>getAllocSize</tt>.</li>
+</ul>
+
+</div>
+
+
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="portability">Portability and Supported Platforms</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>LLVM is known to work on the following platforms:</p>
+
+<ul>
+<li>Intel and AMD machines (IA32, X86-64, AMD64, EMT-64) running Red Hat
+Linux, Fedora Core and FreeBSD (and probably other unix-like systems).</li>
+<li>PowerPC and X86-based Mac OS X systems, running 10.3 and above in 32-bit
+and 64-bit modes.</li>
+<li>Intel and AMD machines running on Win32 using MinGW libraries (native).</li>
+<li>Intel and AMD machines running on Win32 with the Cygwin libraries (limited
+    support is available for native builds with Visual C++).</li>
+<li>Sun UltraSPARC workstations running Solaris 10.</li>
+<li>Alpha-based machines running Debian GNU/Linux.</li>
+<li>Itanium-based (IA64) machines running Linux and HP-UX.</li>
+</ul>
+
+<p>The core LLVM infrastructure uses GNU autoconf to adapt itself
+to the machine and operating system on which it is built.  However, minor
+porting may be required to get LLVM to work on new platforms.  We welcome your
+portability patches and reports of successful builds or error messages.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="knownproblems">Known Problems</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>This section contains significant known problems with the LLVM system,
+listed by component.  If you run into a problem, please check the <a
+href="http://llvm.org/bugs/">LLVM bug database</a> and submit a bug if
+there isn't already one.</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="experimental">Experimental features included with this release</a>
+</div>
+
+<div class="doc_text">
+
+<p>The following components of this LLVM release are either untested, known to
+be broken or unreliable, or are in early development.  These components should
+not be relied on, and bugs should not be filed against them, but they may be
+useful to some people.  In particular, if you would like to work on one of these
+components, please contact us on the <a
+href="http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev">LLVMdev list</a>.</p>
+
+<ul>
+<li>The MSIL, IA64, Alpha, SPU, MIPS, and PIC16 backends are experimental.</li>
+<li>The <tt>llc</tt> "<tt>-filetype=asm</tt>" (the default) is the only
+    supported value for this option.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="x86-be">Known problems with the X86 back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+  <li>The X86 backend does not yet support
+    all <a href="http://llvm.org/PR879">inline assembly that uses the X86
+    floating point stack</a>.  It supports the 'f' and 't' constraints, but not
+    'u'.</li>
+  <li>The X86 backend generates inefficient floating point code when configured
+    to generate code for systems that don't have SSE2.</li>
+  <li>Win64 code generation wasn't widely tested. Everything should work, but we
+    expect small issues to happen. Also, llvm-gcc cannot build the mingw64
+    runtime currently due
+    to <a href="http://llvm.org/PR2255">several</a>
+    <a href="http://llvm.org/PR2257">bugs</a> and due to lack of support for
+    the
+    'u' inline assembly constraint and for X87 floating point inline assembly.</li>
+  <li>The X86-64 backend does not yet support the LLVM IR instruction
+      <tt>va_arg</tt>. Currently, the llvm-gcc and front-ends support variadic
+      argument constructs on X86-64 by lowering them manually.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ppc-be">Known problems with the PowerPC back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+<li>The Linux PPC32/ABI support needs testing for the interpreter and static
+compilation, and lacks support for debug information.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="arm-be">Known problems with the ARM back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+<li>Thumb mode works only on ARMv6 or higher processors. On sub-ARMv6
+processors, thumb programs can crash or produce wrong
+results (<a href="http://llvm.org/PR1388">PR1388</a>).</li>
+<li>Compilation for ARM Linux OABI (old ABI) is supported but not fully tested.
+</li>
+<li>There is a bug in QEMU-ARM (&lt;= 0.9.0) which causes it to incorrectly
+ execute
+programs compiled with LLVM.  Please use more recent versions of QEMU.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="sparc-be">Known problems with the SPARC back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+<li>The SPARC backend only supports the 32-bit SPARC ABI (-m32); it does not
+    support the 64-bit SPARC ABI (-m64).</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="mips-be">Known problems with the MIPS back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+<li>The O32 ABI is not fully supported.</li>
+<li>64-bit MIPS targets are not supported yet.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="alpha-be">Known problems with the Alpha back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+
+<li>On 21164s, some rare FP arithmetic sequences which may trap do not have the
+appropriate nops inserted to ensure restartability.</li>
+
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ia64-be">Known problems with the IA64 back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+<li>The Itanium backend is highly experimental and has a number of known
+    issues.  We are looking for a maintainer for the Itanium backend.  If you
+    are interested, please contact the LLVMdev mailing list.</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="c-be">Known problems with the C back-end</a>
+</div>
+
+<div class="doc_text">
+
+<ul>
+<li><a href="http://llvm.org/PR802">The C backend has only basic support for
+    inline assembly code</a>.</li>
+<li><a href="http://llvm.org/PR1658">The C backend violates the ABI of common
+    C++ programs</a>, preventing intermixing between C++ compiled by the CBE and
+    C++ code compiled with <tt>llc</tt> or native compilers.</li>
+<li>The C backend does not support all exception handling constructs.</li>
+<li>The C backend does not support arbitrary precision integers.</li>
+</ul>
+
+</div>
+
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="c-fe">Known problems with the llvm-gcc C front-end</a>
+</div>
+
+<div class="doc_text">
+
+<p>llvm-gcc does not currently support <a href="http://llvm.org/PR869">Link-Time
+Optimization</a> on most platforms "out-of-the-box".  Please inquire on the
+LLVMdev mailing list if you are interested.</p>
+
+<p>The only major language feature of GCC not supported by llvm-gcc is
+    the <tt>__builtin_apply</tt> family of builtins.   However, some extensions
+    are only supported on some targets.  For example, trampolines are only
+    supported on some targets (these are used when you take the address of a
+    nested function).</p>
+
+<p>If you run into GCC extensions which are not supported, please let us know.
+</p>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="c++-fe">Known problems with the llvm-gcc C++ front-end</a>
+</div>
+
+<div class="doc_text">
+
+<p>The C++ front-end is considered to be fully
+tested and works for a number of non-trivial programs, including LLVM
+itself, Qt, Mozilla, etc.</p>
+
+<ul>
+<li>Exception handling works well on the X86 and PowerPC targets. Currently
+  only Linux and Darwin targets are supported (both 32 and 64 bit).</li>
+</ul>
+
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="fortran-fe">Known problems with the llvm-gcc Fortran front-end</a>
+</div>
+
+<div class="doc_text">
+<ul>
+<li>Fortran support generally works, but there are still several unresolved bugs
+    in Bugzilla.  Please see the tools/gfortran component for details.</li>
+</ul>
+</div>
+
+<!-- ======================================================================= -->
+<div class="doc_subsection">
+  <a name="ada-fe">Known problems with the llvm-gcc Ada front-end</a>
+</div>
+
+<div class="doc_text">
+The llvm-gcc 4.2 Ada compiler works fairly well; however, this is not a mature
+technology, and problems should be expected.
+<ul>
+<li>The Ada front-end currently only builds on X86-32.  This is mainly due
+to lack of trampoline support (pointers to nested functions) on other platforms.
+However, it <a href="http://llvm.org/PR2006">also fails to build on X86-64</a>
+which does support trampolines.</li>
+<li>The Ada front-end <a href="http://llvm.org/PR2007">fails to bootstrap</a>.
+This is due to lack of LLVM support for <tt>setjmp</tt>/<tt>longjmp</tt> style
+exception handling, which is used internally by the compiler.
+Workaround: configure with --disable-bootstrap.</li>
+<li>The c380004, <a href="http://llvm.org/PR2010">c393010</a>
+and <a href="http://llvm.org/PR2421">cxg2021</a> ACATS tests fail
+(c380004 also fails with gcc-4.2 mainline).
+If the compiler is built with checks disabled then <a href="http://llvm.org/PR2010">c393010</a>
+causes the compiler to go into an infinite loop, using up all system memory.</li>
+<li>Some GCC specific Ada tests continue to crash the compiler.</li>
+<li>The -E binder option (exception backtraces)
+<a href="http://llvm.org/PR1982">does not work</a> and will result in programs
+crashing if an exception is raised.  Workaround: do not use -E.</li>
+<li>Only discrete types <a href="http://llvm.org/PR1981">are allowed to start
+or finish at a non-byte offset</a> in a record.  Workaround: do not pack records
+or use representation clauses that result in a field of a non-discrete type
+starting or finishing in the middle of a byte.</li>
+<li>The <tt>lli</tt> interpreter <a href="http://llvm.org/PR2009">considers
+'main' as generated by the Ada binder to be invalid</a>.
+Workaround: hand edit the file to use pointers for <tt>argv</tt> and
+<tt>envp</tt> rather than integers.</li>
+<li>The <tt>-fstack-check</tt> option <a href="http://llvm.org/PR2008">is
+ignored</a>.</li>
+</ul>
+</div>
+
+<!-- *********************************************************************** -->
+<div class="doc_section">
+  <a name="additionalinfo">Additional Information</a>
+</div>
+<!-- *********************************************************************** -->
+
+<div class="doc_text">
+
+<p>A wide variety of additional information is available on the <a
+href="http://llvm.org">LLVM web page</a>, in particular in the <a
+href="http://llvm.org/docs/">documentation</a> section.  The web page also
+contains versions of the API documentation which is up-to-date with the
+Subversion version of the source code.
+You can access versions of these documents specific to this release by going
+into the "<tt>llvm/doc/</tt>" directory in the LLVM tree.</p>
+
+<p>If you have any questions or comments about LLVM, please feel free to contact
+us via the <a href="http://llvm.org/docs/#maillist"> mailing
+lists</a>.</p>
+
+</div>
+
+<!-- *********************************************************************** -->
+
+<hr>
+<address>
+  <a href="http://jigsaw.w3.org/css-validator/check/referer"><img
+  src="http://jigsaw.w3.org/css-validator/images/vcss-blue" alt="Valid CSS"></a>
+  <a href="http://validator.w3.org/check/referer"><img
+  src="http://www.w3.org/Icons/valid-html401-blue" alt="Valid HTML 4.01"></a>
+
+  <a href="http://llvm.org/">LLVM Compiler Infrastructure</a><br>
+  Last modified: $Date: 2009-06-24 23:26:42 +0200 (Wed, 24 Jun 2009) $
+</address>
+
+</body>
+</html>
diff --git a/docs/SourceLevelDebugging.html b/docs/SourceLevelDebugging.html
index 17851b7..e940e93 100644
--- a/docs/SourceLevelDebugging.html
+++ b/docs/SourceLevelDebugging.html
@@ -24,7 +24,6 @@
   <ol>
     <li><a href="#debug_info_descriptors">Debug information descriptors</a>
     <ul>
-      <li><a href="#format_anchors">Anchor descriptors</a></li>
       <li><a href="#format_compile_units">Compile unit descriptors</a></li>
       <li><a href="#format_global_variables">Global variable descriptors</a></li>
       <li><a href="#format_subprograms">Subprogram descriptors</a></li>
@@ -336,58 +335,6 @@ height="369">
 
 <!-- ======================================================================= -->
 <div class="doc_subsubsection">
-  <a name="format_anchors">Anchor descriptors</a>
-</div>
-
-<div class="doc_text">
-
-<div class="doc_code">
-<pre>
-%<a href="#format_anchors">llvm.dbg.anchor.type</a> = type {
-  i32,   ;; Tag = 0 + <a href="#LLVMDebugVersion">LLVMDebugVersion</a>
-  i32    ;; Tag of descriptors grouped by the anchor
-}
-</pre>
-</div>
-
-<p>One important aspect of the LLVM debug representation is that it allows the
-   LLVM debugger to efficiently index all of the global objects without having
-   to scan the program.  To do this, all of the global objects use "anchor"
-   descriptors with designated names.  All of the global objects of a particular
-   type (e.g., compile units) contain a pointer to the anchor.  This pointer
-   allows a debugger to use def-use chains to find all global objects of that
-   type.</p>
-
-<p>The following names are recognized as anchors by LLVM:</p>
-
-<div class="doc_code">
-<pre>
-%<a href="#format_compile_units">llvm.dbg.compile_units</a> = linkonce constant %<a href="#format_anchors">llvm.dbg.anchor.type</a> {
-  i32 0,
-  i32 17
-} ;; DW_TAG_compile_unit
-%<a href="#format_global_variables">llvm.dbg.global_variables</a> = linkonce constant %<a href="#format_anchors">llvm.dbg.anchor.type</a> {
-  i32 0,
-  i32 52
-} ;; DW_TAG_variable
-%<a href="#format_subprograms">llvm.dbg.subprograms</a> = linkonce constant %<a href="#format_anchors">llvm.dbg.anchor.type</a> {
-  i32 0,
-  i32 46
-} ;; DW_TAG_subprogram
-</pre>
-</div>
-
-<p>Using anchors in this way (where the compile unit descriptor points to the
-   anchors, as opposed to having a list of compile unit descriptors) allows for
-   the standard dead global elimination and merging passes to automatically
-   remove unused debugging information.  If the globals were kept track of
-   through lists, there would always be an object pointing to the descriptors,
-   thus would never be deleted.</p>
-
-</div>
-
-<!-- ======================================================================= -->
-<div class="doc_subsubsection">
   <a name="format_compile_units">Compile unit descriptors</a>
 </div>
 
@@ -1919,7 +1866,7 @@ enum Trees {
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-05-29 19:08:57 +0200 (Fri, 29 May 2009) $
+  Last modified: $Date: 2009-06-26 03:49:18 +0200 (Fri, 26 Jun 2009) $
 </address>
 
 </body>
diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html
index 617ebfc..32b16ca 100644
--- a/docs/TestingGuide.html
+++ b/docs/TestingGuide.html
@@ -78,27 +78,8 @@ required to build LLVM, plus the following:</p>
 <dd>Expect is required by DejaGNU.</dd>
 <dt><a href="http://www.tcl.tk/software/tcltk/">tcl</a></dt>
 <dd>Tcl is required by DejaGNU. </dd>
-
-<ul>
-<li><tt>./configure --with-f2c=$DIR</tt><br>
-This will specify a new <tt>$DIR</tt> for the above-described search
-process.  This will only work if the binary, header, and library are in their
-respective subdirectories of <tt>$DIR</tt>.</li>
-
-<li><tt>./configure --with-f2c-bin=/binary/path --with-f2c-inc=/include/path
---with-f2c-lib=/lib/path</tt><br>
-This allows you to specify the F2C components separately.  Note: if you choose
-this route, you MUST specify all three components, and you need to only specify
-<em>directories</em> where the files are located; do NOT include the
-filenames themselves on the <tt>configure</tt> line.</li>
-</ul></dd>
 </dl>
 
-<p>Darwin (Mac OS X) developers can simplify the installation of Expect and tcl
-by using fink.  <tt>fink install expect</tt> will install both. Alternatively,
-Darwinports users can use <tt>sudo port install expect</tt> to install Expect
-and tcl.</p>
-
 </div>
 
 <!--=========================================================================-->
@@ -122,14 +103,17 @@ tests" and are in the <tt>llvm</tt> module in subversion under the
 
 <div class="doc_text">
 
-<p>Code fragments are small pieces of code that test a specific feature of LLVM
-or trigger a specific bug in LLVM.  They are usually written in LLVM assembly
-language, but can be written in other languages if the test targets a particular
-language front end. These tests are driven by the DejaGNU testing framework,
-which is hidden behind a few simple makefiles.</p>
+<p>Code fragments are small pieces of code that test a specific
+feature of LLVM or trigger a specific bug in LLVM.  They are usually
+written in LLVM assembly language, but can be written in other
+languages if the test targets a particular language front end (and the
+appropriate <tt>--with-llvmgcc</tt> options were used
+at <tt>configure</tt> time of the <tt>llvm</tt> module). These tests
+are driven by the DejaGNU testing framework, which is hidden behind a
+few simple makefiles.</p>
 
-<p>These code fragments are not complete programs. The code generated from them is
-never executed to determine correct behavior.</p> 
+<p>These code fragments are not complete programs. The code generated
+from them is never executed to determine correct behavior.</p>
 
 <p>These code fragment tests are located in the <tt>llvm/test</tt>
 directory.</p>
@@ -251,10 +235,18 @@ programs), first checkout and setup the <tt>test-suite</tt> module:</p>
 % cd ..
 % ./configure --with-llvmgccdir=$LLVM_GCC_DIR
 </pre>
-<p>where <tt>$LLVM_GCC_DIR</tt> is the directory where you <em>installed</em>
-llvm-gcc, not it's src or obj dir.</p>
 </div>
 
+<p>where <tt>$LLVM_GCC_DIR</tt> is the directory where
+you <em>installed</em> llvm-gcc, not it's src or obj
+dir. The <tt>--with-llvmgccdir</tt> option assumes that
+the <tt>llvm-gcc-4.2</tt> module was configured with
+<tt>--program-prefix=llvm-</tt>, and therefore that the C and C++
+compiler drivers are called <tt>llvm-gcc</tt> and <tt>llvm-g++</tt>
+respectively.  If this is not the case,
+use <tt>--with-llvmgcc</tt>/<tt>--with-llvmgxx</tt> to specify each
+executable's location.</p>
+
 <p>Then, run the entire test suite by running make in the <tt>test-suite</tt>
 directory:</p>
 
@@ -448,6 +440,11 @@ that subdirectory.</p>
 </pre>
 </div>
 
+<p>If your system includes GNU <tt>grep</tt>, make sure
+that <tt>GREP_OPTIONS</tt> is not set in your environment. Otherwise,
+you may get invalid results (both false positives and false
+negatives).</p>
+
 </div>
 
 <!-- _______________________________________________________________________ -->
@@ -746,6 +743,8 @@ test suite creates temporary files during execution.</p>
 have the suite checked out and configured, you don't need to do it again (unless
 the test code or configure script changes).</p>
 
+</div>
+
 <!-- _______________________________________________________________________ -->
 <div class="doc_subsection">
 <a name="testsuiteexternal">Configuring External Tests</a></div>
@@ -975,7 +974,7 @@ know. Thanks!</p>
 
   John T. Criswell, Reid Spencer, and Tanya Lattner<br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-05-21 22:23:59 +0200 (Thu, 21 May 2009) $
+  Last modified: $Date: 2009-06-26 07:44:53 +0200 (Fri, 26 Jun 2009) $
 </address>
 </body>
 </html>
diff --git a/include/llvm-c/ExecutionEngine.h b/include/llvm-c/ExecutionEngine.h
index a31dc82..9877b8d 100644
--- a/include/llvm-c/ExecutionEngine.h
+++ b/include/llvm-c/ExecutionEngine.h
@@ -26,6 +26,9 @@
 extern "C" {
 #endif
 
+void LLVMLinkInJIT(void);
+void LLVMLinkInInterpreter(void);
+
 typedef struct LLVMOpaqueGenericValue *LLVMGenericValueRef;
 typedef struct LLVMOpaqueExecutionEngine *LLVMExecutionEngineRef;
 
diff --git a/include/llvm-c/Target.h b/include/llvm-c/Target.h
index 5de5bc7..bb423bb 100644
--- a/include/llvm-c/Target.h
+++ b/include/llvm-c/Target.h
@@ -20,6 +20,7 @@
 #define LLVM_C_TARGET_H
 
 #include "llvm-c/Core.h"
+#include "llvm/Config/config.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -31,6 +32,34 @@ typedef int LLVMByteOrdering;
 typedef struct LLVMOpaqueTargetData *LLVMTargetDataRef;
 typedef struct LLVMStructLayout *LLVMStructLayoutRef;
 
+/* Declare all of the target-initialization functions that are available. */
+#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##Target();
+#include "llvm/Config/Targets.def"
+
+/** LLVMInitializeAllTargets - The main program should call this function if it
+    wants to link in all available targets that LLVM is configured to
+    support. */
+static inline void LLVMInitializeAllTargets() {
+#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##Target();
+#include "llvm/Config/Targets.def"
+}
+  
+/** LLVMInitializeNativeTarget - The main program should call this function to
+    initialize the native target corresponding to the host.  This is useful 
+    for JIT applications to ensure that the target gets linked in correctly. */
+static inline int LLVMInitializeNativeTarget() {
+  /* If we have a native target, initialize it to ensure it is linked in. */
+#ifdef LLVM_NATIVE_ARCH
+#define DoInit2(TARG)   LLVMInitialize ## TARG ()
+#define DoInit(T) DoInit2(T)
+  DoInit(LLVM_NATIVE_ARCH);
+  return 0;
+#undef DoInit
+#undef DoInit2
+#else
+  return 1;
+#endif
+}  
 
 /*===-- Target Data -------------------------------------------------------===*/
 
diff --git a/include/llvm/ADT/Statistic.h b/include/llvm/ADT/Statistic.h
index 537f866..484ff5f 100644
--- a/include/llvm/ADT/Statistic.h
+++ b/include/llvm/ADT/Statistic.h
@@ -26,14 +26,16 @@
 #ifndef LLVM_ADT_STATISTIC_H
 #define LLVM_ADT_STATISTIC_H
 
+#include "llvm/System/Atomic.h"
+
 namespace llvm {
 
 class Statistic {
 public:
   const char *Name;
   const char *Desc;
-  unsigned Value : 31;
-  bool Initialized : 1;
+  unsigned Value;
+  bool Initialized;
 
   unsigned getValue() const { return Value; }
   const char *getName() const { return Name; }
@@ -47,19 +49,60 @@ public:
 
   // Allow use of this class as the value itself.
   operator unsigned() const { return Value; }
-  const Statistic &operator=(unsigned Val) { Value = Val; return init(); }
-  const Statistic &operator++() { ++Value; return init(); }
-  unsigned operator++(int) { init(); return Value++; }
-  const Statistic &operator--() { --Value; return init(); }
-  unsigned operator--(int) { init(); return Value--; }
-  const Statistic &operator+=(const unsigned &V) { Value += V; return init(); }
-  const Statistic &operator-=(const unsigned &V) { Value -= V; return init(); }
-  const Statistic &operator*=(const unsigned &V) { Value *= V; return init(); }
-  const Statistic &operator/=(const unsigned &V) { Value /= V; return init(); }
+  const Statistic &operator=(unsigned Val) {
+    Value = Val;
+    return init();
+  }
+  
+  const Statistic &operator++() {
+    sys::AtomicIncrement(&Value);
+    return init();
+  }
+  
+  unsigned operator++(int) {
+    init();
+    unsigned OldValue = Value;
+    sys::AtomicIncrement(&Value);
+    return OldValue;
+  }
+  
+  const Statistic &operator--() {
+    sys::AtomicDecrement(&Value);
+    return init();
+  }
+  
+  unsigned operator--(int) {
+    init();
+    unsigned OldValue = Value;
+    sys::AtomicDecrement(&Value);
+    return OldValue;
+  }
+  
+  const Statistic &operator+=(const unsigned &V) {
+    sys::AtomicAdd(&Value, V);
+    return init();
+  }
+  
+  const Statistic &operator-=(const unsigned &V) {
+    sys::AtomicAdd(&Value, -V);
+    return init();
+  }
+  
+  const Statistic &operator*=(const unsigned &V) {
+    sys::AtomicMul(&Value, V);
+    return init();
+  }
+  
+  const Statistic &operator/=(const unsigned &V) {
+    sys::AtomicDiv(&Value, V);
+    return init();
+  }
 
 protected:
   Statistic &init() {
-    if (!Initialized) RegisterStatistic();
+    bool tmp = Initialized;
+    sys::MemoryFence();
+    if (!tmp) RegisterStatistic();
     return *this;
   }
   void RegisterStatistic();
diff --git a/include/llvm/ADT/Trie.h b/include/llvm/ADT/Trie.h
index 70f3b41..ed94f9d 100644
--- a/include/llvm/ADT/Trie.h
+++ b/include/llvm/ADT/Trie.h
@@ -308,7 +308,8 @@ struct DOTGraphTraits<Trie<Payload> > : public DefaultDOTGraphTraits {
     return "Trie";
   }
 
-  static std::string getNodeLabel(NodeType* Node, const Trie<Payload>& T) {
+  static std::string getNodeLabel(NodeType* Node, const Trie<Payload>& T,
+                                  bool ShortNames) {
     if (T.getRoot() == Node)
       return "<Root>";
     else
diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h
index 972bb07..20de3a4 100644
--- a/include/llvm/Analysis/DebugInfo.h
+++ b/include/llvm/Analysis/DebugInfo.h
@@ -20,6 +20,7 @@
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Dwarf.h"
 
 namespace llvm {
@@ -36,7 +37,7 @@ namespace llvm {
 
   class DIDescriptor {
   protected:    
-    GlobalVariable *GV;
+    GlobalVariable *DbgGV;
 
     /// DIDescriptor constructor.  If the specified GV is non-null, this checks
     /// to make sure that the tag in the descriptor matches 'RequiredTag'.  If
@@ -58,12 +59,12 @@ namespace llvm {
     GlobalVariable *getGlobalVariableField(unsigned Elt) const;
 
   public:
-    explicit DIDescriptor() : GV(0) {}
-    explicit DIDescriptor(GlobalVariable *gv) : GV(gv) {}
+    explicit DIDescriptor() : DbgGV(0) {}
+    explicit DIDescriptor(GlobalVariable *GV) : DbgGV(GV) {}
 
-    bool isNull() const { return GV == 0; }
+    bool isNull() const { return DbgGV == 0; }
 
-    GlobalVariable *getGV() const { return GV; }
+    GlobalVariable *getGV() const { return DbgGV; }
 
     unsigned getVersion() const {
       return getUnsignedField(0) & LLVMDebugVersionMask;
@@ -80,15 +81,6 @@ namespace llvm {
     void dump() const;
   };
 
-  /// DIAnchor - A wrapper for various anchor descriptors.
-  class DIAnchor : public DIDescriptor {
-  public:
-    explicit DIAnchor(GlobalVariable *GV = 0)
-      : DIDescriptor(GV, dwarf::DW_TAG_anchor) {}
-
-    unsigned getAnchorTag() const { return getUnsignedField(1); }
-  };
-
   /// DISubrange - This is used to represent ranges, for array bounds.
   class DISubrange : public DIDescriptor {
   public:
@@ -245,7 +237,7 @@ namespace llvm {
     explicit DIDerivedType(GlobalVariable *GV)
       : DIType(GV, true, true) {
       if (GV && !isDerivedType(getTag()))
-        GV = 0;
+        DbgGV = 0;
     }
 
     DIType getTypeDerivedFrom() const { return getFieldAs<DIType>(9); }
@@ -265,7 +257,7 @@ namespace llvm {
     explicit DICompositeType(GlobalVariable *GV)
       : DIDerivedType(GV, true, true) {
       if (GV && !isCompositeType(getTag()))
-        GV = 0;
+        DbgGV = 0;
     }
 
     DIArray getTypeArray() const { return getFieldAs<DIArray>(10); }
@@ -330,6 +322,19 @@ namespace llvm {
 
     DICompositeType getType() const { return getFieldAs<DICompositeType>(8); }
 
+    /// getReturnTypeName - Subprogram return types are encoded either as
+    /// DIType or as DICompositeType.
+    const std::string &getReturnTypeName(std::string &F) const {
+      DICompositeType DCT(getFieldAs<DICompositeType>(8));
+      if (!DCT.isNull()) {
+        DIArray A = DCT.getTypeArray();
+        DIType T(A.getElement(0).getGV());
+        return T.getName(F);
+      }
+      DIType T(getFieldAs<DIType>(8));
+      return T.getName(F);
+    }
+
     /// Verify - Verify that a subprogram descriptor is well formed.
     bool Verify() const;
 
@@ -360,10 +365,10 @@ namespace llvm {
   /// global etc).
   class DIVariable : public DIDescriptor {
   public:
-    explicit DIVariable(GlobalVariable *gv = 0)
-      : DIDescriptor(gv) {
-      if (gv && !isVariable(getTag()))
-        GV = 0;
+    explicit DIVariable(GlobalVariable *GV = 0)
+      : DIDescriptor(GV) {
+      if (GV && !isVariable(getTag()))
+        DbgGV = 0;
     }
 
     DIDescriptor getContext() const { return getDescriptorField(1); }
@@ -398,7 +403,6 @@ namespace llvm {
   class DIFactory {
     Module &M;
     // Cached values for uniquing and faster lookups.
-    DIAnchor CompileUnitAnchor, SubProgramAnchor, GlobalVariableAnchor;
     const Type *EmptyStructPtr; // "{}*".
     Function *StopPointFn;   // llvm.dbg.stoppoint
     Function *FuncStartFn;   // llvm.dbg.func.start
@@ -413,18 +417,6 @@ namespace llvm {
   public:
     explicit DIFactory(Module &m);
 
-    /// GetOrCreateCompileUnitAnchor - Return the anchor for compile units,
-    /// creating a new one if there isn't already one in the module.
-    DIAnchor GetOrCreateCompileUnitAnchor();
-
-    /// GetOrCreateSubprogramAnchor - Return the anchor for subprograms,
-    /// creating a new one if there isn't already one in the module.
-    DIAnchor GetOrCreateSubprogramAnchor();
-
-    /// GetOrCreateGlobalVariableAnchor - Return the anchor for globals,
-    /// creating a new one if there isn't already one in the module.
-    DIAnchor GetOrCreateGlobalVariableAnchor();
-
     /// GetOrCreateArray - Create an descriptor for an array of descriptors. 
     /// This implicitly uniques the arrays created.
     DIArray GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys);
@@ -527,7 +519,6 @@ namespace llvm {
   private:
     Constant *GetTagConstant(unsigned TAG);
     Constant *GetStringConstant(const std::string &String);
-    DIAnchor GetOrCreateAnchor(unsigned TAG, const char *Name);
 
     /// getCastToEmpty - Return the descriptor as a Constant* with type '{}*'.
     Constant *getCastToEmpty(DIDescriptor D);
@@ -550,6 +541,13 @@ namespace llvm {
 
   bool getLocationInfo(const Value *V, std::string &DisplayName, std::string &Type, 
                        unsigned &LineNo, std::string &File, std::string &Dir); 
+
+  /// CollectDebugInfoAnchors - Collect debugging information anchors.
+  void CollectDebugInfoAnchors(Module &M,
+                               SmallVector<GlobalVariable *, 2> &CompileUnits,
+                               SmallVector<GlobalVariable *, 4> &GlobalVars,
+                               SmallVector<GlobalVariable *, 4> &Subprograms);
+
 } // end namespace llvm
 
 #endif
diff --git a/include/llvm/Analysis/LoopDependenceAnalysis.h b/include/llvm/Analysis/LoopDependenceAnalysis.h
new file mode 100644
index 0000000..c69bc60
--- /dev/null
+++ b/include/llvm/Analysis/LoopDependenceAnalysis.h
@@ -0,0 +1,52 @@
+//===- llvm/Analysis/LoopDependenceAnalysis.h --------------- -*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LoopDependenceAnalysis is an LLVM pass that analyses dependences in memory
+// accesses in loops.
+//
+// Please note that this is work in progress and the interface is subject to
+// change.
+//
+// TODO: adapt as interface progresses
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LOOP_DEPENDENCE_ANALYSIS_H
+#define LLVM_ANALYSIS_LOOP_DEPENDENCE_ANALYSIS_H
+
+#include "llvm/Analysis/LoopPass.h"
+
+namespace llvm {
+
+  class AnalysisUsage;
+  class LoopPass;
+  class ScalarEvolution;
+
+  class LoopDependenceAnalysis : public LoopPass {
+    Loop *L;
+    ScalarEvolution *SE;
+
+  public:
+    static char ID; // Class identification, replacement for typeinfo
+    LoopDependenceAnalysis() : LoopPass(&ID) {}
+
+    bool runOnLoop(Loop*, LPPassManager&);
+
+    virtual void getAnalysisUsage(AnalysisUsage&) const;
+  }; // class LoopDependenceAnalysis
+
+
+  // createLoopDependenceAnalysisPass - This creates an instance of the
+  // LoopDependenceAnalysis pass.
+  //
+  LoopPass *createLoopDependenceAnalysisPass();
+
+} // namespace llvm
+
+#endif /* LLVM_ANALYSIS_LOOP_DEPENDENCE_ANALYSIS_H */
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index fb0b584..9e5f57e 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -281,6 +281,16 @@ public:
     }
   }
 
+  /// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one
+  /// block, return that block. Otherwise return null.
+  BlockT *getUniqueExitBlock() const {
+    SmallVector<BlockT*, 8> UniqueExitBlocks;
+    getUniqueExitBlocks(UniqueExitBlocks);
+    if (UniqueExitBlocks.size() == 1)
+      return UniqueExitBlocks[0];
+    return 0;
+  }
+
   /// getLoopPreheader - If there is a preheader for this loop, return it.  A
   /// loop has a preheader if there is only one edge to the header of the loop
   /// from outside of the loop.  If this is the case, the block branching to the
diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h
index ca41e51..7659b5b 100644
--- a/include/llvm/Analysis/LoopPass.h
+++ b/include/llvm/Analysis/LoopPass.h
@@ -34,9 +34,6 @@ public:
   // runOnLoop - This method should be implemented by the subclass to perform
   // whatever action is necessary for the specified Loop.
   virtual bool runOnLoop(Loop *L, LPPassManager &LPM) = 0;
-  virtual bool runOnFunctionBody(Function &F, LPPassManager &LPM) {
-    return false;
-  }
 
   // Initialization and finalization hooks.
   virtual bool doInitialization(Loop *L, LPPassManager &LPM) {
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index d9121a8..35bd821 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -18,6 +18,7 @@
 namespace llvm {
   class FunctionPass;
   class ImmutablePass;
+  class LoopPass;
   class ModulePass;
   class Pass;
   class LibCallInfo;
@@ -116,6 +117,13 @@ namespace llvm {
   // createLiveValuesPass - This creates an instance of the LiveValues pass.
   //
   FunctionPass *createLiveValuesPass();
+
+  //===--------------------------------------------------------------------===//
+  //
+  // createLoopDependenceAnalysisPass - This creates an instance of the
+  // LoopDependenceAnalysis pass.
+  //
+  LoopPass *createLoopDependenceAnalysisPass();
   
   // Minor pass prototypes, allowing us to expose them through bugpoint and
   // analyze.
diff --git a/include/llvm/Analysis/ProfileInfoLoader.h b/include/llvm/Analysis/ProfileInfoLoader.h
index 8a5141a..9076fbc 100644
--- a/include/llvm/Analysis/ProfileInfoLoader.h
+++ b/include/llvm/Analysis/ProfileInfoLoader.h
@@ -33,6 +33,7 @@ class ProfileInfoLoader {
   std::vector<unsigned>    BlockCounts;
   std::vector<unsigned>    EdgeCounts;
   std::vector<unsigned>    BBTrace;
+  bool Warned;
 public:
   // ProfileInfoLoader ctor - Read the specified profiling data file, exiting
   // the program if the file is invalid or broken.
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 1c1298a..d699775 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -44,8 +44,8 @@ namespace llvm {
   class SCEVUnknown;
 
   /// SCEV - This class represents an analyzed expression in the program.  These
-  /// are reference-counted opaque objects that the client is not allowed to
-  /// do much with directly.
+  /// are opaque objects that the client is not allowed to do much with
+  /// directly.
   ///
   class SCEV {
     const unsigned SCEVType;      // The SCEV baseclass this node corresponds to
@@ -82,6 +82,11 @@ namespace llvm {
     ///
     bool isOne() const;
 
+    /// isAllOnesValue - Return true if the expression is a constant
+    /// all-ones value.
+    ///
+    bool isAllOnesValue() const;
+
     /// replaceSymbolicValuesWithConcrete - If this SCEV internally references
     /// the symbolic value "Sym", construct and return a new SCEV that produces
     /// the same value, but which uses the concrete value Conc instead of the
@@ -300,8 +305,9 @@ namespace llvm {
     /// try to evaluate a few iterations of the loop until we get the exit
     /// condition gets a value of ExitWhen (true or false).  If we cannot
     /// evaluate the trip count of the loop, return CouldNotCompute.
-    const SCEV* ComputeBackedgeTakenCountExhaustively(const Loop *L, Value *Cond,
-                                                     bool ExitWhen);
+    const SCEV* ComputeBackedgeTakenCountExhaustively(const Loop *L,
+                                                      Value *Cond,
+                                                      bool ExitWhen);
 
     /// HowFarToZero - Return the number of times a backedge comparing the
     /// specified value to zero will execute.  If not computable, return
@@ -329,6 +335,12 @@ namespace llvm {
     /// found.
     BasicBlock* getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
 
+    /// isNecessaryCond - Test whether the given CondValue value is a condition
+    /// which is at least as strict as the one described by Pred, LHS, and RHS.
+    bool isNecessaryCond(Value *Cond, ICmpInst::Predicate Pred,
+                         const SCEV *LHS, const SCEV *RHS,
+                         bool Inverse);
+
     /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
     /// in the header of its containing loop, we know the loop executes a
     /// constant number of times, and the PHI node is just a recurrence
@@ -457,7 +469,7 @@ namespace llvm {
     /// widening.
     const SCEV* getTruncateOrNoop(const SCEV* V, const Type *Ty);
 
-    /// getIntegerSCEV - Given an integer or FP type, create a constant for the
+    /// getIntegerSCEV - Given a SCEVable type, create a constant for the
     /// specified signed integer value and return a SCEV for the constant.
     const SCEV* getIntegerSCEV(int Val, const Type *Ty);
 
@@ -531,10 +543,11 @@ namespace llvm {
     /// is deleted.
     void forgetLoopBackedgeTakenCount(const Loop *L);
 
-    /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is
-    /// guaranteed to end in (at every loop iteration).  It is, at the same time,
-    /// the minimum number of times S is divisible by 2.  For example, given {4,+,8}
-    /// it returns 2.  If S is guaranteed to be 0, it returns the bitwidth of S.
+    /// GetMinTrailingZeros - Determine the minimum number of zero bits that S
+    /// is guaranteed to end in (at every loop iteration).  It is, at the same
+    /// time, the minimum number of times S is divisible by 2.  For example,
+    /// given {4,+,8} it returns 2.  If S is guaranteed to be 0, it returns the
+    /// bitwidth of S.
     uint32_t GetMinTrailingZeros(const SCEV* S);
 
     /// GetMinLeadingZeros - Determine the minimum number of zero bits that S is
diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
index 730c97f..90dba8b 100644
--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -28,7 +28,8 @@ namespace llvm {
   /// memory.
   struct SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> {
     ScalarEvolution &SE;
-    std::map<const SCEV*, AssertingVH<Value> > InsertedExpressions;
+    std::map<std::pair<const SCEV *, Instruction *>, AssertingVH<Value> >
+      InsertedExpressions;
     std::set<Value*> InsertedValues;
 
     BasicBlock::iterator InsertPt;
@@ -43,48 +44,18 @@ namespace llvm {
     /// different places within the same BasicBlock can do so.
     void clear() { InsertedExpressions.clear(); }
 
-    /// isInsertedInstruction - Return true if the specified instruction was
-    /// inserted by the code rewriter.  If so, the client should not modify the
-    /// instruction.
-    bool isInsertedInstruction(Instruction *I) const {
-      return InsertedValues.count(I);
-    }
-
-    /// isInsertedExpression - Return true if the the code rewriter has a
-    /// Value* recorded for the given expression.
-    bool isInsertedExpression(const SCEV *S) const {
-      return InsertedExpressions.count(S);
-    }
-
     /// getOrInsertCanonicalInductionVariable - This method returns the
     /// canonical induction variable of the specified type for the specified
     /// loop (inserting one if there is none).  A canonical induction variable
     /// starts at zero and steps by one on each iteration.
     Value *getOrInsertCanonicalInductionVariable(const Loop *L, const Type *Ty);
 
-    /// addInsertedValue - Remember the specified instruction as being the
-    /// canonical form for the specified SCEV.
-    void addInsertedValue(Value *V, const SCEV *S) {
-      InsertedExpressions[S] = V;
-      InsertedValues.insert(V);
-    }
-
-    void setInsertionPoint(BasicBlock::iterator NewIP) { InsertPt = NewIP; }
-
-    BasicBlock::iterator getInsertionPoint() const { return InsertPt; }
-
-    /// expandCodeFor - Insert code to directly compute the specified SCEV
-    /// expression into the program.  The inserted code is inserted into the
-    /// SCEVExpander's current insertion point. If a type is specified, the
-    /// result will be expanded to have that type, with a cast if necessary.
-    Value *expandCodeFor(const SCEV* SH, const Type *Ty = 0);
-
     /// expandCodeFor - Insert code to directly compute the specified SCEV
     /// expression into the program.  The inserted code is inserted into the
     /// specified block.
     Value *expandCodeFor(const SCEV* SH, const Type *Ty,
                          BasicBlock::iterator IP) {
-      setInsertionPoint(IP);
+      InsertPt = IP;
       return expandCodeFor(SH, Ty);
     }
 
@@ -111,6 +82,19 @@ namespace llvm {
 
     Value *expand(const SCEV *S);
 
+    /// expandCodeFor - Insert code to directly compute the specified SCEV
+    /// expression into the program.  The inserted code is inserted into the
+    /// SCEVExpander's current insertion point. If a type is specified, the
+    /// result will be expanded to have that type, with a cast if necessary.
+    Value *expandCodeFor(const SCEV* SH, const Type *Ty = 0);
+
+    /// isInsertedInstruction - Return true if the specified instruction was
+    /// inserted by the code rewriter.  If so, the client should not modify the
+    /// instruction.
+    bool isInsertedInstruction(Instruction *I) const {
+      return InsertedValues.count(I);
+    }
+
     Value *visitConstant(const SCEVConstant *S) {
       return S->getValue();
     }
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index a004632..f3ca306 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -33,6 +33,7 @@ namespace llvm {
   class GlobalVariable;
   class MachineConstantPoolEntry;
   class MachineConstantPoolValue;
+  class MachineModuleInfo;
   class DwarfWriter;
   class Mangler;
   class Section;
@@ -58,14 +59,12 @@ namespace llvm {
     gcp_map_type GCMetadataPrinters;
     
   protected:
-    /// DW -This is needed because printDeclare() has to insert
-    /// DbgVariable entries into the dwarf table. This is a short term hack
-    /// that ought be fixed soon.
+    /// MMI - If available, this is a pointer to the current MachineModuleInfo.
+    MachineModuleInfo *MMI;
+    
+    /// DW - If available, this is a pointer to the current dwarf writer.
     DwarfWriter *DW;
     
-    // Necessary for external weak linkage support
-    std::set<const GlobalValue*> ExtWeakSymbols;
-
     /// OptLevel - Generating code at a specific optimization level.
     CodeGenOpt::Level OptLevel;
   public:
@@ -110,6 +109,15 @@ namespace llvm {
     ///
     bool VerboseAsm;
 
+    /// Private state for PrintSpecial()
+    // Assign a unique ID to this machine instruction.
+    mutable const MachineInstr *LastMI;
+    mutable const Function *LastFn;
+    mutable unsigned Counter;
+    
+    // Private state for processDebugLock()
+    mutable DebugLocTuple PrevDLT;
+
   protected:
     explicit AsmPrinter(raw_ostream &o, TargetMachine &TM,
                         const TargetAsmInfo *T, CodeGenOpt::Level OL, bool V);
diff --git a/include/llvm/CodeGen/IntrinsicLowering.h b/include/llvm/CodeGen/IntrinsicLowering.h
index 6628329..eefbc45 100644
--- a/include/llvm/CodeGen/IntrinsicLowering.h
+++ b/include/llvm/CodeGen/IntrinsicLowering.h
@@ -25,8 +25,12 @@ namespace llvm {
 
   class IntrinsicLowering {
     const TargetData& TD;
+
+    
+    bool Warned;
   public:
-    explicit IntrinsicLowering(const TargetData &td) : TD(td) {}
+    explicit IntrinsicLowering(const TargetData &td) :
+      TD(td), Warned(false) {}
 
     /// AddPrototypes - This method, if called, causes all of the prototypes
     /// that might be needed by an intrinsic lowering implementation to be
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index d3a0995..3f30de5 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -74,8 +74,9 @@ public:
     return *this;
   }
 
-  const MachineInstrBuilder &addMBB(MachineBasicBlock *MBB) const {
-    MI->addOperand(MachineOperand::CreateMBB(MBB));
+  const MachineInstrBuilder &addMBB(MachineBasicBlock *MBB,
+                                    unsigned char TargetFlags = 0) const {
+    MI->addOperand(MachineOperand::CreateMBB(MBB, TargetFlags));
     return *this;
   }
 
@@ -85,25 +86,29 @@ public:
   }
 
   const MachineInstrBuilder &addConstantPoolIndex(unsigned Idx,
-                                                  int Offset = 0) const {
-    MI->addOperand(MachineOperand::CreateCPI(Idx, Offset));
+                                                  int Offset = 0,
+                                          unsigned char TargetFlags = 0) const {
+    MI->addOperand(MachineOperand::CreateCPI(Idx, Offset, TargetFlags));
     return *this;
   }
 
-  const MachineInstrBuilder &addJumpTableIndex(unsigned Idx) const {
-    MI->addOperand(MachineOperand::CreateJTI(Idx));
+  const MachineInstrBuilder &addJumpTableIndex(unsigned Idx,
+                                          unsigned char TargetFlags = 0) const {
+    MI->addOperand(MachineOperand::CreateJTI(Idx, TargetFlags));
     return *this;
   }
 
   const MachineInstrBuilder &addGlobalAddress(GlobalValue *GV,
-                                              int64_t Offset = 0) const {
-    MI->addOperand(MachineOperand::CreateGA(GV, Offset));
+                                              int64_t Offset = 0,
+                                          unsigned char TargetFlags = 0) const {
+    MI->addOperand(MachineOperand::CreateGA(GV, Offset, TargetFlags));
     return *this;
   }
 
   const MachineInstrBuilder &addExternalSymbol(const char *FnName,
-                                               int64_t Offset = 0) const {
-    MI->addOperand(MachineOperand::CreateES(FnName, Offset));
+                                               int64_t Offset = 0,
+                                          unsigned char TargetFlags = 0) const {
+    MI->addOperand(MachineOperand::CreateES(FnName, Offset, TargetFlags));
     return *this;
   }
 
@@ -113,28 +118,7 @@ public:
   }
 
   const MachineInstrBuilder &addOperand(const MachineOperand &MO) const {
-    if (MO.isReg())
-      return addReg(MO.getReg(),
-                    (MO.isDef() ? RegState::Define : 0) |
-                    (MO.isImplicit() ? RegState::Implicit : 0) |
-                    (MO.isKill() ? RegState::Kill : 0) |
-                    (MO.isDead() ? RegState::Dead : 0) |
-                    (MO.isEarlyClobber() ? RegState::EarlyClobber : 0),
-                    MO.getSubReg());
-    if (MO.isImm())
-      return addImm(MO.getImm());
-    if (MO.isFI())
-      return addFrameIndex(MO.getIndex());
-    if (MO.isGlobal())
-      return addGlobalAddress(MO.getGlobal(), MO.getOffset());
-    if (MO.isCPI())
-      return addConstantPoolIndex(MO.getIndex(), MO.getOffset());
-    if (MO.isSymbol())
-      return addExternalSymbol(MO.getSymbolName());
-    if (MO.isJTI())
-      return addJumpTableIndex(MO.getIndex());
-
-    assert(0 && "Unknown operand for MachineInstrBuilder::AddOperand!");
+    MI->addOperand(MO);
     return *this;
   }
 };
diff --git a/include/llvm/CodeGen/MachineOperand.h b/include/llvm/CodeGen/MachineOperand.h
index ba538d7..5a7f76b 100644
--- a/include/llvm/CodeGen/MachineOperand.h
+++ b/include/llvm/CodeGen/MachineOperand.h
@@ -47,7 +47,14 @@ public:
 private:
   /// OpKind - Specify what kind of operand this is.  This discriminates the
   /// union.
-  MachineOperandType OpKind : 8;
+  unsigned char OpKind; // MachineOperandType
+  
+  /// SubReg - Subregister number, only valid for MO_Register.  A value of 0
+  /// indicates the MO_Register has no subReg.
+  unsigned char SubReg;
+  
+  /// TargetFlags - This is a set of target-specific operand flags.
+  unsigned char TargetFlags;
   
   /// IsDef/IsImp/IsKill/IsDead flags - These are only valid for MO_Register
   /// operands.
@@ -73,10 +80,6 @@ private:
   /// model the GCC inline asm '&' constraint modifier.
   bool IsEarlyClobber : 1;
 
-  /// SubReg - Subregister number, only valid for MO_Register.  A value of 0
-  /// indicates the MO_Register has no subReg.
-  unsigned char SubReg;
-  
   /// ParentMI - This is the instruction that this operand is embedded into. 
   /// This is valid for all operand types, when the operand is in an instr.
   MachineInstr *ParentMI;
@@ -105,7 +108,9 @@ private:
     } OffsetedInfo;
   } Contents;
   
-  explicit MachineOperand(MachineOperandType K) : OpKind(K), ParentMI(0) {}
+  explicit MachineOperand(MachineOperandType K) : OpKind(K), ParentMI(0) {
+    TargetFlags = 0;
+  }
 public:
   MachineOperand(const MachineOperand &M) {
     *this = M;
@@ -115,7 +120,12 @@ public:
   
   /// getType - Returns the MachineOperandType for this operand.
   ///
-  MachineOperandType getType() const { return OpKind; }
+  MachineOperandType getType() const { return (MachineOperandType)OpKind; }
+  
+  unsigned char getTargetFlags() const { return TargetFlags; }
+  void setTargetFlags(unsigned char F) { TargetFlags = F; }
+  void addTargetFlag(unsigned char F) { TargetFlags |= F; }
+  
 
   /// getParent - Return the instruction that this operand belongs to.
   ///
@@ -361,9 +371,11 @@ public:
     Op.SubReg = SubReg;
     return Op;
   }
-  static MachineOperand CreateMBB(MachineBasicBlock *MBB) {
+  static MachineOperand CreateMBB(MachineBasicBlock *MBB,
+                                  unsigned char TargetFlags = 0) {
     MachineOperand Op(MachineOperand::MO_MachineBasicBlock);
     Op.setMBB(MBB);
+    Op.setTargetFlags(TargetFlags);
     return Op;
   }
   static MachineOperand CreateFI(unsigned Idx) {
@@ -371,27 +383,35 @@ public:
     Op.setIndex(Idx);
     return Op;
   }
-  static MachineOperand CreateCPI(unsigned Idx, int Offset) {
+  static MachineOperand CreateCPI(unsigned Idx, int Offset,
+                                  unsigned char TargetFlags = 0) {
     MachineOperand Op(MachineOperand::MO_ConstantPoolIndex);
     Op.setIndex(Idx);
     Op.setOffset(Offset);
+    Op.setTargetFlags(TargetFlags);
     return Op;
   }
-  static MachineOperand CreateJTI(unsigned Idx) {
+  static MachineOperand CreateJTI(unsigned Idx,
+                                  unsigned char TargetFlags = 0) {
     MachineOperand Op(MachineOperand::MO_JumpTableIndex);
     Op.setIndex(Idx);
+    Op.setTargetFlags(TargetFlags);
     return Op;
   }
-  static MachineOperand CreateGA(GlobalValue *GV, int64_t Offset) {
+  static MachineOperand CreateGA(GlobalValue *GV, int64_t Offset,
+                                 unsigned char TargetFlags = 0) {
     MachineOperand Op(MachineOperand::MO_GlobalAddress);
     Op.Contents.OffsetedInfo.Val.GV = GV;
     Op.setOffset(Offset);
+    Op.setTargetFlags(TargetFlags);
     return Op;
   }
-  static MachineOperand CreateES(const char *SymName, int64_t Offset = 0) {
+  static MachineOperand CreateES(const char *SymName, int64_t Offset = 0,
+                                 unsigned char TargetFlags = 0) {
     MachineOperand Op(MachineOperand::MO_ExternalSymbol);
     Op.Contents.OffsetedInfo.Val.SymbolName = SymName;
     Op.setOffset(Offset);
+    Op.setTargetFlags(TargetFlags);
     return Op;
   }
   const MachineOperand &operator=(const MachineOperand &MO) {
@@ -404,6 +424,7 @@ public:
     SubReg   = MO.SubReg;
     ParentMI = MO.ParentMI;
     Contents = MO.Contents;
+    TargetFlags = MO.TargetFlags;
     return *this;
   }
 
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index ec2d1d7..bac3eec 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -278,31 +278,37 @@ public:
     return getConstantFP(Val, VT, true);
   }
   SDValue getGlobalAddress(const GlobalValue *GV, MVT VT,
-                           int64_t offset = 0, bool isTargetGA = false);
+                           int64_t offset = 0, bool isTargetGA = false,
+                           unsigned char TargetFlags = 0);
   SDValue getTargetGlobalAddress(const GlobalValue *GV, MVT VT,
-                                 int64_t offset = 0) {
-    return getGlobalAddress(GV, VT, offset, true);
+                                 int64_t offset = 0,
+                                 unsigned char TargetFlags = 0) {
+    return getGlobalAddress(GV, VT, offset, true, TargetFlags);
   }
   SDValue getFrameIndex(int FI, MVT VT, bool isTarget = false);
   SDValue getTargetFrameIndex(int FI, MVT VT) {
     return getFrameIndex(FI, VT, true);
   }
-  SDValue getJumpTable(int JTI, MVT VT, bool isTarget = false);
-  SDValue getTargetJumpTable(int JTI, MVT VT) {
-    return getJumpTable(JTI, VT, true);
+  SDValue getJumpTable(int JTI, MVT VT, bool isTarget = false,
+                       unsigned char TargetFlags = 0);
+  SDValue getTargetJumpTable(int JTI, MVT VT, unsigned char TargetFlags = 0) {
+    return getJumpTable(JTI, VT, true, TargetFlags);
   }
   SDValue getConstantPool(Constant *C, MVT VT,
-                            unsigned Align = 0, int Offs = 0, bool isT=false);
+                          unsigned Align = 0, int Offs = 0, bool isT=false,
+                          unsigned char TargetFlags = 0);
   SDValue getTargetConstantPool(Constant *C, MVT VT,
-                                  unsigned Align = 0, int Offset = 0) {
-    return getConstantPool(C, VT, Align, Offset, true);
+                                unsigned Align = 0, int Offset = 0,
+                                unsigned char TargetFlags = 0) {
+    return getConstantPool(C, VT, Align, Offset, true, TargetFlags);
   }
   SDValue getConstantPool(MachineConstantPoolValue *C, MVT VT,
-                            unsigned Align = 0, int Offs = 0, bool isT=false);
+                          unsigned Align = 0, int Offs = 0, bool isT=false,
+                          unsigned char TargetFlags = 0);
   SDValue getTargetConstantPool(MachineConstantPoolValue *C,
                                   MVT VT, unsigned Align = 0,
-                                  int Offset = 0) {
-    return getConstantPool(C, VT, Align, Offset, true);
+                                  int Offset = 0, unsigned char TargetFlags=0) {
+    return getConstantPool(C, VT, Align, Offset, true, TargetFlags);
   }
   // When generating a branch to a BB, we don't in general know enough
   // to provide debug info for the BB at that time, so keep this one around.
@@ -310,8 +316,8 @@ public:
   SDValue getBasicBlock(MachineBasicBlock *MBB, DebugLoc dl);
   SDValue getExternalSymbol(const char *Sym, MVT VT);
   SDValue getExternalSymbol(const char *Sym, DebugLoc dl, MVT VT);
-  SDValue getTargetExternalSymbol(const char *Sym, MVT VT);
-  SDValue getTargetExternalSymbol(const char *Sym, DebugLoc dl, MVT VT);
+  SDValue getTargetExternalSymbol(const char *Sym, MVT VT,
+                                  unsigned char TargetFlags = 0);
   SDValue getArgFlags(ISD::ArgFlagsTy Flags);
   SDValue getValueType(MVT);
   SDValue getRegister(unsigned Reg, MVT VT);
@@ -862,7 +868,8 @@ private:
   std::vector<SDNode*> ValueTypeNodes;
   std::map<MVT, SDNode*, MVT::compareRawBits> ExtendedValueTypeNodes;
   StringMap<SDNode*> ExternalSymbols;
-  StringMap<SDNode*> TargetExternalSymbols;
+  
+  std::map<std::pair<std::string, unsigned char>,SDNode*> TargetExternalSymbols;
 };
 
 template <> struct GraphTraits<SelectionDAG*> : public GraphTraits<SDNode*> {
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 1b6fecd..adf0478 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -1819,13 +1819,15 @@ public:
 class GlobalAddressSDNode : public SDNode {
   GlobalValue *TheGlobal;
   int64_t Offset;
+  unsigned char TargetFlags;
   friend class SelectionDAG;
-  GlobalAddressSDNode(bool isTarget, const GlobalValue *GA, MVT VT,
-                      int64_t o = 0);
+  GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA, MVT VT,
+                      int64_t o, unsigned char TargetFlags);
 public:
 
   GlobalValue *getGlobal() const { return TheGlobal; }
   int64_t getOffset() const { return Offset; }
+  unsigned char getTargetFlags() const { return TargetFlags; }
   // Return the address space this GlobalAddress belongs to.
   unsigned getAddressSpace() const;
 
@@ -1858,14 +1860,16 @@ public:
 
 class JumpTableSDNode : public SDNode {
   int JTI;
+  unsigned char TargetFlags;
   friend class SelectionDAG;
-  JumpTableSDNode(int jti, MVT VT, bool isTarg)
+  JumpTableSDNode(int jti, MVT VT, bool isTarg, unsigned char TF)
     : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
-      DebugLoc::getUnknownLoc(), getSDVTList(VT)), JTI(jti) {
+      DebugLoc::getUnknownLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
   }
 public:
 
   int getIndex() const { return JTI; }
+  unsigned char getTargetFlags() const { return TargetFlags; }
 
   static bool classof(const JumpTableSDNode *) { return true; }
   static bool classof(const SDNode *N) {
@@ -1881,40 +1885,27 @@ class ConstantPoolSDNode : public SDNode {
   } Val;
   int Offset;  // It's a MachineConstantPoolValue if top bit is set.
   unsigned Alignment;  // Minimum alignment requirement of CP (not log2 value).
+  unsigned char TargetFlags;
   friend class SelectionDAG;
-  ConstantPoolSDNode(bool isTarget, Constant *c, MVT VT, int o=0)
+  ConstantPoolSDNode(bool isTarget, Constant *c, MVT VT, int o, unsigned Align,
+                     unsigned char TF)
     : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool,
              DebugLoc::getUnknownLoc(),
-             getSDVTList(VT)), Offset(o), Alignment(0) {
+             getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) {
     assert((int)Offset >= 0 && "Offset is too large");
     Val.ConstVal = c;
   }
-  ConstantPoolSDNode(bool isTarget, Constant *c, MVT VT, int o, unsigned Align)
-    : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool,
-             DebugLoc::getUnknownLoc(),
-             getSDVTList(VT)), Offset(o), Alignment(Align) {
-    assert((int)Offset >= 0 && "Offset is too large");
-    Val.ConstVal = c;
-  }
-  ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v,
-                     MVT VT, int o=0)
-    : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool,
-             DebugLoc::getUnknownLoc(),
-             getSDVTList(VT)), Offset(o), Alignment(0) {
-    assert((int)Offset >= 0 && "Offset is too large");
-    Val.MachineCPVal = v;
-    Offset |= 1 << (sizeof(unsigned)*CHAR_BIT-1);
-  }
   ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v,
-                     MVT VT, int o, unsigned Align)
+                     MVT VT, int o, unsigned Align, unsigned char TF)
     : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool,
              DebugLoc::getUnknownLoc(),
-             getSDVTList(VT)), Offset(o), Alignment(Align) {
+             getSDVTList(VT)), Offset(o), Alignment(Align), TargetFlags(TF) {
     assert((int)Offset >= 0 && "Offset is too large");
     Val.MachineCPVal = v;
     Offset |= 1 << (sizeof(unsigned)*CHAR_BIT-1);
   }
 public:
+  
 
   bool isMachineConstantPoolEntry() const {
     return (int)Offset < 0;
@@ -1937,6 +1928,7 @@ public:
   // Return the alignment of this constant pool object, which is either 0 (for
   // default alignment) or the desired value.
   unsigned getAlignment() const { return Alignment; }
+  unsigned char getTargetFlags() const { return TargetFlags; }
 
   const Type *getType() const;
 
@@ -2101,15 +2093,18 @@ public:
 
 class ExternalSymbolSDNode : public SDNode {
   const char *Symbol;
+  unsigned char TargetFlags;
+  
   friend class SelectionDAG;
-  ExternalSymbolSDNode(bool isTarget, const char *Sym, MVT VT)
+  ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned char TF, MVT VT)
     : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol,
              DebugLoc::getUnknownLoc(),
-             getSDVTList(VT)), Symbol(Sym) {
+             getSDVTList(VT)), Symbol(Sym), TargetFlags(TF) {
   }
 public:
 
   const char *getSymbol() const { return Symbol; }
+  unsigned char getTargetFlags() const { return TargetFlags; }
 
   static bool classof(const ExternalSymbolSDNode *) { return true; }
   static bool classof(const SDNode *N) {
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index 95c3a11..bf8b19c 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -1,3 +1,4 @@
+
 //===- CodeGen/ValueTypes.h - Low-Level Target independ. types --*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -72,6 +73,12 @@ namespace llvm {
 
       LAST_VALUETYPE =  30,   // This always remains at the end of the list.
 
+      // This is the current maximum for LAST_VALUETYPE.
+      // Affects ValueTypeActions in TargetLowering.h.
+      // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
+      // This value must be a multiple of 32.
+      MAX_ALLOWED_VALUETYPE = 64,
+
       // iPTRAny - An int value the size of the pointer of the current
       // target to any address space. This must only be used internal to
       // tblgen. Other than for overloading, we treat iPTRAny the same as iPTR.
diff --git a/include/llvm/CompilerDriver/BuiltinOptions.h b/include/llvm/CompilerDriver/BuiltinOptions.h
new file mode 100644
index 0000000..492dffd
--- /dev/null
+++ b/include/llvm/CompilerDriver/BuiltinOptions.h
@@ -0,0 +1,33 @@
+//===--- BuiltinOptions.h - The LLVM Compiler Driver ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Declarations of all global command-line option variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INCLUDE_COMPILER_DRIVER_BUILTIN_OPTIONS_H
+#define LLVM_INCLUDE_COMPILER_DRIVER_BUILTIN_OPTIONS_H
+
+#include "llvm/Support/CommandLine.h"
+
+#include <string>
+
+namespace SaveTempsEnum { enum Values { Cwd, Obj, Unset }; }
+
+extern llvm::cl::list<std::string> InputFilenames;
+extern llvm::cl::opt<std::string> OutputFilename;
+extern llvm::cl::list<std::string> Languages;
+extern llvm::cl::opt<bool> DryRun;
+extern llvm::cl::opt<bool> VerboseMode;
+extern llvm::cl::opt<bool> CheckGraph;
+extern llvm::cl::opt<bool> WriteGraph;
+extern llvm::cl::opt<bool> ViewGraph;
+extern llvm::cl::opt<SaveTempsEnum::Values> SaveTemps;
+
+#endif // LLVM_INCLUDE_COMPILER_DRIVER_BUILTIN_OPTIONS_H
diff --git a/include/llvm/CompilerDriver/ForceLinkage.h b/include/llvm/CompilerDriver/ForceLinkage.h
new file mode 100644
index 0000000..58ea167
--- /dev/null
+++ b/include/llvm/CompilerDriver/ForceLinkage.h
@@ -0,0 +1,82 @@
+//===--- ForceLinkage.h - The LLVM Compiler Driver --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  A bit of preprocessor magic to force references to static libraries. Needed
+//  because plugin initialization is done via static variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INCLUDE_COMPILER_DRIVER_FORCE_LINKAGE_H
+#define LLVM_INCLUDE_COMPILER_DRIVER_FORCE_LINKAGE_H
+
+#include "llvm/CompilerDriver/ForceLinkageMacros.h"
+
+namespace llvmc {
+
+// Declare all ForceLinkage$(PluginName) functions.
+
+#ifdef LLVMC_BUILTIN_PLUGIN_1
+      LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_1);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_2
+      LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_2);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_3
+      LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_3);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_4
+      LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_4);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_5
+      LLVMC_FORCE_LINKAGE_DECL(LLVMC_BUILTIN_PLUGIN_5);
+#endif
+
+namespace force_linkage {
+
+  struct LinkageForcer {
+
+    LinkageForcer() {
+
+// Call all ForceLinkage$(PluginName) functions.
+#ifdef LLVMC_BUILTIN_PLUGIN_1
+      LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_1);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_2
+      LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_2);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_3
+      LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_3);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_4
+      LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_4);
+#endif
+
+#ifdef LLVMC_BUILTIN_PLUGIN_5
+      LLVMC_FORCE_LINKAGE_CALL(LLVMC_BUILTIN_PLUGIN_5);
+#endif
+
+    }
+  };
+} // End namespace force_linkage.
+
+// The only externally used bit.
+void ForceLinkage() {
+  force_linkage::LinkageForcer dummy;
+}
+
+} // End namespace llvmc.
+
+#endif // LLVM_INCLUDE_COMPILER_DRIVER_FORCE_LINKAGE_H
diff --git a/include/llvm/CompilerDriver/ForceLinkageMacros.h b/include/llvm/CompilerDriver/ForceLinkageMacros.h
new file mode 100644
index 0000000..8862b00
--- /dev/null
+++ b/include/llvm/CompilerDriver/ForceLinkageMacros.h
@@ -0,0 +1,29 @@
+//===--- ForceLinkageMacros.h - The LLVM Compiler Driver --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open
+// Source License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  Preprocessor magic that forces references to static libraries - common
+//  macros used by both driver and plugins.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_INCLUDE_COMPILER_DRIVER_FORCE_LINKAGE_MACROS_H
+#define LLVM_INCLUDE_COMPILER_DRIVER_FORCE_LINKAGE_MACROS_H
+
+#define LLVMC_FORCE_LINKAGE_PREFIX(PluginName) ForceLinkage ## PluginName
+
+#define LLVMC_FORCE_LINKAGE_FUN(PluginName) \
+  LLVMC_FORCE_LINKAGE_PREFIX(PluginName)
+
+#define LLVMC_FORCE_LINKAGE_DECL(PluginName) \
+  void LLVMC_FORCE_LINKAGE_FUN(PluginName) ()
+
+#define LLVMC_FORCE_LINKAGE_CALL(PluginName) \
+  LLVMC_FORCE_LINKAGE_FUN(PluginName) ()
+
+#endif // LLVM_INCLUDE_COMPILER_DRIVER_FORCE_LINKAGE_MACROS_H
diff --git a/include/llvm/CompilerDriver/Main.inc b/include/llvm/CompilerDriver/Main.inc
index 2d50c95..4a83d56 100644
--- a/include/llvm/CompilerDriver/Main.inc
+++ b/include/llvm/CompilerDriver/Main.inc
@@ -12,15 +12,15 @@
 //  supported please refer to the tools' manual page or run the tool
 //  with the --help option.
 //
-//  This
-//
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_INCLUDE_COMPILER_DRIVER_MAIN_INC
 #define LLVM_INCLUDE_COMPILER_DRIVER_MAIN_INC
 
+#include "llvm/CompilerDriver/BuiltinOptions.h"
 #include "llvm/CompilerDriver/CompilationGraph.h"
 #include "llvm/CompilerDriver/Error.h"
+#include "llvm/CompilerDriver/ForceLinkage.h"
 #include "llvm/CompilerDriver/Plugin.h"
 
 #include "llvm/System/Path.h"
@@ -59,27 +59,56 @@ cl::opt<bool> WriteGraph("write-graph",
 cl::opt<bool> ViewGraph("view-graph",
                          cl::desc("Show compilation graph in GhostView"),
                          cl::Hidden);
-cl::opt<bool> SaveTemps("save-temps",
-                         cl::desc("Keep temporary files"),
-                         cl::Hidden);
+
+cl::opt<SaveTempsEnum::Values> SaveTemps
+("save-temps", cl::desc("Keep temporary files"),
+ cl::init(SaveTempsEnum::Unset),
+ cl::values(clEnumValN(SaveTempsEnum::Obj, "obj",
+                       "Save files in the directory specified with -o"),
+            clEnumValN(SaveTempsEnum::Cwd, "cwd",
+                       "Use current working directory"),
+            clEnumValN(SaveTempsEnum::Obj, "", "Same as 'cwd'"),
+            clEnumValEnd),
+ cl::ValueOptional);
 
 namespace {
+
+  sys::Path getTempDir() {
+    sys::Path tempDir;
+
+    // GCC 4.5-style -save-temps handling.
+    if (SaveTemps == SaveTempsEnum::Unset) {
+      tempDir = sys::Path::GetTemporaryDirectory();
+    }
+    else if (SaveTemps == SaveTempsEnum::Obj && !OutputFilename.empty()) {
+      tempDir = OutputFilename;
+
+      if (!tempDir.exists()) {
+        std::string ErrMsg;
+        if (tempDir.createDirectoryOnDisk(true, &ErrMsg))
+          throw std::runtime_error(ErrMsg);
+      }
+    }
+    // else if (SaveTemps == Cwd) -> use current dir (leave tempDir empty)
+
+    return tempDir;
+  }
+
   /// BuildTargets - A small wrapper for CompilationGraph::Build.
   int BuildTargets(CompilationGraph& graph, const LanguageMap& langMap) {
     int ret;
-    const sys::Path& tempDir = SaveTemps
-      ? sys::Path("")
-      : sys::Path(sys::Path::GetTemporaryDirectory());
+    const sys::Path& tempDir = getTempDir();
 
     try {
       ret = graph.Build(tempDir, langMap);
     }
     catch(...) {
-      tempDir.eraseFromDisk(true);
+      if (SaveTemps == SaveTempsEnum::Unset)
+        tempDir.eraseFromDisk(true);
       throw;
     }
 
-    if (!SaveTemps)
+    if (SaveTemps == SaveTempsEnum::Unset)
       tempDir.eraseFromDisk(true);
     return ret;
   }
@@ -87,6 +116,8 @@ namespace {
 
 int main(int argc, char** argv) {
   try {
+    ForceLinkage();
+
     LanguageMap langMap;
     CompilationGraph graph;
 
diff --git a/include/llvm/Config/Targets.def.in b/include/llvm/Config/Targets.def.in
index a388472..d589ece 100644
--- a/include/llvm/Config/Targets.def.in
+++ b/include/llvm/Config/Targets.def.in
@@ -1,23 +1,23 @@
-//===- llvm/Config/Targets.def - LLVM Target Architectures ------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file enumerates all of the target architectures supported by
-// this build of LLVM. Clients of this file should define the
-// LLVM_TARGET macro to be a function-like macro with a single
-// parameter (the name of the target); including this file will then
-// enumerate all of the targets. 
-//
-// The set of targets supported by LLVM is generated at configuration
-// time, at which point this header is generated. Do not modify this
-// header directly.
-//
-//===----------------------------------------------------------------------===//
+/*===- llvm/Config/Targets.def - LLVM Target Architectures ------*- C++ -*-===*\
+|*                                                                            *|
+|*                     The LLVM Compiler Infrastructure                       *|
+|*                                                                            *|
+|* This file is distributed under the University of Illinois Open Source      *|
+|* License. See LICENSE.TXT for details.                                      *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This file enumerates all of the target architectures supported by          *|
+|* this build of LLVM. Clients of this file should define the                 *|
+|* LLVM_TARGET macro to be a function-like macro with a single                *|
+|* parameter (the name of the target); including this file will then          *|
+|* enumerate all of the targets.                                              *|
+|*                                                                            *|
+|* The set of targets supported by LLVM is generated at configuration         *|
+|* time, at which point this header is generated. Do not modify this          *|
+|* header directly.                                                           *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
 
 #ifndef LLVM_TARGET
 #  error Please define the macro LLVM_TARGET(TargetName)
diff --git a/include/llvm/Config/config.h.cmake b/include/llvm/Config/config.h.cmake
index 4356e7d..27ef440 100644
--- a/include/llvm/Config/config.h.cmake
+++ b/include/llvm/Config/config.h.cmake
@@ -282,6 +282,12 @@
 /* Have pthread_mutex_lock */
 #cmakedefine HAVE_PTHREAD_MUTEX_LOCK ${HAVE_PTHREAD_MUTEX_LOCK}
 
+/* Have pthread_rwlock_init */
+#cmakedefine HAVE_PTHREAD_RWLOCK_INIT ${HAVE_PTHREAD_RWLOCK_INIT}
+
+/* Have pthread_getspecific */
+#cmakedefine HAVE_PTHREAD_GETSPECIFIC ${HAVE_PTHREAD_GETSPECIFIC}
+
 /* Define to 1 if srand48/lrand48/drand48 exist in <stdlib.h> */
 #undef HAVE_RAND48
 
diff --git a/include/llvm/Config/config.h.in b/include/llvm/Config/config.h.in
index ac60f4d..9452ce7 100644
--- a/include/llvm/Config/config.h.in
+++ b/include/llvm/Config/config.h.in
@@ -279,6 +279,9 @@
 /* Define to have the %a format string */
 #undef HAVE_PRINTF_A
 
+/* Have pthread_getspecific */
+#undef HAVE_PTHREAD_GETSPECIFIC
+
 /* Define to 1 if you have the <pthread.h> header file. */
 #undef HAVE_PTHREAD_H
 
diff --git a/include/llvm/Constants.h b/include/llvm/Constants.h
index 52fff20..da69d25 100644
--- a/include/llvm/Constants.h
+++ b/include/llvm/Constants.h
@@ -865,6 +865,7 @@ public:
   /// get() - Static factory methods - Return objects of the specified value.
   ///
   static MDString *get(const char *StrBegin, const char *StrEnd);
+  static MDString *get(const std::string &Str);
 
   /// size() - The length of this string.
   ///
diff --git a/include/llvm/ExecutionEngine/ExecutionEngine.h b/include/llvm/ExecutionEngine/ExecutionEngine.h
index 170e184..613adb5 100644
--- a/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -29,13 +29,14 @@ class Constant;
 class Function;
 class GlobalVariable;
 class GlobalValue;
+class JITEventListener;
+class JITMemoryManager;
+class MachineCodeInfo;
 class Module;
 class ModuleProvider;
+class MutexGuard;
 class TargetData;
 class Type;
-class MutexGuard;
-class JITMemoryManager;
-class MachineCodeInfo;
 
 class ExecutionEngineState {
 private:
@@ -276,7 +277,14 @@ public:
   virtual void *getOrEmitGlobalVariable(const GlobalVariable *GV) {
     return getPointerToGlobal((GlobalValue*)GV);
   }
-  
+
+  /// Registers a listener to be called back on various events within
+  /// the JIT.  See JITEventListener.h for more details.  Does not
+  /// take ownership of the argument.  The argument may be NULL, in
+  /// which case these functions do nothing.
+  virtual void RegisterJITEventListener(JITEventListener *L) {}
+  virtual void UnregisterJITEventListener(JITEventListener *L) {}
+
   /// DisableLazyCompilation - If called, the JIT will abort if lazy compilation
   /// is ever attempted.
   void DisableLazyCompilation(bool Disabled = true) {
diff --git a/include/llvm/ExecutionEngine/Interpreter.h b/include/llvm/ExecutionEngine/Interpreter.h
index b2b0464..7425cdb 100644
--- a/include/llvm/ExecutionEngine/Interpreter.h
+++ b/include/llvm/ExecutionEngine/Interpreter.h
@@ -18,9 +18,7 @@
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include <cstdlib>
 
-namespace llvm {
-  extern void LinkInInterpreter();
-}
+extern "C" void LLVMLinkInInterpreter();
 
 namespace {
   struct ForceInterpreterLinking {
@@ -32,7 +30,7 @@ namespace {
       if (std::getenv("bar") != (char*) -1)
         return;
 
-      llvm::LinkInInterpreter();
+      LLVMLinkInInterpreter();
     }
   } ForceInterpreterLinking;
 }
diff --git a/include/llvm/ExecutionEngine/JIT.h b/include/llvm/ExecutionEngine/JIT.h
index d4d1e73..6013db4 100644
--- a/include/llvm/ExecutionEngine/JIT.h
+++ b/include/llvm/ExecutionEngine/JIT.h
@@ -18,9 +18,7 @@
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include <cstdlib>
 
-namespace llvm {
-  extern void LinkInJIT();
-}
+extern "C" void LLVMLinkInJIT();
 
 namespace {
   struct ForceJITLinking {
@@ -32,7 +30,7 @@ namespace {
       if (std::getenv("bar") != (char*) -1)
         return;
 
-      llvm::LinkInJIT();
+      LLVMLinkInJIT();
     }
   } ForceJITLinking;
 }
diff --git a/include/llvm/ExecutionEngine/JITEventListener.h b/include/llvm/ExecutionEngine/JITEventListener.h
new file mode 100644
index 0000000..dd76f26
--- /dev/null
+++ b/include/llvm/ExecutionEngine/JITEventListener.h
@@ -0,0 +1,59 @@
+//===- JITEventListener.h - Exposes events from JIT compilation -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the JITEventListener interface, which lets users get
+// callbacks when significant events happen during the JIT compilation process.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
+#define LLVM_EXECUTION_ENGINE_JIT_EVENTLISTENER_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+class Function;
+
+/// Empty for now, but this object will contain all details about the
+/// generated machine code that a Listener might care about.
+struct JITEvent_EmittedFunctionDetails {
+};
+
+/// JITEventListener - This interface is used by the JIT to notify clients about
+/// significant events during compilation.  For example, we could have
+/// implementations for profilers and debuggers that need to know where
+/// functions have been emitted.
+///
+/// Each method defaults to doing nothing, so you only need to override the ones
+/// you care about.
+class JITEventListener {
+public:
+  JITEventListener() {}
+  virtual ~JITEventListener();  // Defined in JIT.cpp.
+
+  typedef JITEvent_EmittedFunctionDetails EmittedFunctionDetails;
+  /// NotifyFunctionEmitted - Called after a function has been successfully
+  /// emitted to memory.  The function still has its MachineFunction attached,
+  /// if you should happen to need that.
+  virtual void NotifyFunctionEmitted(const Function &F,
+                                     void *Code, size_t Size,
+                                     const EmittedFunctionDetails &Details) {}
+
+  /// NotifyFreeingMachineCode - This is called inside of
+  /// freeMachineCodeForFunction(), after the global mapping is removed, but
+  /// before the machine code is returned to the allocator.  OldPtr is the
+  /// address of the machine code.
+  virtual void NotifyFreeingMachineCode(const Function &F, void *OldPtr) {}
+};
+
+JITEventListener *createMacOSJITEventListener();
+
+} // end namespace llvm.
+
+#endif
diff --git a/include/llvm/IntrinsicsARM.td b/include/llvm/IntrinsicsARM.td
index a73dc45..4723ffb 100644
--- a/include/llvm/IntrinsicsARM.td
+++ b/include/llvm/IntrinsicsARM.td
@@ -302,15 +302,24 @@ let TargetPrefix = "arm" in {
 
   // Interleaving vector stores from N-element structures.
   def int_arm_neon_vst3i : Intrinsic<[llvm_void_ty],
-                                     [llvm_anyint_ty, llvm_ptr_ty],
+                                     [llvm_ptr_ty, llvm_anyint_ty],
                                      [IntrWriteArgMem]>;
   def int_arm_neon_vst3f : Intrinsic<[llvm_void_ty],
-                                     [llvm_anyfloat_ty, llvm_ptr_ty],
+                                     [llvm_ptr_ty, llvm_anyfloat_ty],
                                      [IntrWriteArgMem]>;
   def int_arm_neon_vst4i : Intrinsic<[llvm_void_ty],
-                                     [llvm_anyint_ty, llvm_ptr_ty],
+                                     [llvm_ptr_ty, llvm_anyint_ty],
                                      [IntrWriteArgMem]>;
   def int_arm_neon_vst4f : Intrinsic<[llvm_void_ty],
-                                     [llvm_anyfloat_ty, llvm_ptr_ty],
+                                     [llvm_ptr_ty, llvm_anyfloat_ty],
                                      [IntrWriteArgMem]>;
+
+  // Vector Table Lookup
+  def int_arm_neon_vtbl : Intrinsic<[llvm_v8i8_ty],
+                                    [llvm_anyint_ty, llvm_v8i8_ty],
+                                    [IntrNoMem]>;
+  // Vector Table Extension
+  def int_arm_neon_vtbx : Intrinsic<[llvm_v8i8_ty],
+                                    [llvm_v8i8_ty, llvm_anyint_ty,
+                                     llvm_v8i8_ty], [IntrNoMem]>;
 }
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 0fb837d..22d6aeb 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -77,6 +77,7 @@ namespace {
       (void) llvm::createLCSSAPass();
       (void) llvm::createLICMPass();
       (void) llvm::createLiveValuesPass();
+      (void) llvm::createLoopDependenceAnalysisPass();
       (void) llvm::createLoopExtractorPass();
       (void) llvm::createLoopSimplifyPass();
       (void) llvm::createLoopStrengthReducePass();
diff --git a/include/llvm/MC/MCContext.h b/include/llvm/MC/MCContext.h
new file mode 100644
index 0000000..13180e8
--- /dev/null
+++ b/include/llvm/MC/MCContext.h
@@ -0,0 +1,166 @@
+//===- MCContext.h - Machine Code Context -----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCCONTEXT_H
+#define LLVM_MC_MCCONTEXT_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+  class MCValue;
+  class MCSection;
+  class MCSymbol;
+
+  /// MCContext - Context object for machine code objects.
+  class MCContext {
+    MCContext(const MCContext&); // DO NOT IMPLEMENT
+    MCContext &operator=(const MCContext&); // DO NOT IMPLEMENT
+
+    /// Sections - Bindings of names to allocated sections.
+    StringMap<MCSection*> Sections;
+
+    /// Symbols - Bindings of names to symbols.
+    StringMap<MCSymbol*> Symbols;
+
+    /// SymbolValues - Bindings of symbols to values.
+    DenseMap<MCSymbol*, MCValue> SymbolValues;
+
+    /// Allocator - Allocator object used for creating machine code objects.
+    ///
+    /// We use a bump pointer allocator to avoid the need to track all allocated
+    /// objects.
+    BumpPtrAllocator Allocator;
+
+  public:
+    MCContext();
+    ~MCContext();
+
+    /// GetSection - Get or create a new section with the given @param Name.
+    MCSection *GetSection(const char *Name);
+    
+    /// CreateSymbol - Create a new symbol with the specified @param Name.
+    ///
+    /// @param Name - The symbol name, which must be unique across all symbols.
+    MCSymbol *CreateSymbol(const char *Name);
+
+    /// GetOrCreateSymbol - Lookup the symbol inside with the specified
+    /// @param Name.  If it exists, return it.  If not, create a forward
+    /// reference and return it.
+    ///
+    /// @param Name - The symbol name, which must be unique across all symbols.
+    MCSymbol *GetOrCreateSymbol(const char *Name);
+    
+    /// CreateTemporarySymbol - Create a new temporary symbol with the specified
+    /// @param Name.
+    ///
+    /// @param Name - The symbol name, for debugging purposes only, temporary
+    /// symbols do not surive assembly. If non-empty the name must be unique
+    /// across all symbols.
+    MCSymbol *CreateTemporarySymbol(const char *Name = "");
+
+    /// LookupSymbol - Get the symbol for @param Name, or null.
+    MCSymbol *LookupSymbol(const char *Name) const;
+
+    /// ClearSymbolValue - Erase a value binding for @param Symbol, if one
+    /// exists.
+    void ClearSymbolValue(MCSymbol *Symbol);
+
+    /// SetSymbolValue - Set the value binding for @param Symbol to @param
+    /// Value.
+    void SetSymbolValue(MCSymbol *Symbol, const MCValue &Value);
+
+    /// GetSymbolValue - Return the current value for @param Symbol, or null if
+    /// none exists.
+    const MCValue *GetSymbolValue(MCSymbol *Symbol) const;
+
+    void *Allocate(unsigned Size, unsigned Align = 8) {
+      return Allocator.Allocate(Size, Align);
+    }
+    void Deallocate(void *Ptr) { 
+    }
+  };
+
+} // end namespace llvm
+
+// operator new and delete aren't allowed inside namespaces.
+// The throw specifications are mandated by the standard.
+/// @brief Placement new for using the MCContext's allocator.
+///
+/// This placement form of operator new uses the MCContext's allocator for
+/// obtaining memory. It is a non-throwing new, which means that it returns
+/// null on error. (If that is what the allocator does. The current does, so if
+/// this ever changes, this operator will have to be changed, too.)
+/// Usage looks like this (assuming there's an MCContext 'Context' in scope):
+/// @code
+/// // Default alignment (16)
+/// IntegerLiteral *Ex = new (Context) IntegerLiteral(arguments);
+/// // Specific alignment
+/// IntegerLiteral *Ex2 = new (Context, 8) IntegerLiteral(arguments);
+/// @endcode
+/// Please note that you cannot use delete on the pointer; it must be
+/// deallocated using an explicit destructor call followed by
+/// @c Context.Deallocate(Ptr).
+///
+/// @param Bytes The number of bytes to allocate. Calculated by the compiler.
+/// @param C The MCContext that provides the allocator.
+/// @param Alignment The alignment of the allocated memory (if the underlying
+///                  allocator supports it).
+/// @return The allocated memory. Could be NULL.
+inline void *operator new(size_t Bytes, llvm::MCContext &C,
+                          size_t Alignment = 16) throw () {
+  return C.Allocate(Bytes, Alignment);
+}
+/// @brief Placement delete companion to the new above.
+///
+/// This operator is just a companion to the new above. There is no way of
+/// invoking it directly; see the new operator for more details. This operator
+/// is called implicitly by the compiler if a placement new expression using
+/// the MCContext throws in the object constructor.
+inline void operator delete(void *Ptr, llvm::MCContext &C, size_t)
+              throw () {
+  C.Deallocate(Ptr);
+}
+
+/// This placement form of operator new[] uses the MCContext's allocator for
+/// obtaining memory. It is a non-throwing new[], which means that it returns
+/// null on error.
+/// Usage looks like this (assuming there's an MCContext 'Context' in scope):
+/// @code
+/// // Default alignment (16)
+/// char *data = new (Context) char[10];
+/// // Specific alignment
+/// char *data = new (Context, 8) char[10];
+/// @endcode
+/// Please note that you cannot use delete on the pointer; it must be
+/// deallocated using an explicit destructor call followed by
+/// @c Context.Deallocate(Ptr).
+///
+/// @param Bytes The number of bytes to allocate. Calculated by the compiler.
+/// @param C The MCContext that provides the allocator.
+/// @param Alignment The alignment of the allocated memory (if the underlying
+///                  allocator supports it).
+/// @return The allocated memory. Could be NULL.
+inline void *operator new[](size_t Bytes, llvm::MCContext& C,
+                            size_t Alignment = 16) throw () {
+  return C.Allocate(Bytes, Alignment);
+}
+
+/// @brief Placement delete[] companion to the new[] above.
+///
+/// This operator is just a companion to the new[] above. There is no way of
+/// invoking it directly; see the new[] operator for more details. This operator
+/// is called implicitly by the compiler if a placement new[] expression using
+/// the MCContext throws in the object constructor.
+inline void operator delete[](void *Ptr, llvm::MCContext &C) throw () {
+  C.Deallocate(Ptr);
+}
+
+#endif
diff --git a/include/llvm/MC/MCInst.h b/include/llvm/MC/MCInst.h
index 457c2ae..3108985 100644
--- a/include/llvm/MC/MCInst.h
+++ b/include/llvm/MC/MCInst.h
@@ -16,7 +16,7 @@
 #ifndef LLVM_MC_MCINST_H
 #define LLVM_MC_MCINST_H
 
-#include "llvm/MC/MCImm.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/DebugLoc.h"
@@ -31,14 +31,14 @@ class MCOperand {
     kRegister,                ///< Register operand.
     kImmediate,               ///< Immediate operand.
     kMBBLabel,                ///< Basic block label.
-    kMCImm
+    kMCValue
   };
   unsigned char Kind;
   
   union {
     unsigned RegVal;
     int64_t ImmVal;
-    MCImm MCImmVal;
+    MCValue MCValueVal;
     struct {
       unsigned FunctionNo;
       unsigned BlockNo;
diff --git a/include/llvm/MC/MCSection.h b/include/llvm/MC/MCSection.h
new file mode 100644
index 0000000..341f7f0
--- /dev/null
+++ b/include/llvm/MC/MCSection.h
@@ -0,0 +1,28 @@
+//===- MCSection.h - Machine Code Sections ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSECTION_H
+#define LLVM_MC_MCSECTION_H
+
+#include <string>
+
+namespace llvm {
+
+  class MCSection {
+    std::string Name;
+
+  public:
+    MCSection(const char *_Name) : Name(_Name) {}
+
+    const std::string &getName() const { return Name; }
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
new file mode 100644
index 0000000..bb85d2d
--- /dev/null
+++ b/include/llvm/MC/MCStreamer.h
@@ -0,0 +1,191 @@
+//===- MCStreamer.h - High-level Streaming Machine Code Output --*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSTREAMER_H
+#define LLVM_MC_MCSTREAMER_H
+
+#include "llvm/Support/DataTypes.h"
+
+namespace llvm {
+  class MCContext;
+  class MCValue;
+  class MCInst;
+  class MCSection;
+  class MCSymbol;
+  class raw_ostream;
+
+  /// MCStreamer - Streaming machine code generation interface.
+  class MCStreamer {
+  public:
+    enum SymbolAttr {
+      Global,         /// .globl
+      Hidden,         /// .hidden (ELF)
+      IndirectSymbol, /// .indirect_symbol (Apple)
+      Internal,       /// .internal (ELF)
+      LazyReference,  /// .lazy_reference (Apple)
+      NoDeadStrip,    /// .no_dead_strip (Apple)
+      PrivateExtern,  /// .private_extern (Apple)
+      Protected,      /// .protected (ELF)
+      Reference,      /// .reference (Apple)
+      Weak,           /// .weak
+      WeakDefinition, /// .weak_definition (Apple)
+      WeakReference,  /// .weak_reference (Apple)
+
+      SymbolAttrFirst = Global,
+      SymbolAttrLast = WeakReference
+    };
+
+  private:
+    MCContext &Context;
+
+    MCStreamer(const MCStreamer&); // DO NOT IMPLEMENT
+    MCStreamer &operator=(const MCStreamer&); // DO NOT IMPLEMENT
+
+  protected:
+    MCStreamer(MCContext &Ctx);
+
+  public:
+    virtual ~MCStreamer();
+
+    MCContext &getContext() const { return Context; }
+
+    /// @name Symbol & Section Management
+    /// @{
+
+    /// SwitchSection - Set the current section where code is being emitted to
+    /// @param Section.
+    ///
+    /// This corresponds to assembler directives like .section, .text, etc.
+    virtual void SwitchSection(MCSection *Section) = 0;
+
+    /// EmitLabel - Emit a label for @param Symbol into the current section.
+    ///
+    /// This corresponds to an assembler statement such as:
+    ///   foo:
+    ///
+    /// @param Symbol - The symbol to emit. A given symbol should only be
+    /// emitted as a label once, and symbols emitted as a label should never be
+    /// used in an assignment.
+    //
+    // FIXME: What to do about the current section? Should we get rid of the
+    // symbol section in the constructor and initialize it here?
+    virtual void EmitLabel(MCSymbol *Symbol) = 0;
+
+    /// EmitAssignment - Emit an assignment of @param Value to @param Symbol.
+    ///
+    /// This corresponds to an assembler statement such as:
+    ///  symbol = value
+    ///
+    /// The assignment generates no code, but has the side effect of binding the
+    /// value in the current context. For the assembly streamer, this prints the
+    /// binding into the .s file.
+    ///
+    /// @param Symbol - The symbol being assigned to.
+    /// @param Value - The value for the symbol.
+    /// @param MakeAbsolute - If true, then the symbol should be given the
+    /// absolute value of @param Value, even if @param Value would be
+    /// relocatable expression. This corresponds to the ".set" directive.
+    virtual void EmitAssignment(MCSymbol *Symbol, const MCValue &Value,
+                                bool MakeAbsolute = false) = 0;
+
+    /// EmitSymbolAttribute - Add the given @param Attribute to @param Symbol.
+    //
+    // FIXME: This doesn't make much sense, could we just have attributes be on
+    // the symbol and make the printer smart enough to add the right symbols?
+    // This should work as long as the order of attributes in the file doesn't
+    // matter.
+    virtual void EmitSymbolAttribute(MCSymbol *Symbol,
+                                     SymbolAttr Attribute) = 0;
+
+    /// @}
+    /// @name Generating Data
+    /// @{
+
+    /// EmitBytes - Emit @param Length bytes starting at @param Data into the
+    /// output.
+    ///
+    /// This is used to implement assembler directives such as .byte, .ascii,
+    /// etc.
+    virtual void EmitBytes(const char *Data, unsigned Length) = 0;
+
+    /// EmitValue - Emit the expression @param Value into the output as a native
+    /// integer of the given @param Size bytes.
+    ///
+    /// This is used to implement assembler directives such as .word, .quad,
+    /// etc.
+    ///
+    /// @param Value - The value to emit.
+    /// @param Size - The size of the integer (in bytes) to emit. This must
+    /// match a native machine width.
+    virtual void EmitValue(const MCValue &Value, unsigned Size) = 0;
+
+    /// EmitValueToAlignment - Emit some number of copies of @param Value until
+    /// the byte alignment @param ByteAlignment is reached.
+    ///
+    /// If the number of bytes need to emit for the alignment is not a multiple
+    /// of @param ValueSize, then the contents of the emitted fill bytes is
+    /// undefined.
+    ///
+    /// This used to implement the .align assembler directive.
+    ///
+    /// @param ByteAlignment - The alignment to reach. This must be a power of
+    /// two.
+    /// @param Value - The value to use when filling bytes.
+    /// @param Size - The size of the integer (in bytes) to emit for @param
+    /// Value. This must match a native machine width.
+    /// @param MaxBytesToEmit - The maximum numbers of bytes to emit, or 0. If
+    /// the alignment cannot be reached in this many bytes, no bytes are
+    /// emitted.
+    virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                      unsigned ValueSize = 1,
+                                      unsigned MaxBytesToEmit = 0) = 0;
+
+    /// EmitValueToOffset - Emit some number of copies of @param Value until the
+    /// byte offset @param Offset is reached.
+    ///
+    /// This is used to implement assembler directives such as .org.
+    ///
+    /// @param Offset - The offset to reach.This may be an expression, but the
+    /// expression must be associated with the current section.
+    /// @param Value - The value to use when filling bytes.
+    // 
+    // FIXME: How are we going to signal failures out of this?
+    virtual void EmitValueToOffset(const MCValue &Offset, 
+                                   unsigned char Value = 0) = 0;
+    
+    /// @}
+
+    /// EmitInstruction - Emit the given @param Instruction into the current
+    /// section.
+    virtual void EmitInstruction(const MCInst &Inst) = 0;
+
+    /// Finish - Finish emission of machine code and flush any output.
+    virtual void Finish() = 0;
+  };
+
+  /// createAsmStreamer - Create a machine code streamer which will print out
+  /// assembly for the native target, suitable for compiling with a native
+  /// assembler.
+  MCStreamer *createAsmStreamer(MCContext &Ctx, raw_ostream &OS);
+
+  // FIXME: These two may end up getting rolled into a single
+  // createObjectStreamer interface, which implements the assembler backend, and
+  // is parameterized on an output object file writer.
+
+  /// createMachOStream - Create a machine code streamer which will generative
+  /// Mach-O format object files.
+  MCStreamer *createMachOStreamer(MCContext &Ctx, raw_ostream &OS);
+
+  /// createELFStreamer - Create a machine code streamer which will generative
+  /// ELF format object files.
+  MCStreamer *createELFStreamer(MCContext &Ctx, raw_ostream &OS);
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCSymbol.h b/include/llvm/MC/MCSymbol.h
new file mode 100644
index 0000000..06f50ae
--- /dev/null
+++ b/include/llvm/MC/MCSymbol.h
@@ -0,0 +1,33 @@
+//===- MCSymbol.h - Machine Code Symbols ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_MC_MCSYMBOL_H
+#define LLVM_MC_MCSYMBOL_H
+
+#include <string>
+
+namespace llvm {
+  class MCSymbol {
+    MCSection *Section;
+    std::string Name;
+    unsigned IsTemporary : 1;
+
+  public:
+    MCSymbol(const char *_Name, bool _IsTemporary) 
+      : Section(0), Name(_Name), IsTemporary(_IsTemporary) {}
+
+    MCSection *getSection() const { return Section; }
+    void setSection(MCSection *Value) { Section = Value; }
+
+    const std::string &getName() const { return Name; }
+  };
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/MC/MCImm.h b/include/llvm/MC/MCValue.h
index 5b1efd8..7df12da 100644
--- a/include/llvm/MC/MCImm.h
+++ b/include/llvm/MC/MCValue.h
@@ -1,4 +1,4 @@
-//===-- llvm/MC/MCImm.h - MCImm class ---------------------------*- C++ -*-===//
+//===-- llvm/MC/MCValue.h - MCValue class -----------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,22 +7,25 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file contains the declaration of the MCInst and MCOperand classes, which
-// is the basic representation used to represent low-level machine code
-// instructions.
+// This file contains the declaration of the MCValue class.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_MC_MCIMM_H
-#define LLVM_MC_MCIMM_H
+#ifndef LLVM_MC_MCVALUE_H
+#define LLVM_MC_MCVALUE_H
+
+#include "llvm/Support/DataTypes.h"
 
 namespace llvm {
 class MCSymbol;
 
-/// MCImm - This represents an "assembler immediate".  In its most general form,
-/// this can hold "SymbolA - SymbolB + imm64".  Not all targets supports
+/// MCValue - This represents an "assembler immediate".  In its most general
+/// form, this can hold "SymbolA - SymbolB + imm64".  Not all targets supports
 /// relocations of this general form, but we need to represent this anyway.
-class MCImm {
+///
+/// Note that this class must remain a simple POD value class, because we need
+/// it to live in unions etc.
+class MCValue {
   MCSymbol *SymA, *SymB;
   int64_t Cst;
 public:
@@ -32,16 +35,16 @@ public:
   MCSymbol *getSymB() const { return SymB; }
   
   
-  static MCImm get(MCSymbol *SymA, MCSymbol *SymB = 0, int64_t Val = 0) {
-    MCImm R;
+  static MCValue get(MCSymbol *SymA, MCSymbol *SymB = 0, int64_t Val = 0) {
+    MCValue R;
     R.Cst = Val;
     R.SymA = SymA;
     R.SymB = SymB;
     return R;
   }
   
-  static MCImm get(int64_t Val) {
-    MCImm R;
+  static MCValue get(int64_t Val) {
+    MCValue R;
     R.Cst = Val;
     R.SymA = 0;
     R.SymB = 0;
diff --git a/include/llvm/Support/DOTGraphTraits.h b/include/llvm/Support/DOTGraphTraits.h
index 7a61b2b..080297f 100644
--- a/include/llvm/Support/DOTGraphTraits.h
+++ b/include/llvm/Support/DOTGraphTraits.h
@@ -51,7 +51,8 @@ struct DefaultDOTGraphTraits {
   /// getNodeLabel - Given a node and a pointer to the top level graph, return
   /// the label to print in the node.
   template<typename GraphType>
-  static std::string getNodeLabel(const void *Node, const GraphType& Graph) {
+  static std::string getNodeLabel(const void *Node,
+                                  const GraphType& Graph, bool ShortNames) {
     return "";
   }
 
diff --git a/include/llvm/Support/GraphWriter.h b/include/llvm/Support/GraphWriter.h
index ca28aafa..01b44d0 100644
--- a/include/llvm/Support/GraphWriter.h
+++ b/include/llvm/Support/GraphWriter.h
@@ -72,6 +72,7 @@ template<typename GraphType>
 class GraphWriter {
   std::ostream &O;
   const GraphType &G;
+  bool ShortNames;
 
   typedef DOTGraphTraits<GraphType>           DOTTraits;
   typedef GraphTraits<GraphType>              GTraits;
@@ -79,7 +80,8 @@ class GraphWriter {
   typedef typename GTraits::nodes_iterator    node_iterator;
   typedef typename GTraits::ChildIteratorType child_iterator;
 public:
-  GraphWriter(std::ostream &o, const GraphType &g) : O(o), G(g) {}
+  GraphWriter(std::ostream &o, const GraphType &g, bool SN) :
+    O(o), G(g), ShortNames(SN) {}
 
   void writeHeader(const std::string &Name) {
     std::string GraphName = DOTTraits::getGraphName(G);
@@ -130,7 +132,7 @@ public:
     O << "label=\"{";
 
     if (!DOTTraits::renderGraphFromBottomUp()) {
-      O << DOT::EscapeString(DOTTraits::getNodeLabel(Node, G));
+      O << DOT::EscapeString(DOTTraits::getNodeLabel(Node, G, ShortNames));
 
       // If we should include the address of the node in the label, do so now.
       if (DOTTraits::hasNodeAddressLabel(Node, G))
@@ -156,7 +158,7 @@ public:
     }
 
     if (DOTTraits::renderGraphFromBottomUp()) {
-      O << DOT::EscapeString(DOTTraits::getNodeLabel(Node, G));
+      O << DOT::EscapeString(DOTTraits::getNodeLabel(Node, G, ShortNames));
 
       // If we should include the address of the node in the label, do so now.
       if (DOTTraits::hasNodeAddressLabel(Node, G))
@@ -250,10 +252,11 @@ public:
 
 template<typename GraphType>
 std::ostream &WriteGraph(std::ostream &O, const GraphType &G,
+                         bool ShortNames = false,
                          const std::string &Name = "",
                          const std::string &Title = "") {
   // Start the graph emission process...
-  GraphWriter<GraphType> W(O, G);
+  GraphWriter<GraphType> W(O, G, ShortNames);
 
   // Output the header for the graph...
   W.writeHeader(Title);
@@ -272,6 +275,7 @@ std::ostream &WriteGraph(std::ostream &O, const GraphType &G,
 template<typename GraphType>
 sys::Path WriteGraph(const GraphType &G,
                      const std::string& Name,
+                     bool ShortNames = false,
                      const std::string& Title = "") {
   std::string ErrMsg;
   sys::Path Filename = sys::Path::GetTemporaryDirectory(&ErrMsg);
@@ -290,7 +294,7 @@ sys::Path WriteGraph(const GraphType &G,
   std::ofstream O(Filename.c_str());
 
   if (O.good()) {
-    WriteGraph(O, G, Name, Title);
+    WriteGraph(O, G, ShortNames, Name, Title);
     cerr << " done. \n";
 
     O.close();
@@ -308,8 +312,9 @@ sys::Path WriteGraph(const GraphType &G,
 template<typename GraphType>
 void ViewGraph(const GraphType& G,
                const std::string& Name,
+               bool ShortNames = false,
                const std::string& Title = "") {
-  sys::Path Filename =  WriteGraph(G, Name, Title);
+  sys::Path Filename =  WriteGraph(G, Name, ShortNames, Title);
 
   if (Filename.isEmpty()) {
     return;
diff --git a/include/llvm/Support/Timer.h b/include/llvm/Support/Timer.h
index f34fc95..71b7ee5 100644
--- a/include/llvm/Support/Timer.h
+++ b/include/llvm/Support/Timer.h
@@ -16,6 +16,7 @@
 #define LLVM_SUPPORT_TIMER_H
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/System/Mutex.h"
 #include <string>
 #include <vector>
 #include <iosfwd>
@@ -34,28 +35,37 @@ class TimerGroup;
 /// if they are never started.
 ///
 class Timer {
-  int64_t Elapsed;        // Wall clock time elapsed in seconds
-  int64_t UserTime;       // User time elapsed
-  int64_t SystemTime;     // System time elapsed
-  int64_t MemUsed;       // Memory allocated (in bytes)
-  int64_t PeakMem;        // Peak memory used
-  int64_t PeakMemBase;    // Temporary for peak calculation...
+  double Elapsed;        // Wall clock time elapsed in seconds
+  double UserTime;       // User time elapsed
+  double SystemTime;     // System time elapsed
+  ssize_t MemUsed;       // Memory allocated (in bytes)
+  size_t PeakMem;        // Peak memory used
+  size_t PeakMemBase;    // Temporary for peak calculation...
   std::string Name;      // The name of this time variable
   bool Started;          // Has this time variable ever been started?
   TimerGroup *TG;        // The TimerGroup this Timer is in.
+  mutable sys::SmartMutex<true> Lock; // Mutex for the contents of this Timer.
 public:
   explicit Timer(const std::string &N);
   Timer(const std::string &N, TimerGroup &tg);
   Timer(const Timer &T);
   ~Timer();
 
-  int64_t getProcessTime() const { return UserTime+SystemTime; }
-  int64_t getWallTime() const { return Elapsed; }
-  int64_t getMemUsed() const { return MemUsed; }
-  int64_t getPeakMem() const { return PeakMem; }
+  double getProcessTime() const { return UserTime+SystemTime; }
+  double getWallTime() const { return Elapsed; }
+  ssize_t getMemUsed() const { return MemUsed; }
+  size_t getPeakMem() const { return PeakMem; }
   std::string getName() const { return Name; }
 
   const Timer &operator=(const Timer &T) {
+    if (&T < this) {
+      T.Lock.acquire();
+      Lock.acquire();
+    } else {
+      Lock.acquire();
+      T.Lock.acquire();
+    }
+    
     Elapsed = T.Elapsed;
     UserTime = T.UserTime;
     SystemTime = T.SystemTime;
@@ -65,6 +75,15 @@ public:
     Name = T.Name;
     Started = T.Started;
     assert(TG == T.TG && "Can only assign timers in the same TimerGroup!");
+    
+    if (&T < this) {
+      T.Lock.release();
+      Lock.release();
+    } else {
+      Lock.release();
+      T.Lock.release();
+    }
+    
     return *this;
   }
 
@@ -160,11 +179,9 @@ public:
 
 private:
   friend class Timer;
-  void addTimer() { ++NumTimers; }
+  void addTimer();
   void removeTimer();
-  void addTimerToPrint(const Timer &T) {
-    TimersToPrint.push_back(Timer(true, T));
-  }
+  void addTimerToPrint(const Timer &T);
 };
 
 } // End llvm namespace
diff --git a/include/llvm/System/Atomic.h b/include/llvm/System/Atomic.h
index c0612f9..4ec117b 100644
--- a/include/llvm/System/Atomic.h
+++ b/include/llvm/System/Atomic.h
@@ -20,14 +20,15 @@ namespace llvm {
   namespace sys {
     void MemoryFence();
 
-    uint32_t CompareAndSwap32(volatile uint32_t* ptr,
-                            uint32_t new_value,
-                            uint32_t old_value);
-    int32_t AtomicIncrement32(volatile int32_t* ptr);
-    int32_t AtomicDecrement32(volatile int32_t* ptr);
-    int32_t AtomicAdd32(volatile int32_t* ptr, int32_t val);
-    
-    int64_t AtomicAdd64(volatile int64_t* ptr, int64_t val);
+    typedef uint32_t cas_flag;
+    cas_flag CompareAndSwap(volatile cas_flag* ptr,
+                            cas_flag new_value,
+                            cas_flag old_value);
+    cas_flag AtomicIncrement(volatile cas_flag* ptr);
+    cas_flag AtomicDecrement(volatile cas_flag* ptr);
+    cas_flag AtomicAdd(volatile cas_flag* ptr, cas_flag val);
+    cas_flag AtomicMul(volatile cas_flag* ptr, cas_flag val);
+    cas_flag AtomicDiv(volatile cas_flag* ptr, cas_flag val);
   }
 }
 
diff --git a/include/llvm/System/ThreadLocal.h b/include/llvm/System/ThreadLocal.h
new file mode 100644
index 0000000..39b1e64
--- /dev/null
+++ b/include/llvm/System/ThreadLocal.h
@@ -0,0 +1,41 @@
+//===- llvm/System/ThreadLocal.h - Thread Local Data ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the llvm::sys::ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SYSTEM_THREAD_LOCAL_H
+#define LLVM_SYSTEM_THREAD_LOCAL_H
+
+#include "llvm/System/Threading.h"
+#include <cassert>
+
+namespace llvm {
+  namespace sys {
+    class ThreadLocalImpl {
+      void* data;
+    public:
+      ThreadLocalImpl();
+      virtual ~ThreadLocalImpl();
+      void setInstance(const void* d);
+      const void* getInstance();
+    };
+    
+    template<class T>
+    class ThreadLocal : public ThreadLocalImpl {
+    public:
+      ThreadLocal() : ThreadLocalImpl() { }
+      T* get() { return static_cast<T*>(getInstance()); }
+      void set(T* d) { setInstance(d); }
+    };
+  }
+}
+
+#endif
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index 40b0e7b..02451c2 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -173,14 +173,18 @@ public:
     /// ValueTypeActions - This is a bitvector that contains two bits for each
     /// value type, where the two bits correspond to the LegalizeAction enum.
     /// This can be queried with "getTypeAction(VT)".
-    uint32_t ValueTypeActions[2];
+    /// dimension by (MVT::MAX_ALLOWED_VALUETYPE/32) * 2
+    uint32_t ValueTypeActions[(MVT::MAX_ALLOWED_VALUETYPE/32)*2];
   public:
     ValueTypeActionImpl() {
       ValueTypeActions[0] = ValueTypeActions[1] = 0;
+      ValueTypeActions[2] = ValueTypeActions[3] = 0;
     }
     ValueTypeActionImpl(const ValueTypeActionImpl &RHS) {
       ValueTypeActions[0] = RHS.ValueTypeActions[0];
       ValueTypeActions[1] = RHS.ValueTypeActions[1];
+      ValueTypeActions[2] = RHS.ValueTypeActions[2];
+      ValueTypeActions[3] = RHS.ValueTypeActions[3];
     }
     
     LegalizeAction getTypeAction(MVT VT) const {
@@ -349,10 +353,13 @@ public:
   /// for it.
   LegalizeAction getOperationAction(unsigned Op, MVT VT) const {
     if (VT.isExtended()) return Expand;
-    assert(Op < array_lengthof(OpActions) &&
-           (unsigned)VT.getSimpleVT() < sizeof(OpActions[0])*8 &&
+    assert(Op < array_lengthof(OpActions[0]) &&
+           (unsigned)VT.getSimpleVT() < sizeof(OpActions[0][0])*8 &&
            "Table isn't big enough!");
-    return (LegalizeAction)((OpActions[Op] >> (2*VT.getSimpleVT())) & 3);
+    unsigned I = (unsigned) VT.getSimpleVT();
+    unsigned J = I & 31;
+    I = I >> 5;
+    return (LegalizeAction)((OpActions[I][Op] >> (J*2) ) & 3);
   }
 
   /// isOperationLegalOrCustom - Return true if the specified operation is
@@ -940,10 +947,13 @@ protected:
   /// with the specified type and indicate what to do about it.
   void setOperationAction(unsigned Op, MVT VT,
                           LegalizeAction Action) {
-    assert((unsigned)VT.getSimpleVT() < sizeof(OpActions[0])*8 &&
-           Op < array_lengthof(OpActions) && "Table isn't big enough!");
-    OpActions[Op] &= ~(uint64_t(3UL) << VT.getSimpleVT()*2);
-    OpActions[Op] |= (uint64_t)Action << VT.getSimpleVT()*2;
+    assert((unsigned)VT.getSimpleVT() < sizeof(OpActions[0][0])*8 &&
+           Op < array_lengthof(OpActions[0]) && "Table isn't big enough!");
+    unsigned I = (unsigned) VT.getSimpleVT();
+    unsigned J = I & 31;
+    I = I >> 5;
+    OpActions[I][Op] &= ~(uint64_t(3UL) << (J*2));
+    OpActions[I][Op] |= (uint64_t)Action << (J*2);
   }
   
   /// setLoadExtAction - Indicate that the specified load with extension does
@@ -1566,7 +1576,9 @@ private:
   /// Most operations are Legal (aka, supported natively by the target), but
   /// operations that are not should be described.  Note that operations on
   /// non-legal value types are not described here.
-  uint64_t OpActions[ISD::BUILTIN_OP_END];
+  /// This array is accessed using VT.getSimpleVT(), so it is subject to
+  /// the MVT::MAX_ALLOWED_VALUETYPE * 2 bits.
+  uint64_t OpActions[MVT::MAX_ALLOWED_VALUETYPE/(sizeof(uint64_t)*4)][ISD::BUILTIN_OP_END];
   
   /// LoadExtActions - For each load of load extension type and each value type,
   /// keep a LegalizeAction that indicates how instruction selection should deal
diff --git a/include/llvm/Target/TargetSelect.h b/include/llvm/Target/TargetSelect.h
index 8aa314a..002d5fc 100644
--- a/include/llvm/Target/TargetSelect.h
+++ b/include/llvm/Target/TargetSelect.h
@@ -18,20 +18,21 @@
 
 #include "llvm/Config/config.h"
 
-namespace llvm {
+extern "C" {
   // Declare all of the target-initialization functions that are available.
-#define LLVM_TARGET(TargetName) void Initialize##TargetName##Target();
+#define LLVM_TARGET(TargetName) void LLVMInitialize##TargetName##Target();
 #include "llvm/Config/Targets.def"
   
   // Declare all of the available asm-printer initialization functions.
-  // Declare all of the target-initialization functions.
-#define LLVM_ASM_PRINTER(TargetName) void Initialize##TargetName##AsmPrinter();
+#define LLVM_ASM_PRINTER(TargetName) void LLVMInitialize##TargetName##AsmPrinter();
 #include "llvm/Config/AsmPrinters.def"
-  
+}
+
+namespace llvm {
   /// InitializeAllTargets - The main program should call this function if it
   /// wants to link in all available targets that LLVM is configured to support.
   inline void InitializeAllTargets() {
-#define LLVM_TARGET(TargetName) llvm::Initialize##TargetName##Target();
+#define LLVM_TARGET(TargetName) LLVMInitialize##TargetName##Target();
 #include "llvm/Config/Targets.def"
   }
   
@@ -39,18 +40,17 @@ namespace llvm {
   /// it wants all asm printers that LLVM is configured to support.  This will
   /// cause them to be linked into its executable.
   inline void InitializeAllAsmPrinters() {
-#define LLVM_ASM_PRINTER(TargetName) Initialize##TargetName##AsmPrinter();
+#define LLVM_ASM_PRINTER(TargetName) LLVMInitialize##TargetName##AsmPrinter();
 #include "llvm/Config/AsmPrinters.def"
   }
   
-  
   /// InitializeNativeTarget - The main program should call this function to
   /// initialize the native target corresponding to the host.  This is useful 
   /// for JIT applications to ensure that the target gets linked in correctly.
   inline bool InitializeNativeTarget() {
   // If we have a native target, initialize it to ensure it is linked in.
 #ifdef LLVM_NATIVE_ARCH
-#define DoInit2(TARG)   llvm::Initialize ## TARG ()
+#define DoInit2(TARG)   LLVMInitialize ## TARG ()
 #define DoInit(T) DoInit2(T)
     DoInit(LLVM_NATIVE_ARCH);
     return false;
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 7ab8721..98a68f6 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -114,13 +114,6 @@ AllocaInst *DemotePHIToStack(PHINode *P, Instruction *AllocaPoint = 0);
 bool OnlyUsedByDbgInfoIntrinsics(Instruction *I, 
                            SmallVectorImpl<DbgInfoIntrinsic *> *DbgInUses = 0);
 
-/// UserIsDebugInfo - Return true if U is a constant expr used by 
-/// llvm.dbg.variable or llvm.dbg.global_variable
-bool UserIsDebugInfo(User *U);
-
-/// RemoveDbgInfoUser - Remove an User which is representing debug info.
-void RemoveDbgInfoUser(User *U);
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Type.h b/include/llvm/Type.h
index 97d5043..d439233 100644
--- a/include/llvm/Type.h
+++ b/include/llvm/Type.h
@@ -103,7 +103,7 @@ private:
   /// has no AbstractTypeUsers, the type is deleted.  This is only sensical for
   /// derived types.
   ///
-  mutable int32_t RefCount;
+  mutable sys::cas_flag RefCount;
 
   const Type *getForwardedTypeInternal() const;
 
@@ -338,7 +338,7 @@ public:
 
   void addRef() const {
     assert(isAbstract() && "Cannot add a reference to a non-abstract type!");
-    sys::AtomicIncrement32(&RefCount);
+    sys::AtomicIncrement(&RefCount);
   }
 
   void dropRef() const {
@@ -347,8 +347,8 @@ public:
 
     // If this is the last PATypeHolder using this object, and there are no
     // PATypeHandles using it, the type is dead, delete it now.
-    int32_t Count = sys::AtomicDecrement32(&RefCount);
-    if (Count == 0 && AbstractTypeUsers.empty())
+    sys::cas_flag OldCount = sys::AtomicDecrement(&RefCount);
+    if (OldCount == 0 && AbstractTypeUsers.empty())
       this->destroy();
   }
   
diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp
index 143220c..8ada5a3 100644
--- a/lib/Analysis/CFGPrinter.cpp
+++ b/lib/Analysis/CFGPrinter.cpp
@@ -31,12 +31,6 @@
 #include <fstream>
 using namespace llvm;
 
-/// CFGOnly flag - This is used to control whether or not the CFG graph printer
-/// prints out the contents of basic blocks or not.  This is acceptable because
-/// this code is only really used for debugging purposes.
-///
-static bool CFGOnly = false;
-
 namespace llvm {
 template<>
 struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
@@ -45,12 +39,13 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
   }
 
   static std::string getNodeLabel(const BasicBlock *Node,
-                                  const Function *Graph) {
-    if (CFGOnly && !Node->getName().empty())
+                                  const Function *Graph,
+                                  bool ShortNames) {
+    if (ShortNames && !Node->getName().empty())
       return Node->getName() + ":";
 
     std::ostringstream Out;
-    if (CFGOnly) {
+    if (ShortNames) {
       WriteAsOperand(Out, Node, false);
       return Out.str();
     }
@@ -117,9 +112,7 @@ namespace {
     CFGOnlyViewer() : FunctionPass(&ID) {}
 
     virtual bool runOnFunction(Function &F) {
-      CFGOnly = true;
       F.viewCFG();
-      CFGOnly = false;
       return false;
     }
 
@@ -168,14 +161,20 @@ static RegisterPass<CFGPrinter>
 P1("dot-cfg", "Print CFG of function to 'dot' file", false, true);
 
 namespace {
-  struct VISIBILITY_HIDDEN CFGOnlyPrinter : public CFGPrinter {
+  struct VISIBILITY_HIDDEN CFGOnlyPrinter : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    CFGOnlyPrinter() : CFGPrinter(&ID) {}
+    CFGOnlyPrinter() : FunctionPass(&ID) {}
+    explicit CFGOnlyPrinter(void *pid) : FunctionPass(pid) {}
     virtual bool runOnFunction(Function &F) {
-      bool OldCFGOnly = CFGOnly;
-      CFGOnly = true;
-      CFGPrinter::runOnFunction(F);
-      CFGOnly = OldCFGOnly;
+      std::string Filename = "cfg." + F.getName() + ".dot";
+      cerr << "Writing '" << Filename << "'...";
+      std::ofstream File(Filename.c_str());
+
+      if (File.good())
+        WriteGraph(File, (const Function*)&F, true);
+      else
+        cerr << "  error opening file for writing!";
+      cerr << "\n";
       return false;
     }
     void print(std::ostream &OS, const Module* = 0) const {}
@@ -206,9 +205,7 @@ void Function::viewCFG() const {
 /// his can make the graph smaller.
 ///
 void Function::viewCFGOnly() const {
-  CFGOnly = true;
-  viewCFG();
-  CFGOnly = false;
+  ViewGraph(this, "cfg" + getName(), true);
 }
 
 FunctionPass *llvm::createCFGPrinterPass () {
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 093aa69..6f2a06c 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -18,6 +18,7 @@ add_llvm_library(LLVMAnalysis
   LibCallAliasAnalysis.cpp
   LibCallSemantics.cpp
   LiveValues.cpp
+  LoopDependenceAnalysis.cpp
   LoopInfo.cpp
   LoopPass.cpp
   LoopVR.cpp
@@ -32,3 +33,5 @@ add_llvm_library(LLVMAnalysis
   Trace.cpp
   ValueTracking.cpp
   )
+
+target_link_libraries (LLVMAnalysis LLVMSupport)
diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp
index d80d581..6c549e63 100644
--- a/lib/Analysis/DbgInfoPrinter.cpp
+++ b/lib/Analysis/DbgInfoPrinter.cpp
@@ -93,7 +93,7 @@ void PrintDbgInfo::printFuncStart(const DbgFuncStartInst *FS) {
   DISubprogram Subprogram(cast<GlobalVariable>(FS->getSubprogram()));
   std::string Res1, Res2;
   Out << "; fully qualified function name: " << Subprogram.getDisplayName(Res1)
-      << " return type: " << Subprogram.getType().getName(Res2)
+      << " return type: " << Subprogram.getReturnTypeName(Res2)
       << " at line " << Subprogram.getLineNumber()
       << "\n\n";
 }
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index adda5ee..6b27cf4 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -73,22 +73,22 @@ bool DIDescriptor::ValidDebugInfo(Value *V, CodeGenOpt::Level OptLevel) {
   return true;
 }
 
-DIDescriptor::DIDescriptor(GlobalVariable *gv, unsigned RequiredTag) {
-  GV = gv;
+DIDescriptor::DIDescriptor(GlobalVariable *GV, unsigned RequiredTag) {
+  DbgGV = GV;
   
   // If this is non-null, check to see if the Tag matches. If not, set to null.
   if (GV && getTag() != RequiredTag)
-    GV = 0;
+    DbgGV = 0;
 }
 
 const std::string &
 DIDescriptor::getStringField(unsigned Elt, std::string &Result) const {
-  if (GV == 0) {
+  if (DbgGV == 0) {
     Result.clear();
     return Result;
   }
 
-  Constant *C = GV->getInitializer();
+  Constant *C = DbgGV->getInitializer();
   if (C == 0 || Elt >= C->getNumOperands()) {
     Result.clear();
     return Result;
@@ -102,9 +102,9 @@ DIDescriptor::getStringField(unsigned Elt, std::string &Result) const {
 }
 
 uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
-  if (GV == 0) return 0;
+  if (DbgGV == 0) return 0;
 
-  Constant *C = GV->getInitializer();
+  Constant *C = DbgGV->getInitializer();
   if (C == 0 || Elt >= C->getNumOperands())
     return 0;
 
@@ -114,9 +114,9 @@ uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const {
 }
 
 DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
-  if (GV == 0) return DIDescriptor();
+  if (DbgGV == 0) return DIDescriptor();
 
-  Constant *C = GV->getInitializer();
+  Constant *C = DbgGV->getInitializer();
   if (C == 0 || Elt >= C->getNumOperands())
     return DIDescriptor();
 
@@ -125,9 +125,9 @@ DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const {
 }
 
 GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
-  if (GV == 0) return 0;
+  if (DbgGV == 0) return 0;
 
-  Constant *C = GV->getInitializer();
+  Constant *C = DbgGV->getInitializer();
   if (C == 0 || Elt >= C->getNumOperands())
     return 0;
 
@@ -140,12 +140,12 @@ GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const {
 //===----------------------------------------------------------------------===//
 
 // Needed by DIVariable::getType().
-DIType::DIType(GlobalVariable *gv) : DIDescriptor(gv) {
-  if (!gv) return;
+DIType::DIType(GlobalVariable *GV) : DIDescriptor(GV) {
+  if (!GV) return;
   unsigned tag = getTag();
   if (tag != dwarf::DW_TAG_base_type && !DIDerivedType::isDerivedType(tag) &&
       !DICompositeType::isCompositeType(tag))
-    GV = 0;
+    DbgGV = 0;
 }
 
 /// isDerivedType - Return true if the specified tag is legal for
@@ -198,8 +198,8 @@ bool DIVariable::isVariable(unsigned Tag) {
 }
 
 unsigned DIArray::getNumElements() const {
-  assert (GV && "Invalid DIArray");
-  Constant *C = GV->getInitializer();
+  assert (DbgGV && "Invalid DIArray");
+  Constant *C = DbgGV->getInitializer();
   assert (C && "Invalid DIArray initializer");
   return C->getNumOperands();
 }
@@ -367,71 +367,10 @@ Constant *DIFactory::GetStringConstant(const std::string &String) {
   return Slot = ConstantExpr::getBitCast(StrGV, DestTy);
 }
 
-/// GetOrCreateAnchor - Look up an anchor for the specified tag and name.  If it
-/// already exists, return it.  If not, create a new one and return it.
-DIAnchor DIFactory::GetOrCreateAnchor(unsigned TAG, const char *Name) {
-  const Type *EltTy = StructType::get(Type::Int32Ty, Type::Int32Ty, NULL);
-  
-  // Otherwise, create the global or return it if already in the module.
-  Constant *C = M.getOrInsertGlobal(Name, EltTy);
-  assert(isa<GlobalVariable>(C) && "Incorrectly typed anchor?");
-  GlobalVariable *GV = cast<GlobalVariable>(C);
-  
-  // If it has an initializer, it is already in the module.
-  if (GV->hasInitializer()) 
-    return SubProgramAnchor = DIAnchor(GV);
-  
-  GV->setLinkage(GlobalValue::LinkOnceAnyLinkage);
-  GV->setSection("llvm.metadata");
-  GV->setConstant(true);
-  M.addTypeName("llvm.dbg.anchor.type", EltTy);
-  
-  // Otherwise, set the initializer.
-  Constant *Elts[] = {
-    GetTagConstant(dwarf::DW_TAG_anchor),
-    ConstantInt::get(Type::Int32Ty, TAG)
-  };
-  
-  GV->setInitializer(ConstantStruct::get(Elts, 2));
-  return DIAnchor(GV);
-}
-
-
-
 //===----------------------------------------------------------------------===//
 // DIFactory: Primary Constructors
 //===----------------------------------------------------------------------===//
 
-/// GetOrCreateCompileUnitAnchor - Return the anchor for compile units,
-/// creating a new one if there isn't already one in the module.
-DIAnchor DIFactory::GetOrCreateCompileUnitAnchor() {
-  // If we already created one, just return it.
-  if (!CompileUnitAnchor.isNull())
-    return CompileUnitAnchor;
-  return CompileUnitAnchor = GetOrCreateAnchor(dwarf::DW_TAG_compile_unit,
-                                               "llvm.dbg.compile_units");
-}
-
-/// GetOrCreateSubprogramAnchor - Return the anchor for subprograms,
-/// creating a new one if there isn't already one in the module.
-DIAnchor DIFactory::GetOrCreateSubprogramAnchor() {
-  // If we already created one, just return it.
-  if (!SubProgramAnchor.isNull())
-    return SubProgramAnchor;
-  return SubProgramAnchor = GetOrCreateAnchor(dwarf::DW_TAG_subprogram,
-                                              "llvm.dbg.subprograms");
-}
-
-/// GetOrCreateGlobalVariableAnchor - Return the anchor for globals,
-/// creating a new one if there isn't already one in the module.
-DIAnchor DIFactory::GetOrCreateGlobalVariableAnchor() {
-  // If we already created one, just return it.
-  if (!GlobalVariableAnchor.isNull())
-    return GlobalVariableAnchor;
-  return GlobalVariableAnchor = GetOrCreateAnchor(dwarf::DW_TAG_variable,
-                                                  "llvm.dbg.global_variables");
-}
-
 /// GetOrCreateArray - Create an descriptor for an array of descriptors. 
 /// This implicitly uniques the arrays created.
 DIArray DIFactory::GetOrCreateArray(DIDescriptor *Tys, unsigned NumTys) {
@@ -494,7 +433,7 @@ DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID,
                                            unsigned RunTimeVer) {
   Constant *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_compile_unit),
-    getCastToEmpty(GetOrCreateCompileUnitAnchor()),
+    Constant::getNullValue(EmptyStructPtr),
     ConstantInt::get(Type::Int32Ty, LangID),
     GetStringConstant(Filename),
     GetStringConstant(Directory),
@@ -509,7 +448,7 @@ DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID,
   
   M.addTypeName("llvm.dbg.compile_unit.type", Init->getType());
   GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
+                                          GlobalValue::LinkOnceAnyLinkage,
                                           Init, "llvm.dbg.compile_unit", &M);
   GV->setSection("llvm.metadata");
   return DICompileUnit(GV);
@@ -655,7 +594,7 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
 
   Constant *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_subprogram),
-    getCastToEmpty(GetOrCreateSubprogramAnchor()),
+    Constant::getNullValue(EmptyStructPtr),
     getCastToEmpty(Context),
     GetStringConstant(Name),
     GetStringConstant(DisplayName),
@@ -671,7 +610,7 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
   
   M.addTypeName("llvm.dbg.subprogram.type", Init->getType());
   GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
+                                          GlobalValue::LinkOnceAnyLinkage,
                                           Init, "llvm.dbg.subprogram", &M);
   GV->setSection("llvm.metadata");
   return DISubprogram(GV);
@@ -687,7 +626,7 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, const std::string &Name,
                                 bool isDefinition, llvm::GlobalVariable *Val) {
   Constant *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_variable),
-    getCastToEmpty(GetOrCreateGlobalVariableAnchor()),
+    Constant::getNullValue(EmptyStructPtr),
     getCastToEmpty(Context),
     GetStringConstant(Name),
     GetStringConstant(DisplayName),
@@ -704,7 +643,7 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, const std::string &Name,
   
   M.addTypeName("llvm.dbg.global_variable.type", Init->getType());
   GlobalVariable *GV = new GlobalVariable(Init->getType(), true,
-                                          GlobalValue::InternalLinkage,
+                                          GlobalValue::LinkOnceAnyLinkage,
                                           Init, "llvm.dbg.global_variable", &M);
   GV->setSection("llvm.metadata");
   return DIGlobalVariable(GV);
@@ -954,12 +893,42 @@ namespace llvm {
     Unit.getDirectory(Dir);
     return true;
   }
+
+  /// CollectDebugInfoAnchors - Collect debugging information anchors.
+  void CollectDebugInfoAnchors(Module &M,
+                               SmallVector<GlobalVariable *, 2> &CUs,
+                               SmallVector<GlobalVariable *, 4> &GVs,
+                               SmallVector<GlobalVariable *, 4> &SPs) {
+
+    for (Module::global_iterator GVI = M.global_begin(), E = M.global_end();
+       GVI != E; GVI++) {
+      GlobalVariable *GV = GVI;
+      if (GV->hasName() && strncmp(GV->getNameStart(), "llvm.dbg", 8) == 0
+          && GV->isConstant() && GV->hasInitializer()) {
+        DICompileUnit C(GV);
+        if (C.isNull() == false) {
+          CUs.push_back(GV);
+          continue;
+        }
+        DIGlobalVariable G(GV);
+        if (G.isNull() == false) {
+          GVs.push_back(GV);
+          continue;
+        }
+        DISubprogram S(GV);
+        if (S.isNull() == false) {
+          SPs.push_back(GV);
+          continue;
+        }
+      }
+    }
+  }
 }
 
 /// dump - Print descriptor.
 void DIDescriptor::dump() const {
   cerr << "[" << dwarf::TagString(getTag()) << "] ";
-  cerr << std::hex << "[GV:" << GV << "]" << std::dec;
+  cerr << std::hex << "[GV:" << DbgGV << "]" << std::dec;
 }
 
 /// dump - Print compile unit.
@@ -1000,11 +969,11 @@ void DIType::dump() const {
     cerr << " [fwd] ";
 
   if (isBasicType(Tag))
-    DIBasicType(GV).dump();
+    DIBasicType(DbgGV).dump();
   else if (isDerivedType(Tag))
-    DIDerivedType(GV).dump();
+    DIDerivedType(DbgGV).dump();
   else if (isCompositeType(Tag))
-    DICompositeType(GV).dump();
+    DICompositeType(DbgGV).dump();
   else {
     cerr << "Invalid DIType\n";
     return;
@@ -1051,7 +1020,7 @@ void DIGlobal::dump() const {
     cerr << " [def] ";
 
   if (isGlobalVariable(Tag))
-    DIGlobalVariable(GV).dump();
+    DIGlobalVariable(DbgGV).dump();
 
   cerr << "\n";
 }
@@ -1077,3 +1046,4 @@ void DIVariable::dump() const {
   getType().dump();
   cerr << "\n";
 }
+
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp
index 8584d06..4ace049 100644
--- a/lib/Analysis/IPA/Andersens.cpp
+++ b/lib/Analysis/IPA/Andersens.cpp
@@ -65,6 +65,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/System/Atomic.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/DenseSet.h"
@@ -284,7 +285,8 @@ namespace {
 
       // Timestamp a node (used for work list prioritization)
       void Stamp() {
-        Timestamp = Counter++;
+        Timestamp = sys::AtomicIncrement(&Counter);
+        --Timestamp;
       }
 
       bool isRep() const {
diff --git a/lib/Analysis/LoopDependenceAnalysis.cpp b/lib/Analysis/LoopDependenceAnalysis.cpp
new file mode 100644
index 0000000..172a2be
--- /dev/null
+++ b/lib/Analysis/LoopDependenceAnalysis.cpp
@@ -0,0 +1,47 @@
+//===- LoopDependenceAnalysis.cpp - LDA Implementation ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the (beginning) of an implementation of a loop dependence analysis
+// framework, which is used to detect dependences in memory accesses in loops.
+//
+// Please note that this is work in progress and the interface is subject to
+// change.
+//
+// TODO: adapt as implementation progresses.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lda"
+#include "llvm/Analysis/LoopDependenceAnalysis.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+using namespace llvm;
+
+LoopPass *llvm::createLoopDependenceAnalysisPass() {
+  return new LoopDependenceAnalysis();
+}
+
+static RegisterPass<LoopDependenceAnalysis>
+R("lda", "Loop Dependence Analysis", false, true);
+char LoopDependenceAnalysis::ID = 0;
+
+//===----------------------------------------------------------------------===//
+//                   LoopDependenceAnalysis Implementation
+//===----------------------------------------------------------------------===//
+
+bool LoopDependenceAnalysis::runOnLoop(Loop *L, LPPassManager &) {
+  this->L = L;
+  SE = &getAnalysis<ScalarEvolution>();
+  return false;
+}
+
+void LoopDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<ScalarEvolution>();
+}
diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp
index 3a0a740..adb2bdc 100644
--- a/lib/Analysis/ProfileInfoLoader.cpp
+++ b/lib/Analysis/ProfileInfoLoader.cpp
@@ -73,7 +73,8 @@ static void ReadProfilingBlock(const char *ToolName, FILE *F,
 //
 ProfileInfoLoader::ProfileInfoLoader(const char *ToolName,
                                      const std::string &Filename,
-                                     Module &TheModule) : M(TheModule) {
+                                     Module &TheModule) : 
+                              M(TheModule), Warned(false) {
   FILE *F = fopen(Filename.c_str(), "r");
   if (F == 0) {
     cerr << ToolName << ": Error opening '" << Filename << "': ";
@@ -200,7 +201,6 @@ void ProfileInfoLoader::getBlockCounts(std::vector<std::pair<BasicBlock*,
         Counts.back().second += EdgeCounts[i].second;
         unsigned SuccNum = EdgeCounts[i].first.second;
         if (SuccNum >= TI->getNumSuccessors()) {
-          static bool Warned = false;
           if (!Warned) {
             cerr << "WARNING: profile info doesn't seem to match"
                  << " the program!\n";
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 5cbb5fa..dcb179af 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -95,7 +95,8 @@ STATISTIC(NumBruteForceTripCountsComputed,
 static cl::opt<unsigned>
 MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden,
                         cl::desc("Maximum number of iterations SCEV will "
-                                 "symbolically execute a constant derived loop"),
+                                 "symbolically execute a constant "
+                                 "derived loop"),
                         cl::init(100));
 
 static RegisterPass<ScalarEvolution>
@@ -132,6 +133,12 @@ bool SCEV::isOne() const {
   return false;
 }
 
+bool SCEV::isAllOnesValue() const {
+  if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this))
+    return SC->getValue()->isAllOnesValue();
+  return false;
+}
+
 SCEVCouldNotCompute::SCEVCouldNotCompute() :
   SCEV(scCouldNotCompute) {}
 
@@ -150,10 +157,11 @@ bool SCEVCouldNotCompute::hasComputableLoopEvolution(const Loop *L) const {
   return false;
 }
 
-const SCEV* SCEVCouldNotCompute::
-replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                  const SCEV* Conc,
-                                  ScalarEvolution &SE) const {
+const SCEV *
+SCEVCouldNotCompute::replaceSymbolicValuesWithConcrete(
+                                                    const SCEV *Sym,
+                                                    const SCEV *Conc,
+                                                    ScalarEvolution &SE) const {
   return this;
 }
 
@@ -165,11 +173,6 @@ bool SCEVCouldNotCompute::classof(const SCEV *S) {
   return S->getSCEVType() == scCouldNotCompute;
 }
 
-
-// SCEVConstants - Only allow the creation of one SCEVConstant for any
-// particular value.  Don't use a const SCEV* here, or else the object will
-// never be deleted!
-
 const SCEV* ScalarEvolution::getConstant(ConstantInt *V) {
   SCEVConstant *&R = SCEVConstants[V];
   if (R == 0) R = new SCEVConstant(V);
@@ -199,10 +202,6 @@ bool SCEVCastExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return Op->dominates(BB, DT);
 }
 
-// SCEVTruncates - Only allow the creation of one SCEVTruncateExpr for any
-// particular input.  Don't use a const SCEV* here, or else the object will
-// never be deleted!
-
 SCEVTruncateExpr::SCEVTruncateExpr(const SCEV* op, const Type *ty)
   : SCEVCastExpr(scTruncate, op, ty) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
@@ -210,15 +209,10 @@ SCEVTruncateExpr::SCEVTruncateExpr(const SCEV* op, const Type *ty)
          "Cannot truncate non-integer value!");
 }
 
-
 void SCEVTruncateExpr::print(raw_ostream &OS) const {
   OS << "(trunc " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
 }
 
-// SCEVZeroExtends - Only allow the creation of one SCEVZeroExtendExpr for any
-// particular input.  Don't use a const SCEV* here, or else the object will never
-// be deleted!
-
 SCEVZeroExtendExpr::SCEVZeroExtendExpr(const SCEV* op, const Type *ty)
   : SCEVCastExpr(scZeroExtend, op, ty) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
@@ -230,10 +224,6 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
   OS << "(zext " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
 }
 
-// SCEVSignExtends - Only allow the creation of one SCEVSignExtendExpr for any
-// particular input.  Don't use a const SCEV* here, or else the object will never
-// be deleted!
-
 SCEVSignExtendExpr::SCEVSignExtendExpr(const SCEV* op, const Type *ty)
   : SCEVCastExpr(scSignExtend, op, ty) {
   assert((Op->getType()->isInteger() || isa<PointerType>(Op->getType())) &&
@@ -245,10 +235,6 @@ void SCEVSignExtendExpr::print(raw_ostream &OS) const {
   OS << "(sext " << *Op->getType() << " " << *Op << " to " << *Ty << ")";
 }
 
-// SCEVCommExprs - Only allow the creation of one SCEVCommutativeExpr for any
-// particular input.  Don't use a const SCEV* here, or else the object will never
-// be deleted!
-
 void SCEVCommutativeExpr::print(raw_ostream &OS) const {
   assert(Operands.size() > 1 && "This plus expr shouldn't exist!");
   const char *OpStr = getOperationStr();
@@ -258,10 +244,11 @@ void SCEVCommutativeExpr::print(raw_ostream &OS) const {
   OS << ")";
 }
 
-const SCEV* SCEVCommutativeExpr::
-replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                  const SCEV* Conc,
-                                  ScalarEvolution &SE) const {
+const SCEV *
+SCEVCommutativeExpr::replaceSymbolicValuesWithConcrete(
+                                                    const SCEV *Sym,
+                                                    const SCEV *Conc,
+                                                    ScalarEvolution &SE) const {
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const SCEV* H =
       getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
@@ -298,11 +285,6 @@ bool SCEVNAryExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return true;
 }
 
-
-// SCEVUDivs - Only allow the creation of one SCEVUDivExpr for any particular
-// input.  Don't use a const SCEV* here, or else the object will never be
-// deleted!
-
 bool SCEVUDivExpr::dominates(BasicBlock *BB, DominatorTree *DT) const {
   return LHS->dominates(BB, DT) && RHS->dominates(BB, DT);
 }
@@ -320,14 +302,10 @@ const Type *SCEVUDivExpr::getType() const {
   return RHS->getType();
 }
 
-// SCEVAddRecExprs - Only allow the creation of one SCEVAddRecExpr for any
-// particular input.  Don't use a const SCEV* here, or else the object will never
-// be deleted!
-
-const SCEV* SCEVAddRecExpr::
-replaceSymbolicValuesWithConcrete(const SCEV* Sym,
-                                  const SCEV* Conc,
-                                  ScalarEvolution &SE) const {
+const SCEV *
+SCEVAddRecExpr::replaceSymbolicValuesWithConcrete(const SCEV *Sym,
+                                                  const SCEV *Conc,
+                                                  ScalarEvolution &SE) const {
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const SCEV* H =
       getOperand(i)->replaceSymbolicValuesWithConcrete(Sym, Conc, SE);
@@ -349,12 +327,22 @@ replaceSymbolicValuesWithConcrete(const SCEV* Sym,
 
 
 bool SCEVAddRecExpr::isLoopInvariant(const Loop *QueryLoop) const {
-  // This recurrence is invariant w.r.t to QueryLoop iff QueryLoop doesn't
-  // contain L and if the start is invariant.
   // Add recurrences are never invariant in the function-body (null loop).
-  return QueryLoop &&
-         !QueryLoop->contains(L->getHeader()) &&
-         getOperand(0)->isLoopInvariant(QueryLoop);
+  if (!QueryLoop)
+    return false;
+
+  // This recurrence is variant w.r.t. QueryLoop if QueryLoop contains L.
+  if (QueryLoop->contains(L->getHeader()))
+    return false;
+
+  // This recurrence is variant w.r.t. QueryLoop if any of its operands
+  // are variant.
+  for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
+    if (!getOperand(i)->isLoopInvariant(QueryLoop))
+      return false;
+
+  // Otherwise it's loop-invariant.
+  return true;
 }
 
 
@@ -365,10 +353,6 @@ void SCEVAddRecExpr::print(raw_ostream &OS) const {
   OS << "}<" << L->getHeader()->getName() + ">";
 }
 
-// SCEVUnknowns - Only allow the creation of one SCEVUnknown for any particular
-// value.  Don't use a const SCEV* here, or else the object will never be
-// deleted!
-
 bool SCEVUnknown::isLoopInvariant(const Loop *L) const {
   // All non-instruction values are loop invariant.  All instructions are loop
   // invariant if they are not contained in the specified loop.
@@ -583,7 +567,7 @@ static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K,
   // safe in modular arithmetic.
   //
   // However, this code doesn't use exactly that formula; the formula it uses
-  // is something like the following, where T is the number of factors of 2 in 
+  // is something like the following, where T is the number of factors of 2 in
   // K! (i.e. trailing zeros in the binary representation of K!), and ^ is
   // exponentiation:
   //
@@ -595,7 +579,7 @@ static const SCEV* BinomialCoefficient(const SCEV* It, unsigned K,
   // arithmetic.  To do exact division in modular arithmetic, all we have
   // to do is multiply by the inverse.  Therefore, this step can be done at
   // width W.
-  // 
+  //
   // The next issue is how to safely do the division by 2^T.  The way this
   // is done is by doing the multiplication step at a width of at least W + T
   // bits.  This way, the bottom W+T bits of the product are accurate. Then,
@@ -713,8 +697,8 @@ const SCEV* ScalarEvolution::getTruncateExpr(const SCEV* Op,
   Ty = getEffectiveSCEVType(Ty);
 
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op))
-    return getUnknown(
-        ConstantExpr::getTrunc(SC->getValue(), Ty));
+    return getConstant(
+      cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty)));
 
   // trunc(trunc(x)) --> trunc(x)
   if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op))
@@ -753,7 +737,7 @@ const SCEV* ScalarEvolution::getZeroExtendExpr(const SCEV* Op,
     const Type *IntTy = getEffectiveSCEVType(Ty);
     Constant *C = ConstantExpr::getZExt(SC->getValue(), IntTy);
     if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty);
-    return getUnknown(C);
+    return getConstant(cast<ConstantInt>(C));
   }
 
   // zext(zext(x)) --> zext(x)
@@ -841,7 +825,7 @@ const SCEV* ScalarEvolution::getSignExtendExpr(const SCEV* Op,
     const Type *IntTy = getEffectiveSCEVType(Ty);
     Constant *C = ConstantExpr::getSExt(SC->getValue(), IntTy);
     if (IntTy != Ty) C = ConstantExpr::getIntToPtr(C, Ty);
-    return getUnknown(C);
+    return getConstant(cast<ConstantInt>(C));
   }
 
   // sext(sext(x)) --> sext(x)
@@ -1199,10 +1183,11 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
       Ops.clear();
       if (AccumulatedConstant != 0)
         Ops.push_back(getConstant(AccumulatedConstant));
-      for (std::map<APInt, SmallVector<const SCEV*, 4>, APIntCompare>::iterator I =
-           MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
+      for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator
+           I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I)
         if (I->first != 0)
-          Ops.push_back(getMulExpr(getConstant(I->first), getAddExpr(I->second)));
+          Ops.push_back(getMulExpr(getConstant(I->first),
+                                   getAddExpr(I->second)));
       if (Ops.empty())
         return getIntegerSCEV(0, Ty);
       if (Ops.size() == 1)
@@ -1257,14 +1242,15 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
             // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E))
             const SCEV* InnerMul1 = Mul->getOperand(MulOp == 0);
             if (Mul->getNumOperands() != 2) {
-              SmallVector<const SCEV*, 4> MulOps(Mul->op_begin(), Mul->op_end());
+              SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(),
+                                                  Mul->op_end());
               MulOps.erase(MulOps.begin()+MulOp);
               InnerMul1 = getMulExpr(MulOps);
             }
             const SCEV* InnerMul2 = OtherMul->getOperand(OMulOp == 0);
             if (OtherMul->getNumOperands() != 2) {
-              SmallVector<const SCEV*, 4> MulOps(OtherMul->op_begin(),
-                                             OtherMul->op_end());
+              SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(),
+                                                  OtherMul->op_end());
               MulOps.erase(MulOps.begin()+OMulOp);
               InnerMul2 = getMulExpr(MulOps);
             }
@@ -1330,7 +1316,8 @@ const SCEV* ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV*> &Ops) {
         const SCEVAddRecExpr *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]);
         if (AddRec->getLoop() == OtherAddRec->getLoop()) {
           // Other + {A,+,B} + {C,+,D}  -->  Other + {A+C,+,B+D}
-          SmallVector<const SCEV*, 4> NewOps(AddRec->op_begin(), AddRec->op_end());
+          SmallVector<const SCEV *, 4> NewOps(AddRec->op_begin(),
+                                              AddRec->op_end());
           for (unsigned i = 0, e = OtherAddRec->getNumOperands(); i != e; ++i) {
             if (i >= NewOps.size()) {
               NewOps.insert(NewOps.end(), OtherAddRec->op_begin()+i,
@@ -1394,7 +1381,7 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
     ++Idx;
     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
       // We found two constants, fold them together!
-      ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() * 
+      ConstantInt *Fold = ConstantInt::get(LHSC->getValue()->getValue() *
                                            RHSC->getValue()->getValue());
       Ops[0] = getConstant(Fold);
       Ops.erase(Ops.begin()+1);  // Erase the folded element
@@ -1531,8 +1518,8 @@ const SCEV* ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV*> &Ops) {
 
 /// getUDivExpr - Get a canonical multiply expression, or something simpler if
 /// possible.
-const SCEV* ScalarEvolution::getUDivExpr(const SCEV* LHS,
-                                        const SCEV* RHS) {
+const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
   assert(getEffectiveSCEVType(LHS->getType()) ==
          getEffectiveSCEVType(RHS->getType()) &&
          "SCEVUDivExpr operand types don't match!");
@@ -1611,7 +1598,8 @@ const SCEV* ScalarEvolution::getUDivExpr(const SCEV* LHS,
     if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) {
       Constant *LHSCV = LHSC->getValue();
       Constant *RHSCV = RHSC->getValue();
-      return getUnknown(ConstantExpr::getUDiv(LHSCV, RHSCV));
+      return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV,
+                                                                 RHSCV)));
     }
   }
 
@@ -1640,8 +1628,9 @@ const SCEV* ScalarEvolution::getAddRecExpr(const SCEV* Start,
 
 /// getAddRecExpr - Get an add recurrence expression for the specified loop.
 /// Simplify the expression as much as possible.
-const SCEV* ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands,
-                                          const Loop *L) {
+const SCEV *
+ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operands,
+                               const Loop *L) {
   if (Operands.size() == 1) return Operands[0];
 #ifndef NDEBUG
   for (unsigned i = 1, e = Operands.size(); i != e; ++i)
@@ -1662,8 +1651,29 @@ const SCEV* ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operand
       SmallVector<const SCEV*, 4> NestedOperands(NestedAR->op_begin(),
                                                 NestedAR->op_end());
       Operands[0] = NestedAR->getStart();
-      NestedOperands[0] = getAddRecExpr(Operands, L);
-      return getAddRecExpr(NestedOperands, NestedLoop);
+      // AddRecs require their operands be loop-invariant with respect to their
+      // loops. Don't perform this transformation if it would break this
+      // requirement.
+      bool AllInvariant = true;
+      for (unsigned i = 0, e = Operands.size(); i != e; ++i)
+        if (!Operands[i]->isLoopInvariant(L)) {
+          AllInvariant = false;
+          break;
+        }
+      if (AllInvariant) {
+        NestedOperands[0] = getAddRecExpr(Operands, L);
+        AllInvariant = true;
+        for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i)
+          if (!NestedOperands[i]->isLoopInvariant(NestedLoop)) {
+            AllInvariant = false;
+            break;
+          }
+        if (AllInvariant)
+          // Ok, both add recurrences are valid after the transformation.
+          return getAddRecExpr(NestedOperands, NestedLoop);
+      }
+      // Reset Operands to its original state.
+      Operands[0] = NestedAR;
     }
   }
 
@@ -1673,8 +1683,8 @@ const SCEV* ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV*> &Operand
   return Result;
 }
 
-const SCEV* ScalarEvolution::getSMaxExpr(const SCEV* LHS,
-                                        const SCEV* RHS) {
+const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
   SmallVector<const SCEV*, 2> Ops;
   Ops.push_back(LHS);
   Ops.push_back(RHS);
@@ -1711,10 +1721,14 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
       LHSC = cast<SCEVConstant>(Ops[0]);
     }
 
-    // If we are left with a constant -inf, strip it off.
+    // If we are left with a constant minimum-int, strip it off.
     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
       Ops.erase(Ops.begin());
       --Idx;
+    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
+      // If we have an smax with a constant maximum-int, it will always be
+      // maximum-int.
+      return Ops[0];
     }
   }
 
@@ -1760,8 +1774,8 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
   return Result;
 }
 
-const SCEV* ScalarEvolution::getUMaxExpr(const SCEV* LHS,
-                                        const SCEV* RHS) {
+const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
   SmallVector<const SCEV*, 2> Ops;
   Ops.push_back(LHS);
   Ops.push_back(RHS);
@@ -1798,10 +1812,14 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
       LHSC = cast<SCEVConstant>(Ops[0]);
     }
 
-    // If we are left with a constant zero, strip it off.
+    // If we are left with a constant minimum-int, strip it off.
     if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
       Ops.erase(Ops.begin());
       --Idx;
+    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
+      // If we have an umax with a constant maximum-int, it will always be
+      // maximum-int.
+      return Ops[0];
     }
   }
 
@@ -1847,23 +1865,24 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV*> &Ops) {
   return Result;
 }
 
-const SCEV* ScalarEvolution::getSMinExpr(const SCEV* LHS,
-                                        const SCEV* RHS) {
+const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
   // ~smax(~x, ~y) == smin(x, y).
   return getNotSCEV(getSMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
 }
 
-const SCEV* ScalarEvolution::getUMinExpr(const SCEV* LHS,
-                                        const SCEV* RHS) {
+const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
+                                         const SCEV *RHS) {
   // ~umax(~x, ~y) == umin(x, y)
   return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS)));
 }
 
 const SCEV* ScalarEvolution::getUnknown(Value *V) {
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
-    return getConstant(CI);
-  if (isa<ConstantPointerNull>(V))
-    return getIntegerSCEV(0, V->getType());
+  // Don't attempt to do anything other than create a SCEVUnknown object
+  // here.  createSCEV only calls getUnknown after checking for all other
+  // interesting possibilities, and any other code that calls getUnknown
+  // is doing so in order to hide a value from SCEV canonicalization.
+
   SCEVUnknown *&Result = SCEVUnknowns[V];
   if (Result == 0) Result = new SCEVUnknown(V);
   return Result;
@@ -1941,26 +1960,18 @@ const SCEV* ScalarEvolution::getSCEV(Value *V) {
   return S;
 }
 
-/// getIntegerSCEV - Given an integer or FP type, create a constant for the
+/// getIntegerSCEV - Given a SCEVable type, create a constant for the
 /// specified signed integer value and return a SCEV for the constant.
 const SCEV* ScalarEvolution::getIntegerSCEV(int Val, const Type *Ty) {
-  Ty = getEffectiveSCEVType(Ty);
-  Constant *C;
-  if (Val == 0)
-    C = Constant::getNullValue(Ty);
-  else if (Ty->isFloatingPoint())
-    C = ConstantFP::get(APFloat(Ty==Type::FloatTy ? APFloat::IEEEsingle :
-                                APFloat::IEEEdouble, Val));
-  else
-    C = ConstantInt::get(Ty, Val);
-  return getUnknown(C);
+  const IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty));
+  return getConstant(ConstantInt::get(ITy, Val));
 }
 
 /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V
 ///
 const SCEV* ScalarEvolution::getNegativeSCEV(const SCEV* V) {
   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
-    return getUnknown(ConstantExpr::getNeg(VC->getValue()));
+    return getConstant(cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));
 
   const Type *Ty = V->getType();
   Ty = getEffectiveSCEVType(Ty);
@@ -1970,7 +1981,7 @@ const SCEV* ScalarEvolution::getNegativeSCEV(const SCEV* V) {
 /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V
 const SCEV* ScalarEvolution::getNotSCEV(const SCEV* V) {
   if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))
-    return getUnknown(ConstantExpr::getNot(VC->getValue()));
+    return getConstant(cast<ConstantInt>(ConstantExpr::getNot(VC->getValue())));
 
   const Type *Ty = V->getType();
   Ty = getEffectiveSCEVType(Ty);
@@ -1980,8 +1991,8 @@ const SCEV* ScalarEvolution::getNotSCEV(const SCEV* V) {
 
 /// getMinusSCEV - Return a SCEV corresponding to LHS - RHS.
 ///
-const SCEV* ScalarEvolution::getMinusSCEV(const SCEV* LHS,
-                                         const SCEV* RHS) {
+const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS,
+                                          const SCEV *RHS) {
   // X - Y --> X + -Y
   return getAddExpr(LHS, getNegativeSCEV(RHS));
 }
@@ -2087,8 +2098,8 @@ ScalarEvolution::getTruncateOrNoop(const SCEV* V, const Type *Ty) {
 /// getUMaxFromMismatchedTypes - Promote the operands to the wider of
 /// the types using zero-extension, and then perform a umax operation
 /// with them.
-const SCEV* ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV* LHS,
-                                                       const SCEV* RHS) {
+const SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS,
+                                                        const SCEV *RHS) {
   const SCEV* PromotedLHS = LHS;
   const SCEV* PromotedRHS = RHS;
 
@@ -2103,8 +2114,8 @@ const SCEV* ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV* LHS,
 /// getUMinFromMismatchedTypes - Promote the operands to the wider of
 /// the types using zero-extension, and then perform a umin operation
 /// with them.
-const SCEV* ScalarEvolution::getUMinFromMismatchedTypes(const SCEV* LHS,
-                                                       const SCEV* RHS) {
+const SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS,
+                                                        const SCEV *RHS) {
   const SCEV* PromotedLHS = LHS;
   const SCEV* PromotedRHS = RHS;
 
@@ -2119,9 +2130,10 @@ const SCEV* ScalarEvolution::getUMinFromMismatchedTypes(const SCEV* LHS,
 /// ReplaceSymbolicValueWithConcrete - This looks up the computed SCEV value for
 /// the specified instruction and replaces any references to the symbolic value
 /// SymName with the specified value.  This is used during PHI resolution.
-void ScalarEvolution::
-ReplaceSymbolicValueWithConcrete(Instruction *I, const SCEV* SymName,
-                                 const SCEV* NewVal) {
+void
+ScalarEvolution::ReplaceSymbolicValueWithConcrete(Instruction *I,
+                                                  const SCEV *SymName,
+                                                  const SCEV *NewVal) {
   std::map<SCEVCallbackVH, const SCEV*>::iterator SI =
     Scalars.find(SCEVCallbackVH(I, this));
   if (SI == Scalars.end()) return;
@@ -2190,8 +2202,10 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
             if (Accum->isLoopInvariant(L) ||
                 (isa<SCEVAddRecExpr>(Accum) &&
                  cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
-              const SCEV* StartVal = getSCEV(PN->getIncomingValue(IncomingEdge));
-              const SCEV* PHISCEV  = getAddRecExpr(StartVal, Accum, L);
+              const SCEV *StartVal =
+                getSCEV(PN->getIncomingValue(IncomingEdge));
+              const SCEV *PHISCEV =
+                getAddRecExpr(StartVal, Accum, L);
 
               // Okay, for the entire analysis of this edge we assumed the PHI
               // to be symbolic.  We now need to go back and update all of the
@@ -2216,7 +2230,7 @@ const SCEV* ScalarEvolution::createNodeForPHI(PHINode *PN) {
             // initial step of the addrec evolution.
             if (StartVal == getMinusSCEV(AddRec->getOperand(0),
                                             AddRec->getOperand(1))) {
-              const SCEV* PHISCEV = 
+              const SCEV* PHISCEV =
                  getAddRecExpr(StartVal, AddRec->getOperand(1), L);
 
               // Okay, for the entire analysis of this edge we assumed the PHI
@@ -2402,6 +2416,38 @@ ScalarEvolution::GetMinSignBits(const SCEV* S) {
             getTypeSizeInBits(C->getOperand()->getType()));
   }
 
+  if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
+    unsigned BitWidth = getTypeSizeInBits(A->getType());
+
+    // Special case decrementing a value (ADD X, -1):
+    if (const SCEVConstant *CRHS = dyn_cast<SCEVConstant>(A->getOperand(0)))
+      if (CRHS->isAllOnesValue()) {
+        SmallVector<const SCEV *, 4> OtherOps(A->op_begin() + 1, A->op_end());
+        const SCEV *OtherOpsAdd = getAddExpr(OtherOps);
+        unsigned LZ = GetMinLeadingZeros(OtherOpsAdd);
+
+        // If the input is known to be 0 or 1, the output is 0/-1, which is all
+        // sign bits set.
+        if (LZ == BitWidth - 1)
+          return BitWidth;
+
+        // If we are subtracting one from a positive number, there is no carry
+        // out of the result.
+        if (LZ > 0)
+          return GetMinSignBits(OtherOpsAdd);
+      }
+
+    // Add can have at most one carry bit.  Thus we know that the output
+    // is, at worst, one more bit than the inputs.
+    unsigned Min = BitWidth;
+    for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) {
+      unsigned N = GetMinSignBits(A->getOperand(i));
+      Min = std::min(Min, N) - 1;
+      if (Min == 0) return 1;
+    }
+    return 1;
+  }
+
   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
     // For a SCEVUnknown, ask ValueTracking.
     return ComputeNumSignBits(U->getValue(), TD);
@@ -2422,6 +2468,12 @@ const SCEV* ScalarEvolution::createSCEV(Value *V) {
     Opcode = I->getOpcode();
   else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
     Opcode = CE->getOpcode();
+  else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
+    return getConstant(CI);
+  else if (isa<ConstantPointerNull>(V))
+    return getIntegerSCEV(0, V->getType());
+  else if (isa<UndefValue>(V))
+    return getIntegerSCEV(0, V->getType());
   else
     return getUnknown(V);
 
@@ -2750,7 +2802,8 @@ void ScalarEvolution::forgetLoopPHIs(const Loop *L) {
   SmallVector<Instruction *, 16> Worklist;
   for (BasicBlock::iterator I = Header->begin();
        PHINode *PN = dyn_cast<PHINode>(I); ++I) {
-    std::map<SCEVCallbackVH, const SCEV*>::iterator It = Scalars.find((Value*)I);
+    std::map<SCEVCallbackVH, const SCEV*>::iterator It =
+      Scalars.find((Value*)I);
     if (It != Scalars.end() && !isa<SCEVUnknown>(It->second))
       Worklist.push_back(PN);
   }
@@ -2775,7 +2828,6 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
   const SCEV* BECount = CouldNotCompute;
   const SCEV* MaxBECount = CouldNotCompute;
   bool CouldNotComputeBECount = false;
-  bool CouldNotComputeMaxBECount = false;
   for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
     BackedgeTakenInfo NewBTI =
       ComputeBackedgeTakenCountFromExit(L, ExitingBlocks[i]);
@@ -2788,25 +2840,13 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {
     } else if (!CouldNotComputeBECount) {
       if (BECount == CouldNotCompute)
         BECount = NewBTI.Exact;
-      else {
-        // TODO: More analysis could be done here. For example, a
-        // loop with a short-circuiting && operator has an exact count
-        // of the min of both sides.
-        CouldNotComputeBECount = true;
-        BECount = CouldNotCompute;
-      }
-    }
-    if (NewBTI.Max == CouldNotCompute) {
-      // We couldn't compute an maximum value for this exit, so
-      // we won't be able to compute an maximum value for the loop.
-      CouldNotComputeMaxBECount = true;
-      MaxBECount = CouldNotCompute;
-    } else if (!CouldNotComputeMaxBECount) {
-      if (MaxBECount == CouldNotCompute)
-        MaxBECount = NewBTI.Max;
       else
-        MaxBECount = getUMaxFromMismatchedTypes(MaxBECount, NewBTI.Max);
+        BECount = getUMinFromMismatchedTypes(BECount, NewBTI.Exact);
     }
+    if (MaxBECount == CouldNotCompute)
+      MaxBECount = NewBTI.Max;
+    else if (NewBTI.Max != CouldNotCompute)
+      MaxBECount = getUMinFromMismatchedTypes(MaxBECount, NewBTI.Max);
   }
 
   return BackedgeTakenInfo(BECount, MaxBECount);
@@ -2825,7 +2865,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
   BranchInst *ExitBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
   if (ExitBr == 0) return CouldNotCompute;
   assert(ExitBr->isConditional() && "If unconditional, it can't be in loop!");
-  
+
   // At this point, we know we have a conditional branch that determines whether
   // the loop is exited.  However, we don't know if the branch is executed each
   // time through the loop.  If not, then the execution count of the branch will
@@ -2887,9 +2927,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
                                                        Value *ExitCond,
                                                        BasicBlock *TBB,
                                                        BasicBlock *FBB) {
-  // Check if the controlling expression for this loop is an and or or. In
-  // such cases, an exact backedge-taken count may be infeasible, but a
-  // maximum count may still be feasible.
+  // Check if the controlling expression for this loop is an And or Or.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) {
     if (BO->getOpcode() == Instruction::And) {
       // Recurse on the operands of the and.
@@ -3002,7 +3040,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
   LHS = getSCEVAtScope(LHS, L);
   RHS = getSCEVAtScope(RHS, L);
 
-  // At this point, we would like to compute how many iterations of the 
+  // At this point, we would like to compute how many iterations of the
   // loop the predicate will return true for these inputs.
   if (LHS->isLoopInvariant(L) && !RHS->isLoopInvariant(L)) {
     // If there is a loop-invariant, force it into the RHS.
@@ -3064,7 +3102,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
     if (ExitCond->getOperand(0)->getType()->isUnsigned())
       errs() << "[unsigned] ";
     errs() << *LHS << "   "
-         << Instruction::getOpcodeName(Instruction::ICmp) 
+         << Instruction::getOpcodeName(Instruction::ICmp)
          << "   " << *RHS << "\n";
 #endif
     break;
@@ -3120,10 +3158,12 @@ GetAddressedElementFromGlobal(GlobalVariable *GV,
 /// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of
 /// 'icmp op load X, cst', try to see if we can compute the backedge
 /// execution count.
-const SCEV* ScalarEvolution::
-ComputeLoadConstantCompareBackedgeTakenCount(LoadInst *LI, Constant *RHS,
-                                             const Loop *L,
-                                             ICmpInst::Predicate predicate) {
+const SCEV *
+ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
+                                                LoadInst *LI,
+                                                Constant *RHS,
+                                                const Loop *L,
+                                                ICmpInst::Predicate predicate) {
   if (LI->isVolatile()) return CouldNotCompute;
 
   // Check to see if the loaded pointer is a getelementptr of a global.
@@ -3279,8 +3319,10 @@ static Constant *EvaluateExpression(Value *V, Constant *PHIVal) {
 /// in the header of its containing loop, we know the loop executes a
 /// constant number of times, and the PHI node is just a recurrence
 /// involving constants, fold it.
-Constant *ScalarEvolution::
-getConstantEvolutionLoopExitValue(PHINode *PN, const APInt& BEs, const Loop *L){
+Constant *
+ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
+                                                   const APInt& BEs,
+                                                   const Loop *L) {
   std::map<PHINode*, Constant*>::iterator I =
     ConstantEvolutionLoopExitValue.find(PN);
   if (I != ConstantEvolutionLoopExitValue.end())
@@ -3330,8 +3372,10 @@ getConstantEvolutionLoopExitValue(PHINode *PN, const APInt& BEs, const Loop *L){
 /// try to evaluate a few iterations of the loop until we get the exit
 /// condition gets a value of ExitWhen (true or false).  If we cannot
 /// evaluate the trip count of the loop, return CouldNotCompute.
-const SCEV* ScalarEvolution::
-ComputeBackedgeTakenCountExhaustively(const Loop *L, Value *Cond, bool ExitWhen) {
+const SCEV *
+ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
+                                                       Value *Cond,
+                                                       bool ExitWhen) {
   PHINode *PN = getConstantEvolvingPHI(Cond, L);
   if (PN == 0) return CouldNotCompute;
 
@@ -3467,7 +3511,7 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
             }
           }
         }
-        
+
         Constant *C;
         if (const CmpInst *CI = dyn_cast<CmpInst>(I))
           C = ConstantFoldCompareInstOperands(CI->getPredicate(),
@@ -3492,7 +3536,8 @@ const SCEV* ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) {
       if (OpAtScope != Comm->getOperand(i)) {
         // Okay, at least one of these operands is loop variant but might be
         // foldable.  Build a new instance of the folded commutative expression.
-        SmallVector<const SCEV*, 8> NewOps(Comm->op_begin(), Comm->op_begin()+i);
+        SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(),
+                                            Comm->op_begin()+i);
         NewOps.push_back(OpAtScope);
 
         for (++i; i != e; ++i) {
@@ -3640,7 +3685,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
   APInt Two(BitWidth, 2);
   APInt Four(BitWidth, 4);
 
-  { 
+  {
     using namespace APIntOps;
     const APInt& C = L;
     // Convert from chrec coefficients to polynomial coefficients AX^2+BX+C
@@ -3660,7 +3705,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
     // integer value or else APInt::sqrt() will assert.
     APInt SqrtVal(SqrtTerm.sqrt());
 
-    // Compute the two solutions for the quadratic formula. 
+    // Compute the two solutions for the quadratic formula.
     // The divisions must be performed as signed divisions.
     APInt NegB(-B);
     APInt TwoA( A << 1 );
@@ -3672,7 +3717,7 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
     ConstantInt *Solution1 = ConstantInt::get((NegB + SqrtVal).sdiv(TwoA));
     ConstantInt *Solution2 = ConstantInt::get((NegB - SqrtVal).sdiv(TwoA));
 
-    return std::make_pair(SE.getConstant(Solution1), 
+    return std::make_pair(SE.getConstant(Solution1),
                           SE.getConstant(Solution2));
     } // end APIntOps namespace
 }
@@ -3704,8 +3749,10 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
     // where BW is the common bit width of Start and Step.
 
     // Get the initial value for the loop.
-    const SCEV* Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop());
-    const SCEV* Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop());
+    const SCEV *Start = getSCEVAtScope(AddRec->getStart(),
+                                       L->getParentLoop());
+    const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1),
+                                      L->getParentLoop());
 
     if (const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step)) {
       // For now we handle only constant steps.
@@ -3736,7 +3783,7 @@ const SCEV* ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
 #endif
       // Pick the smallest positive root value.
       if (ConstantInt *CB =
-          dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, 
+          dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
                                    R1->getValue(), R2->getValue()))) {
         if (CB->getZExtValue() == false)
           std::swap(R1, R2);   // R1 is the minimum root now.
@@ -3861,88 +3908,111 @@ bool ScalarEvolution::isLoopGuardedByCond(const Loop *L,
         LoopEntryPredicate->isUnconditional())
       continue;
 
-    ICmpInst *ICI = dyn_cast<ICmpInst>(LoopEntryPredicate->getCondition());
-    if (!ICI) continue;
+    if (isNecessaryCond(LoopEntryPredicate->getCondition(), Pred, LHS, RHS,
+                        LoopEntryPredicate->getSuccessor(0) != PredecessorDest))
+      return true;
+  }
 
-    // Now that we found a conditional branch that dominates the loop, check to
-    // see if it is the comparison we are looking for.
-    Value *PreCondLHS = ICI->getOperand(0);
-    Value *PreCondRHS = ICI->getOperand(1);
-    ICmpInst::Predicate Cond;
-    if (LoopEntryPredicate->getSuccessor(0) == PredecessorDest)
-      Cond = ICI->getPredicate();
-    else
-      Cond = ICI->getInversePredicate();
+  return false;
+}
 
-    if (Cond == Pred)
-      ; // An exact match.
-    else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
-      ; // The actual condition is beyond sufficient.
-    else
-      // Check a few special cases.
-      switch (Cond) {
-      case ICmpInst::ICMP_UGT:
-        if (Pred == ICmpInst::ICMP_ULT) {
-          std::swap(PreCondLHS, PreCondRHS);
-          Cond = ICmpInst::ICMP_ULT;
-          break;
-        }
-        continue;
-      case ICmpInst::ICMP_SGT:
-        if (Pred == ICmpInst::ICMP_SLT) {
-          std::swap(PreCondLHS, PreCondRHS);
-          Cond = ICmpInst::ICMP_SLT;
+/// isNecessaryCond - Test whether the given CondValue value is a condition
+/// which is at least as strict as the one described by Pred, LHS, and RHS.
+bool ScalarEvolution::isNecessaryCond(Value *CondValue,
+                                      ICmpInst::Predicate Pred,
+                                      const SCEV *LHS, const SCEV *RHS,
+                                      bool Inverse) {
+  // Recursivly handle And and Or conditions.
+  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) {
+    if (BO->getOpcode() == Instruction::And) {
+      if (!Inverse)
+        return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
+               isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+    } else if (BO->getOpcode() == Instruction::Or) {
+      if (Inverse)
+        return isNecessaryCond(BO->getOperand(0), Pred, LHS, RHS, Inverse) ||
+               isNecessaryCond(BO->getOperand(1), Pred, LHS, RHS, Inverse);
+    }
+  }
+
+  ICmpInst *ICI = dyn_cast<ICmpInst>(CondValue);
+  if (!ICI) return false;
+
+  // Now that we found a conditional branch that dominates the loop, check to
+  // see if it is the comparison we are looking for.
+  Value *PreCondLHS = ICI->getOperand(0);
+  Value *PreCondRHS = ICI->getOperand(1);
+  ICmpInst::Predicate Cond;
+  if (Inverse)
+    Cond = ICI->getInversePredicate();
+  else
+    Cond = ICI->getPredicate();
+
+  if (Cond == Pred)
+    ; // An exact match.
+  else if (!ICmpInst::isTrueWhenEqual(Cond) && Pred == ICmpInst::ICMP_NE)
+    ; // The actual condition is beyond sufficient.
+  else
+    // Check a few special cases.
+    switch (Cond) {
+    case ICmpInst::ICMP_UGT:
+      if (Pred == ICmpInst::ICMP_ULT) {
+        std::swap(PreCondLHS, PreCondRHS);
+        Cond = ICmpInst::ICMP_ULT;
+        break;
+      }
+      return false;
+    case ICmpInst::ICMP_SGT:
+      if (Pred == ICmpInst::ICMP_SLT) {
+        std::swap(PreCondLHS, PreCondRHS);
+        Cond = ICmpInst::ICMP_SLT;
+        break;
+      }
+      return false;
+    case ICmpInst::ICMP_NE:
+      // Expressions like (x >u 0) are often canonicalized to (x != 0),
+      // so check for this case by checking if the NE is comparing against
+      // a minimum or maximum constant.
+      if (!ICmpInst::isTrueWhenEqual(Pred))
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
+          const APInt &A = CI->getValue();
+          switch (Pred) {
+          case ICmpInst::ICMP_SLT:
+            if (A.isMaxSignedValue()) break;
+            return false;
+          case ICmpInst::ICMP_SGT:
+            if (A.isMinSignedValue()) break;
+            return false;
+          case ICmpInst::ICMP_ULT:
+            if (A.isMaxValue()) break;
+            return false;
+          case ICmpInst::ICMP_UGT:
+            if (A.isMinValue()) break;
+            return false;
+          default:
+            return false;
+          }
+          Cond = ICmpInst::ICMP_NE;
+          // NE is symmetric but the original comparison may not be. Swap
+          // the operands if necessary so that they match below.
+          if (isa<SCEVConstant>(LHS))
+            std::swap(PreCondLHS, PreCondRHS);
           break;
         }
-        continue;
-      case ICmpInst::ICMP_NE:
-        // Expressions like (x >u 0) are often canonicalized to (x != 0),
-        // so check for this case by checking if the NE is comparing against
-        // a minimum or maximum constant.
-        if (!ICmpInst::isTrueWhenEqual(Pred))
-          if (ConstantInt *CI = dyn_cast<ConstantInt>(PreCondRHS)) {
-            const APInt &A = CI->getValue();
-            switch (Pred) {
-            case ICmpInst::ICMP_SLT:
-              if (A.isMaxSignedValue()) break;
-              continue;
-            case ICmpInst::ICMP_SGT:
-              if (A.isMinSignedValue()) break;
-              continue;
-            case ICmpInst::ICMP_ULT:
-              if (A.isMaxValue()) break;
-              continue;
-            case ICmpInst::ICMP_UGT:
-              if (A.isMinValue()) break;
-              continue;
-            default:
-              continue;
-            }
-            Cond = ICmpInst::ICMP_NE;
-            // NE is symmetric but the original comparison may not be. Swap
-            // the operands if necessary so that they match below.
-            if (isa<SCEVConstant>(LHS))
-              std::swap(PreCondLHS, PreCondRHS);
-            break;
-          }
-        continue;
-      default:
-        // We weren't able to reconcile the condition.
-        continue;
-      }
+      return false;
+    default:
+      // We weren't able to reconcile the condition.
+      return false;
+    }
 
-    if (!PreCondLHS->getType()->isInteger()) continue;
+  if (!PreCondLHS->getType()->isInteger()) return false;
 
-    const SCEV* PreCondLHSSCEV = getSCEV(PreCondLHS);
-    const SCEV* PreCondRHSSCEV = getSCEV(PreCondRHS);
-    if ((HasSameValue(LHS, PreCondLHSSCEV) &&
-         HasSameValue(RHS, PreCondRHSSCEV)) ||
-        (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
-         HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV))))
-      return true;
-  }
-
-  return false;
+  const SCEV *PreCondLHSSCEV = getSCEV(PreCondLHS);
+  const SCEV *PreCondRHSSCEV = getSCEV(PreCondRHS);
+  return (HasSameValue(LHS, PreCondLHSSCEV) &&
+          HasSameValue(RHS, PreCondRHSSCEV)) ||
+         (HasSameValue(LHS, getNotSCEV(PreCondRHSSCEV)) &&
+          HasSameValue(RHS, getNotSCEV(PreCondLHSSCEV)));
 }
 
 /// getBECount - Subtract the end and start values and divide by the step,
@@ -3975,9 +4045,9 @@ const SCEV* ScalarEvolution::getBECount(const SCEV* Start,
 /// HowManyLessThans - Return the number of times a backedge containing the
 /// specified less-than comparison will execute.  If not computable, return
 /// CouldNotCompute.
-ScalarEvolution::BackedgeTakenInfo ScalarEvolution::
-HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
-                 const Loop *L, bool isSigned) {
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
+                                  const Loop *L, bool isSigned) {
   // Only handle:  "ADDREC < LoopInvariant".
   if (!RHS->isLoopInvariant(L)) return CouldNotCompute;
 
@@ -4027,7 +4097,7 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
     const SCEV* Start = AddRec->getOperand(0);
 
     // Determine the minimum constant start value.
-    const SCEV* MinStart = isa<SCEVConstant>(Start) ? Start :
+    const SCEV *MinStart = isa<SCEVConstant>(Start) ? Start :
       getConstant(isSigned ? APInt::getSignedMinValue(BitWidth) :
                              APInt::getMinValue(BitWidth));
 
@@ -4070,7 +4140,7 @@ HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
 /// the condition, thus computing the exit count. If the iteration count can't
 /// be computed, an instance of SCEVCouldNotCompute is returned.
 const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
-                                                   ScalarEvolution &SE) const {
+                                                    ScalarEvolution &SE) const {
   if (Range.isFullSet())  // Infinite loop.
     return SE.getCouldNotCompute();
 
@@ -4129,7 +4199,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
 
     // Ensure that the previous value is in the range.  This is a sanity check.
     assert(Range.contains(
-           EvaluateConstantChrecAtConstant(this, 
+           EvaluateConstantChrecAtConstant(this,
            ConstantInt::get(ExitVal - One), SE)->getValue()) &&
            "Linear scev computation is off in a bad way!");
     return SE.getConstant(ExitValue);
@@ -4150,7 +4220,7 @@ const SCEV* SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,
     if (R1) {
       // Pick the smallest positive root value.
       if (ConstantInt *CB =
-          dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, 
+          dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT,
                                    R1->getValue(), R2->getValue()))) {
         if (CB->getZExtValue() == false)
           std::swap(R1, R2);   // R1 is the minimum root now.
@@ -4264,7 +4334,7 @@ void ScalarEvolution::releaseMemory() {
   BackedgeTakenCounts.clear();
   ConstantEvolutionLoopExitValue.clear();
   ValuesAtScopes.clear();
-  
+
   for (std::map<ConstantInt*, SCEVConstant*>::iterator
        I = SCEVConstants.begin(), E = SCEVConstants.end(); I != E; ++I)
     delete I->second;
@@ -4294,7 +4364,7 @@ void ScalarEvolution::releaseMemory() {
   for (std::map<Value*, SCEVUnknown*>::iterator I = SCEVUnknowns.begin(),
        E = SCEVUnknowns.end(); I != E; ++I)
     delete I->second;
-  
+
   SCEVConstants.clear();
   SCEVTruncates.clear();
   SCEVZeroExtends.clear();
@@ -4334,6 +4404,15 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
   }
 
   OS << "\n";
+  OS << "Loop " << L->getHeader()->getName() << ": ";
+
+  if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
+    OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+  } else {
+    OS << "Unpredictable max backedge-taken count. ";
+  }
+
+  OS << "\n";
 }
 
 void ScalarEvolution::print(raw_ostream &OS, const Module* ) const {
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index c5591d7..4cc5ebc 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -51,21 +51,26 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
   if (Argument *A = dyn_cast<Argument>(V)) {
     // Check to see if there is already a cast!
     for (Value::use_iterator UI = A->use_begin(), E = A->use_end();
-         UI != E; ++UI) {
+         UI != E; ++UI)
       if ((*UI)->getType() == Ty)
         if (CastInst *CI = dyn_cast<CastInst>(cast<Instruction>(*UI)))
           if (CI->getOpcode() == opcode) {
             // If the cast isn't the first instruction of the function, move it.
-            if (BasicBlock::iterator(CI) != 
+            if (BasicBlock::iterator(CI) !=
                 A->getParent()->getEntryBlock().begin()) {
-              // If the CastInst is the insert point, change the insert point.
-              if (CI == InsertPt) ++InsertPt;
-              // Splice the cast at the beginning of the entry block.
-              CI->moveBefore(A->getParent()->getEntryBlock().begin());
+              // Recreate the cast at the beginning of the entry block.
+              // The old cast is left in place in case it is being used
+              // as an insert point.
+              Instruction *NewCI =
+                CastInst::Create(opcode, V, Ty, "",
+                                 A->getParent()->getEntryBlock().begin());
+              NewCI->takeName(CI);
+              CI->replaceAllUsesWith(NewCI);
+              return NewCI;
             }
             return CI;
           }
-    }
+
     Instruction *I = CastInst::Create(opcode, V, Ty, V->getName(),
                                       A->getParent()->getEntryBlock().begin());
     InsertedValues.insert(I);
@@ -85,10 +90,13 @@ Value *SCEVExpander::InsertCastOfTo(Instruction::CastOps opcode, Value *V,
             It = cast<InvokeInst>(I)->getNormalDest()->begin();
           while (isa<PHINode>(It)) ++It;
           if (It != BasicBlock::iterator(CI)) {
-            // If the CastInst is the insert point, change the insert point.
-            if (CI == InsertPt) ++InsertPt;
-            // Splice the cast immediately after the operand in question.
-            CI->moveBefore(It);
+            // Recreate the cast at the beginning of the entry block.
+            // The old cast is left in place in case it is being used
+            // as an insert point.
+            Instruction *NewCI = CastInst::Create(opcode, V, Ty, "", It);
+            NewCI->takeName(CI);
+            CI->replaceAllUsesWith(NewCI);
+            return NewCI;
           }
           return CI;
         }
@@ -460,13 +468,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     const SCEV* Step = SE.getAnyExtendExpr(S->getStepRecurrence(SE),
                                           CanonicalIV->getType());
     Value *V = expand(SE.getAddRecExpr(Start, Step, S->getLoop()));
-    BasicBlock::iterator SaveInsertPt = getInsertionPoint();
+    BasicBlock::iterator SaveInsertPt = InsertPt;
     BasicBlock::iterator NewInsertPt =
       next(BasicBlock::iterator(cast<Instruction>(V)));
     while (isa<PHINode>(NewInsertPt)) ++NewInsertPt;
     V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
                       NewInsertPt);
-    setInsertionPoint(SaveInsertPt);
+    InsertPt = SaveInsertPt;
     return V;
   }
 
@@ -497,8 +505,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
       }
     }
 
-    Value *RestV = expand(Rest);
-    return expand(SE.getAddExpr(S->getStart(), SE.getUnknown(RestV)));
+    // Just do a normal add. Pre-expand the operands to suppress folding.
+    return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())),
+                                SE.getUnknown(expand(Rest))));
   }
 
   // {0,+,1} --> Insert a canonical induction variable into the loop!
@@ -546,36 +555,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
              getOrInsertCanonicalInductionVariable(L, Ty);
 
   // If this is a simple linear addrec, emit it now as a special case.
-  if (S->isAffine()) {   // {0,+,F} --> i*F
-    Value *F = expandCodeFor(S->getOperand(1), Ty);
-
-    // If the insert point is directly inside of the loop, emit the multiply at
-    // the insert point.  Otherwise, L is a loop that is a parent of the insert
-    // point loop.  If we can, move the multiply to the outer most loop that it
-    // is safe to be in.
-    BasicBlock::iterator MulInsertPt = getInsertionPoint();
-    Loop *InsertPtLoop = SE.LI->getLoopFor(MulInsertPt->getParent());
-    if (InsertPtLoop != L && InsertPtLoop &&
-        L->contains(InsertPtLoop->getHeader())) {
-      do {
-        // If we cannot hoist the multiply out of this loop, don't.
-        if (!InsertPtLoop->isLoopInvariant(F)) break;
-
-        BasicBlock *InsertPtLoopPH = InsertPtLoop->getLoopPreheader();
-
-        // If this loop hasn't got a preheader, we aren't able to hoist the
-        // multiply.
-        if (!InsertPtLoopPH)
-          break;
-
-        // Otherwise, move the insert point to the preheader.
-        MulInsertPt = InsertPtLoopPH->getTerminator();
-        InsertPtLoop = InsertPtLoop->getParentLoop();
-      } while (InsertPtLoop != L);
-    }
-    
-    return InsertBinop(Instruction::Mul, I, F, MulInsertPt);
-  }
+  if (S->isAffine())    // {0,+,F} --> i*F
+    return
+      expand(SE.getTruncateOrNoop(
+        SE.getMulExpr(SE.getUnknown(I),
+                      SE.getNoopOrAnyExtend(S->getOperand(1),
+                                            I->getType())),
+        Ty));
 
   // If this is a chain of recurrences, turn it into a closed form, using the
   // folders, then expandCodeFor the closed form.  This allows the folders to
@@ -666,14 +652,42 @@ Value *SCEVExpander::expandCodeFor(const SCEV* SH, const Type *Ty) {
 }
 
 Value *SCEVExpander::expand(const SCEV *S) {
-  // Check to see if we already expanded this.
-  std::map<const SCEV*, AssertingVH<Value> >::iterator I =
-    InsertedExpressions.find(S);
-  if (I != InsertedExpressions.end())
+  BasicBlock::iterator SaveInsertPt = InsertPt;
+
+  // Compute an insertion point for this SCEV object. Hoist the instructions
+  // as far out in the loop nest as possible.
+  for (Loop *L = SE.LI->getLoopFor(InsertPt->getParent()); ;
+       L = L->getParentLoop())
+    if (S->isLoopInvariant(L)) {
+      if (!L) break;
+      if (BasicBlock *Preheader = L->getLoopPreheader())
+        InsertPt = Preheader->getTerminator();
+    } else {
+      // If the SCEV is computable at this level, insert it into the header
+      // after the PHIs (and after any other instructions that we've inserted
+      // there) so that it is guaranteed to dominate any user inside the loop.
+      if (L && S->hasComputableLoopEvolution(L))
+        InsertPt = L->getHeader()->getFirstNonPHI();
+      while (isInsertedInstruction(InsertPt)) ++InsertPt;
+      break;
+    }
+
+  // Check to see if we already expanded this here.
+  std::map<std::pair<const SCEV *, Instruction *>,
+           AssertingVH<Value> >::iterator I =
+    InsertedExpressions.find(std::make_pair(S, InsertPt));
+  if (I != InsertedExpressions.end()) {
+    InsertPt = SaveInsertPt;
     return I->second;
-  
+  }
+
+  // Expand the expression into instructions.
   Value *V = visit(S);
-  InsertedExpressions[S] = V;
+
+  // Remember the expanded value for this SCEV at this location.
+  InsertedExpressions[std::make_pair(S, InsertPt)] = V;
+
+  InsertPt = SaveInsertPt;
   return V;
 }
 
@@ -686,6 +700,9 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
                                                     const Type *Ty) {
   assert(Ty->isInteger() && "Can only insert integer induction variables!");
   const SCEV* H = SE.getAddRecExpr(SE.getIntegerSCEV(0, Ty),
-                                  SE.getIntegerSCEV(1, Ty), L);
-  return expand(H);
+                                   SE.getIntegerSCEV(1, Ty), L);
+  BasicBlock::iterator SaveInsertPt = InsertPt;
+  Value *V = expandCodeFor(H, 0, L->getHeader()->begin());
+  InsertPt = SaveInsertPt;
+  return V;
 }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index e931904..bc3af9a 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -45,8 +45,9 @@ AsmPrinter::AsmPrinter(raw_ostream &o, TargetMachine &tm,
                        const TargetAsmInfo *T, CodeGenOpt::Level OL, bool VDef)
   : MachineFunctionPass(&ID), FunctionNumber(0), OptLevel(OL), O(o),
     TM(tm), TAI(T), TRI(tm.getRegisterInfo()),
-    IsInTextSection(false)
-{
+    IsInTextSection(false), LastMI(0), LastFn(0), Counter(~0U),
+    PrevDLT(0, ~0U, ~0U) {
+  DW = 0; MMI = 0;
   switch (AsmVerbose) {
   case cl::BOU_UNSET: VerboseAsm = VDef;  break;
   case cl::BOU_TRUE:  VerboseAsm = true;  break;
@@ -177,28 +178,44 @@ bool AsmPrinter::doInitialization(Module &M) {
 
   SwitchToDataSection("");   // Reset back to no section.
   
-  if (TAI->doesSupportDebugInformation() 
-      || TAI->doesSupportExceptionHandling()) {
-    MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-    if (MMI) {
+  if (TAI->doesSupportDebugInformation() ||
+      TAI->doesSupportExceptionHandling()) {
+    MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+    if (MMI)
       MMI->AnalyzeModule(M);
-      DW = getAnalysisIfAvailable<DwarfWriter>();
-      if (DW)
-        DW->BeginModule(&M, MMI, O, this, TAI);
-    }
+    DW = getAnalysisIfAvailable<DwarfWriter>();
+    if (DW)
+      DW->BeginModule(&M, MMI, O, this, TAI);
   }
 
   return false;
 }
 
 bool AsmPrinter::doFinalization(Module &M) {
+  // Emit final debug information.
+  if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
+    DW->EndModule();
+  
+  // If the target wants to know about weak references, print them all.
   if (TAI->getWeakRefDirective()) {
-    if (!ExtWeakSymbols.empty())
-      SwitchToDataSection("");
-
-    for (std::set<const GlobalValue*>::iterator i = ExtWeakSymbols.begin(),
-         e = ExtWeakSymbols.end(); i != e; ++i)
-      O << TAI->getWeakRefDirective() << Mang->getValueName(*i) << '\n';
+    // FIXME: This is not lazy, it would be nice to only print weak references
+    // to stuff that is actually used.  Note that doing so would require targets
+    // to notice uses in operands (due to constant exprs etc).  This should
+    // happen with the MC stuff eventually.
+    SwitchToDataSection("");
+
+    // Print out module-level global variables here.
+    for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
+         I != E; ++I) {
+      if (I->hasExternalWeakLinkage())
+        O << TAI->getWeakRefDirective() << Mang->getValueName(I) << '\n';
+    }
+    
+    for (Module::const_iterator I = M.begin(), E = M.end();
+         I != E; ++I) {
+      if (I->hasExternalWeakLinkage())
+        O << TAI->getWeakRefDirective() << Mang->getValueName(I) << '\n';
+    }
   }
 
   if (TAI->getSetDirective()) {
@@ -207,7 +224,7 @@ bool AsmPrinter::doFinalization(Module &M) {
 
     O << '\n';
     for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();
-         I!=E; ++I) {
+         I != E; ++I) {
       std::string Name = Mang->getValueName(I);
       std::string Target;
 
@@ -235,12 +252,13 @@ bool AsmPrinter::doFinalization(Module &M) {
 
   // If we don't have any trampolines, then we don't require stack memory
   // to be executable. Some targets have a directive to declare this.
-  Function* InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
+  Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline");
   if (!InitTrampolineIntrinsic || InitTrampolineIntrinsic->use_empty())
     if (TAI->getNonexecutableStackDirective())
       O << TAI->getNonexecutableStackDirective() << '\n';
 
   delete Mang; Mang = 0;
+  DW = 0; MMI = 0;
   return false;
 }
 
@@ -1298,20 +1316,15 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const {
     if (VerboseAsm)
       O << TAI->getCommentString();
   } else if (!strcmp(Code, "uid")) {
-    // Assign a unique ID to this machine instruction.
-    static const MachineInstr *LastMI = 0;
-    static const Function *F = 0;
-    static unsigned Counter = 0U-1;
-
     // Comparing the address of MI isn't sufficient, because machineinstrs may
     // be allocated to the same address across functions.
     const Function *ThisF = MI->getParent()->getParent()->getFunction();
     
-    // If this is a new machine instruction, bump the counter.
-    if (LastMI != MI || F != ThisF) {
+    // If this is a new LastFn instruction, bump the counter.
+    if (LastMI != MI || LastFn != ThisF) {
       ++Counter;
       LastMI = MI;
-      F = ThisF;
+      LastFn = ThisF;
     }
     O << Counter;
   } else {
@@ -1326,7 +1339,6 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const {
 void AsmPrinter::processDebugLoc(DebugLoc DL) {
   if (TAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) {
     if (!DL.isUnknown()) {
-      static DebugLocTuple PrevDLT(0, ~0U, ~0U);
       DebugLocTuple CurDLT = MF->getDebugLocTuple(DL);
 
       if (CurDLT.CompileUnit != 0 && PrevDLT != CurDLT)
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index dc149cf..01c431c 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -126,7 +126,6 @@ void DIE::Profile(FoldingSetNodeID &ID) {
 
 #ifndef NDEBUG
 void DIE::print(std::ostream &O, unsigned IncIndent) {
-  static unsigned IndentCount = 0;
   IndentCount += IncIndent;
   const std::string Indent(IndentCount, ' ');
   bool isBlock = Abbrev.getTag() == 0;
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index b14d91c..5b60327 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -141,9 +141,13 @@ namespace llvm {
 
     /// Abstract compile unit.
     CompileUnit *AbstractCU;
+    
+    // Private data for print()
+    mutable unsigned IndentCount;
   public:
     explicit DIE(unsigned Tag)
-      : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0), Size(0) {}
+      : Abbrev(Tag, dwarf::DW_CHILDREN_no), Offset(0),
+        Size(0), IndentCount(0) {}
     virtual ~DIE();
 
     // Accessors.
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 9d340e3..cbe542b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -141,9 +141,12 @@ class VISIBILITY_HIDDEN DbgScope {
   SmallVector<DbgScope *, 4> Scopes;  // Scopes defined in scope.
   SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope.
   SmallVector<DbgConcreteScope *, 8> ConcreteInsts;// Concrete insts of funcs.
+  
+  // Private state for dump()
+  mutable unsigned IndentLevel;
 public:
   DbgScope(DbgScope *P, DIDescriptor D)
-    : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0) {}
+    : Parent(P), Desc(D), StartLabelID(0), EndLabelID(0), IndentLevel(0) {}
   virtual ~DbgScope();
 
   // Accessors.
@@ -176,7 +179,6 @@ public:
 
 #ifndef NDEBUG
 void DbgScope::dump() const {
-  static unsigned IndentLevel = 0;
   std::string Indent(IndentLevel, ' ');
 
   cerr << Indent; Desc.dump();
@@ -1240,27 +1242,7 @@ void DwarfDebug::ConstructCompileUnit(GlobalVariable *GV) {
   CompileUnits.push_back(Unit);
 }
 
-/// ConstructCompileUnits - Create a compile unit DIEs.
-void DwarfDebug::ConstructCompileUnits() {
-  GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.compile_units");
-  if (!Root)
-    return;
-  assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() &&
-         "Malformed compile unit descriptor anchor type");
-  Constant *RootC = cast<Constant>(*Root->use_begin());
-  assert(RootC->hasNUsesOrMore(1) &&
-         "Malformed compile unit descriptor anchor type");
-
-  for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
-       UI != UE; ++UI)
-    for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
-         UUI != UUE; ++UUI) {
-      GlobalVariable *GV = cast<GlobalVariable>(*UUI);
-      ConstructCompileUnit(GV);
-    }
-}
-
-bool DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
+void DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
   DIGlobalVariable DI_GV(GV);
   CompileUnit *DW_Unit = MainCU;
   if (!DW_Unit)
@@ -1269,7 +1251,7 @@ bool DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
   // Check for pre-existence.
   DIE *&Slot = DW_Unit->getDieMapSlotFor(DI_GV.getGV());
   if (Slot)
-    return false;
+    return;
 
   DIE *VariableDie = CreateGlobalVariableDIE(DW_Unit, DI_GV);
 
@@ -1290,33 +1272,10 @@ bool DwarfDebug::ConstructGlobalVariableDIE(GlobalVariable *GV) {
   // Expose as global. FIXME - need to check external flag.
   std::string Name;
   DW_Unit->AddGlobal(DI_GV.getName(Name), VariableDie);
-  return true;
+  return;
 }
 
-/// ConstructGlobalVariableDIEs - Create DIEs for each of the externally visible
-/// global variables. Return true if at least one global DIE is created.
-bool DwarfDebug::ConstructGlobalVariableDIEs() {
-  GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.global_variables");
-  if (!Root)
-    return false;
-
-  assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() &&
-         "Malformed global variable descriptor anchor type");
-  Constant *RootC = cast<Constant>(*Root->use_begin());
-  assert(RootC->hasNUsesOrMore(1) &&
-         "Malformed global variable descriptor anchor type");
-
-  bool Result = false;
-  for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
-       UI != UE; ++UI)
-    for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
-         UUI != UUE; ++UUI)
-      Result |= ConstructGlobalVariableDIE(cast<GlobalVariable>(*UUI));
-
-  return Result;
-}
-
-bool DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
+void DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
   DISubprogram SP(GV);
   CompileUnit *Unit = MainCU;
   if (!Unit)
@@ -1325,12 +1284,12 @@ bool DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
   // Check for pre-existence.
   DIE *&Slot = Unit->getDieMapSlotFor(GV);
   if (Slot)
-    return false;
+    return;
 
   if (!SP.isDefinition())
     // This is a method declaration which will be handled while constructing
     // class type.
-    return false;
+    return;
 
   DIE *SubprogramDie = CreateSubprogramDIE(Unit, SP);
 
@@ -1343,40 +1302,27 @@ bool DwarfDebug::ConstructSubprogram(GlobalVariable *GV) {
   // Expose as global.
   std::string Name;
   Unit->AddGlobal(SP.getName(Name), SubprogramDie);
-  return true;
+  return;
 }
 
-/// ConstructSubprograms - Create DIEs for each of the externally visible
-/// subprograms. Return true if at least one subprogram DIE is created.
-bool DwarfDebug::ConstructSubprograms() {
-  GlobalVariable *Root = M->getGlobalVariable("llvm.dbg.subprograms");
-  if (!Root)
-    return false;
-
-  assert(Root->hasLinkOnceLinkage() && Root->hasOneUse() &&
-         "Malformed subprogram descriptor anchor type");
-  Constant *RootC = cast<Constant>(*Root->use_begin());
-  assert(RootC->hasNUsesOrMore(1) &&
-         "Malformed subprogram descriptor anchor type");
+  /// BeginModule - Emit all Dwarf sections that should come prior to the
+  /// content. Create global DIEs and emit initial debug info sections.
+  /// This is inovked by the target AsmPrinter.
+void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
+  this->M = M;
 
-  bool Result = false;
-  for (Value::use_iterator UI = RootC->use_begin(), UE = Root->use_end();
-       UI != UE; ++UI)
-    for (Value::use_iterator UUI = UI->use_begin(), UUE = UI->use_end();
-         UUI != UUE; ++UUI)
-      Result |= ConstructSubprogram(cast<GlobalVariable>(*UUI));
-
-  return Result;
-}
-
-/// SetDebugInfo - Create global DIEs and emit initial debug info sections.
-/// This is inovked by the target AsmPrinter.
-void DwarfDebug::SetDebugInfo(MachineModuleInfo *mmi) {
   if (TimePassesIsEnabled)
     DebugTimer->startTimer();
 
+  SmallVector<GlobalVariable *, 2> CUs;
+  SmallVector<GlobalVariable *, 4> GVs;
+  SmallVector<GlobalVariable *, 4> SPs;
+  CollectDebugInfoAnchors(*M, CUs, GVs, SPs);
+
   // Create all the compile unit DIEs.
-  ConstructCompileUnits();
+  for (SmallVector<GlobalVariable *, 2>::iterator I = CUs.begin(),
+         E = CUs.end(); I != E; ++I) 
+    ConstructCompileUnit(*I);
 
   if (CompileUnits.empty()) {
     if (TimePassesIsEnabled)
@@ -1385,21 +1331,25 @@ void DwarfDebug::SetDebugInfo(MachineModuleInfo *mmi) {
     return;
   }
 
-  // Create DIEs for each of the externally visible global variables.
-  bool globalDIEs = ConstructGlobalVariableDIEs();
-
-  // Create DIEs for each of the externally visible subprograms.
-  bool subprogramDIEs = ConstructSubprograms();
-
   // If there is not any debug info available for any global variables and any
   // subprograms then there is not any debug info to emit.
-  if (!globalDIEs && !subprogramDIEs) {
+  if (GVs.empty() && SPs.empty()) {
     if (TimePassesIsEnabled)
       DebugTimer->stopTimer();
 
     return;
   }
 
+  // Create DIEs for each of the externally visible global variables.
+  for (SmallVector<GlobalVariable *, 4>::iterator I = GVs.begin(),
+         E = GVs.end(); I != E; ++I) 
+    ConstructGlobalVariableDIE(*I);
+
+  // Create DIEs for each of the externally visible subprograms.
+  for (SmallVector<GlobalVariable *, 4>::iterator I = SPs.begin(),
+         E = SPs.end(); I != E; ++I) 
+    ConstructSubprogram(*I);
+
   MMI = mmi;
   shouldEmit = true;
   MMI->setDebugInfoAvailability(true);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 9824566..111ec33 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -460,21 +460,10 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
 
   void ConstructCompileUnit(GlobalVariable *GV);
 
-  /// ConstructCompileUnits - Create a compile unit DIEs.
-  void ConstructCompileUnits();
+  void ConstructGlobalVariableDIE(GlobalVariable *GV);
 
-  bool ConstructGlobalVariableDIE(GlobalVariable *GV);
+  void ConstructSubprogram(GlobalVariable *GV);
 
-  /// ConstructGlobalVariableDIEs - Create DIEs for each of the externally 
-  /// visible global variables. Return true if at least one global DIE is
-  /// created.
-  bool ConstructGlobalVariableDIEs();
-
-  bool ConstructSubprogram(GlobalVariable *GV);
-
-  /// ConstructSubprograms - Create DIEs for each of the externally visible
-  /// subprograms. Return true if at least one subprogram DIE is created.
-  bool ConstructSubprograms();
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
@@ -486,15 +475,9 @@ public:
   /// be emitted.
   bool ShouldEmitDwarfDebug() const { return shouldEmit; }
 
-  /// SetDebugInfo - Create global DIEs and emit initial debug info sections.
-  /// This is inovked by the target AsmPrinter.
-  void SetDebugInfo(MachineModuleInfo *mmi);
-
   /// BeginModule - Emit all Dwarf sections that should come prior to the
   /// content.
-  void BeginModule(Module *M) {
-    this->M = M;
-  }
+  void BeginModule(Module *M, MachineModuleInfo *MMI);
 
   /// EndModule - Emit all Dwarf sections that should come after the content.
   ///
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index 4479af2..f1c3e56 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -149,16 +149,11 @@ public:
   DwarfException(raw_ostream &OS, AsmPrinter *A, const TargetAsmInfo *T);
   virtual ~DwarfException();
 
-  /// SetModuleInfo - Set machine module information when it's known that pass
-  /// manager has created it.  Set by the target AsmPrinter.
-  void SetModuleInfo(MachineModuleInfo *mmi) {
-    MMI = mmi;
-  }
-
   /// BeginModule - Emit all exception information that should come prior to the
   /// content.
-  void BeginModule(Module *M) {
-    this->M = M;
+  void BeginModule(Module *m, MachineModuleInfo *mmi) {
+    this->M = m;
+    this->MMI = mmi;
   }
 
   /// EndModule - Emit all exception information that should come after the
diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
index 483ee559..89084989 100644
--- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
@@ -42,10 +42,8 @@ void DwarfWriter::BeginModule(Module *M,
                               const TargetAsmInfo *T) {
   DE = new DwarfException(OS, A, T);
   DD = new DwarfDebug(OS, A, T);
-  DE->BeginModule(M);
-  DD->BeginModule(M);
-  DD->SetDebugInfo(MMI);
-  DE->SetModuleInfo(MMI);
+  DE->BeginModule(M, MMI);
+  DD->BeginModule(M, MMI);
 }
 
 /// EndModule - Emit all Dwarf sections that should come after the content.
diff --git a/lib/CodeGen/AsmPrinter/Makefile b/lib/CodeGen/AsmPrinter/Makefile
index cb5b3f6..8f65d8d 100644
--- a/lib/CodeGen/AsmPrinter/Makefile
+++ b/lib/CodeGen/AsmPrinter/Makefile
@@ -9,7 +9,5 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMAsmPrinter
 PARALLEL_DIRS =
-BUILD_ARCHIVE = 1
-DONT_BUILD_RELINKED = 1
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 5ba8b3c..eeefe31 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -63,3 +63,5 @@ add_llvm_library(LLVMCodeGen
   VirtRegMap.cpp
   VirtRegRewriter.cpp
   )
+
+target_link_libraries (LLVMCodeGen LLVMCore)
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
index 28b6be8..8d92373 100644
--- a/lib/CodeGen/ELF.h
+++ b/lib/CodeGen/ELF.h
@@ -144,6 +144,9 @@ namespace llvm {
     uint8_t Other;
     unsigned short SectionIdx;
 
+    // Symbol index into the Symbol table
+    unsigned SymTabIdx;
+
     enum { 
       STB_LOCAL = 0,
       STB_GLOBAL = 1,
@@ -168,7 +171,8 @@ namespace llvm {
     ELFSym(const GlobalValue *gv) : GV(gv), IsCommon(false), IsBss(false),
                                     IsConstant(false), NameIdx(0), Value(0),
                                     Size(0), Info(0), Other(STV_DEFAULT),
-                                    SectionIdx(ELFSection::SHN_UNDEF) {
+                                    SectionIdx(ELFSection::SHN_UNDEF),
+                                    SymTabIdx(0) {
       if (!GV)
         return;
 
@@ -191,6 +195,10 @@ namespace llvm {
       return (Info >> 4) & 0xf;
     }
 
+    unsigned getType() {
+      return Info & 0xf;
+    }
+
     void setBind(unsigned X) {
       assert(X == (X & 0xF) && "Bind value out of range!");
       Info = (Info & 0x0F) | (X << 4);
diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp
index 8cb7c94..168fed5 100644
--- a/lib/CodeGen/ELFCodeEmitter.cpp
+++ b/lib/CodeGen/ELFCodeEmitter.cpp
@@ -16,6 +16,7 @@
 #include "llvm/CodeGen/BinaryObject.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/Debug.h"
 
@@ -103,21 +104,28 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
     break;
   }
 
+  // Emit constant pool to appropriate section(s)
+  emitConstantPool(MF.getConstantPool());
+
   // Relocations
   // -----------
-  // If we have emitted any relocations to function-specific objects such as 
+  // If we have emitted any relocations to function-specific objects such as
   // basic blocks, constant pools entries, or jump tables, record their
   // addresses now so that we can rewrite them with the correct addresses
   // later.
   for (unsigned i = 0, e = Relocations.size(); i != e; ++i) {
     MachineRelocation &MR = Relocations[i];
     intptr_t Addr;
-    if (MR.isBasicBlock()) {
+    if (MR.isGlobalValue()) {
+      EW.PendingGlobals.insert(MR.getGlobalValue());
+    } else if (MR.isBasicBlock()) {
       Addr = getMachineBasicBlockAddress(MR.getBasicBlock());
       MR.setConstantVal(ES->SectionIdx);
       MR.setResultPointer((void*)Addr);
-    } else if (MR.isGlobalValue()) {
-      EW.PendingGlobals.insert(MR.getGlobalValue());
+    } else if (MR.isConstantPoolIndex()) {
+      Addr = getConstantPoolEntryAddress(MR.getConstantPoolIndex());
+      MR.setConstantVal(CPSections[MR.getConstantPoolIndex()]);
+      MR.setResultPointer((void*)Addr);
     } else {
       assert(0 && "Unhandled relocation type");
     }
@@ -128,4 +136,36 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) {
   return false;
 }
 
+/// emitConstantPool - For each constant pool entry, figure out which section
+/// the constant should live in and emit the constant
+void ELFCodeEmitter::emitConstantPool(MachineConstantPool *MCP) {
+  const std::vector<MachineConstantPoolEntry> &CP = MCP->getConstants();
+  if (CP.empty()) return;
+
+  // TODO: handle PIC codegen
+  assert(TM.getRelocationModel() != Reloc::PIC_ &&
+         "PIC codegen not yet handled for elf constant pools!");
+
+  const TargetAsmInfo *TAI = TM.getTargetAsmInfo();
+  for (unsigned i = 0, e = CP.size(); i != e; ++i) {
+    MachineConstantPoolEntry CPE = CP[i];
+
+    // Get the right ELF Section for this constant pool entry
+    std::string CstPoolName =
+      TAI->SelectSectionForMachineConst(CPE.getType())->getName();
+    ELFSection &CstPoolSection =
+      EW.getConstantPoolSection(CstPoolName, CPE.getAlignment());
+
+    // Record the constant pool location and the section index
+    CPLocations.push_back(CstPoolSection.size());
+    CPSections.push_back(CstPoolSection.SectionIdx);
+
+    if (CPE.isMachineConstantPoolEntry())
+      assert("CPE.isMachineConstantPoolEntry not supported yet");
+
+    // Emit the constant to constant pool section
+    EW.EmitGlobalConstant(CPE.Val.ConstVal, CstPoolSection);
+  }
+}
+
 } // end namespace llvm
diff --git a/lib/CodeGen/ELFCodeEmitter.h b/lib/CodeGen/ELFCodeEmitter.h
index c0289da..c309ef7 100644
--- a/lib/CodeGen/ELFCodeEmitter.h
+++ b/lib/CodeGen/ELFCodeEmitter.h
@@ -31,6 +31,14 @@ namespace llvm {
     /// emitted.
     std::vector<MachineRelocation> Relocations;
 
+    /// CPLocations - This is a map of constant pool indices to offsets from the
+    /// start of the section for that constant pool index.
+    std::vector<uintptr_t> CPLocations;
+
+    /// CPSections - This is a map of constant pool indices to the MachOSection
+    /// containing the constant pool entry for that index.
+    std::vector<unsigned> CPSections;
+
     /// MBBLocations - This vector is a mapping from MBB ID's to their address.
     /// It is filled in by the StartMachineBasicBlock callback and queried by
     /// the getMachineBasicBlockAddress callback.
@@ -62,9 +70,10 @@ namespace llvm {
     }
 
     virtual uintptr_t getConstantPoolEntryAddress(unsigned Index) const {
-      assert(0 && "CP not implementated yet!");
-      return 0;
+      assert(CPLocations.size() > Index && "CP not emitted!");
+      return CPLocations[Index];
     }
+
     virtual uintptr_t getJumpTableEntryAddress(unsigned Index) const {
       assert(0 && "JT not implementated yet!");
       return 0;
@@ -86,6 +95,10 @@ namespace llvm {
       abort();
     }
 
+    /// emitConstantPool - For each constant pool entry, figure out which section
+    /// the constant should live in and emit the constant.
+    void emitConstantPool(MachineConstantPool *MCP);
+
     virtual void setModuleInfo(llvm::MachineModuleInfo* MMI) { }
 
     /// JIT SPECIFIC FUNCTIONS - DO NOT IMPLEMENT THESE HERE!
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index 03db656..041defa 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -389,6 +389,24 @@ bool ELFWriter::doFinalization(Module &M) {
   if (TAI->getNonexecutableStackDirective())
     getNonExecStackSection();
 
+  // Emit a symbol for each section created until now
+  for (std::map<std::string, ELFSection*>::iterator I = SectionLookup.begin(),
+       E = SectionLookup.end(); I != E; ++I) {
+    ELFSection *ES = I->second;
+
+    // Skip null section
+    if (ES->SectionIdx == 0) continue;
+
+    ELFSym SectionSym(0);
+    SectionSym.SectionIdx = ES->SectionIdx;
+    SectionSym.Size = 0;
+    SectionSym.setBind(ELFSym::STB_LOCAL);
+    SectionSym.setType(ELFSym::STT_SECTION);
+
+    // Local symbols go in the list front
+    SymbolList.push_front(SectionSym);
+  }
+
   // Emit string table
   EmitStringTable();
 
@@ -451,15 +469,25 @@ void ELFWriter::EmitRelocations() {
 
       // Constant addend used to compute the value to be stored 
       // into the relocatable field
-      int64_t Addend = TEW->getAddendForRelTy(RelType);
+      int64_t Addend = 0;
 
       // There are several machine relocations types, and each one of
       // them needs a different approach to retrieve the symbol table index.
       if (MR.isGlobalValue()) {
         const GlobalValue *G = MR.getGlobalValue();
         SymIdx = GblSymLookup[G];
+        Addend = TEW->getAddendForRelTy(RelType);
       } else {
-        assert(0 && "dunno how to handle other relocation types");
+        unsigned SectionIdx = MR.getConstantVal();
+        // TODO: use a map for this.
+        for (std::list<ELFSym>::iterator I = SymbolList.begin(),
+             E = SymbolList.end(); I != E; ++I)
+          if ((SectionIdx == I->SectionIdx) &&
+              (I->getType() == ELFSym::STT_SECTION)) {
+            SymIdx = I->SymTabIdx;
+            break;
+          }
+        Addend = (uint64_t)MR.getResultPointer();
       }
 
       // Get the relocation entry and emit to the relocation section
@@ -540,7 +568,8 @@ void ELFWriter::EmitStringTable() {
        E = SymbolList.end(); I != E; ++I) {
 
     // Use the name mangler to uniquify the LLVM symbol.
-    std::string Name = Mang->getValueName(I->GV);
+    std::string Name;
+    if (I->GV) Name.append(Mang->getValueName(I->GV));
 
     if (Name.empty()) {
       I->NameIdx = 0;
@@ -589,7 +618,11 @@ void ELFWriter::EmitSymbolTable() {
     EmitSymbol(SymTab, *I);
 
     // Record the symbol table index for each global value
-    GblSymLookup[I->GV] = Index;
+    if (I->GV)
+      GblSymLookup[I->GV] = Index;
+
+    // Keep track on the symbol index into the symbol table
+    I->SymTabIdx = Index;
   }
 
   SymTab.Info = FirstNonLocalSymbol;
diff --git a/lib/CodeGen/ELFWriter.h b/lib/CodeGen/ELFWriter.h
index 39577d9..e0e71d0 100644
--- a/lib/CodeGen/ELFWriter.h
+++ b/lib/CodeGen/ELFWriter.h
@@ -147,6 +147,12 @@ namespace llvm {
                         ELFSection::SHF_EXECINSTR | ELFSection::SHF_ALLOC);
     }
 
+    /// Get a constant pool section based on the section name returned by TAI
+    ELFSection &getConstantPoolSection(std::string SName, unsigned Align) {
+      return getSection(SName, ELFSection::SHT_PROGBITS,
+                        ELFSection::SHF_MERGE | ELFSection::SHF_ALLOC, Align);
+    }
+
     /// Return the relocation section of section 'S'. 'RelA' is true
     /// if the relocation section contains entries with addends.
     ELFSection &getRelocSection(std::string SName, bool RelA) {
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 4d5c3c2..d5e7ea5 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -144,9 +144,10 @@ namespace {
     const TargetLowering *TLI;
     const TargetInstrInfo *TII;
     bool MadeChange;
+    int FnNum;
   public:
     static char ID;
-    IfConverter() : MachineFunctionPass(&ID) {}
+    IfConverter() : MachineFunctionPass(&ID), FnNum(-1) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
     virtual const char *getPassName() const { return "If Converter"; }
@@ -225,7 +226,6 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getTarget().getInstrInfo();
   if (!TII) return false;
 
-  static int FnNum = -1;
   DOUT << "\nIfcvt: function (" << ++FnNum <<  ") \'"
        << MF.getFunction()->getName() << "\'";
 
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index e6912b8..052334a 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -61,18 +61,16 @@ static void EnsureFPIntrinsicsExist(Module &M, Function *Fn,
 template <class ArgIt>
 static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
                                  ArgIt ArgBegin, ArgIt ArgEnd,
-                                 const Type *RetTy, Constant *&FCache) {
-  if (!FCache) {
-    // If we haven't already looked up this function, check to see if the
-    // program already contains a function with this name.
-    Module *M = CI->getParent()->getParent()->getParent();
-    // Get or insert the definition now.
-    std::vector<const Type *> ParamTys;
-    for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
-      ParamTys.push_back((*I)->getType());
-    FCache = M->getOrInsertFunction(NewFn,
-                                    FunctionType::get(RetTy, ParamTys, false));
-  }
+                                 const Type *RetTy) {
+  // If we haven't already looked up this function, check to see if the
+  // program already contains a function with this name.
+  Module *M = CI->getParent()->getParent()->getParent();
+  // Get or insert the definition now.
+  std::vector<const Type *> ParamTys;
+  for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
+    ParamTys.push_back((*I)->getType());
+  Constant* FCache = M->getOrInsertFunction(NewFn,
+                                  FunctionType::get(RetTy, ParamTys, false));
 
   IRBuilder<> Builder(CI->getParent(), CI);
   SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
@@ -624,25 +622,24 @@ static Instruction *LowerPartSet(CallInst *CI) {
   return NewCI;
 }
 
-static void ReplaceFPIntrinsicWithCall(CallInst *CI, Constant *FCache,
-                                       Constant *DCache, Constant *LDCache,
-                                       const char *Fname, const char *Dname,
+static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname,
+                                       const char *Dname,
                                        const char *LDname) {
   switch (CI->getOperand(1)->getType()->getTypeID()) {
   default: assert(0 && "Invalid type in intrinsic"); abort();
   case Type::FloatTyID:
     ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(),
-                  Type::FloatTy, FCache);
+                  Type::FloatTy);
     break;
   case Type::DoubleTyID:
     ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(),
-                  Type::DoubleTy, DCache);
+                  Type::DoubleTy);
     break;
   case Type::X86_FP80TyID:
   case Type::FP128TyID:
   case Type::PPC_FP128TyID:
     ReplaceCallWith(LDname, CI, CI->op_begin() + 1, CI->op_end(),
-                  CI->getOperand(1)->getType(), LDCache);
+                  CI->getOperand(1)->getType());
     break;
   }
 }
@@ -668,9 +665,8 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     // by the lowerinvoke pass.  In both cases, the right thing to do is to
     // convert the call to an explicit setjmp or longjmp call.
   case Intrinsic::setjmp: {
-    static Constant *SetjmpFCache = 0;
     Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(),
-                               Type::Int32Ty, SetjmpFCache);
+                               Type::Int32Ty);
     if (CI->getType() != Type::VoidTy)
       CI->replaceAllUsesWith(V);
     break;
@@ -681,17 +677,15 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
      break;
 
   case Intrinsic::longjmp: {
-    static Constant *LongjmpFCache = 0;
     ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(),
-                    Type::VoidTy, LongjmpFCache);
+                    Type::VoidTy);
     break;
   }
 
   case Intrinsic::siglongjmp: {
     // Insert the call to abort
-    static Constant *AbortFCache = 0;
     ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(), 
-                    Type::VoidTy, AbortFCache);
+                    Type::VoidTy);
     break;
   }
   case Intrinsic::ctpop:
@@ -728,7 +722,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
 
   case Intrinsic::stacksave:
   case Intrinsic::stackrestore: {
-    static bool Warned = false;
     if (!Warned)
       cerr << "WARNING: this target does not support the llvm.stack"
            << (Callee->getIntrinsicID() == Intrinsic::stacksave ?
@@ -783,7 +776,6 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     break;   // Strip out annotate intrinsic
     
   case Intrinsic::memcpy: {
-    static Constant *MemcpyFCache = 0;
     const IntegerType *IntPtr = TD.getIntPtrType();
     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
                                         /* isSigned */ false);
@@ -791,12 +783,10 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     Ops[0] = CI->getOperand(1);
     Ops[1] = CI->getOperand(2);
     Ops[2] = Size;
-    ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
-                    MemcpyFCache);
+    ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType());
     break;
   }
   case Intrinsic::memmove: {
-    static Constant *MemmoveFCache = 0;
     const IntegerType *IntPtr = TD.getIntPtrType();
     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
                                         /* isSigned */ false);
@@ -804,12 +794,10 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     Ops[0] = CI->getOperand(1);
     Ops[1] = CI->getOperand(2);
     Ops[2] = Size;
-    ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
-                    MemmoveFCache);
+    ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType());
     break;
   }
   case Intrinsic::memset: {
-    static Constant *MemsetFCache = 0;
     const IntegerType *IntPtr = TD.getIntPtrType();
     Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr,
                                         /* isSigned */ false);
@@ -819,64 +807,35 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::Int32Ty,
                                    /* isSigned */ false);
     Ops[2] = Size;
-    ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType(),
-                    MemsetFCache);
+    ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType());
     break;
   }
   case Intrinsic::sqrt: {
-    static Constant *sqrtFCache = 0;
-    static Constant *sqrtDCache = 0;
-    static Constant *sqrtLDCache = 0;
-    ReplaceFPIntrinsicWithCall(CI, sqrtFCache, sqrtDCache, sqrtLDCache,
-                               "sqrtf", "sqrt", "sqrtl");
+    ReplaceFPIntrinsicWithCall(CI, "sqrtf", "sqrt", "sqrtl");
     break;
   }
   case Intrinsic::log: {
-    static Constant *logFCache = 0;
-    static Constant *logDCache = 0;
-    static Constant *logLDCache = 0;
-    ReplaceFPIntrinsicWithCall(CI, logFCache, logDCache, logLDCache,
-                               "logf", "log", "logl");
+    ReplaceFPIntrinsicWithCall(CI, "logf", "log", "logl");
     break;
   }
   case Intrinsic::log2: {
-    static Constant *log2FCache = 0;
-    static Constant *log2DCache = 0;
-    static Constant *log2LDCache = 0;
-    ReplaceFPIntrinsicWithCall(CI, log2FCache, log2DCache, log2LDCache,
-                               "log2f", "log2", "log2l");
+    ReplaceFPIntrinsicWithCall(CI, "log2f", "log2", "log2l");
     break;
   }
   case Intrinsic::log10: {
-    static Constant *log10FCache = 0;
-    static Constant *log10DCache = 0;
-    static Constant *log10LDCache = 0;
-    ReplaceFPIntrinsicWithCall(CI, log10FCache, log10DCache, log10LDCache,
-                               "log10f", "log10", "log10l");
+    ReplaceFPIntrinsicWithCall(CI, "log10f", "log10", "log10l");
     break;
   }
   case Intrinsic::exp: {
-    static Constant *expFCache = 0;
-    static Constant *expDCache = 0;
-    static Constant *expLDCache = 0;
-    ReplaceFPIntrinsicWithCall(CI, expFCache, expDCache, expLDCache,
-                               "expf", "exp", "expl");
+    ReplaceFPIntrinsicWithCall(CI, "expf", "exp", "expl");
     break;
   }
   case Intrinsic::exp2: {
-    static Constant *exp2FCache = 0;
-    static Constant *exp2DCache = 0;
-    static Constant *exp2LDCache = 0;
-    ReplaceFPIntrinsicWithCall(CI, exp2FCache, exp2DCache, exp2LDCache,
-                               "exp2f", "exp2", "exp2l");
+    ReplaceFPIntrinsicWithCall(CI, "exp2f", "exp2", "exp2l");
     break;
   }
   case Intrinsic::pow: {
-    static Constant *powFCache = 0;
-    static Constant *powDCache = 0;
-    static Constant *powLDCache = 0;
-    ReplaceFPIntrinsicWithCall(CI, powFCache, powDCache, powLDCache,
-                               "powf", "pow", "powl");
+    ReplaceFPIntrinsicWithCall(CI, "powf", "pow", "powl");
     break;
   }
   case Intrinsic::flt_rounds:
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index cac9253..26722a3 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -373,7 +373,8 @@ void LiveInterval::scaleNumbering(unsigned factor) {
   for (vni_iterator VNI = vni_begin(), VNIE = vni_end(); VNI != VNIE; ++VNI) {
     VNInfo *vni = *VNI;
 
-    vni->def = InstrSlots::scale(vni->def, factor);
+    if (vni->isDefAccurate())
+      vni->def = InstrSlots::scale(vni->def, factor);
 
     for (unsigned i = 0; i < vni->kills.size(); ++i) {
       if (vni->kills[i] != 0)
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index d6931df..21bb5dc 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -947,6 +947,10 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
     unsigned Reg = MO.getReg();
     if (Reg == 0 || Reg == li.reg)
       continue;
+    
+    if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+        !allocatableRegs_[Reg])
+      continue;
     // FIXME: For now, only remat MI with at most one register operand.
     assert(!RegOp &&
            "Can't rematerialize instruction with multiple register operand!");
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index cacfed1..2d2b59e 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -124,25 +124,28 @@ MachineFunction::MachineFunction(const Function *F,
                   MachineFrameInfo(*TM.getFrameInfo());
   ConstantPool = new (Allocator.Allocate<MachineConstantPool>())
                      MachineConstantPool(TM.getTargetData());
-  
+
   // Set up jump table.
   const TargetData &TD = *TM.getTargetData();
   bool IsPic = TM.getRelocationModel() == Reloc::PIC_;
   unsigned EntrySize = IsPic ? 4 : TD.getPointerSize();
-  unsigned Alignment = IsPic ? TD.getABITypeAlignment(Type::Int32Ty)
-                             : TD.getPointerABIAlignment();
+  unsigned TyAlignment = IsPic ? TD.getABITypeAlignment(Type::Int32Ty)
+                               : TD.getPointerABIAlignment();
   JumpTableInfo = new (Allocator.Allocate<MachineJumpTableInfo>())
-                      MachineJumpTableInfo(EntrySize, Alignment);
+                      MachineJumpTableInfo(EntrySize, TyAlignment);
 }
 
 MachineFunction::~MachineFunction() {
   BasicBlocks.clear();
   InstructionRecycler.clear(Allocator);
   BasicBlockRecycler.clear(Allocator);
-  if (RegInfo)
-    RegInfo->~MachineRegisterInfo();        Allocator.Deallocate(RegInfo);
+  if (RegInfo) {
+    RegInfo->~MachineRegisterInfo();
+    Allocator.Deallocate(RegInfo);
+  }
   if (MFInfo) {
-    MFInfo->~MachineFunctionInfo();       Allocator.Deallocate(MFInfo);
+    MFInfo->~MachineFunctionInfo();
+    Allocator.Deallocate(MFInfo);
   }
   FrameInfo->~MachineFrameInfo();         Allocator.Deallocate(FrameInfo);
   ConstantPool->~MachineConstantPool();   Allocator.Deallocate(ConstantPool);
@@ -295,12 +298,6 @@ void MachineFunction::print(std::ostream &OS) const {
   OS << "\n# End machine code for " << Fn->getName () << "().\n\n";
 }
 
-/// CFGOnly flag - This is used to control whether or not the CFG graph printer
-/// prints out the contents of basic blocks or not.  This is acceptable because
-/// this code is only really used for debugging purposes.
-///
-static bool CFGOnly = false;
-
 namespace llvm {
   template<>
   struct DOTGraphTraits<const MachineFunction*> : public DefaultDOTGraphTraits {
@@ -309,13 +306,14 @@ namespace llvm {
     }
 
     static std::string getNodeLabel(const MachineBasicBlock *Node,
-                                    const MachineFunction *Graph) {
-      if (CFGOnly && Node->getBasicBlock() &&
+                                    const MachineFunction *Graph,
+                                    bool ShortNames) {
+      if (ShortNames && Node->getBasicBlock() &&
           !Node->getBasicBlock()->getName().empty())
         return Node->getBasicBlock()->getName() + ":";
 
       std::ostringstream Out;
-      if (CFGOnly) {
+      if (ShortNames) {
         Out << Node->getNumber() << ':';
         return Out.str();
       }
@@ -348,9 +346,12 @@ void MachineFunction::viewCFG() const
 
 void MachineFunction::viewCFGOnly() const
 {
-  CFGOnly = true;
-  viewCFG();
-  CFGOnly = false;
+#ifndef NDEBUG
+  ViewGraph(this, "mf" + getFunction()->getName(), true);
+#else
+  cerr << "SelectionDAG::viewGraph is only available in debug builds on "
+       << "systems with Graphviz or gv!\n";
+#endif // NDEBUG
 }
 
 // The next two methods are used to construct and to retrieve
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index c351593..c977508 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -150,7 +150,9 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
 /// isIdenticalTo - Return true if this operand is identical to the specified
 /// operand.
 bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
-  if (getType() != Other.getType()) return false;
+  if (getType() != Other.getType() ||
+      getTargetFlags() != Other.getTargetFlags())
+    return false;
   
   switch (getType()) {
   default: assert(0 && "Unrecognized operand type");
@@ -205,70 +207,72 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
     }
 
     if (getSubReg() != 0) {
-      OS << ":" << getSubReg();
+      OS << ':' << getSubReg();
     }
 
     if (isDef() || isKill() || isDead() || isImplicit() || isEarlyClobber()) {
-      OS << "<";
+      OS << '<';
       bool NeedComma = false;
       if (isImplicit()) {
-        if (NeedComma) OS << ",";
+        if (NeedComma) OS << ',';
         OS << (isDef() ? "imp-def" : "imp-use");
         NeedComma = true;
       } else if (isDef()) {
-        if (NeedComma) OS << ",";
+        if (NeedComma) OS << ',';
         if (isEarlyClobber())
           OS << "earlyclobber,";
         OS << "def";
         NeedComma = true;
       }
       if (isKill() || isDead()) {
-        if (NeedComma) OS << ",";
+        if (NeedComma) OS << ',';
         if (isKill())  OS << "kill";
         if (isDead())  OS << "dead";
       }
-      OS << ">";
+      OS << '>';
     }
     break;
   case MachineOperand::MO_Immediate:
     OS << getImm();
     break;
   case MachineOperand::MO_FPImmediate:
-    if (getFPImm()->getType() == Type::FloatTy) {
+    if (getFPImm()->getType() == Type::FloatTy)
       OS << getFPImm()->getValueAPF().convertToFloat();
-    } else {
+    else
       OS << getFPImm()->getValueAPF().convertToDouble();
-    }
     break;
   case MachineOperand::MO_MachineBasicBlock:
     OS << "mbb<"
        << ((Value*)getMBB()->getBasicBlock())->getName()
-       << "," << (void*)getMBB() << ">";
+       << "," << (void*)getMBB() << '>';
     break;
   case MachineOperand::MO_FrameIndex:
-    OS << "<fi#" << getIndex() << ">";
+    OS << "<fi#" << getIndex() << '>';
     break;
   case MachineOperand::MO_ConstantPoolIndex:
     OS << "<cp#" << getIndex();
     if (getOffset()) OS << "+" << getOffset();
-    OS << ">";
+    OS << '>';
     break;
   case MachineOperand::MO_JumpTableIndex:
-    OS << "<jt#" << getIndex() << ">";
+    OS << "<jt#" << getIndex() << '>';
     break;
   case MachineOperand::MO_GlobalAddress:
     OS << "<ga:" << ((Value*)getGlobal())->getName();
     if (getOffset()) OS << "+" << getOffset();
-    OS << ">";
+    OS << '>';
     break;
   case MachineOperand::MO_ExternalSymbol:
     OS << "<es:" << getSymbolName();
     if (getOffset()) OS << "+" << getOffset();
-    OS << ">";
+    OS << '>';
     break;
   default:
     assert(0 && "Unrecognized operand type");
   }
+  
+  if (unsigned TF = getTargetFlags())
+    OS << "[TF=" << TF << ']';
 }
 
 //===----------------------------------------------------------------------===//
@@ -716,31 +720,37 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
     const MachineOperand &MO = getOperand(DefOpIdx);
     if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
       return false;
-    // Determine the actual operand no corresponding to this index.
+    // Determine the actual operand index that corresponds to this index.
     unsigned DefNo = 0;
+    unsigned DefPart = 0;
     for (unsigned i = 1, e = getNumOperands(); i < e; ) {
       const MachineOperand &FMO = getOperand(i);
       assert(FMO.isImm());
       // Skip over this def.
-      i += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
-      if (i > DefOpIdx)
+      unsigned NumOps = InlineAsm::getNumOperandRegisters(FMO.getImm());
+      unsigned PrevDef = i + 1;
+      i = PrevDef + NumOps;
+      if (i > DefOpIdx) {
+        DefPart = DefOpIdx - PrevDef;
         break;
+      }
       ++DefNo;
     }
-    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+    for (unsigned i = 1, e = getNumOperands(); i != e; ++i) {
       const MachineOperand &FMO = getOperand(i);
       if (!FMO.isImm())
         continue;
       if (i+1 >= e || !getOperand(i+1).isReg() || !getOperand(i+1).isUse())
         continue;
       unsigned Idx;
-      if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) && 
+      if (InlineAsm::isUseOperandTiedToDef(FMO.getImm(), Idx) &&
           Idx == DefNo) {
         if (UseOpIdx)
-          *UseOpIdx = (unsigned)i + 1;
+          *UseOpIdx = (unsigned)i + 1 + DefPart;
         return true;
       }
     }
+    return false;
   }
 
   assert(getOperand(DefOpIdx).isDef() && "DefOpIdx is not a def!");
@@ -766,10 +776,16 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
     const MachineOperand &MO = getOperand(UseOpIdx);
     if (!MO.isReg() || !MO.isUse() || MO.getReg() == 0)
       return false;
-    assert(UseOpIdx > 0);
-    const MachineOperand &UFMO = getOperand(UseOpIdx-1);
-    if (!UFMO.isImm())
-      return false;  // Must be physreg uses.
+    int FlagIdx = UseOpIdx - 1;
+    if (FlagIdx < 1)
+      return false;
+    while (!getOperand(FlagIdx).isImm()) {
+      if (--FlagIdx == 0)
+        return false;
+    }
+    const MachineOperand &UFMO = getOperand(FlagIdx);
+    if (FlagIdx + InlineAsm::getNumOperandRegisters(UFMO.getImm()) < UseOpIdx)
+      return false;
     unsigned DefNo;
     if (InlineAsm::isUseOperandTiedToDef(UFMO.getImm(), DefNo)) {
       if (!DefOpIdx)
@@ -785,7 +801,7 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
         DefIdx += InlineAsm::getNumOperandRegisters(FMO.getImm()) + 1;
         --DefNo;
       }
-      *DefOpIdx = DefIdx+1;
+      *DefOpIdx = DefIdx + UseOpIdx - FlagIdx;
       return true;
     }
     return false;
@@ -1092,13 +1108,13 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg,
 
   // If not found, this means an alias of one of the operands is dead. Add a
   // new implicit operand if required.
-  if (!Found && AddIfNotFound) {
-    addOperand(MachineOperand::CreateReg(IncomingReg,
-                                         true  /*IsDef*/,
-                                         true  /*IsImp*/,
-                                         false /*IsKill*/,
-                                         true  /*IsDead*/));
-    return true;
-  }
-  return Found;
+  if (Found || !AddIfNotFound)
+    return Found;
+    
+  addOperand(MachineOperand::CreateReg(IncomingReg,
+                                       true  /*IsDef*/,
+                                       true  /*IsImp*/,
+                                       false /*IsKill*/,
+                                       true  /*IsDead*/));
+  return true;
 }
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index 594c24d..5efd274 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -59,7 +59,8 @@ namespace llvm {
     
 
     static std::string getNodeLabel(const SUnit *Node,
-                                    const ScheduleDAG *Graph);
+                                    const ScheduleDAG *Graph,
+                                    bool ShortNames);
     static std::string getNodeAttributes(const SUnit *N,
                                          const ScheduleDAG *Graph) {
       return "shape=Mrecord";
@@ -73,7 +74,8 @@ namespace llvm {
 }
 
 std::string DOTGraphTraits<ScheduleDAG*>::getNodeLabel(const SUnit *SU,
-                                                       const ScheduleDAG *G) {
+                                                       const ScheduleDAG *G,
+                                                       bool ShortNames) {
   return G->getGraphNodeLabel(SU);
 }
 
@@ -84,11 +86,11 @@ void ScheduleDAG::viewGraph() {
 // This code is only for debugging!
 #ifndef NDEBUG
   if (BB->getBasicBlock())
-    ViewGraph(this, "dag." + MF.getFunction()->getName(),
+    ViewGraph(this, "dag." + MF.getFunction()->getName(), false,
               "Scheduling-Units Graph for " + MF.getFunction()->getName() + ':' +
               BB->getBasicBlock()->getName());
   else
-    ViewGraph(this, "dag." + MF.getFunction()->getName(),
+    ViewGraph(this, "dag." + MF.getFunction()->getName(), false,
               "Scheduling-Units Graph for " + MF.getFunction()->getName());
 #else
   cerr << "ScheduleDAG::viewGraph is only available in debug builds on "
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 1bb8090..ef365e6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -211,7 +211,7 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
                                            CodeGenOpt::Level ol)
   : TLI(dag.getTargetLoweringInfo()), DAG(dag), OptLevel(ol),
     ValueTypeActions(TLI.getValueTypeActions()) {
-  assert(MVT::LAST_VALUETYPE <= 32 &&
+  assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
          "Too many value types for ValueTypeActions to hold!");
 }
 
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 75c8924..02b0732 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -159,7 +159,7 @@ public:
   explicit DAGTypeLegalizer(SelectionDAG &dag)
     : TLI(dag.getTargetLoweringInfo()), DAG(dag),
     ValueTypeActions(TLI.getValueTypeActions()) {
-    assert(MVT::LAST_VALUETYPE <= 32 &&
+    assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
            "Too many value types for ValueTypeActions to hold!");
   }
 
diff --git a/lib/CodeGen/SelectionDAG/Makefile b/lib/CodeGen/SelectionDAG/Makefile
index 185222a..73f0b5d 100644
--- a/lib/CodeGen/SelectionDAG/Makefile
+++ b/lib/CodeGen/SelectionDAG/Makefile
@@ -9,7 +9,5 @@
 LEVEL = ../../..
 LIBRARYNAME = LLVMSelectionDAG
 PARALLEL_DIRS =
-BUILD_ARCHIVE = 1
-DONT_BUILD_RELINKED = 1
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
index fb5e207..e372b5b 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
@@ -30,10 +30,9 @@ using namespace llvm;
 
 /// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
 /// implicit physical register output.
-void ScheduleDAGSDNodes::EmitCopyFromReg(SDNode *Node, unsigned ResNo,
-                                         bool IsClone, bool IsCloned,
-                                         unsigned SrcReg,
-                                         DenseMap<SDValue, unsigned> &VRBaseMap) {
+void ScheduleDAGSDNodes::
+EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
+                unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
   unsigned VRBase = 0;
   if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
     // Just use the input register directly!
@@ -281,13 +280,15 @@ void ScheduleDAGSDNodes::AddOperand(MachineInstr *MI, SDValue Op,
   } else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateReg(R->getReg(), false));
   } else if (GlobalAddressSDNode *TGA = dyn_cast<GlobalAddressSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(),TGA->getOffset()));
+    MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(),
+                                            TGA->getTargetFlags()));
   } else if (BasicBlockSDNode *BBNode = dyn_cast<BasicBlockSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock()));
   } else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op)) {
     MI->addOperand(MachineOperand::CreateFI(FI->getIndex()));
   } else if (JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateJTI(JT->getIndex()));
+    MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(),
+                                             JT->getTargetFlags()));
   } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op)) {
     int Offset = CP->getOffset();
     unsigned Align = CP->getAlignment();
@@ -306,9 +307,11 @@ void ScheduleDAGSDNodes::AddOperand(MachineInstr *MI, SDValue Op,
       Idx = ConstPool->getConstantPoolIndex(CP->getMachineCPVal(), Align);
     else
       Idx = ConstPool->getConstantPoolIndex(CP->getConstVal(), Align);
-    MI->addOperand(MachineOperand::CreateCPI(Idx, Offset));
+    MI->addOperand(MachineOperand::CreateCPI(Idx, Offset,
+                                             CP->getTargetFlags()));
   } else if (ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op)) {
-    MI->addOperand(MachineOperand::CreateES(ES->getSymbol()));
+    MI->addOperand(MachineOperand::CreateES(ES->getSymbol(), 0,
+                                            ES->getTargetFlags()));
   } else {
     assert(Op.getValueType() != MVT::Other &&
            Op.getValueType() != MVT::Flag &&
@@ -335,7 +338,7 @@ getSuperRegisterRegClass(const TargetRegisterClass *TRC,
 /// EmitSubregNode - Generate machine code for subreg nodes.
 ///
 void ScheduleDAGSDNodes::EmitSubregNode(SDNode *Node, 
-                                        DenseMap<SDValue, unsigned> &VRBaseMap) {
+                                        DenseMap<SDValue, unsigned> &VRBaseMap){
   unsigned VRBase = 0;
   unsigned Opc = Node->getMachineOpcode();
   
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index ce01d53..0342f67 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -31,8 +31,10 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Mutex.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
@@ -359,6 +361,9 @@ static void AddNodeIDNode(FoldingSetNodeID &ID,
 /// the NodeID data.
 static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   switch (N->getOpcode()) {
+  case ISD::TargetExternalSymbol:
+  case ISD::ExternalSymbol:
+    assert(0 && "Should only be used on nodes with operands");
   default: break;  // Normal nodes don't need extra info.
   case ISD::ARG_FLAGS:
     ID.AddInteger(cast<ARG_FLAGSSDNode>(N)->getArgFlags().getRawBits());
@@ -379,6 +384,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
     const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
     ID.AddPointer(GA->getGlobal());
     ID.AddInteger(GA->getOffset());
+    ID.AddInteger(GA->getTargetFlags());
     break;
   }
   case ISD::BasicBlock:
@@ -409,6 +415,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
   case ISD::JumpTable:
   case ISD::TargetJumpTable:
     ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
+    ID.AddInteger(cast<JumpTableSDNode>(N)->getTargetFlags());
     break;
   case ISD::ConstantPool:
   case ISD::TargetConstantPool: {
@@ -419,6 +426,7 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
       CP->getMachineCPVal()->AddSelectionDAGCSEId(ID);
     else
       ID.AddPointer(CP->getConstVal());
+    ID.AddInteger(CP->getTargetFlags());
     break;
   }
   case ISD::CALL: {
@@ -630,10 +638,13 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
   case ISD::ExternalSymbol:
     Erased = ExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
     break;
-  case ISD::TargetExternalSymbol:
-    Erased =
-      TargetExternalSymbols.erase(cast<ExternalSymbolSDNode>(N)->getSymbol());
+  case ISD::TargetExternalSymbol: {
+    ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
+    Erased = TargetExternalSymbols.erase(
+               std::pair<std::string,unsigned char>(ESN->getSymbol(),
+                                                    ESN->getTargetFlags()));
     break;
+  }
   case ISD::VALUETYPE: {
     MVT VT = cast<VTSDNode>(N)->getVT();
     if (VT.isExtended()) {
@@ -953,9 +964,11 @@ SDValue SelectionDAG::getConstantFP(double Val, MVT VT, bool isTarget) {
 
 SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
                                        MVT VT, int64_t Offset,
-                                       bool isTargetGA) {
-  unsigned Opc;
-
+                                       bool isTargetGA,
+                                       unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTargetGA) &&
+         "Cannot set target flags on target-independent globals");
+  
   // Truncate (with sign-extension) the offset value to the pointer size.
   unsigned BitWidth = TLI.getPointerTy().getSizeInBits();
   if (BitWidth < 64)
@@ -968,6 +981,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
       GVar = dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
   }
 
+  unsigned Opc;
   if (GVar && GVar->isThreadLocal())
     Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress;
   else
@@ -977,11 +991,12 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
   AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
   ID.AddPointer(GV);
   ID.AddInteger(Offset);
+  ID.AddInteger(TargetFlags);
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
   SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>();
-  new (N) GlobalAddressSDNode(isTargetGA, GV, VT, Offset);
+  new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1002,16 +1017,20 @@ SDValue SelectionDAG::getFrameIndex(int FI, MVT VT, bool isTarget) {
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget){
+SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget,
+                                   unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTarget) &&
+         "Cannot set target flags on target-independent jump tables");
   unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
   ID.AddInteger(JTI);
+  ID.AddInteger(TargetFlags);
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
   SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>();
-  new (N) JumpTableSDNode(JTI, VT, isTarget);
+  new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1019,7 +1038,10 @@ SDValue SelectionDAG::getJumpTable(int JTI, MVT VT, bool isTarget){
 
 SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT,
                                       unsigned Alignment, int Offset,
-                                      bool isTarget) {
+                                      bool isTarget, 
+                                      unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTarget) &&
+         "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
     Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
@@ -1028,11 +1050,12 @@ SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT,
   ID.AddInteger(Alignment);
   ID.AddInteger(Offset);
   ID.AddPointer(C);
+  ID.AddInteger(TargetFlags);
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
   SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
-  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);
+  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1041,7 +1064,10 @@ SDValue SelectionDAG::getConstantPool(Constant *C, MVT VT,
 
 SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, MVT VT,
                                       unsigned Alignment, int Offset,
-                                      bool isTarget) {
+                                      bool isTarget,
+                                      unsigned char TargetFlags) {
+  assert((TargetFlags == 0 || isTarget) &&
+         "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
     Alignment = TLI.getTargetData()->getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
@@ -1050,11 +1076,12 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, MVT VT,
   ID.AddInteger(Alignment);
   ID.AddInteger(Offset);
   C->AddSelectionDAGCSEId(ID);
+  ID.AddInteger(TargetFlags);
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
     return SDValue(E, 0);
   SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
-  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment);
+  new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
   return SDValue(N, 0);
@@ -1106,16 +1133,19 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, MVT VT) {
   SDNode *&N = ExternalSymbols[Sym];
   if (N) return SDValue(N, 0);
   N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
-  new (N) ExternalSymbolSDNode(false, Sym, VT);
+  new (N) ExternalSymbolSDNode(false, Sym, 0, VT);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT VT) {
-  SDNode *&N = TargetExternalSymbols[Sym];
+SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, MVT VT,
+                                              unsigned char TargetFlags) {
+  SDNode *&N =
+    TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
+                                                               TargetFlags)];
   if (N) return SDValue(N, 0);
   N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
-  new (N) ExternalSymbolSDNode(true, Sym, VT);
+  new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
   AllNodes.push_back(N);
   return SDValue(N, 0);
 }
@@ -3181,27 +3211,17 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
     } else {
       // The type might not be legal for the target.  This should only happen
       // if the type is smaller than a legal type, as on PPC, so the right
-      // thing to do is generate a LoadExt/StoreTrunc pair.
+      // thing to do is generate a LoadExt/StoreTrunc pair.  These simplify
+      // to Load/Store if NVT==VT.
       // FIXME does the case above also need this?
-      if (TLI.isTypeLegal(VT)) {
-        Value = DAG.getLoad(VT, dl, Chain,
-                            getMemBasePlusOffset(Src, SrcOff, DAG),
-                            SrcSV, SrcSVOff + SrcOff, false, Align);
-        Store = DAG.getStore(Chain, dl, Value,
+      MVT NVT = TLI.getTypeToTransformTo(VT);
+      assert(NVT.bitsGE(VT));
+      Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
+                             getMemBasePlusOffset(Src, SrcOff, DAG),
+                             SrcSV, SrcSVOff + SrcOff, VT, false, Align);
+      Store = DAG.getTruncStore(Chain, dl, Value,
                              getMemBasePlusOffset(Dst, DstOff, DAG),
-                             DstSV, DstSVOff + DstOff, false, DstAlign);
-      } else {
-        MVT NVT = VT;
-        while (!TLI.isTypeLegal(NVT)) {
-          NVT = (MVT::SimpleValueType(NVT.getSimpleVT() + 1));
-        }
-        Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
-                               getMemBasePlusOffset(Src, SrcOff, DAG),
-                               SrcSV, SrcSVOff + SrcOff, VT, false, Align);
-        Store = DAG.getTruncStore(Chain, dl, Value,
-                               getMemBasePlusOffset(Dst, DstOff, DAG),
-                               DstSV, DstSVOff + DstOff, VT, false, DstAlign);
-      }
+                             DstSV, DstSVOff + DstOff, VT, false, DstAlign);
     }
     OutChains.push_back(Store);
     SrcOff += VTSize;
@@ -4915,15 +4935,10 @@ HandleSDNode::~HandleSDNode() {
   DropOperands();
 }
 
-GlobalAddressSDNode::GlobalAddressSDNode(bool isTarget, const GlobalValue *GA,
-                                         MVT VT, int64_t o)
-  : SDNode(isa<GlobalVariable>(GA) &&
-           cast<GlobalVariable>(GA)->isThreadLocal() ?
-           // Thread Local
-           (isTarget ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress) :
-           // Non Thread Local
-           (isTarget ? ISD::TargetGlobalAddress : ISD::GlobalAddress),
-           DebugLoc::getUnknownLoc(), getSDVTList(VT)), Offset(o) {
+GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA,
+                                         MVT VT, int64_t o, unsigned char TF)
+  : SDNode(Opc, DebugLoc::getUnknownLoc(), getSDVTList(VT)),
+    Offset(o), TargetFlags(TF) {
   TheGlobal = const_cast<GlobalValue*>(GA);
 }
 
@@ -4987,14 +5002,17 @@ void SDNode::Profile(FoldingSetNodeID &ID) const {
   AddNodeIDNode(ID, this);
 }
 
+static ManagedStatic<std::set<MVT, MVT::compareRawBits> > EVTs;
+static MVT VTs[MVT::LAST_VALUETYPE];
+static ManagedStatic<sys::SmartMutex<true> > VTMutex;
+
 /// getValueTypeList - Return a pointer to the specified value type.
 ///
 const MVT *SDNode::getValueTypeList(MVT VT) {
+  sys::SmartScopedLock<true> Lock(&*VTMutex);
   if (VT.isExtended()) {
-    static std::set<MVT, MVT::compareRawBits> EVTs;
-    return &(*EVTs.insert(VT).first);
+    return &(*EVTs->insert(VT).first);
   } else {
-    static MVT VTs[MVT::LAST_VALUETYPE];
     VTs[VT.getSimpleVT()] = VT;
     return &VTs[VT.getSimpleVT()];
   }
@@ -5486,10 +5504,14 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
       OS << " + " << offset;
     else
       OS << " " << offset;
+    if (unsigned char TF = GADN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
   } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(this)) {
     OS << "<" << FIDN->getIndex() << ">";
   } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(this)) {
     OS << "<" << JTDN->getIndex() << ">";
+    if (unsigned char TF = JTDN->getTargetFlags())
+      OS << " [TF=" << TF << ']';
   } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(this)){
     int offset = CP->getOffset();
     if (CP->isMachineConstantPoolEntry())
@@ -5500,6 +5522,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
       OS << " + " << offset;
     else
       OS << " " << offset;
+    if (unsigned char TF = CP->getTargetFlags())
+      OS << " [TF=" << TF << ']';
   } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(this)) {
     OS << "<";
     const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
@@ -5516,6 +5540,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   } else if (const ExternalSymbolSDNode *ES =
              dyn_cast<ExternalSymbolSDNode>(this)) {
     OS << "'" << ES->getSymbol() << "'";
+    if (unsigned char TF = ES->getTargetFlags())
+      OS << " [TF=" << TF << ']';
   } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(this)) {
     if (M->getValue())
       OS << "<" << M->getValue() << ">";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 3eec684..6fd5df2 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -94,7 +94,8 @@ namespace llvm {
     
 
     static std::string getNodeLabel(const SDNode *Node,
-                                    const SelectionDAG *Graph);
+                                    const SelectionDAG *Graph,
+                                    bool ShortNames);
     static std::string getNodeAttributes(const SDNode *N,
                                          const SelectionDAG *Graph) {
 #ifndef NDEBUG
@@ -120,139 +121,14 @@ namespace llvm {
 }
 
 std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
-                                                        const SelectionDAG *G) {
-  std::string Op = Node->getOperationName(G);
-
-  if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Node)) {
-    Op += ": " + utostr(CSDN->getZExtValue());
-  } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(Node)) {
-    Op += ": " + ftostr(CSDN->getValueAPF());
-  } else if (const GlobalAddressSDNode *GADN =
-             dyn_cast<GlobalAddressSDNode>(Node)) {
-    Op += ": " + GADN->getGlobal()->getName();
-    if (int64_t Offset = GADN->getOffset()) {
-      if (Offset > 0)
-        Op += "+" + itostr(Offset);
-      else
-        Op += itostr(Offset);
-    }
-  } else if (const FrameIndexSDNode *FIDN = dyn_cast<FrameIndexSDNode>(Node)) {
-    Op += " " + itostr(FIDN->getIndex());
-  } else if (const JumpTableSDNode *JTDN = dyn_cast<JumpTableSDNode>(Node)) {
-    Op += " " + itostr(JTDN->getIndex());
-  } else if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Node)){
-    if (CP->isMachineConstantPoolEntry()) {
-      Op += '<';
-      {
-        raw_string_ostream OSS(Op);
-        OSS << *CP->getMachineCPVal();
-      }
-      Op += '>';
-    } else {
-      if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
-        Op += "<" + ftostr(CFP->getValueAPF()) + ">";
-      else if (ConstantInt *CI = dyn_cast<ConstantInt>(CP->getConstVal()))
-        Op += "<" + utostr(CI->getZExtValue()) + ">";
-      else {
-        Op += '<';
-        {
-          raw_string_ostream OSS(Op);
-          WriteAsOperand(OSS, CP->getConstVal(), false);
-        }
-        Op += '>';
-      }
-    }
-    Op += " A=" + itostr(CP->getAlignment());
-  } else if (const BasicBlockSDNode *BBDN = dyn_cast<BasicBlockSDNode>(Node)) {
-    Op = "BB: ";
-    const Value *LBB = (const Value*)BBDN->getBasicBlock()->getBasicBlock();
-    if (LBB)
-      Op += LBB->getName();
-    //Op += " " + (const void*)BBDN->getBasicBlock();
-  } else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node)) {
-    if (G && R->getReg() != 0 &&
-        TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
-      Op = Op + " " +
-        G->getTarget().getRegisterInfo()->getName(R->getReg());
-    } else {
-      Op += " #" + utostr(R->getReg());
-    }
-  } else if (const DbgStopPointSDNode *D = dyn_cast<DbgStopPointSDNode>(Node)) {
-    DICompileUnit CU(cast<GlobalVariable>(D->getCompileUnit()));
-    std::string FN;
-    Op += ": " + CU.getFilename(FN);
-    Op += ":" + utostr(D->getLine());
-    if (D->getColumn() != 0)
-      Op += ":" + utostr(D->getColumn());
-  } else if (const LabelSDNode *L = dyn_cast<LabelSDNode>(Node)) {
-    Op += ": LabelID=" + utostr(L->getLabelID());
-  } else if (const CallSDNode *C = dyn_cast<CallSDNode>(Node)) {
-    Op += ": CallingConv=" + utostr(C->getCallingConv());
-    if (C->isVarArg())
-      Op += ", isVarArg";
-    if (C->isTailCall())
-      Op += ", isTailCall";
-  } else if (const ExternalSymbolSDNode *ES =
-             dyn_cast<ExternalSymbolSDNode>(Node)) {
-    Op += "'" + std::string(ES->getSymbol()) + "'";
-  } else if (const SrcValueSDNode *M = dyn_cast<SrcValueSDNode>(Node)) {
-    if (M->getValue())
-      Op += "<" + M->getValue()->getName() + ">";
-    else
-      Op += "<null>";
-  } else if (const MemOperandSDNode *M = dyn_cast<MemOperandSDNode>(Node)) {
-    const Value *V = M->MO.getValue();
-    Op += '<';
-    if (!V) {
-      Op += "(unknown)";
-    } else if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
-      // PseudoSourceValues don't have names, so use their print method.
-      raw_string_ostream OSS(Op);
-      PSV->print(OSS);
-    } else {
-      Op += V->getName();
-    }
-    Op += '+' + itostr(M->MO.getOffset()) + '>';
-  } else if (const ARG_FLAGSSDNode *N = dyn_cast<ARG_FLAGSSDNode>(Node)) {
-    Op = Op + " AF=" + N->getArgFlags().getArgFlagsString();
-  } else if (const VTSDNode *N = dyn_cast<VTSDNode>(Node)) {
-    Op = Op + " VT=" + N->getVT().getMVTString();
-  } else if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(Node)) {
-    bool doExt = true;
-    switch (LD->getExtensionType()) {
-    default: doExt = false; break;
-    case ISD::EXTLOAD:
-      Op = Op + "<anyext ";
-      break;
-    case ISD::SEXTLOAD:
-      Op = Op + " <sext ";
-      break;
-    case ISD::ZEXTLOAD:
-      Op = Op + " <zext ";
-      break;
-    }
-    if (doExt)
-      Op += LD->getMemoryVT().getMVTString() + ">";
-    if (LD->isVolatile())
-      Op += "<V>";
-    Op += LD->getIndexedModeName(LD->getAddressingMode());
-    if (LD->getAlignment() > 1)
-      Op += " A=" + utostr(LD->getAlignment());
-  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(Node)) {
-    if (ST->isTruncatingStore())
-      Op += "<trunc " + ST->getMemoryVT().getMVTString() + ">";
-    if (ST->isVolatile())
-      Op += "<V>";
-    Op += ST->getIndexedModeName(ST->getAddressingMode());
-    if (ST->getAlignment() > 1)
-      Op += " A=" + utostr(ST->getAlignment());
+                                                        const SelectionDAG *G,
+                                                        bool ShortNames) {
+  std::string Result = Node->getOperationName(G);
+  {
+    raw_string_ostream OS(Result);
+    Node->print_details(OS, G);
   }
-
-#if 0
-  Op += " Id=" + itostr(Node->getNodeId());
-#endif
-  
-  return Op;
+  return Result;
 }
 
 
@@ -262,7 +138,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
 void SelectionDAG::viewGraph(const std::string &Title) {
 // This code is only for debugging!
 #ifndef NDEBUG
-  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(),
+  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getName(), false,
             Title);
 #else
   cerr << "SelectionDAG::viewGraph is only available in debug builds on "
@@ -393,7 +269,8 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
     for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
       FlaggedNodes.push_back(N);
     while (!FlaggedNodes.empty()) {
-      O << DOTGraphTraits<SelectionDAG*>::getNodeLabel(FlaggedNodes.back(), DAG);
+      O << DOTGraphTraits<SelectionDAG*>::getNodeLabel(FlaggedNodes.back(),
+                                                       DAG, false);
       FlaggedNodes.pop_back();
       if (!FlaggedNodes.empty())
         O << "\n    ";
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a771d46..83357e0 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -527,7 +527,7 @@ TargetLowering::~TargetLowering() {}
 /// computeRegisterProperties - Once all of the register classes are added,
 /// this allows us to compute derived properties we expose.
 void TargetLowering::computeRegisterProperties() {
-  assert(MVT::LAST_VALUETYPE <= 32 &&
+  assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE &&
          "Too many value types for ValueTypeActions to hold!");
 
   // Everything defaults to needing one register.
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 919a0ce..405cd80 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -78,24 +78,21 @@ protected:
     return miIdx;
   }  
 
-
   /// Insert a store of the given vreg to the given stack slot immediately
   /// after the given instruction. Returns the base index of the inserted
   /// instruction. The caller is responsible for adding an appropriate
   /// LiveInterval to the LiveIntervals analysis.
-  unsigned insertStoreFor(MachineInstr *mi, unsigned ss,
+  unsigned insertStoreAfter(MachineInstr *mi, unsigned ss,
                           unsigned vreg,
                           const TargetRegisterClass *trc) {
 
-    MachineBasicBlock::iterator nextInstItr(mi); 
-    ++nextInstItr;
+    MachineBasicBlock::iterator nextInstItr(next(mi)); 
 
     unsigned miIdx = makeSpaceAfter(mi);
 
     tii->storeRegToStackSlot(*mi->getParent(), nextInstItr, vreg,
                              true, ss, trc);
-    MachineBasicBlock::iterator storeInstItr(mi);
-    ++storeInstItr;
+    MachineBasicBlock::iterator storeInstItr(next(mi));
     MachineInstr *storeInst = &*storeInstItr;
     unsigned storeInstIdx = miIdx + LiveInterval::InstrSlots::NUM;
 
@@ -107,37 +104,81 @@ protected:
     return storeInstIdx;
   }
 
-  void insertStoreOnInterval(LiveInterval *li,
-                             MachineInstr *mi, unsigned ss,
-                             unsigned vreg,
-                             const TargetRegisterClass *trc) {
+  /// Insert a store of the given vreg to the given stack slot immediately
+  /// before the given instructnion. Returns the base index of the inserted
+  /// Instruction.
+  unsigned insertStoreBefore(MachineInstr *mi, unsigned ss,
+                            unsigned vreg,
+                            const TargetRegisterClass *trc) {
+    unsigned miIdx = makeSpaceBefore(mi);
+  
+    tii->storeRegToStackSlot(*mi->getParent(), mi, vreg, true, ss, trc);
+    MachineBasicBlock::iterator storeInstItr(prior(mi));
+    MachineInstr *storeInst = &*storeInstItr;
+    unsigned storeInstIdx = miIdx - LiveInterval::InstrSlots::NUM;
+
+    assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
+           "Store inst index already in use.");
+
+    lis->InsertMachineInstrInMaps(storeInst, storeInstIdx);
+
+    return storeInstIdx;
+  }
+
+  void insertStoreAfterInstOnInterval(LiveInterval *li,
+                                      MachineInstr *mi, unsigned ss,
+                                      unsigned vreg,
+                                      const TargetRegisterClass *trc) {
 
-    unsigned storeInstIdx = insertStoreFor(mi, ss, vreg, trc);
+    unsigned storeInstIdx = insertStoreAfter(mi, ss, vreg, trc);
     unsigned start = lis->getDefIndex(lis->getInstructionIndex(mi)),
              end = lis->getUseIndex(storeInstIdx);
 
     VNInfo *vni =
       li->getNextValue(storeInstIdx, 0, true, lis->getVNInfoAllocator());
     vni->kills.push_back(storeInstIdx);
+    DOUT << "    Inserting store range: [" << start << ", " << end << ")\n";
     LiveRange lr(start, end, vni);
       
     li->addRange(lr);
   }
 
-  /// Insert a load of the given veg from the given stack slot immediately
+  /// Insert a load of the given vreg from the given stack slot immediately
+  /// after the given instruction. Returns the base index of the inserted
+  /// instruction. The caller is responsibel for adding/removing an appropriate
+  /// range vreg's LiveInterval.
+  unsigned insertLoadAfter(MachineInstr *mi, unsigned ss,
+                          unsigned vreg,
+                          const TargetRegisterClass *trc) {
+
+    MachineBasicBlock::iterator nextInstItr(next(mi)); 
+
+    unsigned miIdx = makeSpaceAfter(mi);
+
+    tii->loadRegFromStackSlot(*mi->getParent(), nextInstItr, vreg, ss, trc);
+    MachineBasicBlock::iterator loadInstItr(next(mi));
+    MachineInstr *loadInst = &*loadInstItr;
+    unsigned loadInstIdx = miIdx + LiveInterval::InstrSlots::NUM;
+
+    assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
+           "Store inst index already in use.");
+    
+    lis->InsertMachineInstrInMaps(loadInst, loadInstIdx);
+
+    return loadInstIdx;
+  }
+
+  /// Insert a load of the given vreg from the given stack slot immediately
   /// before the given instruction. Returns the base index of the inserted
   /// instruction. The caller is responsible for adding an appropriate
   /// LiveInterval to the LiveIntervals analysis.
-  unsigned insertLoadFor(MachineInstr *mi, unsigned ss,
-                         unsigned vreg,
-                         const TargetRegisterClass *trc) {
-    MachineBasicBlock::iterator useInstItr(mi);
-  
+  unsigned insertLoadBefore(MachineInstr *mi, unsigned ss,
+                            unsigned vreg,
+                            const TargetRegisterClass *trc) {  
     unsigned miIdx = makeSpaceBefore(mi);
   
-    tii->loadRegFromStackSlot(*mi->getParent(), useInstItr, vreg, ss, trc);
-    MachineBasicBlock::iterator loadInstItr(mi);
-    --loadInstItr;
+    tii->loadRegFromStackSlot(*mi->getParent(), mi, vreg, ss, trc);
+    MachineBasicBlock::iterator loadInstItr(prior(mi));
     MachineInstr *loadInst = &*loadInstItr;
     unsigned loadInstIdx = miIdx - LiveInterval::InstrSlots::NUM;
 
@@ -149,18 +190,19 @@ protected:
     return loadInstIdx;
   }
 
-  void insertLoadOnInterval(LiveInterval *li,
-                            MachineInstr *mi, unsigned ss, 
-                            unsigned vreg,
-                            const TargetRegisterClass *trc) {
+  void insertLoadBeforeInstOnInterval(LiveInterval *li,
+                                      MachineInstr *mi, unsigned ss, 
+                                      unsigned vreg,
+                                      const TargetRegisterClass *trc) {
 
-    unsigned loadInstIdx = insertLoadFor(mi, ss, vreg, trc);
+    unsigned loadInstIdx = insertLoadBefore(mi, ss, vreg, trc);
     unsigned start = lis->getDefIndex(loadInstIdx),
              end = lis->getUseIndex(lis->getInstructionIndex(mi));
 
     VNInfo *vni =
       li->getNextValue(loadInstIdx, 0, true, lis->getVNInfoAllocator());
     vni->kills.push_back(lis->getInstructionIndex(mi));
+    DOUT << "    Intserting load range: [" << start << ", " << end << ")\n";
     LiveRange lr(start, end, vni);
 
     li->addRange(lr);
@@ -180,6 +222,8 @@ protected:
     assert(!li->isStackSlot() &&
            "Trying to spill a stack slot.");
 
+    DOUT << "Trivial spill everywhere of reg" << li->reg << "\n";
+
     std::vector<LiveInterval*> added;
     
     const TargetRegisterClass *trc = mri->getRegClass(li->reg);
@@ -189,6 +233,9 @@ protected:
          regItr = mri->reg_begin(li->reg); regItr != mri->reg_end();) {
 
       MachineInstr *mi = &*regItr;
+
+      DOUT << "  Processing " << *mi;
+
       do {
         ++regItr;
       } while (regItr != mri->reg_end() && (&*regItr == mi));
@@ -227,11 +274,11 @@ protected:
       assert(hasUse || hasDef);
 
       if (hasUse) {
-        insertLoadOnInterval(newLI, mi, ss, newVReg, trc);
+        insertLoadBeforeInstOnInterval(newLI, mi, ss, newVReg, trc);
       }
 
       if (hasDef) {
-        insertStoreOnInterval(newLI, mi, ss, newVReg, trc);
+        insertStoreAfterInstOnInterval(newLI, mi, ss, newVReg, trc);
       }
 
       added.push_back(newLI);
@@ -258,29 +305,53 @@ public:
 
   std::vector<LiveInterval*> intraBlockSplit(LiveInterval *li, VNInfo *valno)  {
     std::vector<LiveInterval*> spillIntervals;
-    MachineBasicBlock::iterator storeInsertPoint;
+
+    if (!valno->isDefAccurate() && !valno->isPHIDef()) {
+      // Early out for values which have no well defined def point.
+      return spillIntervals;
+    }
+
+    // Ok.. we should be able to proceed...
+    const TargetRegisterClass *trc = mri->getRegClass(li->reg);
+    unsigned ss = vrm->assignVirt2StackSlot(li->reg);    
+    vrm->grow();
+    vrm->assignVirt2StackSlot(li->reg, ss);
+
+    MachineInstr *mi = 0;
+    unsigned storeIdx = 0;
 
     if (valno->isDefAccurate()) {
       // If we have an accurate def we can just grab an iterator to the instr
       // after the def.
-      storeInsertPoint =
-        next(MachineBasicBlock::iterator(lis->getInstructionFromIndex(valno->def)));
+      mi = lis->getInstructionFromIndex(valno->def);
+      storeIdx = insertStoreAfter(mi, ss, li->reg, trc) +
+        LiveInterval::InstrSlots::DEF;
     } else {
-      // If the def info isn't accurate we check if this is a PHI def.
-      // If it is then def holds the index of the defining Basic Block, and we
-      // can use that to get an insertion point.
-      if (valno->isPHIDef()) {
-
-      } else {
-        // We have no usable def info. We can't split this value sensibly.
-        // FIXME: Need sensible feedback for "failure to split", an empty
-        // set of spill intervals could be reasonably returned from a
-        // split where both the store and load are folded.
-        return spillIntervals;
-      }
+      // if we get here we have a PHI def.
+      mi = &lis->getMBBFromIndex(valno->def)->front();
+      storeIdx = insertStoreBefore(mi, ss, li->reg, trc) +
+        LiveInterval::InstrSlots::DEF;
+    }
+
+    MachineBasicBlock *defBlock = mi->getParent();
+    unsigned loadIdx = 0;
+
+    // Now we need to find the load...
+    MachineBasicBlock::iterator useItr(mi);
+    for (; !useItr->readsRegister(li->reg); ++useItr) {}
+
+    if (useItr != defBlock->end()) {
+      MachineInstr *loadInst = useItr;
+      loadIdx = insertLoadBefore(loadInst, ss, li->reg, trc) +
+        LiveInterval::InstrSlots::USE;
+    }
+    else {
+      MachineInstr *loadInst = &defBlock->back();
+      loadIdx = insertLoadAfter(loadInst, ss, li->reg, trc) +
+        LiveInterval::InstrSlots::USE;
     }
 
-        
+    li->removeRange(storeIdx, loadIdx, true);
 
     return spillIntervals;
   }
diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h
index bc830f7..f9d7fbb 100644
--- a/lib/CodeGen/VirtRegRewriter.h
+++ b/lib/CodeGen/VirtRegRewriter.h
@@ -32,11 +32,6 @@
 #include "VirtRegMap.h"
 #include <map>
 
-// TODO:
-//       - Finish renaming Spiller -> Rewriter
-//         - SimpleSpiller
-//         - LocalSpiller
-
 namespace llvm {
   
   /// VirtRegRewriter interface: Implementations of this interface assign
diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp
index c0a1b84..816f793 100644
--- a/lib/CompilerDriver/Action.cpp
+++ b/lib/CompilerDriver/Action.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CompilerDriver/Action.h"
+#include "llvm/CompilerDriver/BuiltinOptions.h"
 
-#include "llvm/Support/CommandLine.h"
 #include "llvm/System/Program.h"
 
 #include <iostream>
@@ -22,9 +22,6 @@
 using namespace llvm;
 using namespace llvmc;
 
-extern cl::opt<bool> DryRun;
-extern cl::opt<bool> VerboseMode;
-
 namespace {
   int ExecuteProgram(const std::string& name,
                      const StrVector& args) {
diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp
index dece4e8..1212a21 100644
--- a/lib/CompilerDriver/CompilationGraph.cpp
+++ b/lib/CompilerDriver/CompilationGraph.cpp
@@ -11,11 +11,11 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CompilerDriver/BuiltinOptions.h"
 #include "llvm/CompilerDriver/CompilationGraph.h"
 #include "llvm/CompilerDriver/Error.h"
 
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DOTGraphTraits.h"
 #include "llvm/Support/GraphWriter.h"
 
@@ -30,9 +30,6 @@
 using namespace llvm;
 using namespace llvmc;
 
-extern cl::list<std::string> InputFilenames;
-extern cl::list<std::string> Languages;
-
 namespace llvmc {
 
   const std::string& LanguageMap::GetLanguage(const sys::Path& File) const {
@@ -477,7 +474,8 @@ namespace llvm {
   {
 
     template<typename GraphType>
-    static std::string getNodeLabel(const Node* N, const GraphType&)
+    static std::string getNodeLabel(const Node* N, const GraphType&,
+                                    bool ShortNames)
     {
       if (N->ToolPtr)
         if (N->ToolPtr->IsJoin())
diff --git a/lib/CompilerDriver/Plugin.cpp b/lib/CompilerDriver/Plugin.cpp
index 75abbd0..cb3c7be 100644
--- a/lib/CompilerDriver/Plugin.cpp
+++ b/lib/CompilerDriver/Plugin.cpp
@@ -12,7 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CompilerDriver/Plugin.h"
-
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/System/Mutex.h"
 #include <algorithm>
 #include <vector>
 
@@ -28,6 +29,7 @@ namespace {
   static bool pluginListInitialized = false;
   typedef std::vector<const llvmc::BasePlugin*> PluginList;
   static PluginList Plugins;
+  static llvm::ManagedStatic<llvm::sys::SmartMutex<true> > PluginMutex;
 
   struct ByPriority {
     bool operator()(const llvmc::BasePlugin* lhs,
@@ -40,6 +42,7 @@ namespace {
 namespace llvmc {
 
   PluginLoader::PluginLoader() {
+    llvm::sys::SmartScopedLock<true> Lock(&*PluginMutex);
     if (!pluginListInitialized) {
       for (PluginRegistry::iterator B = PluginRegistry::begin(),
              E = PluginRegistry::end(); B != E; ++B)
@@ -50,6 +53,7 @@ namespace llvmc {
   }
 
   PluginLoader::~PluginLoader() {
+    llvm::sys::SmartScopedLock<true> Lock(&*PluginMutex);
     if (pluginListInitialized) {
       for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
            B != E; ++B)
@@ -59,12 +63,14 @@ namespace llvmc {
   }
 
   void PluginLoader::PopulateLanguageMap(LanguageMap& langMap) {
+    llvm::sys::SmartScopedLock<true> Lock(&*PluginMutex);
     for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
          B != E; ++B)
       (*B)->PopulateLanguageMap(langMap);
   }
 
   void PluginLoader::PopulateCompilationGraph(CompilationGraph& graph) {
+    llvm::sys::SmartScopedLock<true> Lock(&*PluginMutex);
     for (PluginList::iterator B = Plugins.begin(), E = Plugins.end();
          B != E; ++B)
       (*B)->PopulateCompilationGraph(graph);
diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp
index 886b26b..e704dd9 100644
--- a/lib/CompilerDriver/Tool.cpp
+++ b/lib/CompilerDriver/Tool.cpp
@@ -11,16 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CompilerDriver/BuiltinOptions.h"
 #include "llvm/CompilerDriver/Tool.h"
 
 #include "llvm/System/Path.h"
-#include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
 using namespace llvmc;
 
-extern cl::opt<std::string> OutputFilename;
-
 namespace {
   sys::Path MakeTempFile(const sys::Path& TempDir, const std::string& BaseName,
                          const std::string& Suffix) {
@@ -39,7 +37,7 @@ namespace {
     // NOTE: makeUnique always *creates* a unique temporary file,
     // which is good, since there will be no races. However, some
     // tools do not like it when the output file already exists, so
-    // they have to be placated with -f or something like that.
+    // they need to be placated with -f or something like that.
     Out.makeUnique(true, NULL);
     return Out;
   }
@@ -52,7 +50,7 @@ sys::Path Tool::OutFilename(const sys::Path& In,
   sys::Path Out;
 
   if (StopCompilation) {
-    if (!OutputFilename.empty()) {
+    if (!OutputFilename.empty() && SaveTemps != SaveTempsEnum::Obj ) {
       Out.set(OutputFilename);
     }
     else if (IsJoin()) {
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 7dfeae0..bb3f64e 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -29,7 +29,6 @@
 using namespace llvm;
 
 STATISTIC(NumDynamicInsts, "Number of dynamic instructions executed");
-static Interpreter *TheEE = 0;
 
 static cl::opt<bool> PrintVolatile("interpreter-print-volatile", cl::Hidden,
           cl::desc("make the interpreter print every volatile load and store"));
@@ -51,10 +50,6 @@ static void SetValue(Value *V, GenericValue Val, ExecutionContext &SF) {
   SF.Values[V] = Val;
 }
 
-void Interpreter::initializeExecutionEngine() {
-  TheEE = this;
-}
-
 //===----------------------------------------------------------------------===//
 //                    Binary Instruction Implementations
 //===----------------------------------------------------------------------===//
@@ -815,7 +810,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
 
 void Interpreter::visitGetElementPtrInst(GetElementPtrInst &I) {
   ExecutionContext &SF = ECStack.back();
-  SetValue(&I, TheEE->executeGEPOperation(I.getPointerOperand(),
+  SetValue(&I, executeGEPOperation(I.getPointerOperand(),
                                    gep_type_begin(I), gep_type_end(I), SF), SF);
 }
 
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
index ded65d5..d7f38ef5 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -29,10 +29,7 @@ static struct RegisterInterp {
 
 }
 
-namespace llvm {
-  void LinkInInterpreter() {
-  }
-}
+extern "C" void LLVMLinkInInterpreter() { }
 
 /// create - Create a new interpreter object.  This can never fail.
 ///
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index 8a285ec..6b13c90 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -202,7 +202,7 @@ private:  // Helper functions
 
   void *getPointerToFunction(Function *F) { return (void*)F; }
 
-  void initializeExecutionEngine();
+  void initializeExecutionEngine() { }
   void initializeExternalFunctions();
   GenericValue getConstantExprValue(ConstantExpr *CE, ExecutionContext &SF);
   GenericValue getOperandValue(Value *V, ExecutionContext &SF);
diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt
index e0c13a1..bf915f7 100644
--- a/lib/ExecutionEngine/JIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/JIT/CMakeLists.txt
@@ -7,5 +7,6 @@ add_llvm_library(LLVMJIT
   JITDwarfEmitter.cpp
   JITEmitter.cpp
   JITMemoryManager.cpp
+  MacOSJITEventListener.cpp
   TargetSelect.cpp
   )
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 43995cb..db5a306 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -20,8 +20,9 @@
 #include "llvm/Instructions.h"
 #include "llvm/ModuleProvider.h"
 #include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetJITInfo.h"
@@ -60,9 +61,7 @@ static struct RegisterJIT {
 
 }
 
-namespace llvm {
-  void LinkInJIT() {
-  }
+extern "C" void LLVMLinkInJIT() {
 }
 
 
@@ -509,6 +508,40 @@ GenericValue JIT::runFunction(Function *F,
   return runFunction(Stub, std::vector<GenericValue>());
 }
 
+void JIT::RegisterJITEventListener(JITEventListener *L) {
+  if (L == NULL)
+    return;
+  MutexGuard locked(lock);
+  EventListeners.push_back(L);
+}
+void JIT::UnregisterJITEventListener(JITEventListener *L) {
+  if (L == NULL)
+    return;
+  MutexGuard locked(lock);
+  std::vector<JITEventListener*>::reverse_iterator I=
+      std::find(EventListeners.rbegin(), EventListeners.rend(), L);
+  if (I != EventListeners.rend()) {
+    std::swap(*I, EventListeners.back());
+    EventListeners.pop_back();
+  }
+}
+void JIT::NotifyFunctionEmitted(
+    const Function &F,
+    void *Code, size_t Size,
+    const JITEvent_EmittedFunctionDetails &Details) {
+  MutexGuard locked(lock);
+  for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
+    EventListeners[I]->NotifyFunctionEmitted(F, Code, Size, Details);
+  }
+}
+
+void JIT::NotifyFreeingMachineCode(const Function &F, void *OldPtr) {
+  MutexGuard locked(lock);
+  for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) {
+    EventListeners[I]->NotifyFreeingMachineCode(F, OldPtr);
+  }
+}
+
 /// runJITOnFunction - Run the FunctionPassManager full of
 /// just-in-time compilation passes on F, hopefully filling in
 /// GlobalAddress[F] with the address of F's machine code.
@@ -516,11 +549,23 @@ GenericValue JIT::runFunction(Function *F,
 void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) {
   MutexGuard locked(lock);
 
-  registerMachineCodeInfo(MCI);
+  class MCIListener : public JITEventListener {
+    MachineCodeInfo *const MCI;
+   public:
+    MCIListener(MachineCodeInfo *mci) : MCI(mci) {}
+    virtual void NotifyFunctionEmitted(const Function &,
+                                       void *Code, size_t Size,
+                                       const EmittedFunctionDetails &) {
+      MCI->setAddress(Code);
+      MCI->setSize(Size);
+    }
+  };
+  MCIListener MCIL(MCI);
+  RegisterJITEventListener(&MCIL);
 
   runJITOnFunctionUnlocked(F, locked);
 
-  registerMachineCodeInfo(0);
+  UnregisterJITEventListener(&MCIL);
 }
 
 void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
@@ -711,3 +756,6 @@ void JIT::addPendingFunction(Function *F) {
   MutexGuard locked(lock);
   jitstate->getPendingFunctions(locked).push_back(F);
 }
+
+
+JITEventListener::~JITEventListener() {}
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index 3ccb2dd..66417a7 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -20,10 +20,11 @@
 namespace llvm {
 
 class Function;
-class TargetMachine;
-class TargetJITInfo;
+class JITEvent_EmittedFunctionDetails;
 class MachineCodeEmitter;
 class MachineCodeInfo;
+class TargetJITInfo;
+class TargetMachine;
 
 class JITState {
 private:
@@ -52,6 +53,7 @@ class JIT : public ExecutionEngine {
   TargetMachine &TM;       // The current target we are compiling to
   TargetJITInfo &TJI;      // The JITInfo for the target we are compiling to
   JITCodeEmitter *JCE;     // JCE object
+  std::vector<JITEventListener*> EventListeners;
 
   JITState *jitstate;
 
@@ -157,9 +159,18 @@ public:
   // Run the JIT on F and return information about the generated code
   void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0);
 
+  virtual void RegisterJITEventListener(JITEventListener *L);
+  virtual void UnregisterJITEventListener(JITEventListener *L);
+  /// These functions correspond to the methods on JITEventListener.  They
+  /// iterate over the registered listeners and call the corresponding method on
+  /// each.
+  void NotifyFunctionEmitted(
+      const Function &F, void *Code, size_t Size,
+      const JITEvent_EmittedFunctionDetails &Details);
+  void NotifyFreeingMachineCode(const Function &F, void *OldPtr);
+
 private:
   static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM);
-  void registerMachineCodeInfo(MachineCodeInfo *MCI);
   void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked);
   void updateFunctionStub(Function *F);
   void updateDlsymStubTable();
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 43f23e4..8fe7ab8 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -24,8 +24,9 @@
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRelocation.h"
-#include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/CodeGen/MachineCodeInfo.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Target/TargetJITInfo.h"
@@ -411,136 +412,6 @@ void *JITResolver::JITCompilerFn(void *Stub) {
 }
 
 //===----------------------------------------------------------------------===//
-// Function Index Support
-
-// On MacOS we generate an index of currently JIT'd functions so that
-// performance tools can determine a symbol name and accurate code range for a
-// PC value.  Because performance tools are generally asynchronous, the code
-// below is written with the hope that it could be interrupted at any time and
-// have useful answers.  However, we don't go crazy with atomic operations, we
-// just do a "reasonable effort".
-#ifdef __APPLE__ 
-#define ENABLE_JIT_SYMBOL_TABLE 0
-#endif
-
-/// JitSymbolEntry - Each function that is JIT compiled results in one of these
-/// being added to an array of symbols.  This indicates the name of the function
-/// as well as the address range it occupies.  This allows the client to map
-/// from a PC value to the name of the function.
-struct JitSymbolEntry {
-  const char *FnName;   // FnName - a strdup'd string.
-  void *FnStart;
-  intptr_t FnSize;
-};
-
-
-struct JitSymbolTable {
-  /// NextPtr - This forms a linked list of JitSymbolTable entries.  This
-  /// pointer is not used right now, but might be used in the future.  Consider
-  /// it reserved for future use.
-  JitSymbolTable *NextPtr;
-  
-  /// Symbols - This is an array of JitSymbolEntry entries.  Only the first
-  /// 'NumSymbols' symbols are valid.
-  JitSymbolEntry *Symbols;
-  
-  /// NumSymbols - This indicates the number entries in the Symbols array that
-  /// are valid.
-  unsigned NumSymbols;
-  
-  /// NumAllocated - This indicates the amount of space we have in the Symbols
-  /// array.  This is a private field that should not be read by external tools.
-  unsigned NumAllocated;
-};
-
-#if ENABLE_JIT_SYMBOL_TABLE 
-JitSymbolTable *__jitSymbolTable;
-#endif
-
-static void AddFunctionToSymbolTable(const char *FnName, 
-                                     void *FnStart, intptr_t FnSize) {
-  assert(FnName != 0 && FnStart != 0 && "Bad symbol to add");
-  JitSymbolTable **SymTabPtrPtr = 0;
-#if !ENABLE_JIT_SYMBOL_TABLE
-  return;
-#else
-  SymTabPtrPtr = &__jitSymbolTable;
-#endif
-  
-  // If this is the first entry in the symbol table, add the JitSymbolTable
-  // index.
-  if (*SymTabPtrPtr == 0) {
-    JitSymbolTable *New = new JitSymbolTable();
-    New->NextPtr = 0;
-    New->Symbols = 0;
-    New->NumSymbols = 0;
-    New->NumAllocated = 0;
-    *SymTabPtrPtr = New;
-  }
-  
-  JitSymbolTable *SymTabPtr = *SymTabPtrPtr;
-  
-  // If we have space in the table, reallocate the table.
-  if (SymTabPtr->NumSymbols >= SymTabPtr->NumAllocated) {
-    // If we don't have space, reallocate the table.
-    unsigned NewSize = std::max(64U, SymTabPtr->NumAllocated*2);
-    JitSymbolEntry *NewSymbols = new JitSymbolEntry[NewSize];
-    JitSymbolEntry *OldSymbols = SymTabPtr->Symbols;
-    
-    // Copy the old entries over.
-    memcpy(NewSymbols, OldSymbols, SymTabPtr->NumSymbols*sizeof(OldSymbols[0]));
-    
-    // Swap the new symbols in, delete the old ones.
-    SymTabPtr->Symbols = NewSymbols;
-    SymTabPtr->NumAllocated = NewSize;
-    delete [] OldSymbols;
-  }
-  
-  // Otherwise, we have enough space, just tack it onto the end of the array.
-  JitSymbolEntry &Entry = SymTabPtr->Symbols[SymTabPtr->NumSymbols];
-  Entry.FnName = strdup(FnName);
-  Entry.FnStart = FnStart;
-  Entry.FnSize = FnSize;
-  ++SymTabPtr->NumSymbols;
-}
-
-static void RemoveFunctionFromSymbolTable(void *FnStart) {
-  assert(FnStart && "Invalid function pointer");
-  JitSymbolTable **SymTabPtrPtr = 0;
-#if !ENABLE_JIT_SYMBOL_TABLE
-  return;
-#else
-  SymTabPtrPtr = &__jitSymbolTable;
-#endif
-  
-  JitSymbolTable *SymTabPtr = *SymTabPtrPtr;
-  JitSymbolEntry *Symbols = SymTabPtr->Symbols;
-  
-  // Scan the table to find its index.  The table is not sorted, so do a linear
-  // scan.
-  unsigned Index;
-  for (Index = 0; Symbols[Index].FnStart != FnStart; ++Index)
-    assert(Index != SymTabPtr->NumSymbols && "Didn't find function!");
-  
-  // Once we have an index, we know to nuke this entry, overwrite it with the
-  // entry at the end of the array, making the last entry redundant.
-  const char *OldName = Symbols[Index].FnName;
-  Symbols[Index] = Symbols[SymTabPtr->NumSymbols-1];
-  free((void*)OldName);
-  
-  // Drop the number of symbols in the table.
-  --SymTabPtr->NumSymbols;
-
-  // Finally, if we deleted the final symbol, deallocate the table itself.
-  if (SymTabPtr->NumSymbols != 0) 
-    return;
-  
-  *SymTabPtrPtr = 0;
-  delete [] Symbols;
-  delete SymTabPtr;
-}
-
-//===----------------------------------------------------------------------===//
 // JITEmitter code.
 //
 namespace {
@@ -616,11 +487,8 @@ namespace {
     // in the JITResolver's ExternalFnToStubMap.
     StringMap<void *> ExtFnStubs;
 
-    // MCI - A pointer to a MachineCodeInfo object to update with information.
-    MachineCodeInfo *MCI;
-
   public:
-    JITEmitter(JIT &jit, JITMemoryManager *JMM) : Resolver(jit), CurFn(0), MCI(0) {
+    JITEmitter(JIT &jit, JITMemoryManager *JMM) : Resolver(jit), CurFn(0) {
       MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
       if (jit.getJITInfo().needsGOT()) {
         MemMgr->AllocateGOT();
@@ -716,10 +584,6 @@ namespace {
     
     JITMemoryManager *getMemMgr(void) const { return MemMgr; }
 
-    void setMachineCodeInfo(MachineCodeInfo *mci) {
-      MCI = mci;
-    }
-
   private:
     void *getPointerToGlobal(GlobalValue *GV, void *Reference, bool NoNeedStub);
     void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference,
@@ -1157,21 +1021,16 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
 
   // Invalidate the icache if necessary.
   sys::Memory::InvalidateInstructionCache(FnStart, FnEnd-FnStart);
-  
-  // Add it to the JIT symbol table if the host wants it.
-  AddFunctionToSymbolTable(F.getFunction()->getNameStart(),
-                           FnStart, FnEnd-FnStart);
+
+  JITEvent_EmittedFunctionDetails Details;
+  TheJIT->NotifyFunctionEmitted(*F.getFunction(), FnStart, FnEnd-FnStart,
+                                Details);
 
   DOUT << "JIT: Finished CodeGen of [" << (void*)FnStart
        << "] Function: " << F.getFunction()->getName()
        << ": " << (FnEnd-FnStart) << " bytes of text, "
        << Relocations.size() << " relocations\n";
 
-  if (MCI) {
-    MCI->setAddress(FnStart);
-    MCI->setSize(FnEnd-FnStart);
-  }
-
   Relocations.clear();
   ConstPoolAddresses.clear();
 
@@ -1495,13 +1354,6 @@ void *JIT::getPointerToFunctionOrStub(Function *F) {
   return JE->getJITResolver().getFunctionStub(F);
 }
 
-void JIT::registerMachineCodeInfo(MachineCodeInfo *mc) {
-  assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
-  JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
-
-  JE->setMachineCodeInfo(mc);
-}
-
 void JIT::updateFunctionStub(Function *F) {
   // Get the empty stub we generated earlier.
   assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
@@ -1609,10 +1461,9 @@ void JIT::freeMachineCodeForFunction(Function *F) {
   void *OldPtr = updateGlobalMapping(F, 0);
 
   if (OldPtr)
-    RemoveFunctionFromSymbolTable(OldPtr);
+    TheJIT->NotifyFreeingMachineCode(*F, OldPtr);
 
   // Free the actual memory for the function body and related stuff.
   assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
   cast<JITEmitter>(JCE)->deallocateMemForFunction(F);
 }
-
diff --git a/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp b/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp
new file mode 100644
index 0000000..3b8b84c
--- /dev/null
+++ b/lib/ExecutionEngine/JIT/MacOSJITEventListener.cpp
@@ -0,0 +1,173 @@
+//===-- MacOSJITEventListener.cpp - Save symbol table for OSX perf tools --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a JITEventListener object that records JITted functions to
+// a global __jitSymbolTable linked list.  Apple's performance tools use this to
+// determine a symbol name and accurate code range for a PC value.  Because
+// performance tools are generally asynchronous, the code below is written with
+// the hope that it could be interrupted at any time and have useful answers.
+// However, we don't go crazy with atomic operations, we just do a "reasonable
+// effort".
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "macos-jit-event-listener"
+#include "llvm/Function.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
+#include <stddef.h>
+using namespace llvm;
+
+#ifdef __APPLE__
+#define ENABLE_JIT_SYMBOL_TABLE 0
+#endif
+
+#if ENABLE_JIT_SYMBOL_TABLE
+
+namespace {
+
+/// JITSymbolEntry - Each function that is JIT compiled results in one of these
+/// being added to an array of symbols.  This indicates the name of the function
+/// as well as the address range it occupies.  This allows the client to map
+/// from a PC value to the name of the function.
+struct JITSymbolEntry {
+  const char *FnName;   // FnName - a strdup'd string.
+  void *FnStart;
+  intptr_t FnSize;
+};
+
+
+struct JITSymbolTable {
+  /// NextPtr - This forms a linked list of JitSymbolTable entries.  This
+  /// pointer is not used right now, but might be used in the future.  Consider
+  /// it reserved for future use.
+  JITSymbolTable *NextPtr;
+  
+  /// Symbols - This is an array of JitSymbolEntry entries.  Only the first
+  /// 'NumSymbols' symbols are valid.
+  JITSymbolEntry *Symbols;
+  
+  /// NumSymbols - This indicates the number entries in the Symbols array that
+  /// are valid.
+  unsigned NumSymbols;
+  
+  /// NumAllocated - This indicates the amount of space we have in the Symbols
+  /// array.  This is a private field that should not be read by external tools.
+  unsigned NumAllocated;
+};
+
+class MacOSJITEventListener : public JITEventListener {
+public:
+  virtual void NotifyFunctionEmitted(const Function &F,
+                                     void *FnStart, size_t FnSize,
+                                     const EmittedFunctionDetails &Details);
+  virtual void NotifyFreeingMachineCode(const Function &F, void *OldPtr);
+};
+
+}  // anonymous namespace.
+
+// This is a public symbol so the performance tools can find it.
+JITSymbolTable *__jitSymbolTable;
+
+namespace llvm {
+JITEventListener *createMacOSJITEventListener() {
+  return new MacOSJITEventListener;
+}
+}
+
+// Adds the just-emitted function to the symbol table.
+void MacOSJITEventListener::NotifyFunctionEmitted(
+    const Function &F, void *FnStart, size_t FnSize,
+    const EmittedFunctionDetails &) {
+  const char *const FnName = F.getNameStart();
+  assert(FnName != 0 && FnStart != 0 && "Bad symbol to add");
+  JITSymbolTable **SymTabPtrPtr = 0;
+  SymTabPtrPtr = &__jitSymbolTable;
+
+  // If this is the first entry in the symbol table, add the JITSymbolTable
+  // index.
+  if (*SymTabPtrPtr == 0) {
+    JITSymbolTable *New = new JITSymbolTable();
+    New->NextPtr = 0;
+    New->Symbols = 0;
+    New->NumSymbols = 0;
+    New->NumAllocated = 0;
+    *SymTabPtrPtr = New;
+  }
+
+  JITSymbolTable *SymTabPtr = *SymTabPtrPtr;
+
+  // If we have space in the table, reallocate the table.
+  if (SymTabPtr->NumSymbols >= SymTabPtr->NumAllocated) {
+    // If we don't have space, reallocate the table.
+    unsigned NewSize = std::max(64U, SymTabPtr->NumAllocated*2);
+    JITSymbolEntry *NewSymbols = new JITSymbolEntry[NewSize];
+    JITSymbolEntry *OldSymbols = SymTabPtr->Symbols;
+
+    // Copy the old entries over.
+    memcpy(NewSymbols, OldSymbols, SymTabPtr->NumSymbols*sizeof(OldSymbols[0]));
+
+    // Swap the new symbols in, delete the old ones.
+    SymTabPtr->Symbols = NewSymbols;
+    SymTabPtr->NumAllocated = NewSize;
+    delete [] OldSymbols;
+  }
+
+  // Otherwise, we have enough space, just tack it onto the end of the array.
+  JITSymbolEntry &Entry = SymTabPtr->Symbols[SymTabPtr->NumSymbols];
+  Entry.FnName = strdup(FnName);
+  Entry.FnStart = FnStart;
+  Entry.FnSize = FnSize;
+  ++SymTabPtr->NumSymbols;
+}
+
+// Removes the to-be-deleted function from the symbol table.
+void MacOSJITEventListener::NotifyFreeingMachineCode(
+    const Function &, void *FnStart) {
+  assert(FnStart && "Invalid function pointer");
+  JITSymbolTable **SymTabPtrPtr = 0;
+  SymTabPtrPtr = &__jitSymbolTable;
+
+  JITSymbolTable *SymTabPtr = *SymTabPtrPtr;
+  JITSymbolEntry *Symbols = SymTabPtr->Symbols;
+
+  // Scan the table to find its index.  The table is not sorted, so do a linear
+  // scan.
+  unsigned Index;
+  for (Index = 0; Symbols[Index].FnStart != FnStart; ++Index)
+    assert(Index != SymTabPtr->NumSymbols && "Didn't find function!");
+
+  // Once we have an index, we know to nuke this entry, overwrite it with the
+  // entry at the end of the array, making the last entry redundant.
+  const char *OldName = Symbols[Index].FnName;
+  Symbols[Index] = Symbols[SymTabPtr->NumSymbols-1];
+  free((void*)OldName);
+
+  // Drop the number of symbols in the table.
+  --SymTabPtr->NumSymbols;
+
+  // Finally, if we deleted the final symbol, deallocate the table itself.
+  if (SymTabPtr->NumSymbols != 0)
+    return;
+
+  *SymTabPtrPtr = 0;
+  delete [] Symbols;
+  delete SymTabPtr;
+}
+
+#else  // !ENABLE_JIT_SYMBOL_TABLE
+
+namespace llvm {
+// By defining this to return NULL, we can let clients call it unconditionally,
+// even if they aren't on an Apple system.
+JITEventListener *createMacOSJITEventListener() {
+  return NULL;
+}
+}  // namespace llvm
+
+#endif  // ENABLE_JIT_SYMBOL_TABLE
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
new file mode 100644
index 0000000..6307ffe
--- /dev/null
+++ b/lib/MC/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMMC
+  MCAsmStreamer.cpp
+  MCContext.cpp
+  MCStreamer.cpp
+  )
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
new file mode 100644
index 0000000..e38f2b3
--- /dev/null
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -0,0 +1,206 @@
+//===- lib/MC/MCAsmStreamer.cpp - Text Assembly Output --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+  class MCAsmStreamer : public MCStreamer {
+    raw_ostream &OS;
+
+    MCSection *CurSection;
+
+  public:
+    MCAsmStreamer(MCContext &Context, raw_ostream &_OS)
+      : MCStreamer(Context), OS(_OS) {}
+    ~MCAsmStreamer() {}
+
+    /// @name MCStreamer Interface
+    /// @{
+
+    virtual void SwitchSection(MCSection *Section);
+
+    virtual void EmitLabel(MCSymbol *Symbol);
+
+    virtual void EmitAssignment(MCSymbol *Symbol, const MCValue &Value,
+                                bool MakeAbsolute = false);
+
+    virtual void EmitSymbolAttribute(MCSymbol *Symbol, SymbolAttr Attribute);
+
+    virtual void EmitBytes(const char *Data, unsigned Length);
+
+    virtual void EmitValue(const MCValue &Value, unsigned Size);
+
+    virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
+                                      unsigned ValueSize = 1,
+                                      unsigned MaxBytesToEmit = 0);
+
+    virtual void EmitValueToOffset(const MCValue &Offset, 
+                                   unsigned char Value = 0);
+    
+    virtual void EmitInstruction(const MCInst &Inst);
+
+    virtual void Finish();
+    
+    /// @}
+  };
+
+}
+
+/// Allow printing values directly to a raw_ostream.
+static inline raw_ostream &operator<<(raw_ostream &os, const MCValue &Value) {
+  if (Value.getSymA()) {
+    os << Value.getSymA()->getName();
+    if (Value.getSymB())
+      os << " - " << Value.getSymB()->getName();
+    if (Value.getCst())
+      os << " + " << Value.getCst();
+  } else {
+    assert(!Value.getSymB() && "Invalid machine code value!");
+    os << Value.getCst();
+  }
+
+  return os;
+}
+
+static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
+  assert(Bytes && "Invalid size!");
+  return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
+}
+
+static inline MCValue truncateToSize(const MCValue &Value, unsigned Bytes) {
+  return MCValue::get(Value.getSymA(), Value.getSymB(), 
+                      truncateToSize(Value.getCst(), Bytes));
+}
+
+void MCAsmStreamer::SwitchSection(MCSection *Section) {
+  if (Section != CurSection) {
+    CurSection = Section;
+
+    // FIXME: Really we would like the segment, flags, etc. to be separate
+    // values instead of embedded in the name. Not all assemblers understand all
+    // this stuff though.
+    OS << ".section " << Section->getName() << "\n";
+  }
+}
+
+void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
+  assert(Symbol->getSection() == 0 && "Cannot emit a symbol twice!");
+  assert(CurSection && "Cannot emit before setting section!");
+  assert(!getContext().GetSymbolValue(Symbol) && 
+         "Cannot emit symbol which was directly assigned to!");
+
+  OS << Symbol->getName() << ":\n";
+  Symbol->setSection(CurSection);
+}
+
+void MCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCValue &Value,
+                                   bool MakeAbsolute) {
+  assert(!Symbol->getSection() && "Cannot assign to a label!");
+
+  if (MakeAbsolute) {
+    OS << ".set " << Symbol->getName() << ", " << Value << '\n';
+  } else {
+    OS << Symbol->getName() << " = " << Value << '\n';
+  }
+
+  getContext().SetSymbolValue(Symbol, Value);
+}
+
+void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, 
+                                        SymbolAttr Attribute) {
+  switch (Attribute) {
+  case Global: OS << ".globl"; break;
+  case Hidden: OS << ".hidden"; break;
+  case IndirectSymbol: OS << ".indirect_symbol"; break;
+  case Internal: OS << ".internal"; break;
+  case LazyReference: OS << ".lazy_reference"; break;
+  case NoDeadStrip: OS << ".no_dead_strip"; break;
+  case PrivateExtern: OS << ".private_extern"; break;
+  case Protected: OS << ".protected"; break;
+  case Reference: OS << ".reference"; break;
+  case Weak: OS << ".weak"; break;
+  case WeakDefinition: OS << ".weak_definition"; break;
+  case WeakReference: OS << ".weak_reference"; break;
+  }
+
+  OS << ' ' << Symbol->getName() << '\n';
+}
+
+void MCAsmStreamer::EmitBytes(const char *Data, unsigned Length) {
+  assert(CurSection && "Cannot emit contents before setting section!");
+  for (unsigned i = 0; i != Length; ++i)
+    OS << ".byte " << (unsigned) Data[i] << '\n';
+}
+
+void MCAsmStreamer::EmitValue(const MCValue &Value, unsigned Size) {
+  assert(CurSection && "Cannot emit contents before setting section!");
+  // Need target hooks to know how to print this.
+  switch (Size) {
+  default:
+    assert(0 && "Invalid size for machine code value!");
+  case 1: OS << ".byte"; break;
+  case 2: OS << ".short"; break;
+  case 4: OS << ".long"; break;
+  case 8: OS << ".quad"; break;
+  }
+
+  OS << ' ' << truncateToSize(Value, Size) << '\n';
+}
+
+void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
+                                         unsigned ValueSize,
+                                         unsigned MaxBytesToEmit) {
+  unsigned Pow2 = Log2_32(ByteAlignment);
+  assert((1U << Pow2) == ByteAlignment && "Invalid alignment!");
+
+  switch (ValueSize) {
+  default:
+    assert(0 && "Invalid size for machine code value!");
+  case 8:
+    assert(0 && "Unsupported alignment size!");
+  case 1: OS << ".p2align"; break;
+  case 2: OS << ".p2alignw"; break;
+  case 4: OS << ".p2alignl"; break;
+  }
+
+  OS << ' ' << Pow2;
+
+  OS << ", " << truncateToSize(Value, ValueSize);
+  if (MaxBytesToEmit) 
+    OS << ", " << MaxBytesToEmit;
+  OS << '\n';
+}
+
+void MCAsmStreamer::EmitValueToOffset(const MCValue &Offset, 
+                                      unsigned char Value) {
+  // FIXME: Verify that Offset is associated with the current section.
+  OS << ".org " << Offset << ", " << (unsigned) Value << '\n';
+}
+
+void MCAsmStreamer::EmitInstruction(const MCInst &Inst) {
+  assert(CurSection && "Cannot emit contents before setting section!");
+  // FIXME: Implement.
+  OS << "# FIXME: Implement instruction printing!\n";
+}
+
+void MCAsmStreamer::Finish() {
+  OS.flush();
+}
+    
+MCStreamer *llvm::createAsmStreamer(MCContext &Context, raw_ostream &OS) {
+  return new MCAsmStreamer(Context, OS);
+}
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
new file mode 100644
index 0000000..6c6019c
--- /dev/null
+++ b/lib/MC/MCContext.cpp
@@ -0,0 +1,80 @@
+//===- lib/MC/MCContext.cpp - Machine Code Context ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCContext.h"
+
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
+using namespace llvm;
+
+MCContext::MCContext()
+{
+}
+
+MCContext::~MCContext() {
+}
+
+MCSection *MCContext::GetSection(const char *Name) {
+  MCSection *&Entry = Sections[Name];
+  
+  if (!Entry)
+    Entry = new (*this) MCSection(Name);
+
+  return Entry;
+}
+
+MCSymbol *MCContext::CreateSymbol(const char *Name) {
+  assert(Name[0] != '\0' && "Normal symbols cannot be unnamed!");
+
+  // Create and bind the symbol, and ensure that names are unique.
+  MCSymbol *&Entry = Symbols[Name];
+  assert(!Entry && "Duplicate symbol definition!");
+  return Entry = new (*this) MCSymbol(Name, false);
+}
+
+MCSymbol *MCContext::GetOrCreateSymbol(const char *Name) {
+  MCSymbol *&Entry = Symbols[Name];
+  if (Entry) return Entry;
+
+  return Entry = new (*this) MCSymbol(Name, false);
+}
+
+
+MCSymbol *MCContext::CreateTemporarySymbol(const char *Name) {
+  // If unnamed, just create a symbol.
+  if (Name[0] == '\0')
+    new (*this) MCSymbol("", true);
+    
+  // Otherwise create as usual.
+  MCSymbol *&Entry = Symbols[Name];
+  assert(!Entry && "Duplicate symbol definition!");
+  return Entry = new (*this) MCSymbol(Name, true);
+}
+
+MCSymbol *MCContext::LookupSymbol(const char *Name) const {
+  return Symbols.lookup(Name);
+}
+
+void MCContext::ClearSymbolValue(MCSymbol *Sym) {
+  SymbolValues.erase(Sym);
+}
+
+void MCContext::SetSymbolValue(MCSymbol *Sym, const MCValue &Value) {
+  SymbolValues[Sym] = Value;
+}
+
+const MCValue *MCContext::GetSymbolValue(MCSymbol *Sym) const {
+  DenseMap<MCSymbol*, MCValue>::iterator it = SymbolValues.find(Sym);
+
+  if (it == SymbolValues.end())
+    return 0;
+
+  return &it->second;
+}
diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp
new file mode 100644
index 0000000..a634f33
--- /dev/null
+++ b/lib/MC/MCStreamer.cpp
@@ -0,0 +1,18 @@
+//===- lib/MC/MCStreamer.cpp - Streaming Machine Code Output --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCStreamer.h"
+
+using namespace llvm;
+
+MCStreamer::MCStreamer(MCContext &_Context) : Context(_Context) {
+}
+
+MCStreamer::~MCStreamer() {
+}
diff --git a/lib/MC/Makefile b/lib/MC/Makefile
new file mode 100644
index 0000000..314a5b1
--- /dev/null
+++ b/lib/MC/Makefile
@@ -0,0 +1,15 @@
+##===- lib/MC/Makefile -------------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+LIBRARYNAME = LLVMMC
+BUILD_ARCHIVE := 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Makefile b/lib/Makefile
index 8dd67d9..7199da5 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -9,7 +9,7 @@
 LEVEL = ..
 
 PARALLEL_DIRS = VMCore AsmParser Bitcode Archive Analysis Transforms CodeGen \
-                Target ExecutionEngine Debugger Linker CompilerDriver
+                Target ExecutionEngine Debugger Linker CompilerDriver MC
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Support/Annotation.cpp b/lib/Support/Annotation.cpp
index 9c3efa3..b778043 100644
--- a/lib/Support/Annotation.cpp
+++ b/lib/Support/Annotation.cpp
@@ -68,9 +68,12 @@ AnnotationID AnnotationManager::getID(const char *Name) {  // Name -> ID
   if (I == E) {
     sys::SmartScopedWriter<true> Writer(&*AnnotationsLock);
     I = IDMap->find(Name);
-    if (I == IDMap->end())
-      (*IDMap)[Name] = IDCounter++;   // Add a new element
-    return AnnotationID(IDCounter-1);
+    if (I == IDMap->end()) {
+      unsigned newCount = sys::AtomicIncrement(&IDCounter);
+      (*IDMap)[Name] = newCount-1;   // Add a new element
+      return AnnotationID(newCount-1);
+    } else
+      return AnnotationID(I->second);
   }
   return AnnotationID(I->second);
 }
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index e7a76cc..f26c2c0 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -30,3 +30,5 @@ add_llvm_library(LLVMSupport
   Triple.cpp
   raw_ostream.cpp
   )
+
+target_link_libraries (LLVMSupport LLVMSystem)
diff --git a/lib/Support/PrettyStackTrace.cpp b/lib/Support/PrettyStackTrace.cpp
index c111c5e..14290a1 100644
--- a/lib/Support/PrettyStackTrace.cpp
+++ b/lib/Support/PrettyStackTrace.cpp
@@ -15,11 +15,12 @@
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/System/Signals.h"
+#include "llvm/System/ThreadLocal.h"
 #include "llvm/ADT/SmallString.h"
 using namespace llvm;
 
 // FIXME: This should be thread local when llvm supports threads.
-static const PrettyStackTraceEntry *PrettyStackTraceHead = 0;
+static sys::ThreadLocal<const PrettyStackTraceEntry> PrettyStackTraceHead;
 
 static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
   unsigned NextID = 0;
@@ -34,12 +35,12 @@ static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){
 /// PrintCurStackTrace - Print the current stack trace to the specified stream.
 static void PrintCurStackTrace(raw_ostream &OS) {
   // Don't print an empty trace.
-  if (PrettyStackTraceHead == 0) return;
+  if (PrettyStackTraceHead.get() == 0) return;
   
   // If there are pretty stack frames registered, walk and emit them.
   OS << "Stack dump:\n";
   
-  PrintStack(PrettyStackTraceHead, OS);
+  PrintStack(PrettyStackTraceHead.get(), OS);
   OS.flush();
 }
 
@@ -84,14 +85,14 @@ PrettyStackTraceEntry::PrettyStackTraceEntry() {
   HandlerRegistered = HandlerRegistered;
     
   // Link ourselves.
-  NextEntry = PrettyStackTraceHead;
-  PrettyStackTraceHead = this;
+  NextEntry = PrettyStackTraceHead.get();
+  PrettyStackTraceHead.set(this);
 }
 
 PrettyStackTraceEntry::~PrettyStackTraceEntry() {
-  assert(PrettyStackTraceHead == this &&
+  assert(PrettyStackTraceHead.get() == this &&
          "Pretty stack trace entry destruction is out of order");
-  PrettyStackTraceHead = getNextEntry();
+  PrettyStackTraceHead.set(getNextEntry());
 }
 
 void PrettyStackTraceString::print(raw_ostream &OS) const {
diff --git a/lib/Support/Statistic.cpp b/lib/Support/Statistic.cpp
index 6c652f8..33570b0 100644
--- a/lib/Support/Statistic.cpp
+++ b/lib/Support/Statistic.cpp
@@ -66,10 +66,14 @@ void Statistic::RegisterStatistic() {
   // If stats are enabled, inform StatInfo that this statistic should be
   // printed.
   sys::ScopedLock Writer(&*StatLock);
-  if (Enabled)
-    StatInfo->addStatistic(this);
-  // Remember we have been registered.
-  Initialized = true;
+  if (!Initialized) {
+    if (Enabled)
+      StatInfo->addStatistic(this);
+    
+    sys::MemoryFence();
+    // Remember we have been registered.
+    Initialized = true;
+  }
 }
 
 namespace {
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index 69f967c..ede1dc9 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -38,6 +38,8 @@ static std::string &getLibSupportInfoOutputFilename() {
   return *LibSupportInfoOutputFilename;
 }
 
+static ManagedStatic<sys::SmartMutex<true> > TimerLock;
+
 namespace {
   static cl::opt<bool>
   TrackSpace("track-memory", cl::desc("Enable -time-passes memory "
@@ -112,7 +114,8 @@ static inline size_t getMemUsage() {
 }
 
 struct TimeRecord {
-  int64_t Elapsed, UserTime, SystemTime, MemUsed;
+  double Elapsed, UserTime, SystemTime;
+  ssize_t MemUsed;
 };
 
 static TimeRecord getTimeRecord(bool Start) {
@@ -122,7 +125,7 @@ static TimeRecord getTimeRecord(bool Start) {
   sys::TimeValue user(0,0);
   sys::TimeValue sys(0,0);
 
-  int64_t MemUsed = 0;
+  ssize_t MemUsed = 0;
   if (Start) {
     MemUsed = getMemUsage();
     sys::Process::GetTimeUsage(now,user,sys);
@@ -131,9 +134,9 @@ static TimeRecord getTimeRecord(bool Start) {
     MemUsed = getMemUsage();
   }
 
-  Result.Elapsed  = now.seconds() * 1000000 + now.microseconds();
-  Result.UserTime = user.seconds() * 1000000 + user.microseconds();
-  Result.SystemTime  = sys.seconds() * 1000000 + sys.microseconds();
+  Result.Elapsed  = now.seconds()  + now.microseconds()  / 1000000.0;
+  Result.UserTime = user.seconds() + user.microseconds() / 1000000.0;
+  Result.SystemTime  = sys.seconds()  + sys.microseconds()  / 1000000.0;
   Result.MemUsed  = MemUsed;
 
   return Result;
@@ -142,6 +145,7 @@ static TimeRecord getTimeRecord(bool Start) {
 static ManagedStatic<std::vector<Timer*> > ActiveTimers;
 
 void Timer::startTimer() {
+  sys::SmartScopedLock<true> L(&Lock);
   Started = true;
   ActiveTimers->push_back(this);
   TimeRecord TR = getTimeRecord(true);
@@ -153,6 +157,7 @@ void Timer::startTimer() {
 }
 
 void Timer::stopTimer() {
+  sys::SmartScopedLock<true> L(&Lock);
   TimeRecord TR = getTimeRecord(false);
   Elapsed    += TR.Elapsed;
   UserTime   += TR.UserTime;
@@ -170,11 +175,27 @@ void Timer::stopTimer() {
 }
 
 void Timer::sum(const Timer &T) {
+  if (&T < this) {
+    T.Lock.acquire();
+    Lock.acquire();
+  } else {
+    Lock.acquire();
+    T.Lock.acquire();
+  }
+  
   Elapsed    += T.Elapsed;
   UserTime   += T.UserTime;
   SystemTime += T.SystemTime;
   MemUsed    += T.MemUsed;
   PeakMem    += T.PeakMem;
+  
+  if (&T < this) {
+    T.Lock.release();
+    Lock.release();
+  } else {
+    Lock.release();
+    T.Lock.release();
+  }
 }
 
 /// addPeakMemoryMeasurement - This method should be called whenever memory
@@ -185,8 +206,11 @@ void Timer::addPeakMemoryMeasurement() {
   size_t MemUsed = getMemUsage();
 
   for (std::vector<Timer*>::iterator I = ActiveTimers->begin(),
-         E = ActiveTimers->end(); I != E; ++I)
+         E = ActiveTimers->end(); I != E; ++I) {
+    (*I)->Lock.acquire();
     (*I)->PeakMem = std::max((*I)->PeakMem, MemUsed-(*I)->PeakMemBase);
+    (*I)->Lock.release();
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -205,6 +229,7 @@ static ManagedStatic<Name2Timer> NamedTimers;
 static ManagedStatic<Name2Pair> NamedGroupedTimers;
 
 static Timer &getNamedRegionTimer(const std::string &Name) {
+  sys::SmartScopedLock<true> L(&*TimerLock);
   Name2Timer::iterator I = NamedTimers->find(Name);
   if (I != NamedTimers->end())
     return I->second;
@@ -214,6 +239,7 @@ static Timer &getNamedRegionTimer(const std::string &Name) {
 
 static Timer &getNamedRegionTimer(const std::string &Name,
                                   const std::string &GroupName) {
+  sys::SmartScopedLock<true> L(&*TimerLock);
 
   Name2Pair::iterator I = NamedGroupedTimers->find(GroupName);
   if (I == NamedGroupedTimers->end()) {
@@ -275,14 +301,21 @@ static void printVal(double Val, double Total, std::ostream &OS) {
 }
 
 void Timer::print(const Timer &Total, std::ostream &OS) {
+  if (&Total < this) {
+    Total.Lock.acquire();
+    Lock.acquire();
+  } else {
+    Lock.acquire();
+    Total.Lock.acquire();
+  }
+  
   if (Total.UserTime)
-    printVal(UserTime / 1000000.0, Total.UserTime / 1000000.0, OS);
+    printVal(UserTime, Total.UserTime, OS);
   if (Total.SystemTime)
-    printVal(SystemTime / 1000000.0, Total.SystemTime / 1000000.0, OS);
+    printVal(SystemTime, Total.SystemTime, OS);
   if (Total.getProcessTime())
-    printVal(getProcessTime() / 1000000.0,
-             Total.getProcessTime() / 1000000.0, OS);
-  printVal(Elapsed / 1000000.0, Total.Elapsed / 1000000.0, OS);
+    printVal(getProcessTime(), Total.getProcessTime(), OS);
+  printVal(Elapsed, Total.Elapsed, OS);
 
   OS << "  ";
 
@@ -300,6 +333,14 @@ void Timer::print(const Timer &Total, std::ostream &OS) {
   OS << Name << "\n";
 
   Started = false;  // Once printed, don't print again
+  
+  if (&Total < this) {
+    Total.Lock.release();
+    Lock.release();
+  } else {
+    Lock.release();
+    Total.Lock.release();
+  }
 }
 
 // GetLibSupportInfoOutputFile - Return a file stream to print our output on...
@@ -324,6 +365,7 @@ llvm::GetLibSupportInfoOutputFile() {
 
 
 void TimerGroup::removeTimer() {
+  sys::SmartScopedLock<true> L(&*TimerLock);
   if (--NumTimers == 0 && !TimersToPrint.empty()) { // Print timing report...
     // Sort the timers in descending order by amount of time taken...
     std::sort(TimersToPrint.begin(), TimersToPrint.end(),
@@ -355,23 +397,23 @@ void TimerGroup::removeTimer() {
       if (this != DefaultTimerGroup) {
         *OutStream << "  Total Execution Time: ";
 
-        printAlignedFP(Total.getProcessTime() / 1000000.0, 4, 5, *OutStream);
+        printAlignedFP(Total.getProcessTime(), 4, 5, *OutStream);
         *OutStream << " seconds (";
-        printAlignedFP(Total.getWallTime() / 1000000.0, 4, 5, *OutStream);
+        printAlignedFP(Total.getWallTime(), 4, 5, *OutStream);
         *OutStream << " wall clock)\n";
       }
       *OutStream << "\n";
 
-      if (Total.UserTime / 1000000.0)
+      if (Total.UserTime)
         *OutStream << "   ---User Time---";
-      if (Total.SystemTime / 1000000.0)
+      if (Total.SystemTime)
         *OutStream << "   --System Time--";
-      if (Total.getProcessTime() / 1000000.0)
+      if (Total.getProcessTime())
         *OutStream << "   --User+System--";
       *OutStream << "   ---Wall Time---";
-      if (Total.getMemUsed() / 1000000.0)
+      if (Total.getMemUsed())
         *OutStream << "  ---Mem---";
-      if (Total.getPeakMem() / 1000000.0)
+      if (Total.getPeakMem())
         *OutStream << "  -PeakMem-";
       *OutStream << "  --- Name ---\n";
 
@@ -391,3 +433,13 @@ void TimerGroup::removeTimer() {
   }
 }
 
+void TimerGroup::addTimer() {
+  sys::SmartScopedLock<true> L(&*TimerLock);
+  ++NumTimers;
+}
+
+void TimerGroup::addTimerToPrint(const Timer &T) {
+  sys::SmartScopedLock<true> L(&*TimerLock);
+  TimersToPrint.push_back(Timer(true, T));
+}
+
diff --git a/lib/System/Atomic.cpp b/lib/System/Atomic.cpp
index fda2708..f9b55a1 100644
--- a/lib/System/Atomic.cpp
+++ b/lib/System/Atomic.cpp
@@ -35,11 +35,11 @@ void sys::MemoryFence() {
 #endif
 }
 
-uint32_t sys::CompareAndSwap32(volatile uint32_t* ptr,
-                               uint32_t new_value,
-                               uint32_t old_value) {
+sys::cas_flag sys::CompareAndSwap(volatile sys::cas_flag* ptr,
+                                  sys::cas_flag new_value,
+                                  sys::cas_flag old_value) {
 #if LLVM_MULTITHREADED==0
-  uint32_t result = *ptr;
+  sys::cas_flag result = *ptr;
   if (result == old_value)
     *ptr = new_value;
   return result;
@@ -52,7 +52,7 @@ uint32_t sys::CompareAndSwap32(volatile uint32_t* ptr,
 #endif
 }
 
-int32_t sys::AtomicIncrement32(volatile int32_t* ptr) {
+sys::cas_flag sys::AtomicIncrement(volatile sys::cas_flag* ptr) {
 #if LLVM_MULTITHREADED==0
   ++(*ptr);
   return *ptr;
@@ -65,7 +65,7 @@ int32_t sys::AtomicIncrement32(volatile int32_t* ptr) {
 #endif
 }
 
-int32_t sys::AtomicDecrement32(volatile int32_t* ptr) {
+sys::cas_flag sys::AtomicDecrement(volatile sys::cas_flag* ptr) {
 #if LLVM_MULTITHREADED==0
   --(*ptr);
   return *ptr;
@@ -78,7 +78,7 @@ int32_t sys::AtomicDecrement32(volatile int32_t* ptr) {
 #endif
 }
 
-int32_t sys::AtomicAdd32(volatile int32_t* ptr, int32_t val) {
+sys::cas_flag sys::AtomicAdd(volatile sys::cas_flag* ptr, sys::cas_flag val) {
 #if LLVM_MULTITHREADED==0
   *ptr += val;
   return *ptr;
@@ -91,16 +91,22 @@ int32_t sys::AtomicAdd32(volatile int32_t* ptr, int32_t val) {
 #endif
 }
 
-int64_t sys::AtomicAdd64(volatile int64_t* ptr, int64_t val) {
-#if LLVM_MULTITHREADED==0
-  *ptr += val;
-  return *ptr;
-#elif defined(__GNUC__)
-  return __sync_add_and_fetch(ptr, val);
-#elif defined(_MSC_VER)
-  return InterlockedAdd64(ptr, val);
-#else
-#  error No atomic add implementation for your platform!
-#endif
+sys::cas_flag sys::AtomicMul(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+  sys::cas_flag original, result;
+  do {
+    original = *ptr;
+    result = original * val;
+  } while (sys::CompareAndSwap(ptr, result, original) != original);
+
+  return result;
 }
 
+sys::cas_flag sys::AtomicDiv(volatile sys::cas_flag* ptr, sys::cas_flag val) {
+  sys::cas_flag original, result;
+  do {
+    original = *ptr;
+    result = original / val;
+  } while (sys::CompareAndSwap(ptr, result, original) != original);
+
+  return result;
+}
diff --git a/lib/System/CMakeLists.txt b/lib/System/CMakeLists.txt
index a5a56e8..431629a 100644
--- a/lib/System/CMakeLists.txt
+++ b/lib/System/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_library(LLVMSystem
   Signals.cpp
   Threading.cpp
   TimeValue.cpp
+  ThreadLocal.cpp
   )
 
 if( BUILD_SHARED_LIBS AND NOT WIN32 )
diff --git a/lib/System/DynamicLibrary.cpp b/lib/System/DynamicLibrary.cpp
index 3bf172c..ef5c9e6 100644
--- a/lib/System/DynamicLibrary.cpp
+++ b/lib/System/DynamicLibrary.cpp
@@ -12,20 +12,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/System/DynamicLibrary.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/System/RWMutex.h"
 #include "llvm/Config/config.h"
 #include <cstdio>
 #include <cstring>
 #include <map>
 
 // Collection of symbol name/value pairs to be searched prior to any libraries.
-std::map<std::string, void *> &g_symbols() {
-  static std::map<std::string, void *> symbols;
-  return symbols;
-}
+static std::map<std::string, void*> symbols;
+static llvm::sys::SmartRWMutex<true> SymbolsLock;
+
 
 void llvm::sys::DynamicLibrary::AddSymbol(const char* symbolName,
                                           void *symbolValue) {
-  g_symbols()[symbolName] = symbolValue;
+  llvm::sys::SmartScopedWriter<true> Writer(&SymbolsLock);
+  symbols[symbolName] = symbolValue;
 }
 
 // It is not possible to use ltdl.c on VC++ builds as the terms of its LGPL
@@ -57,6 +59,7 @@ static std::vector<void *> OpenedHandles;
 DynamicLibrary::DynamicLibrary() {}
 
 DynamicLibrary::~DynamicLibrary() {
+  SmartScopedWriter<true> Writer(&SymbolsLock);
   while(!OpenedHandles.empty()) {
     void *H = OpenedHandles.back();   OpenedHandles.pop_back(); 
     dlclose(H);
@@ -65,6 +68,7 @@ DynamicLibrary::~DynamicLibrary() {
 
 bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
                                             std::string *ErrMsg) {
+  SmartScopedWriter<true> Writer(&SymbolsLock);                                              
   void *H = dlopen(Filename, RTLD_LAZY|RTLD_GLOBAL);
   if (H == 0) {
     if (ErrMsg)
@@ -77,20 +81,28 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *Filename,
 
 void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
   //  check_ltdl_initialization();
-
+  
   // First check symbols added via AddSymbol().
-  std::map<std::string, void *>::iterator I = g_symbols().find(symbolName);
-  if (I != g_symbols().end())
+  SymbolsLock.reader_acquire();
+  std::map<std::string, void *>::iterator I = symbols.find(symbolName);
+  std::map<std::string, void *>::iterator E = symbols.end();
+  SymbolsLock.reader_release();
+  
+  if (I != E)
     return I->second;
 
+  SymbolsLock.writer_acquire();
   // Now search the libraries.
   for (std::vector<void *>::iterator I = OpenedHandles.begin(),
        E = OpenedHandles.end(); I != E; ++I) {
     //lt_ptr ptr = lt_dlsym(*I, symbolName);
     void *ptr = dlsym(*I, symbolName);
-    if (ptr)
+    if (ptr) {
+      SymbolsLock.writer_release();
       return ptr;
+    }
   }
+  SymbolsLock.writer_release();
 
 #define EXPLICIT_SYMBOL(SYM) \
    extern void *SYM; if (!strcmp(symbolName, #SYM)) return &SYM
diff --git a/lib/System/ThreadLocal.cpp b/lib/System/ThreadLocal.cpp
new file mode 100644
index 0000000..8884e79
--- /dev/null
+++ b/lib/System/ThreadLocal.cpp
@@ -0,0 +1,80 @@
+//===- ThreadLocal.cpp - Thread Local Data ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the llvm::sys::ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/System/ThreadLocal.h"
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only TRULY operating system
+//===          independent code.
+//===----------------------------------------------------------------------===//
+
+#if !defined(ENABLE_THREADS) || ENABLE_THREADS == 0
+// Define all methods as no-ops if threading is explicitly disabled
+namespace llvm {
+using namespace sys;
+ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::~ThreadLocalImpl() { }
+void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
+const void* ThreadLocalImpl::getInstance() { return data; }
+}
+#else
+
+#if defined(HAVE_PTHREAD_H) && defined(HAVE_PTHREAD_GETSPECIFIC)
+
+#include <cassert>
+#include <pthread.h>
+#include <stdlib.h>
+
+namespace llvm {
+using namespace sys;
+
+ThreadLocalImpl::ThreadLocalImpl() : data(0) {
+  pthread_key_t* key = new pthread_key_t;
+  int errorcode = pthread_key_create(key, NULL);
+  assert(errorcode == 0);
+  (void) errorcode;
+  data = key;
+}
+
+ThreadLocalImpl::~ThreadLocalImpl() {
+  pthread_key_t* key = static_cast<pthread_key_t*>(data);
+  int errorcode = pthread_key_delete(*key);
+  assert(errorcode == 0);
+  (void) errorcode;
+  delete key;
+}
+
+void ThreadLocalImpl::setInstance(const void* d) {
+  pthread_key_t* key = static_cast<pthread_key_t*>(data);
+  int errorcode = pthread_setspecific(*key, d);
+  assert(errorcode == 0);
+  (void) errorcode;
+}
+
+const void* ThreadLocalImpl::getInstance() {
+  pthread_key_t* key = static_cast<pthread_key_t*>(data);
+  return pthread_getspecific(*key);
+}
+
+}
+
+#elif defined(LLVM_ON_UNIX)
+#include "Unix/ThreadLocal.inc"
+#elif defined( LLVM_ON_WIN32)
+#include "Win32/ThreadLocal.inc"
+#else
+#warning Neither LLVM_ON_UNIX nor LLVM_ON_WIN32 was set in System/ThreadLocal.cpp
+#endif
+#endif
+
diff --git a/lib/System/Unix/ThreadLocal.inc b/lib/System/Unix/ThreadLocal.inc
new file mode 100644
index 0000000..83d554d3
--- /dev/null
+++ b/lib/System/Unix/ThreadLocal.inc
@@ -0,0 +1,25 @@
+//=== llvm/System/Unix/ThreadLocal.inc - Unix Thread Local Data -*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Unix specific (non-pthread) ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic UNIX code that
+//===          is guaranteed to work on *all* UNIX variants.
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+using namespace sys;
+ThreadLocalImpl::ThreadLocalImpl() { }
+ThreadLocalImpl::~ThreadLocalImpl() { }
+void ThreadLocalImpl::setInstance(const void* d) { data = const_cast<void*>(d);}
+const void* ThreadLocalImpl::getInstance() { return data; }
+}
diff --git a/lib/System/Win32/DynamicLibrary.inc b/lib/System/Win32/DynamicLibrary.inc
index 1ddf6ce..aa04268 100644
--- a/lib/System/Win32/DynamicLibrary.inc
+++ b/lib/System/Win32/DynamicLibrary.inc
@@ -67,6 +67,7 @@ extern "C" {
                                     PVOID UserContext)
 #endif
   {
+    llvm::sys::SmartScopedWriter<true> Writer(&SymbolsLock);
     // Ignore VC++ runtimes prior to 7.1.  Somehow some of them get loaded
     // into the process.
     if (stricmp(ModuleName, "msvci70") != 0 &&
@@ -89,11 +90,13 @@ extern "C" {
 }
 
 DynamicLibrary::DynamicLibrary() : handle(0) {
+  SmartScopedWriter<true> Writer(&SymbolsLock);
   handle = GetModuleHandle(NULL);
   OpenedHandles.push_back((HMODULE)handle);
 }
 
 DynamicLibrary::~DynamicLibrary() {
+  llvm::sys::SmartScopedWriter<true> Writer(&SymbolsLock);
   if (handle == 0)
     return;
 
@@ -113,8 +116,9 @@ DynamicLibrary::~DynamicLibrary() {
 }
  
 bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
-                                            std::string *ErrMsg) {
+                                            std::string *ErrMsg) {                                            
   if (filename) {
+    llvm::sys::SmartScopedWriter<true> Writer(&SymbolsLock);
     HMODULE a_handle = LoadLibrary(filename);
 
     if (a_handle == 0)
@@ -166,17 +170,24 @@ bool DynamicLibrary::LoadLibraryPermanently(const char *filename,
 
 void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) {
   // First check symbols added via AddSymbol().
-  std::map<std::string, void *>::iterator I = g_symbols().find(symbolName);
-  if (I != g_symbols().end())
+  SymbolsLock.reader_acquire();
+  std::map<std::string, void *>::iterator I = symbols.find(symbolName);
+  std::map<std::string, void *>::iterator E = symbols.end();
+  SymbolsLock.reader_release();
+  if (I != E)
     return I->second;
 
   // Now search the libraries.
+  SymbolsLock.writer_acquire();
   for (std::vector<HMODULE>::iterator I = OpenedHandles.begin(),
        E = OpenedHandles.end(); I != E; ++I) {
     FARPROC ptr = GetProcAddress((HMODULE)*I, symbolName);
-    if (ptr)
+    if (ptr) {
+      SymbolsLock.writer_release();
       return (void *) ptr;
+    }
   }
+  SymbolsLock.writer_release();
 
 #if defined(__MINGW32__)
   {
diff --git a/lib/System/Win32/ThreadLocal.inc b/lib/System/Win32/ThreadLocal.inc
new file mode 100644
index 0000000..8ab37d9
--- /dev/null
+++ b/lib/System/Win32/ThreadLocal.inc
@@ -0,0 +1,49 @@
+//= llvm/System/Win32/ThreadLocal.inc - Win32 Thread Local Data -*- C++ -*-===//
+// 
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+// 
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Win32 specific (non-pthread) ThreadLocal class.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//=== WARNING: Implementation here must contain only generic Win32 code that
+//===          is guaranteed to work on *all* Win32 variants.
+//===----------------------------------------------------------------------===//
+
+#include "Win32.h"
+#include "llvm/System/ThreadLocal.h"
+
+namespace llvm {
+using namespace sys;
+
+ThreadLocalImpl::ThreadLocalImpl() {
+  DWORD* tls = new DWORD;
+  *tls = TlsAlloc();
+  assert(*tls != TLS_OUT_OF_INDEXES);
+  data = tls;
+}
+
+ThreadLocalImpl::~ThreadLocalImpl() {
+  DWORD* tls = static_cast<DWORD*>(data);
+  TlsFree(*tls);
+  delete tls;
+}
+
+const void* ThreadLocalImpl::getInstance() {
+  DWORD* tls = static_cast<DWORD*>(data);
+  return TlsGetValue(*tls);
+}
+
+void ThreadLocalImpl::setInstance(const void* d){
+  DWORD* tls = static_cast<DWORD*>(data);
+  int errorcode = TlsSetValue(*tls, const_cast<void*>(d));
+  assert(errorcode == 0);
+}
+
+}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 7edd118..8bf1b7c 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -20,7 +20,7 @@
 
 namespace llvm {
 
-class ARMTargetMachine;
+class ARMBaseTargetMachine;
 class FunctionPass;
 class MachineCodeEmitter;
 class JITCodeEmitter;
@@ -28,8 +28,8 @@ class raw_ostream;
 
 // Enums corresponding to ARM condition codes
 namespace ARMCC {
-  // The CondCodes constants map directly to the 4-bit encoding of the 
-  // condition field for predicated instructions. 
+  // The CondCodes constants map directly to the 4-bit encoding of the
+  // condition field for predicated instructions.
   enum CondCodes {
     EQ,
     NE,
@@ -47,7 +47,7 @@ namespace ARMCC {
     LE,
     AL
   };
-  
+
   inline static CondCodes getOppositeCondition(CondCodes CC){
     switch (CC) {
     default: assert(0 && "Unknown condition code");
@@ -90,17 +90,17 @@ inline static const char *ARMCondCodeToString(ARMCC::CondCodes CC) {
   }
 }
 
-FunctionPass *createARMISelDag(ARMTargetMachine &TM);
+FunctionPass *createARMISelDag(ARMBaseTargetMachine &TM);
 FunctionPass *createARMCodePrinterPass(raw_ostream &O,
-                                       ARMTargetMachine &TM,
+                                       ARMBaseTargetMachine &TM,
                                        CodeGenOpt::Level OptLevel,
                                        bool Verbose);
-FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM,
                                        MachineCodeEmitter &MCE);
 
-FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM,
                                        MachineCodeEmitter &MCE);
-FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM, 
+FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
                                           JITCodeEmitter &JCE);
 
 FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index f6629fe..8424c2e 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -176,11 +176,11 @@ namespace {
 
 namespace llvm {
 
-FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+FunctionPass *createARMCodeEmitterPass(ARMBaseTargetMachine &TM,
                                        MachineCodeEmitter &MCE) {
   return new Emitter<MachineCodeEmitter>(TM, MCE);
 }
-FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM,
+FunctionPass *createARMJITCodeEmitterPass(ARMBaseTargetMachine &TM,
                                           JITCodeEmitter &JCE) {
   return new Emitter<JITCodeEmitter>(TM, JCE);
 }
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index be543a9..200371b 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -41,14 +41,14 @@ static const unsigned arm_dsubreg_1 = 6;
 ///
 namespace {
 class ARMDAGToDAGISel : public SelectionDAGISel {
-  ARMTargetMachine &TM;
+  ARMBaseTargetMachine &TM;
 
   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
   /// make the right decision when generating code for different targets.
   const ARMSubtarget *Subtarget;
 
 public:
-  explicit ARMDAGToDAGISel(ARMTargetMachine &tm)
+  explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm)
     : SelectionDAGISel(tm), TM(tm),
     Subtarget(&TM.getSubtarget<ARMSubtarget>()) {
   }
@@ -92,11 +92,10 @@ public:
   bool SelectThumbAddrModeSP(SDValue Op, SDValue N, SDValue &Base,
                              SDValue &OffImm);
 
-  bool SelectThumb2ShifterOperandReg(SDValue Op, SDValue N,
-                                     SDValue &BaseReg, SDValue &Opc);
-
   bool SelectShifterOperandReg(SDValue Op, SDValue N, SDValue &A,
                                SDValue &B, SDValue &C);
+  bool SelectT2ShifterOperandReg(SDValue Op, SDValue N,
+                                 SDValue &BaseReg, SDValue &Opc);
   
   // Include the pieces autogenerated from the target description.
 #include "ARMGenDAGISel.inc"
@@ -520,28 +519,6 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue Op, SDValue N,
   return false;
 }
 
-bool ARMDAGToDAGISel::SelectThumb2ShifterOperandReg(SDValue Op,
-                                                    SDValue N,
-                                                    SDValue &BaseReg,
-                                                    SDValue &Opc) {
-  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
-
-  // Don't match base register only case. That is matched to a separate
-  // lower complexity pattern with explicit register operand.
-  if (ShOpcVal == ARM_AM::no_shift) return false;
-
-  BaseReg = N.getOperand(0);
-  unsigned ShImmVal = 0;
-  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)))
-    ShImmVal = RHS->getZExtValue() & 31;
-  else
-    return false;
-
-  Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
-
-  return true;
-}
-
 bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
                                               SDValue N,
                                               SDValue &BaseReg,
@@ -566,6 +543,26 @@ bool ARMDAGToDAGISel::SelectShifterOperandReg(SDValue Op,
   return true;
 }
 
+bool ARMDAGToDAGISel::SelectT2ShifterOperandReg(SDValue Op, SDValue N,
+                                                SDValue &BaseReg,
+                                                SDValue &Opc) {
+  ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N);
+
+  // Don't match base register only case. That is matched to a separate
+  // lower complexity pattern with explicit register operand.
+  if (ShOpcVal == ARM_AM::no_shift) return false;
+
+  BaseReg = N.getOperand(0);
+  unsigned ShImmVal = 0;
+  if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    ShImmVal = RHS->getZExtValue() & 31;
+    Opc = getI32Imm(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal));
+    return true;
+  }
+
+  return false;
+}
+
 /// getAL - Returns a ARMCC::AL immediate node.
 static inline SDValue getAL(SelectionDAG *CurDAG) {
   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, MVT::i32);
@@ -1003,6 +1000,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
 /// createARMISelDag - This pass converts a legalized DAG into a
 /// ARM-specific DAG, ready for instruction scheduling.
 ///
-FunctionPass *llvm::createARMISelDag(ARMTargetMachine &TM) {
+FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM) {
   return new ARMDAGToDAGISel(TM);
 }
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 29d3da2..c24bb2e 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -266,7 +266,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::ROTL,  MVT::i32, Expand);
   setOperationAction(ISD::CTTZ,  MVT::i32, Expand);
   setOperationAction(ISD::CTPOP, MVT::i32, Expand);
-  if (!Subtarget->hasV5TOps() || Subtarget->isThumb())
+  if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
     setOperationAction(ISD::CTLZ, MVT::i32, Expand);
 
   // Only ARMv6 has BSWAP.
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index d70d2e2..d7371b0 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -742,32 +742,85 @@ class TIx2<dag outs, dag ins, string asm, list<dag> pattern>
 class TJTI<dag outs, dag ins, string asm, list<dag> pattern>
   : ThumbI<outs, ins, AddrModeNone, SizeSpecial, asm, "", pattern>;
 
-// ThumbPat - Same as Pat<>, but requires that the compiler be in Thumb mode.
-class ThumbPat<dag pattern, dag result> : Pat<pattern, result> {
+// TPat - Same as Pat<>, but requires that the compiler be in Thumb mode.
+class TPat<dag pattern, dag result> : Pat<pattern, result> {
   list<Predicate> Predicates = [IsThumb];
 }
 
-class ThumbV5Pat<dag pattern, dag result> : Pat<pattern, result> {
+class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
   list<Predicate> Predicates = [IsThumb, HasV5T];
 }
 
-// T2I - Thumb2 instruction.
-
-class Thumb2I<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
+// Thumb1 only
+class Thumb1I<dag outs, dag ins, AddrMode am, SizeFlagVal sz,
              string asm, string cstr, list<dag> pattern>
   : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
   let OutOperandList = outs;
   let InOperandList = ins;
   let AsmString   = asm;
   let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb1Only];
+}
+
+class T1I<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeNone, Size2Bytes, asm, "", pattern>;
+
+// Two-address instructions
+class T1It<dag outs, dag ins, string asm, list<dag> pattern>
+  : Thumb1I<outs, ins, AddrModeNone, Size2Bytes, asm, "$lhs = $dst", pattern>;
+
+class T1Pat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb1Only];
+}
+
+// Thumb2I - Thumb2 instruction. Almost all Thumb2 instructions are predicable.
+class Thumb2I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+              string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ops pred:$p));
+  let AsmString = !strconcat(opc, !strconcat("${p}", asm));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb, HasThumb2];
+}
+
+// Same as Thumb2I except it can optionally modify CPSR. Note it's modeled as
+// an input operand since by default it's a zero register. It will
+// become an implicit def once it's "flipped".
+// FIXME: This uses unified syntax so {s} comes before {p}. We should make it
+// more consistent.
+class Thumb2sI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+               string opc, string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
+  let OutOperandList = oops;
+  let InOperandList = !con(iops, (ops pred:$p, cc_out:$s));
+  let AsmString   = !strconcat(opc, !strconcat("${s}${p}", asm));
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb, HasThumb2];
+}
+
+// Special cases
+class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+               string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, cstr> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString   = asm;
+  let Pattern = pattern;
   list<Predicate> Predicates = [IsThumb, HasThumb2];
 }
 
-class T2I<dag outs, dag ins, string asm, list<dag> pattern>
-  : Thumb2I<outs, ins, AddrModeNone, Size4Bytes, asm, "", pattern>;
+class T2I<dag oops, dag iops, string opc, string asm, list<dag> pattern>
+  : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, opc, asm, "", pattern>;
+
+class T2sI<dag oops, dag iops, string opc, string asm, list<dag> pattern>
+  : Thumb2sI<oops, iops, AddrModeNone, Size4Bytes, opc, asm, "", pattern>;
+
+class T2XI<dag oops, dag iops, string asm, list<dag> pattern>
+  : Thumb2XI<oops, iops, AddrModeNone, Size4Bytes, asm, "", pattern>;
 
-// Thumb2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
-class Thumb2Pat<dag pattern, dag result> : Pat<pattern, result> {
+// T2Pat - Same as Pat<>, but requires that the compiler be in Thumb2 mode.
+class T2Pat<dag pattern, dag result> : Pat<pattern, result> {
   list<Predicate> Predicates = [IsThumb, HasThumb2];
 }
 
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index e8da927..d95089d 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -39,11 +39,14 @@ const MachineInstrBuilder &AddDefaultCC(const MachineInstrBuilder &MIB) {
   return MIB.addReg(0);
 }
 
-ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget &STI)
   : TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
     RI(*this, STI) {
 }
 
+ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI) {
+}
 
 /// Return true if the instruction is a register to register move and
 /// leave the source and dest operands in the passed parameters.
@@ -65,10 +68,6 @@ bool ARMInstrInfo::isMoveInstr(const MachineInstr &MI,
     DstReg = MI.getOperand(0).getReg();
     return true;
   case ARM::MOVr:
-  case ARM::tMOVr:
-  case ARM::tMOVhir2lor:
-  case ARM::tMOVlor2hir:
-  case ARM::tMOVhir2hir:
     assert(MI.getDesc().getNumOperands() >= 2 &&
            MI.getOperand(0).isReg() &&
            MI.getOperand(1).isReg() &&
@@ -102,14 +101,6 @@ unsigned ARMInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
       return MI->getOperand(0).getReg();
     }
     break;
-  case ARM::tRestore:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
   }
   return 0;
 }
@@ -137,22 +128,15 @@ unsigned ARMInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
       return MI->getOperand(0).getReg();
     }
     break;
-  case ARM::tSpill:
-    if (MI->getOperand(1).isFI() &&
-        MI->getOperand(2).isImm() &&
-        MI->getOperand(2).getImm() == 0) {
-      FrameIndex = MI->getOperand(1).getIndex();
-      return MI->getOperand(0).getReg();
-    }
-    break;
   }
+
   return 0;
 }
 
-void ARMInstrInfo::reMaterialize(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator I,
-                                 unsigned DestReg,
-                                 const MachineInstr *Orig) const {
+void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I,
+                                     unsigned DestReg,
+                                     const MachineInstr *Orig) const {
   DebugLoc dl = Orig->getDebugLoc();
   if (Orig->getOpcode() == ARM::MOVi2pieces) {
     RI.emitLoadConstPool(MBB, I, DestReg, Orig->getOperand(1).getImm(),
@@ -198,9 +182,9 @@ static unsigned getUnindexedOpcode(unsigned Opc) {
 }
 
 MachineInstr *
-ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
-                                    MachineBasicBlock::iterator &MBBI,
-                                    LiveVariables *LV) const {
+ARMBaseInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
+                                        MachineBasicBlock::iterator &MBBI,
+                                        LiveVariables *LV) const {
   if (!EnableARM3Addr)
     return NULL;
 
@@ -261,7 +245,7 @@ ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
                          get(isSub ? ARM::SUBrs : ARM::ADDrs), WBReg)
         .addReg(BaseReg).addReg(OffReg).addReg(0).addImm(SOOpc)
         .addImm(Pred).addReg(0).addReg(0);
-    } else 
+    } else
       UpdateMI = BuildMI(MF, MI->getDebugLoc(),
                          get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)
         .addReg(BaseReg).addReg(OffReg)
@@ -312,7 +296,7 @@ ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     NewMIs.push_back(UpdateMI);
     NewMIs.push_back(MemMI);
   }
-  
+
   // Transfer LiveVariables states, kill / dead info.
   if (LV) {
     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -320,7 +304,7 @@ ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
       if (MO.isReg() && MO.getReg() &&
           TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
         unsigned Reg = MO.getReg();
-      
+
         LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
         if (MO.isDef()) {
           MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;
@@ -349,18 +333,19 @@ ARMInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
 }
 
 // Branch analysis.
-bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
-                                 MachineBasicBlock *&FBB,
-                                 SmallVectorImpl<MachineOperand> &Cond,
-                                 bool AllowModify) const {
+bool
+  ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
+                                  MachineBasicBlock *&FBB,
+                                  SmallVectorImpl<MachineOperand> &Cond,
+                                  bool AllowModify) const {
   // If the block has no terminators, it just falls into the block after it.
   MachineBasicBlock::iterator I = MBB.end();
   if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
     return false;
-  
+
   // Get the last instruction in the block.
   MachineInstr *LastInst = I;
-  
+
   // If there is only one terminator instruction, process it.
   unsigned LastOpc = LastInst->getOpcode();
   if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
@@ -377,14 +362,14 @@ bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
     }
     return true;  // Can't handle indirect branch.
   }
-  
+
   // Get the instruction before it if it is a terminator.
   MachineInstr *SecondLastInst = I;
-  
+
   // If there are three terminators, we don't know what sort of block this is.
   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
     return true;
-  
+
   // If the block ends with ARM::B/ARM::tB and a ARM::Bcc/ARM::tBcc, handle it.
   unsigned SecondLastOpc = SecondLastInst->getOpcode();
   if ((SecondLastOpc == ARM::Bcc && LastOpc == ARM::B) ||
@@ -395,8 +380,8 @@ bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
     FBB = LastInst->getOperand(0).getMBB();
     return false;
   }
-  
-  // If the block ends with two unconditional branches, handle it.  The second 
+
+  // If the block ends with two unconditional branches, handle it.  The second
   // one is not executed, so remove it.
   if ((SecondLastOpc == ARM::B || SecondLastOpc==ARM::tB) &&
       (LastOpc == ARM::B || LastOpc == ARM::tB)) {
@@ -417,14 +402,14 @@ bool ARMInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB,
     if (AllowModify)
       I->eraseFromParent();
     return true;
-  } 
+  }
 
   // Otherwise, can't handle this.
   return true;
 }
 
 
-unsigned ARMInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   int BOpc   = AFI->isThumbFunction() ? ARM::tB : ARM::B;
@@ -435,26 +420,26 @@ unsigned ARMInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   --I;
   if (I->getOpcode() != BOpc && I->getOpcode() != BccOpc)
     return 0;
-  
+
   // Remove the branch.
   I->eraseFromParent();
-  
+
   I = MBB.end();
-  
+
   if (I == MBB.begin()) return 1;
   --I;
   if (I->getOpcode() != BccOpc)
     return 1;
-  
+
   // Remove the branch.
   I->eraseFromParent();
   return 2;
 }
 
 unsigned
-ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
-                           MachineBasicBlock *FBB,
-                           const SmallVectorImpl<MachineOperand> &Cond) const {
+ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+                               MachineBasicBlock *FBB,
+                             const SmallVectorImpl<MachineOperand> &Cond) const {
   // FIXME this should probably have a DebugLoc argument
   DebugLoc dl = DebugLoc::getUnknownLoc();
   MachineFunction &MF = *MBB.getParent();
@@ -466,7 +451,7 @@ ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 2 || Cond.size() == 0) &&
          "ARM branch conditions have two components!");
-  
+
   if (FBB == 0) {
     if (Cond.empty()) // Unconditional branch?
       BuildMI(&MBB, dl, get(BOpc)).addMBB(TBB);
@@ -475,7 +460,7 @@ ARMInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
         .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
     return 1;
   }
-  
+
   // Two-way conditional branch.
   BuildMI(&MBB, dl, get(BccOpc)).addMBB(TBB)
     .addImm(Cond[0].getImm()).addReg(Cond[1].getReg());
@@ -488,43 +473,18 @@ bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                 unsigned DestReg, unsigned SrcReg,
                                 const TargetRegisterClass *DestRC,
                                 const TargetRegisterClass *SrcRC) const {
-  MachineFunction &MF = *MBB.getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   DebugLoc DL = DebugLoc::getUnknownLoc();
   if (I != MBB.end()) DL = I->getDebugLoc();
 
-  if (!AFI->isThumbFunction()) {
-    if (DestRC == ARM::GPRRegisterClass) {
-      AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
-                                  .addReg(SrcReg)));
-      return true;
-    }
-  } else {
-    if (DestRC == ARM::GPRRegisterClass) {
-      if (SrcRC == ARM::GPRRegisterClass) {
-        BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg);
-        return true;
-      } else if (SrcRC == ARM::tGPRRegisterClass) {
-        BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg);
-        return true;
-      }
-    } else if (DestRC == ARM::tGPRRegisterClass) {
-      if (SrcRC == ARM::GPRRegisterClass) {
-        BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg);
-        return true;
-      } else if (SrcRC == ARM::tGPRRegisterClass) {
-        BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
-        return true;
-      }
-    }
-  }
   if (DestRC != SrcRC) {
     // Not yet supported!
     return false;
   }
 
-
-  if (DestRC == ARM::SPRRegisterClass)
+  if (DestRC == ARM::GPRRegisterClass)
+    AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)
+                                .addReg(SrcReg)));
+  else if (DestRC == ARM::SPRRegisterClass)
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
                    .addReg(SrcReg));
   else if (DestRC == ARM::DPRRegisterClass)
@@ -534,7 +494,7 @@ bool ARMInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
     BuildMI(MBB, I, DL, get(ARM::VMOVQ), DestReg).addReg(SrcReg);
   else
     return false;
-  
+
   return true;
 }
 
@@ -546,19 +506,9 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == ARM::GPRRegisterClass) {
-    MachineFunction &MF = *MBB.getParent();
-    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    assert (!AFI->isThumbFunction());
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
                    .addReg(SrcReg, getKillRegState(isKill))
                    .addFrameIndex(FI).addReg(0).addImm(0));
-  } else if (RC == ARM::tGPRRegisterClass) {
-    MachineFunction &MF = *MBB.getParent();
-    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    assert (AFI->isThumbFunction());
-    BuildMI(MBB, I, DL, get(ARM::tSpill))
-      .addReg(SrcReg, getKillRegState(isKill))
-      .addFrameIndex(FI).addImm(0);
   } else if (RC == ARM::DPRRegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD))
                    .addReg(SrcReg, getKillRegState(isKill))
@@ -579,16 +529,6 @@ void ARMInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
   DebugLoc DL = DebugLoc::getUnknownLoc();
   unsigned Opc = 0;
   if (RC == ARM::GPRRegisterClass) {
-    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    if (AFI->isThumbFunction()) {
-      Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR;
-      MachineInstrBuilder MIB = 
-        BuildMI(MF, DL,  get(Opc)).addReg(SrcReg, getKillRegState(isKill));
-      for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-        MIB.addOperand(Addr[i]);
-      NewMIs.push_back(MIB);
-      return;
-    }
     Opc = ARM::STR;
   } else if (RC == ARM::DPRRegisterClass) {
     Opc = ARM::FSTD;
@@ -597,7 +537,7 @@ void ARMInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
     Opc = ARM::FSTS;
   }
 
-  MachineInstrBuilder MIB = 
+  MachineInstrBuilder MIB =
     BuildMI(MF, DL, get(Opc)).addReg(SrcReg, getKillRegState(isKill));
   for (unsigned i = 0, e = Addr.size(); i != e; ++i)
     MIB.addOperand(Addr[i]);
@@ -614,17 +554,8 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   if (RC == ARM::GPRRegisterClass) {
-    MachineFunction &MF = *MBB.getParent();
-    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    assert (!AFI->isThumbFunction());
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
                    .addFrameIndex(FI).addReg(0).addImm(0));
-  } else if (RC == ARM::tGPRRegisterClass) {
-    MachineFunction &MF = *MBB.getParent();
-    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    assert (AFI->isThumbFunction());
-    BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
-      .addFrameIndex(FI).addImm(0);
   } else if (RC == ARM::DPRRegisterClass) {
     AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg)
                    .addFrameIndex(FI).addImm(0));
@@ -643,15 +574,6 @@ loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
   DebugLoc DL = DebugLoc::getUnknownLoc();
   unsigned Opc = 0;
   if (RC == ARM::GPRRegisterClass) {
-    ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-    if (AFI->isThumbFunction()) {
-      Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR;
-      MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
-      for (unsigned i = 0, e = Addr.size(); i != e; ++i)
-        MIB.addOperand(Addr[i]);
-      NewMIs.push_back(MIB);
-      return;
-    }
     Opc = ARM::LDR;
   } else if (RC == ARM::DPRRegisterClass) {
     Opc = ARM::FLDD;
@@ -668,59 +590,6 @@ loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
   return;
 }
 
-bool ARMInstrInfo::
-spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                          MachineBasicBlock::iterator MI,
-                          const std::vector<CalleeSavedInfo> &CSI) const {
-  MachineFunction &MF = *MBB.getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  if (!AFI->isThumbFunction() || CSI.empty())
-    return false;
-
-  DebugLoc DL = DebugLoc::getUnknownLoc();
-  if (MI != MBB.end()) DL = MI->getDebugLoc();
-
-  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
-  for (unsigned i = CSI.size(); i != 0; --i) {
-    unsigned Reg = CSI[i-1].getReg();
-    // Add the callee-saved register as live-in. It's killed at the spill.
-    MBB.addLiveIn(Reg);
-    MIB.addReg(Reg, RegState::Kill);
-  }
-  return true;
-}
-
-bool ARMInstrInfo::
-restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                            MachineBasicBlock::iterator MI,
-                            const std::vector<CalleeSavedInfo> &CSI) const {
-  MachineFunction &MF = *MBB.getParent();
-  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  if (!AFI->isThumbFunction() || CSI.empty())
-    return false;
-
-  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
-  MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
-  for (unsigned i = CSI.size(); i != 0; --i) {
-    unsigned Reg = CSI[i-1].getReg();
-    if (Reg == ARM::LR) {
-      // Special epilogue for vararg functions. See emitEpilogue
-      if (isVarArg)
-        continue;
-      Reg = ARM::PC;
-      PopMI->setDesc(get(ARM::tPOP_RET));
-      MI = MBB.erase(MI);
-    }
-    PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
-  }
-
-  // It's illegal to emit pop instruction without operands.
-  if (PopMI->getNumOperands() > 0)
-    MBB.insert(MI, PopMI);
-
-  return true;
-}
-
 MachineInstr *ARMInstrInfo::
 foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
                       const SmallVectorImpl<unsigned> &Ops, int FI) const {
@@ -752,31 +621,6 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
     }
     break;
   }
-  case ARM::tMOVr:
-  case ARM::tMOVlor2hir:
-  case ARM::tMOVhir2lor:
-  case ARM::tMOVhir2hir: {
-    if (OpNum == 0) { // move -> store
-      unsigned SrcReg = MI->getOperand(1).getReg();
-      bool isKill = MI->getOperand(1).isKill();
-      if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg))
-        // tSpill cannot take a high register operand.
-        break;
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
-        .addReg(SrcReg, getKillRegState(isKill))
-        .addFrameIndex(FI).addImm(0);
-    } else {          // move -> load
-      unsigned DstReg = MI->getOperand(0).getReg();
-      if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg))
-        // tRestore cannot target a high register operand.
-        break;
-      bool isDead = MI->getOperand(0).isDead();
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore))
-        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
-        .addFrameIndex(FI).addImm(0);
-    }
-    break;
-  }
   case ARM::FCPYS: {
     unsigned Pred = MI->getOperand(2).getImm();
     unsigned PredReg = MI->getOperand(3).getReg();
@@ -816,7 +660,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
   return NewMI;
 }
 
-bool ARMInstrInfo::
+bool ARMBaseInstrInfo::
 canFoldMemoryOperand(const MachineInstr *MI,
                      const SmallVectorImpl<unsigned> &Ops) const {
   if (Ops.size() != 1) return false;
@@ -857,9 +701,10 @@ canFoldMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
+bool
+  ARMBaseInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
   if (MBB.empty()) return false;
-  
+
   switch (MBB.back().getOpcode()) {
   case ARM::BX_RET:   // Return.
   case ARM::LDM_RET:
@@ -877,19 +722,19 @@ bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
   }
 }
 
-bool ARMInstrInfo::
+bool ARMBaseInstrInfo::
 ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
   Cond[0].setImm(ARMCC::getOppositeCondition(CC));
   return false;
 }
 
-bool ARMInstrInfo::isPredicated(const MachineInstr *MI) const {
+bool ARMBaseInstrInfo::isPredicated(const MachineInstr *MI) const {
   int PIdx = MI->findFirstPredOperandIdx();
   return PIdx != -1 && MI->getOperand(PIdx).getImm() != ARMCC::AL;
 }
 
-bool ARMInstrInfo::
+bool ARMBaseInstrInfo::
 PredicateInstruction(MachineInstr *MI,
                      const SmallVectorImpl<MachineOperand> &Pred) const {
   unsigned Opc = MI->getOpcode();
@@ -910,7 +755,7 @@ PredicateInstruction(MachineInstr *MI,
   return false;
 }
 
-bool ARMInstrInfo::
+bool ARMBaseInstrInfo::
 SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
                   const SmallVectorImpl<MachineOperand> &Pred2) const {
   if (Pred1.size() > 2 || Pred2.size() > 2)
@@ -937,7 +782,7 @@ SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
   }
 }
 
-bool ARMInstrInfo::DefinesPredicate(MachineInstr *MI,
+bool ARMBaseInstrInfo::DefinesPredicate(MachineInstr *MI,
                                     std::vector<MachineOperand> &Pred) const {
   const TargetInstrDesc &TID = MI->getDesc();
   if (!TID.getImplicitDefs() && !TID.hasOptionalDef())
@@ -966,7 +811,7 @@ static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
 
 /// GetInstSize - Return the size of the specified MachineInstr.
 ///
-unsigned ARMInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
+unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   const MachineBasicBlock &MBB = *MI->getParent();
   const MachineFunction *MF = MBB.getParent();
   const TargetAsmInfo *TAI = MF->getTarget().getTargetAsmInfo();
@@ -974,7 +819,7 @@ unsigned ARMInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   // Basic size info comes from the TSFlags field.
   const TargetInstrDesc &TID = MI->getDesc();
   unsigned TSFlags = TID.TSFlags;
-  
+
   switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
   default: {
     // If this machine instr is an inline asm, measure it.
@@ -1024,7 +869,7 @@ unsigned ARMInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
       // FIXME: If we know the size of the function is less than (1 << 16) *2
       // bytes, we can use 16-bit entries instead. Then there won't be an
       // alignment issue.
-      return getNumJTEntries(JT, JTI) * 4 + 
+      return getNumJTEntries(JT, JTI) * 4 +
              (MI->getOpcode()==ARM::tBR_JTr ? 2 : 4);
     }
     default:
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 9658f3b..131960b 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -51,14 +51,14 @@ namespace ARMII {
     Size8Bytes    = 2,
     Size4Bytes    = 3,
     Size2Bytes    = 4,
-    
+
     // IndexMode - Unindex, pre-indexed, or post-indexed. Only valid for load
-    // and store ops 
+    // and store ops
     IndexModeShift = 7,
     IndexModeMask  = 3 << IndexModeShift,
     IndexModePre   = 1,
     IndexModePost  = 2,
-    
+
     //===------------------------------------------------------------------===//
     // Misc flags.
 
@@ -146,10 +146,12 @@ namespace ARMII {
   };
 }
 
-class ARMInstrInfo : public TargetInstrInfoImpl {
+class ARMBaseInstrInfo : public TargetInstrInfoImpl {
   const ARMRegisterInfo RI;
+protected:
+  // Can be only subclassed.
+  explicit ARMBaseInstrInfo(const ARMSubtarget &STI);
 public:
-  explicit ARMInstrInfo(const ARMSubtarget &STI);
 
   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
   /// such, whenever a client has an instance of instruction info, it should
@@ -157,17 +159,6 @@ public:
   ///
   virtual const ARMRegisterInfo &getRegisterInfo() const { return RI; }
 
-  /// Return true if the instruction is a register to register move and return
-  /// the source and dest operands and their sub-register indices by reference.
-  virtual bool isMoveInstr(const MachineInstr &MI,
-                           unsigned &SrcReg, unsigned &DstReg,
-                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
-
-  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
-                                       int &FrameIndex) const;
-  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
-                                      int &FrameIndex) const;
-  
   void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                      unsigned DestReg, const MachineInstr *Orig) const;
 
@@ -184,6 +175,54 @@ public:
   virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                                 MachineBasicBlock *FBB,
                             const SmallVectorImpl<MachineOperand> &Cond) const;
+
+  virtual bool canFoldMemoryOperand(const MachineInstr *MI,
+                                    const SmallVectorImpl<unsigned> &Ops) const;
+
+  virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+  virtual
+  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
+
+  // Predication support.
+  virtual bool isPredicated(const MachineInstr *MI) const;
+
+  ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
+    int PIdx = MI->findFirstPredOperandIdx();
+    return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm()
+                      : ARMCC::AL;
+  }
+
+  virtual
+  bool PredicateInstruction(MachineInstr *MI,
+                            const SmallVectorImpl<MachineOperand> &Pred) const;
+
+  virtual
+  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
+                         const SmallVectorImpl<MachineOperand> &Pred2) const;
+
+  virtual bool DefinesPredicate(MachineInstr *MI,
+                                std::vector<MachineOperand> &Pred) const;
+
+  /// GetInstSize - Returns the size of the specified MachineInstr.
+  ///
+  virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
+};
+
+class ARMInstrInfo : public ARMBaseInstrInfo {
+public:
+  explicit ARMInstrInfo(const ARMSubtarget &STI);
+
+  /// Return true if the instruction is a register to register move and return
+  /// the source and dest operands and their sub-register indices by reference.
+  virtual bool isMoveInstr(const MachineInstr &MI,
+                           unsigned &SrcReg, unsigned &DstReg,
+                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
   virtual bool copyRegToReg(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator I,
                             unsigned DestReg, unsigned SrcReg,
@@ -208,13 +247,7 @@ public:
                                SmallVectorImpl<MachineOperand> &Addr,
                                const TargetRegisterClass *RC,
                                SmallVectorImpl<MachineInstr*> &NewMIs) const;
-  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
-  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
-                                           MachineBasicBlock::iterator MI,
-                                 const std::vector<CalleeSavedInfo> &CSI) const;
-  
+
   virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                               MachineInstr* MI,
                                            const SmallVectorImpl<unsigned> &Ops,
@@ -226,37 +259,6 @@ public:
                                               MachineInstr* LoadMI) const {
     return 0;
   }
-
-  virtual bool canFoldMemoryOperand(const MachineInstr *MI,
-                                    const SmallVectorImpl<unsigned> &Ops) const;
-  
-  virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-  virtual
-  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-
-  // Predication support.
-  virtual bool isPredicated(const MachineInstr *MI) const;
-
-  ARMCC::CondCodes getPredicate(const MachineInstr *MI) const {
-    int PIdx = MI->findFirstPredOperandIdx();
-    return PIdx != -1 ? (ARMCC::CondCodes)MI->getOperand(PIdx).getImm() 
-                      : ARMCC::AL;
-  }
-
-  virtual
-  bool PredicateInstruction(MachineInstr *MI,
-                            const SmallVectorImpl<MachineOperand> &Pred) const;
-
-  virtual
-  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
-                         const SmallVectorImpl<MachineOperand> &Pred2) const;
-
-  virtual bool DefinesPredicate(MachineInstr *MI,
-                                std::vector<MachineOperand> &Pred) const;
-    
-  /// GetInstSize - Returns the size of the specified MachineInstr.
-  ///
-  virtual unsigned GetInstSizeInBytes(const MachineInstr* MI) const;
 };
 
 }
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 7003a65..cb7b7b9 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -103,6 +103,8 @@ def HasThumb2 : Predicate<"Subtarget->hasThumb2()">;
 def IsARM     : Predicate<"!Subtarget->isThumb()">;
 def IsDarwin    : Predicate<"Subtarget->isTargetDarwin()">;
 def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
+def CarryDefIsUnused : Predicate<"!N.getNode()->hasAnyUseOfValue(1)">;
+def CarryDefIsUsed   : Predicate<"N.getNode()->hasAnyUseOfValue(1)">;
 
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
@@ -353,28 +355,34 @@ include "ARMInstrFormats.td"
 
 /// AsI1_bin_irs - Defines a set of (op r, {so_imm|r|so_reg}) patterns for a
 /// binop that produces a value.
-multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode> {
+multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
+                        bit Commutable = 0> {
   def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
                opc, " $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
   def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
                opc, " $dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
+    let isCommutable = Commutable;
+  }
   def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
                opc, " $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
 }
 
-/// ASI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
+/// AI1_bin_s_irs - Similar to AsI1_bin_irs except it sets the 's' bit so the
 /// instruction modifies the CSPR register.
 let Defs = [CPSR] in {
-multiclass ASI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode> {
+multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
+                         bit Commutable = 0> {
   def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
                opc, "s $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
   def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
                opc, "s $dst, $a, $b",
-               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
+               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
+    let isCommutable = Commutable;
+  }
   def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
                opc, "s $dst, $a, $b",
                [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
@@ -385,13 +393,16 @@ multiclass ASI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode> {
 /// patterns. Similar to AsI1_bin_irs except the instruction does not produce
 /// a explicit result, only implicitly set CPSR.
 let Defs = [CPSR] in {
-multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode> {
+multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
+                       bit Commutable = 0> {
   def ri : AI1<opcod, (outs), (ins GPR:$a, so_imm:$b), DPFrm,
                opc, " $a, $b",
                [(opnode GPR:$a, so_imm:$b)]>;
   def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm,
                opc, " $a, $b",
-               [(opnode GPR:$a, GPR:$b)]>;
+               [(opnode GPR:$a, GPR:$b)]> {
+    let isCommutable = Commutable;
+  }
   def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
                opc, " $a, $b",
                [(opnode GPR:$a, so_reg:$b)]>;
@@ -430,19 +441,43 @@ multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
                   Requires<[IsARM, HasV6]>;
 }
 
-/// AsXI1_bin_c_irs - Same as AsI1_bin_irs but without the predicate operand and
-/// setting carry bit. But it can optionally set CPSR.
+/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
 let Uses = [CPSR] in {
-multiclass AsXI1_bin_c_irs<bits<4> opcod, string opc, PatFrag opnode> {
-  def ri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b, cc_out:$s),
-                DPFrm, !strconcat(opc, "${s} $dst, $a, $b"),
-               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>;
-  def rr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b, cc_out:$s),
-                DPFrm, !strconcat(opc, "${s} $dst, $a, $b"),
-               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>;
-  def rs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b, cc_out:$s),
-                DPSoRegFrm, !strconcat(opc, "${s} $dst, $a, $b"),
-               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>;
+multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+                             bit Commutable = 0> {
+  def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
+                DPFrm, opc, " $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
+               Requires<[IsARM, CarryDefIsUnused]>;
+  def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+                DPFrm, opc, " $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
+               Requires<[IsARM, CarryDefIsUnused]> {
+    let isCommutable = Commutable;
+  }
+  def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
+                DPSoRegFrm, opc, " $dst, $a, $b",
+               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
+               Requires<[IsARM, CarryDefIsUnused]>;
+  // Carry setting variants
+  def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
+                DPFrm, !strconcat(opc, "s $dst, $a, $b"),
+               [(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
+               Requires<[IsARM, CarryDefIsUsed]> {
+                 let Defs = [CPSR];
+  }
+  def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+                DPFrm, !strconcat(opc, "s $dst, $a, $b"),
+               [(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
+               Requires<[IsARM, CarryDefIsUsed]> {
+                 let Defs = [CPSR];
+  }
+  def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
+                DPSoRegFrm, !strconcat(opc, "s $dst, $a, $b"),
+               [(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
+               Requires<[IsARM, CarryDefIsUsed]> {
+                 let Defs = [CPSR];
+  }
 }
 }
 
@@ -535,7 +570,8 @@ def LEApcrel : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, pred:$p), Pseudo,
                                          "add$p $dst, pc, #PCRELV${:uid}")),
                    []>;
 
-def LEApcrelJT : AXI1<0x0, (outs GPR:$dst), (ins i32imm:$label, i32imm:$id, pred:$p),
+def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
+                           (ins i32imm:$label, i32imm:$id, pred:$p),
           Pseudo,
           !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
                                          "${:private}PCRELL${:uid}+8))\n"),
@@ -899,21 +935,20 @@ defm UXTAH : AI_bin_rrot<0b01101111, "uxtah",
 //
 
 defm ADD  : AsI1_bin_irs<0b0100, "add",
-                         BinOpFrag<(add  node:$LHS, node:$RHS)>>;
+                         BinOpFrag<(add  node:$LHS, node:$RHS)>, 1>;
 defm SUB  : AsI1_bin_irs<0b0010, "sub",
                          BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
 
 // ADD and SUB with 's' bit set.
-defm ADDS : ASI1_bin_s_irs<0b0100, "add",
-                           BinOpFrag<(addc node:$LHS, node:$RHS)>>;
-defm SUBS : ASI1_bin_s_irs<0b0010, "sub",
-                           BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+defm ADDS : AI1_bin_s_irs<0b0100, "add",
+                          BinOpFrag<(addc node:$LHS, node:$RHS)>>;
+defm SUBS : AI1_bin_s_irs<0b0010, "sub",
+                          BinOpFrag<(subc node:$LHS, node:$RHS)>>;
 
-// FIXME: Do not allow ADC / SBC to be predicated for now.
-defm ADC  : AsXI1_bin_c_irs<0b0101, "adc",
-                            BinOpFrag<(adde node:$LHS, node:$RHS)>>;
-defm SBC  : AsXI1_bin_c_irs<0b0110, "sbc",
-                            BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+defm ADC : AI1_adde_sube_irs<0b0101, "adc",
+                             BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
+defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
+                             BinOpFrag<(sube node:$LHS, node:$RHS)>>;
 
 // These don't define reg/reg forms, because they are handled above.
 def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
@@ -934,14 +969,27 @@ def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
                  [(set GPR:$dst, (subc so_reg:$b, GPR:$a))]>;
 }
 
-// FIXME: Do not allow RSC to be predicated for now. But they can set CPSR.
 let Uses = [CPSR] in {
-def RSCri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b, cc_out:$s),
-                 DPFrm, "rsc${s} $dst, $a, $b",
-                 [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>;
-def RSCrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b, cc_out:$s),
-                 DPSoRegFrm, "rsc${s} $dst, $a, $b",
-                 [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>;
+def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
+                 DPFrm, "rsc", " $dst, $a, $b",
+                 [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>,
+                 Requires<[IsARM, CarryDefIsUnused]>;
+def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
+                 DPSoRegFrm, "rsc", " $dst, $a, $b",
+                 [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
+                 Requires<[IsARM, CarryDefIsUnused]>;
+}
+
+// FIXME: Allow these to be predicated.
+let Defs = [CPSR], Uses = [CPSR] in {
+def RSCSri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
+                  DPFrm, "rscs $dst, $a, $b",
+                  [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>,
+                  Requires<[IsARM, CarryDefIsUnused]>;
+def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
+                  DPSoRegFrm, "rscs $dst, $a, $b",
+                  [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
+                  Requires<[IsARM, CarryDefIsUnused]>;
 }
 
 // (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
@@ -965,11 +1013,11 @@ def : ARMPat<(add    GPR:$src, so_imm_neg:$imm),
 //
 
 defm AND   : AsI1_bin_irs<0b0000, "and",
-                          BinOpFrag<(and node:$LHS, node:$RHS)>>;
+                          BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
 defm ORR   : AsI1_bin_irs<0b1100, "orr",
-                          BinOpFrag<(or  node:$LHS, node:$RHS)>>;
+                          BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
 defm EOR   : AsI1_bin_irs<0b0001, "eor",
-                          BinOpFrag<(xor node:$LHS, node:$RHS)>>;
+                          BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
 defm BIC   : AsI1_bin_irs<0b1110, "bic",
                           BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
 
@@ -991,6 +1039,7 @@ def : ARMPat<(and   GPR:$src, so_imm_not:$imm),
 //  Multiply Instructions.
 //
 
+let isCommutable = 1 in
 def MUL   : AsMul1I<0b0000000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
                     "mul", " $dst, $a, $b",
                    [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
@@ -1001,6 +1050,7 @@ def MLA   : AsMul1I<0b0000001, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
 
 // Extra precision multiplies with low / high results
 let neverHasSideEffects = 1 in {
+let isCommutable = 1 in {
 def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
                                (ins GPR:$a, GPR:$b),
                     "smull", " $ldst, $hdst, $a, $b", []>;
@@ -1008,6 +1058,7 @@ def SMULL : AsMul1I<0b0000110, (outs GPR:$ldst, GPR:$hdst),
 def UMULL : AsMul1I<0b0000100, (outs GPR:$ldst, GPR:$hdst),
                                (ins GPR:$a, GPR:$b),
                     "umull", " $ldst, $hdst, $a, $b", []>;
+}
 
 // Multiply + accumulate
 def SMLAL : AsMul1I<0b0000111, (outs GPR:$ldst, GPR:$hdst),
@@ -1258,9 +1309,9 @@ defm CMN  : AI1_cmp_irs<0b1011, "cmn",
 
 // Note that TST/TEQ don't set all the same flags that CMP does!
 defm TST  : AI1_cmp_irs<0b1000, "tst",
-                        BinOpFrag<(ARMcmpNZ (and node:$LHS, node:$RHS), 0)>>;
+                        BinOpFrag<(ARMcmpNZ (and node:$LHS, node:$RHS), 0)>, 1>;
 defm TEQ  : AI1_cmp_irs<0b1001, "teq",
-                        BinOpFrag<(ARMcmpNZ (xor node:$LHS, node:$RHS), 0)>>;
+                        BinOpFrag<(ARMcmpNZ (xor node:$LHS, node:$RHS), 0)>, 1>;
 
 defm CMPnz : AI1_cmp_irs<0b1010, "cmp",
                          BinOpFrag<(ARMcmpNZ node:$LHS, node:$RHS)>>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 1def093..7927ca5 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -128,10 +128,28 @@ PseudoInst<(outs), (ins i32imm:$amt),
 }
 
 let isNotDuplicable = 1 in
-def tPICADD : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, pclabel:$cp),
+def tPICADD : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, pclabel:$cp),
                   "$cp:\n\tadd $dst, pc",
                   [(set tGPR:$dst, (ARMpic_add tGPR:$lhs, imm:$cp))]>;
 
+// PC relative add.
+def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs),
+                  "add $dst, pc, $rhs * 4", []>;
+
+// ADD rd, sp, #imm8
+// FIXME: hard code sp?
+def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs),
+                  "add $dst, $sp, $rhs * 4 @ addrspi", []>;
+
+// ADD sp, sp, #imm7
+// FIXME: hard code sp?
+def tADDspi : T1It<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                  "add $dst, $rhs * 4", []>;
+
+// FIXME: Make use of the following?
+// ADD rm, sp, rm
+// ADD sp, rm
+
 //===----------------------------------------------------------------------===//
 //  Control Flow Instructions.
 //
@@ -276,113 +294,135 @@ def tPUSH : TI<(outs), (ins reglist:$src1, variable_ops),
 //  Arithmetic Instructions.
 //
 
-// Add with carry
-let isCommutable = 1 in
-def tADC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-               "adc $dst, $rhs",
-               [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>;
+// Add with carry register
+let isCommutable = 1, Defs = [CPSR], Uses = [CPSR] in
+def tADCS : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+                "adc $dst, $rhs",
+                [(set tGPR:$dst, (adde tGPR:$lhs, tGPR:$rhs))]>;
 
-def tADDS : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-               "add $dst, $lhs, $rhs",
-               [(set tGPR:$dst, (addc tGPR:$lhs, tGPR:$rhs))]>;
-
-
-def tADDi3 : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+// Add immediate
+let Defs = [CPSR] in {
+def tADDi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                 "add $dst, $lhs, $rhs",
                 [(set tGPR:$dst, (add tGPR:$lhs, imm0_7:$rhs))]>;
+def tADDSi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                 "add $dst, $lhs, $rhs",
+                 [(set tGPR:$dst, (addc tGPR:$lhs, imm0_7:$rhs))]>;
+}
 
-def tADDi8 : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+let Defs = [CPSR] in {
+def tADDi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                  "add $dst, $rhs",
                  [(set tGPR:$dst, (add tGPR:$lhs, imm8_255:$rhs))]>;
+def tADDSi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                  "add $dst, $rhs",
+                  [(set tGPR:$dst, (addc tGPR:$lhs, imm8_255:$rhs))]>;
+}
 
-def tADDrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// Add register
+let isCommutable = 1, Defs = [CPSR] in {
+def tADDrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                 "add $dst, $lhs, $rhs",
                 [(set tGPR:$dst, (add tGPR:$lhs, tGPR:$rhs))]>;
+def tADDSrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+                 "add $dst, $lhs, $rhs",
+                 [(set tGPR:$dst, (addc tGPR:$lhs, tGPR:$rhs))]>;
+}
 
 let neverHasSideEffects = 1 in
-def tADDhirr : TIt<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+def tADDhirr : T1It<(outs tGPR:$dst), (ins GPR:$lhs, GPR:$rhs),
                    "add $dst, $rhs @ addhirr", []>;
 
-def tADDrPCi : TI<(outs tGPR:$dst), (ins i32imm:$rhs),
-                  "add $dst, pc, $rhs * 4", []>;
-
-def tADDrSPi : TI<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs),
-                  "add $dst, $sp, $rhs * 4 @ addrspi", []>;
-
-def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
-                  "add $dst, $rhs * 4", []>;
-
-let isCommutable = 1 in
-def tAND : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// And register
+let isCommutable = 1, Defs = [CPSR] in
+def tAND : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                 "and $dst, $rhs",
                 [(set tGPR:$dst, (and tGPR:$lhs, tGPR:$rhs))]>;
 
-def tASRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+// ASR immediate
+let Defs = [CPSR] in
+def tASRri : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                 "asr $dst, $lhs, $rhs",
                 [(set tGPR:$dst, (sra tGPR:$lhs, (i32 imm:$rhs)))]>;
 
-def tASRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// ASR register
+let Defs = [CPSR] in
+def tASRrr : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                  "asr $dst, $rhs",
                  [(set tGPR:$dst, (sra tGPR:$lhs, tGPR:$rhs))]>;
 
-def tBIC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// BIC register
+let Defs = [CPSR] in
+def tBIC : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "bic $dst, $rhs",
                [(set tGPR:$dst, (and tGPR:$lhs, (not tGPR:$rhs)))]>;
 
-
-def tCMN : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+// CMN register
+let Defs = [CPSR] in {
+def tCMN : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
               "cmn $lhs, $rhs",
               [(ARMcmp tGPR:$lhs, (ineg tGPR:$rhs))]>;
-
-def tCMPi8 : TI<(outs), (ins tGPR:$lhs, i32imm:$rhs),
-               "cmp $lhs, $rhs",
-               [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>;
-
-def tCMPr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
-               "cmp $lhs, $rhs",
-               [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>;
-
-def tTST  : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
-               "tst $lhs, $rhs",
-               [(ARMcmpNZ (and tGPR:$lhs, tGPR:$rhs), 0)]>;
-
-def tCMNNZ : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+def tCMNNZ : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
                 "cmn $lhs, $rhs",
                 [(ARMcmpNZ tGPR:$lhs, (ineg tGPR:$rhs))]>;
+}
 
-def tCMPNZi8 : TI<(outs), (ins tGPR:$lhs, i32imm:$rhs),
+// CMP immediate
+let Defs = [CPSR] in {
+def tCMPi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs),
+               "cmp $lhs, $rhs",
+               [(ARMcmp tGPR:$lhs, imm0_255:$rhs)]>;
+def tCMPNZi8 : T1I<(outs), (ins tGPR:$lhs, i32imm:$rhs),
                  "cmp $lhs, $rhs",
                  [(ARMcmpNZ tGPR:$lhs, imm0_255:$rhs)]>;
 
-def tCMPNZr : TI<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+}
+
+// CMP register
+let Defs = [CPSR] in {
+def tCMPr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+               "cmp $lhs, $rhs",
+               [(ARMcmp tGPR:$lhs, tGPR:$rhs)]>;
+def tCMPNZr : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
                  "cmp $lhs, $rhs",
                  [(ARMcmpNZ tGPR:$lhs, tGPR:$rhs)]>;
+}
 
 // TODO: A7-37: CMP(3) - cmp hi regs
 
-let isCommutable = 1 in
-def tEOR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// XOR register
+let isCommutable = 1, Defs = [CPSR] in
+def tEOR : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "eor $dst, $rhs",
                [(set tGPR:$dst, (xor tGPR:$lhs, tGPR:$rhs))]>;
 
-def tLSLri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+// LSL immediate
+let Defs = [CPSR] in
+def tLSLri : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                 "lsl $dst, $lhs, $rhs",
                 [(set tGPR:$dst, (shl tGPR:$lhs, (i32 imm:$rhs)))]>;
 
-def tLSLrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// LSL register
+let Defs = [CPSR] in
+def tLSLrr : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                  "lsl $dst, $rhs",
                  [(set tGPR:$dst, (shl tGPR:$lhs, tGPR:$rhs))]>;
 
-def tLSRri : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+// LSR immediate
+let Defs = [CPSR] in
+def tLSRri : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                 "lsr $dst, $lhs, $rhs",
                 [(set tGPR:$dst, (srl tGPR:$lhs, (i32 imm:$rhs)))]>;
 
-def tLSRrr : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// LSR register
+let Defs = [CPSR] in
+def tLSRrr : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                  "lsr $dst, $rhs",
                  [(set tGPR:$dst, (srl tGPR:$lhs, tGPR:$rhs))]>;
 
-// FIXME: This is not rematerializable because mov changes the condition code.
-def tMOVi8 : TI<(outs tGPR:$dst), (ins i32imm:$src),
+// move register
+let Defs = [CPSR] in
+def tMOVi8 : T1I<(outs tGPR:$dst), (ins i32imm:$src),
                  "mov $dst, $src",
                  [(set tGPR:$dst, imm0_255:$src)]>;
 
@@ -392,41 +432,47 @@ def tMOVi8 : TI<(outs tGPR:$dst), (ins i32imm:$src),
 // Note: MOV(2) of two low regs updates the flags, so we emit this as 'cpy',
 // which is MOV(3).  This also supports high registers.
 let neverHasSideEffects = 1 in {
-def tMOVr       : TI<(outs tGPR:$dst), (ins tGPR:$src),
+def tMOVr       : T1I<(outs tGPR:$dst), (ins tGPR:$src),
                       "cpy $dst, $src", []>;
-def tMOVhir2lor : TI<(outs tGPR:$dst), (ins GPR:$src),
+def tMOVhir2lor : T1I<(outs tGPR:$dst), (ins GPR:$src),
                       "cpy $dst, $src\t@ hir2lor", []>;
-def tMOVlor2hir : TI<(outs GPR:$dst), (ins tGPR:$src),
+def tMOVlor2hir : T1I<(outs GPR:$dst), (ins tGPR:$src),
                       "cpy $dst, $src\t@ lor2hir", []>;
-def tMOVhir2hir : TI<(outs GPR:$dst), (ins GPR:$src),
+def tMOVhir2hir : T1I<(outs GPR:$dst), (ins GPR:$src),
                       "cpy $dst, $src\t@ hir2hir", []>;
 } // neverHasSideEffects
 
-let isCommutable = 1 in
-def tMUL : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// multiply register
+let isCommutable = 1, Defs = [CPSR] in
+def tMUL : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "mul $dst, $rhs",
                [(set tGPR:$dst, (mul tGPR:$lhs, tGPR:$rhs))]>;
 
-def tMVN : TI<(outs tGPR:$dst), (ins tGPR:$src),
+// move inverse register
+let Defs = [CPSR] in
+def tMVN : T1I<(outs tGPR:$dst), (ins tGPR:$src),
               "mvn $dst, $src",
               [(set tGPR:$dst, (not tGPR:$src))]>;
 
-def tNEG : TI<(outs tGPR:$dst), (ins tGPR:$src),
+// negate register
+let Defs = [CPSR] in
+def tNEG : T1I<(outs tGPR:$dst), (ins tGPR:$src),
               "neg $dst, $src",
               [(set tGPR:$dst, (ineg tGPR:$src))]>;
 
-let isCommutable = 1 in
-def tORR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// bitwise or register
+let isCommutable = 1, Defs = [CPSR] in
+def tORR : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                "orr $dst, $rhs",
                [(set tGPR:$dst, (or tGPR:$lhs, tGPR:$rhs))]>;
 
-
-def tREV : TI<(outs tGPR:$dst), (ins tGPR:$src),
+// swaps
+def tREV : T1I<(outs tGPR:$dst), (ins tGPR:$src),
               "rev $dst, $src",
               [(set tGPR:$dst, (bswap tGPR:$src))]>,
               Requires<[IsThumb, HasV6]>;
 
-def tREV16 : TI<(outs tGPR:$dst), (ins tGPR:$src),
+def tREV16 : T1I<(outs tGPR:$dst), (ins tGPR:$src),
                 "rev16 $dst, $src",
                 [(set tGPR:$dst,
                     (or (and (srl tGPR:$src, (i32 8)), 0xFF),
@@ -435,7 +481,7 @@ def tREV16 : TI<(outs tGPR:$dst), (ins tGPR:$src),
                                 (and (shl tGPR:$src, (i32 8)), 0xFF000000)))))]>,
                 Requires<[IsThumb, HasV6]>;
 
-def tREVSH : TI<(outs tGPR:$dst), (ins tGPR:$src),
+def tREVSH : T1I<(outs tGPR:$dst), (ins tGPR:$src),
                 "revsh $dst, $src",
                 [(set tGPR:$dst,
                    (sext_inreg
@@ -443,53 +489,78 @@ def tREVSH : TI<(outs tGPR:$dst), (ins tGPR:$src),
                          (shl tGPR:$src, (i32 8))), i16))]>,
                 Requires<[IsThumb, HasV6]>;
 
-def tROR : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// rotate right register
+let Defs = [CPSR] in
+def tROR : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                 "ror $dst, $rhs",
                 [(set tGPR:$dst, (rotr tGPR:$lhs, tGPR:$rhs))]>;
 
-
-// Subtract with carry
-def tSBC : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// Subtract with carry register
+let Defs = [CPSR], Uses = [CPSR] in
+def tSBCS : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                 "sbc $dst, $rhs",
                 [(set tGPR:$dst, (sube tGPR:$lhs, tGPR:$rhs))]>;
 
-def tSUBS : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
-                "sub $dst, $lhs, $rhs",
-               [(set tGPR:$dst, (subc tGPR:$lhs, tGPR:$rhs))]>;
-
-
-// TODO: A7-96: STMIA - store multiple.
-
-def tSUBi3 : TI<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+// Subtract immediate
+let Defs = [CPSR] in {
+def tSUBi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                 "sub $dst, $lhs, $rhs",
                 [(set tGPR:$dst, (add tGPR:$lhs, imm0_7_neg:$rhs))]>;
+def tSUBSi3 : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                 "sub $dst, $lhs, $rhs",
+                 [(set tGPR:$dst, (addc tGPR:$lhs, imm0_7_neg:$rhs))]>;
+}
 
-def tSUBi8 : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+let Defs = [CPSR] in {
+def tSUBi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                   "sub $dst, $rhs",
                   [(set tGPR:$dst, (add tGPR:$lhs, imm8_255_neg:$rhs))]>;
+def tSUBSi8 : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+                   "sub $dst, $rhs",
+                   [(set tGPR:$dst, (addc tGPR:$lhs, imm8_255_neg:$rhs))]>;
+}
 
-def tSUBrr : TI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+// subtract register
+let Defs = [CPSR] in {
+def tSUBrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
                 "sub $dst, $lhs, $rhs",
                 [(set tGPR:$dst, (sub tGPR:$lhs, tGPR:$rhs))]>;
+def tSUBSrr : T1I<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs),
+                "sub $dst, $lhs, $rhs",
+                [(set tGPR:$dst, (subc tGPR:$lhs, tGPR:$rhs))]>;
+}
 
-def tSUBspi : TIt<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
+// TODO: A7-96: STMIA - store multiple.
+
+def tSUBspi : T1It<(outs tGPR:$dst), (ins tGPR:$lhs, i32imm:$rhs),
                   "sub $dst, $rhs * 4", []>;
 
-def tSXTB  : TI<(outs tGPR:$dst), (ins tGPR:$src),
+// sign-extend byte
+def tSXTB  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
                 "sxtb $dst, $src",
                 [(set tGPR:$dst, (sext_inreg tGPR:$src, i8))]>,
                 Requires<[IsThumb, HasV6]>;
-def tSXTH  : TI<(outs tGPR:$dst), (ins tGPR:$src),
+
+// sign-extend short
+def tSXTH  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
                 "sxth $dst, $src",
                 [(set tGPR:$dst, (sext_inreg tGPR:$src, i16))]>,
                 Requires<[IsThumb, HasV6]>;
 
+// test
+let isCommutable = 1, Defs = [CPSR] in
+def tTST  : T1I<(outs), (ins tGPR:$lhs, tGPR:$rhs),
+               "tst $lhs, $rhs",
+               [(ARMcmpNZ (and tGPR:$lhs, tGPR:$rhs), 0)]>;
 
-def tUXTB  : TI<(outs tGPR:$dst), (ins tGPR:$src),
+// zero-extend byte
+def tUXTB  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
                 "uxtb $dst, $src",
                 [(set tGPR:$dst, (and tGPR:$src, 0xFF))]>,
                 Requires<[IsThumb, HasV6]>;
-def tUXTH  : TI<(outs tGPR:$dst), (ins tGPR:$src),
+
+// zero-extend short
+def tUXTH  : T1I<(outs tGPR:$dst), (ins tGPR:$src),
                 "uxth $dst, $src",
                 [(set tGPR:$dst, (and tGPR:$src, 0xFFFF))]>,
                 Requires<[IsThumb, HasV6]>;
@@ -536,35 +607,35 @@ let isCall = 1,
 //
 
 // ConstantPool, GlobalAddress
-def : ThumbPat<(ARMWrapper  tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
-def : ThumbPat<(ARMWrapper  tconstpool  :$dst), (tLEApcrel tconstpool  :$dst)>;
+def : TPat<(ARMWrapper  tglobaladdr :$dst), (tLEApcrel tglobaladdr :$dst)>;
+def : TPat<(ARMWrapper  tconstpool  :$dst), (tLEApcrel tconstpool  :$dst)>;
 
 // JumpTable
-def : ThumbPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
-               (tLEApcrelJT tjumptable:$dst, imm:$id)>;
+def : TPat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+           (tLEApcrelJT tjumptable:$dst, imm:$id)>;
 
 // Direct calls
-def : ThumbPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>;
-def : ThumbV5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>;
+def : TPat<(ARMtcall texternalsym:$func), (tBL texternalsym:$func)>;
+def : Tv5Pat<(ARMcall texternalsym:$func), (tBLXi texternalsym:$func)>;
 
 // Indirect calls to ARM routines
-def : ThumbV5Pat<(ARMcall tGPR:$dst), (tBLXr tGPR:$dst)>;
+def : Tv5Pat<(ARMcall tGPR:$dst), (tBLXr tGPR:$dst)>;
 
 // zextload i1 -> zextload i8
-def : ThumbPat<(zextloadi1 t_addrmode_s1:$addr),
-               (tLDRB t_addrmode_s1:$addr)>;
+def : TPat<(zextloadi1 t_addrmode_s1:$addr),
+           (tLDRB t_addrmode_s1:$addr)>;
 
 // extload -> zextload
-def : ThumbPat<(extloadi1  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
-def : ThumbPat<(extloadi8  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
-def : ThumbPat<(extloadi16 t_addrmode_s2:$addr),  (tLDRH t_addrmode_s2:$addr)>;
+def : TPat<(extloadi1  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
+def : TPat<(extloadi8  t_addrmode_s1:$addr),  (tLDRB t_addrmode_s1:$addr)>;
+def : TPat<(extloadi16 t_addrmode_s2:$addr),  (tLDRH t_addrmode_s2:$addr)>;
 
 // Large immediate handling.
 
 // Two piece imms.
-def : ThumbPat<(i32 thumb_immshifted:$src),
-               (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)),
-                       (thumb_immshifted_shamt imm:$src))>;
+def : T1Pat<(i32 thumb_immshifted:$src),
+            (tLSLri (tMOVi8 (thumb_immshifted_val imm:$src)),
+                    (thumb_immshifted_shamt imm:$src))>;
 
-def : ThumbPat<(i32 imm0_255_comp:$src),
-               (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+def : T1Pat<(i32 imm0_255_comp:$src),
+            (tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index e0617e4..bfdf719 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -14,9 +14,9 @@
 // Shifted operands. No register controlled shifts for Thumb2.
 // Note: We do not support rrx shifted operands yet.
 def t2_so_reg : Operand<i32>,    // reg imm
-                ComplexPattern<i32, 2, "SelectThumb2ShifterOperandReg",
+                ComplexPattern<i32, 2, "SelectT2ShifterOperandReg",
                                [shl,srl,sra,rotr]> {
-  let PrintMethod = "printSOOperand";
+  let PrintMethod = "printT2SOOperand";
   let MIOperandInfo = (ops GPR, i32imm);
 }
 
@@ -69,6 +69,11 @@ def t2_so_imm_neg : Operand<i32>,
   let PrintMethod = "printT2SOImmOperand";
 }
 
+/// imm1_31 predicate - True if the 32-bit immediate is in the range [1,31].
+def imm1_31 : PatLeaf<(i32 imm), [{
+  return (int32_t)N->getZExtValue() >= 1 && (int32_t)N->getZExtValue() < 32;
+}]>;
+
 /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095].
 def imm0_4095 : PatLeaf<(i32 imm), [{
   return (uint32_t)N->getZExtValue() < 4096;
@@ -121,137 +126,287 @@ def t2_lo16AllZero : PatLeaf<(i32 imm), [{
   return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
   }], t2_hi16>;
 
+
 //===----------------------------------------------------------------------===//
-//  Thumb2 to cover the functionality of the ARM instruction set.
+// Multiclass helpers...
 //
 
-/// T2I_bin_is - Defines a set of (op reg, {so_imm|so_reg}) patterns for a
-//  binary operation that produces a value.
-multiclass T2I_bin_is<string opc, PatFrag opnode> {
+/// T2I_un_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// unary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_un_irs<string opc, PatFrag opnode, bit Cheap = 0, bit ReMat = 0>{
    // shifted imm
-   def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
-                !strconcat(opc, " $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
+   def i : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src),
+                opc, " $dst, $src",
+                [(set GPR:$dst, (opnode t2_so_imm:$src))]> {
+     let isAsCheapAsAMove = Cheap;
+     let isReMaterializable = ReMat;
+   }
+   // register
+   def r : T2I<(outs GPR:$dst), (ins GPR:$src),
+               opc, " $dst, $src",
+                [(set GPR:$dst, (opnode GPR:$src))]>;
    // shifted register
-   def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
-                !strconcat(opc, " $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
+   def s : T2I<(outs GPR:$dst), (ins t2_so_reg:$src),
+               opc, " $dst, $src",
+               [(set GPR:$dst, (opnode t2_so_reg:$src))]>;
 }
 
-/// T2I_2bin_is - Same as T2I_bin_is except the order of operands are reversed.
+/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+//  binary operation that produces a value. These are predicable and can be
+/// changed to modify CPSR.
+multiclass T2I_bin_irs<string opc, PatFrag opnode, bit Commutable = 0> {
+   // shifted imm
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
+   // register
+   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+     let isCommutable = Commutable;
+   }
+   // shifted register
+   def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
+}
+
+/// T2I_rbin_is - Same as T2I_bin_irs except the order of operands are
+/// reversed. It doesn't define the 'rr' form since it's handled by its
+/// T2I_bin_irs counterpart.
 multiclass T2I_rbin_is<string opc, PatFrag opnode> {
    // shifted imm
    def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
-                !strconcat(opc, " $dst, $lhs, $rhs"),
+                opc, " $dst, $rhs, $lhs",
                 [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>;
    // shifted register
    def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
-                !strconcat(opc, " $dst, $lhs, $rhs"),
+                opc, " $dst, $rhs, $lhs",
                 [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>;
 }
 
-/// T2I_bin_s_is - Similar to T2I_bin_is except it sets the 's' bit so the
+/// T2I_bin_s_irs - Similar to T2I_bin_irs except it sets the 's' bit so the
 /// instruction modifies the CPSR register.
 let Defs = [CPSR] in {
-multiclass T2I_bin_s_is<string opc, PatFrag opnode> {
+multiclass T2I_bin_s_irs<string opc, PatFrag opnode, bit Commutable = 0> {
    // shifted imm
    def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
-                !strconcat(opc, "s $dst, $lhs, $rhs"),
+                !strconcat(opc, "s"), " $dst, $lhs, $rhs",
                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
-
+   // register
+   def rr : T2I<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                !strconcat(opc, "s"), " $dst, $lhs, $rhs",
+                [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+     let isCommutable = Commutable;
+   }
    // shifted register
    def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
-                !strconcat(opc, "s $dst, $lhs, $rhs"),
+                !strconcat(opc, "s"), " $dst, $lhs, $rhs",
                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
 }
 }
 
-/// T2I_rbin_s_is - Same as T2I_bin_s_is except the order of operands are
-/// reversed.
-let Defs = [CPSR] in {
-multiclass T2I_rbin_s_is<string opc, PatFrag opnode> {
+/// T2I_bin_ii12rs - Defines a set of (op reg, {so_imm|imm0_4095|r|so_reg})
+/// patterns for a binary operation that produces a value.
+multiclass T2I_bin_ii12rs<string opc, PatFrag opnode, bit Commutable = 0> {
    // shifted imm
-   def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
-                !strconcat(opc, "s $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>;
-
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
+   // 12-bit imm
+   def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                   !strconcat(opc, "w"), " $dst, $lhs, $rhs",
+                   [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]>;
+   // register
+   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]> {
+     let isCommutable = Commutable;
+   }
    // shifted register
-   def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
-                !strconcat(opc, "s $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>;
-}
+   def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
 }
 
-/// T2I_bin_ii12s - Defines a set of (op reg, {so_imm|imm0_4095|so_reg}) patterns
-/// for a binary operation that produces a value.
-multiclass T2I_bin_ii12s<string opc, PatFrag opnode> {
+/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
+/// binary operation that produces a value and use and define the carry bit.
+/// It's not predicable.
+let Uses = [CPSR] in {
+multiclass T2I_adde_sube_irs<string opc, PatFrag opnode, bit Commutable = 0> {
    // shifted imm
-   def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
-                !strconcat(opc, " $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
-   // 12-bit imm
-   def ri12 : T2I<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
-                !strconcat(opc, "w $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]>;
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
+   // register
+   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]> {
+     let isCommutable = Commutable;
+   }
    // shifted register
-   def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
-                !strconcat(opc, " $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
+   def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
+   // Carry setting variants
+   // shifted imm
+   def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs),
+                  !strconcat(opc, "s $dst, $lhs, $rhs"),
+                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+                  Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                    let Defs = [CPSR];
+                  }
+   // register
+   def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                  !strconcat(opc, "s $dst, $lhs, $rhs"),
+                  [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+                  Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                    let Defs = [CPSR];
+                    let isCommutable = Commutable;
+   }
+   // shifted register
+   def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs),
+                  !strconcat(opc, "s $dst, $lhs, $rhs"),
+                  [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+                  Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                    let Defs = [CPSR];
+   }
+}
 }
 
-/// T2I_bin_c_is - Defines a set of (op reg, {so_imm|reg}) patterns for a
-//  binary operation that produces a value and set the carry bit. It can also
-/// optionally set CPSR.
-let Uses = [CPSR] in {
-multiclass T2I_bin_c_is<string opc, PatFrag opnode> {
+/// T2I_rsc_is - Same as T2I_adde_sube_irs except the order of operands are
+/// reversed. It doesn't define the 'rr' form since it's handled by its
+/// T2I_adde_sube_irs counterpart.
+let Defs = [CPSR], Uses = [CPSR] in {
+multiclass T2I_rsc_is<string opc, PatFrag opnode> {
    // shifted imm
-   def ri : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs, cc_out:$s),
-                !strconcat(opc, "${s} $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>;
-
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
+                 opc, " $dst, $rhs, $lhs",
+                 [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>,
+                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
    // shifted register
-   def rs : T2I<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs, cc_out:$s),
-                !strconcat(opc, "${s} $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>;
+   def rs : T2sI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
+                 opc, " $dst, $rhs, $lhs",
+                 [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>,
+                 Requires<[IsThumb, HasThumb2, CarryDefIsUnused]>;
+   // shifted imm
+   def Sri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs),
+                 !strconcat(opc, "s $dst, $rhs, $lhs"),
+                 [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>,
+                 Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                   let Defs = [CPSR];
+   }
+   // shifted register
+   def Srs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs),
+                 !strconcat(opc, "s $dst, $rhs, $lhs"),
+                 [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>,
+                 Requires<[IsThumb, HasThumb2, CarryDefIsUsed]> {
+                   let Defs = [CPSR];
+   }
 }
 }
 
-/// T2I_rbin_c_is - Same as T2I_bin_c_is except the order of operands are
-/// reversed.
-let Uses = [CPSR] in {
-multiclass T2I_rbin_c_is<string opc, PatFrag opnode> {
+/// T2I_rbin_s_is - Same as T2I_bin_s_irs except the order of operands are
+/// reversed. It doesn't define the 'rr' form since it's handled by its
+/// T2I_bin_s_irs counterpart.
+let Defs = [CPSR] in {
+multiclass T2I_rbin_s_is<string opc, PatFrag opnode> {
    // shifted imm
-   def ri : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s),
-                !strconcat(opc, "${s} $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>;
-
+   def ri : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_imm:$lhs, cc_out:$s),
+                 !strconcat(opc, "${s} $dst, $rhs, $lhs"),
+                 [(set GPR:$dst, (opnode t2_so_imm:$lhs, GPR:$rhs))]>;
    // shifted register
-   def rs : T2I<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s),
-                !strconcat(opc, "${s} $dst, $lhs, $rhs"),
-                [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>;
+   def rs : T2XI<(outs GPR:$dst), (ins GPR:$rhs, t2_so_reg:$lhs, cc_out:$s),
+                 !strconcat(opc, "${s} $dst, $rhs, $lhs"),
+                 [(set GPR:$dst, (opnode t2_so_reg:$lhs, GPR:$rhs))]>;
 }
 }
 
+/// T2I_sh_ir - Defines a set of (op reg, {so_imm|r}) patterns for a shift /
+//  rotate operation that produces a value.
+multiclass T2I_sh_ir<string opc, PatFrag opnode> {
+   // 5-bit imm
+   def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, imm1_31:$rhs))]>;
+   // register
+   def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs),
+                 opc, " $dst, $lhs, $rhs",
+                 [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>;
+}
 
-/// T21_cmp_irs - Defines a set of (op r, {so_imm|so_reg}) cmp / test
-/// patterns. Similar to T2I_bin_is except the instruction does not produce
+/// T21_cmp_irs - Defines a set of (op r, {so_imm|r|so_reg}) cmp / test
+/// patterns. Similar to T2I_bin_irs except the instruction does not produce
 /// a explicit result, only implicitly set CPSR.
 let Uses = [CPSR] in {
 multiclass T2I_cmp_is<string opc, PatFrag opnode> {
    // shifted imm
    def ri : T2I<(outs), (ins GPR:$lhs, t2_so_imm:$rhs),
-                !strconcat(opc, " $lhs, $rhs"),
+                opc, " $lhs, $rhs",
                 [(opnode GPR:$lhs, t2_so_imm:$rhs)]>;
-
+   // register
+   def rr : T2I<(outs), (ins GPR:$lhs, GPR:$rhs),
+                opc, " $lhs, $rhs",
+                [(opnode GPR:$lhs, GPR:$rhs)]>;
    // shifted register
    def rs : T2I<(outs), (ins GPR:$lhs, t2_so_reg:$rhs),
-                !strconcat(opc, " $lhs, $rhs"),
+                opc, " $lhs, $rhs",
                 [(opnode GPR:$lhs, t2_so_reg:$rhs)]>;
 }
 }
 
 //===----------------------------------------------------------------------===//
-//  Arithmetic Instructions.
+// Instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Miscellaneous Instructions.
+//
+
+let isNotDuplicable = 1 in
+def t2PICADD : T2XI<(outs tGPR:$dst), (ins tGPR:$lhs, pclabel:$cp),
+                    "$cp:\n\tadd $dst, pc",
+                    [(set tGPR:$dst, (ARMpic_add tGPR:$lhs, imm:$cp))]>;
+
+
+// LEApcrel - Load a pc-relative address into a register without offending the
+// assembler.
+def t2LEApcrel : T2XI<(outs GPR:$dst), (ins i32imm:$label, pred:$p),
+                   !strconcat(!strconcat(".set PCRELV${:uid}, ($label-(",
+                                         "${:private}PCRELL${:uid}+8))\n"),
+                              !strconcat("${:private}PCRELL${:uid}:\n\t",
+                                         "add$p $dst, pc, #PCRELV${:uid}")),
+                   []>;
+
+def t2LEApcrelJT : T2XI<(outs GPR:$dst),
+                       (ins i32imm:$label, i32imm:$id, pred:$p),
+          !strconcat(!strconcat(".set PCRELV${:uid}, (${label}_${id:no_hash}-(",
+                                         "${:private}PCRELL${:uid}+8))\n"),
+                              !strconcat("${:private}PCRELL${:uid}:\n\t",
+                                         "add$p $dst, pc, #PCRELV${:uid}")),
+                   []>;
+
+// ADD rd, sp, #so_imm
+def t2ADDrSPi : T2XI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
+                     "add $dst, $sp, $imm",
+                     []>;
+
+// ADD rd, sp, #imm12
+def t2ADDrSPi12 : T2XI<(outs GPR:$dst), (ins GPR:$sp, i32imm:$imm),
+                       "addw $dst, $sp, $imm",
+                       []>;
+
+def t2ADDrSPs : T2XI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
+                     "addw $dst, $sp, $rhs",
+                     []>;
+
+
+//===----------------------------------------------------------------------===//
+//  Load / store Instructions.
 //
 
 //===----------------------------------------------------------------------===//
@@ -259,90 +414,95 @@ multiclass T2I_cmp_is<string opc, PatFrag opnode> {
 //
 
 let neverHasSideEffects = 1 in
-def t2MOVr : T2I<(outs GPR:$dst), (ins GPR:$src),
-                  "mov $dst, $src", []>;
+def t2MOVr : T2sI<(outs GPR:$dst), (ins GPR:$src),
+                   "mov", " $dst, $src", []>;
+
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
+def t2MOVi : T2sI<(outs GPR:$dst), (ins t2_so_imm:$src),
+                   "mov", " $dst, $src",
+                   [(set GPR:$dst, t2_so_imm:$src)]>;
 
+let isReMaterializable = 1, isAsCheapAsAMove = 1 in
 def t2MOVi16 : T2I<(outs GPR:$dst), (ins i32imm:$src),
-                   "movw $dst, $src",
+                   "movw", " $dst, $src",
                    [(set GPR:$dst, imm0_65535:$src)]>;
 
-
-// FIXME: Move (shifted register) is a pseudo-instruction for ASR, LSL, LSR,
-// ROR, and RRX. Consider splitting into multiple instructions.
-def t2MOVs  : T2I<(outs GPR:$dst), (ins t2_so_reg:$src),
-                  "mov $dst, $src",
-                  [(set GPR:$dst, t2_so_reg:$src)]>;
-def t2MOVrx : T2I<(outs GPR:$dst), (ins GPR:$src),
-                  "mov $dst, $src, rrx",
-                  [(set GPR:$dst, (ARMrrx GPR:$src))]>;
-
-
 // FIXME: Also available in ARM mode.
 let Constraints = "$src = $dst" in
-def t2MOVTi16 : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
-                   "movt $dst, $imm",
-                   [(set GPR:$dst,
-                         (or (and GPR:$src, 0xffff), t2_lo16AllZero:$imm))]>;
+def t2MOVTi16 : T2sI<(outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
+                     "movt", " $dst, $imm",
+                     [(set GPR:$dst,
+                           (or (and GPR:$src, 0xffff), t2_lo16AllZero:$imm))]>;
 
 //===----------------------------------------------------------------------===//
 //  Arithmetic Instructions.
 //
 
-defm t2ADD  : T2I_bin_ii12s<"add", BinOpFrag<(add  node:$LHS, node:$RHS)>>;
-defm t2SUB  : T2I_bin_ii12s<"sub", BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
+defm t2ADD  : T2I_bin_ii12rs<"add", BinOpFrag<(add  node:$LHS, node:$RHS)>, 1>;
+defm t2SUB  : T2I_bin_ii12rs<"sub", BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
 
 // ADD and SUB with 's' bit set. No 12-bit immediate (T4) variants.
-defm t2ADDS : T2I_bin_s_is<"add", BinOpFrag<(addc node:$LHS, node:$RHS)>>;
-defm t2SUBS : T2I_bin_s_is<"sub", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+defm t2ADDS : T2I_bin_s_irs <"add",  BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
+defm t2SUBS : T2I_bin_s_irs <"sub",  BinOpFrag<(subc node:$LHS, node:$RHS)>>;
 
-// FIXME: predication support
-defm t2ADC  : T2I_bin_c_is<"adc", BinOpFrag<(adde node:$LHS, node:$RHS)>>;
-defm t2SBC  : T2I_bin_c_is<"sbc", BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+defm t2ADC  : T2I_adde_sube_irs<"adc",BinOpFrag<(adde node:$LHS, node:$RHS)>,1>;
+defm t2SBC  : T2I_adde_sube_irs<"sbc",BinOpFrag<(sube node:$LHS, node:$RHS)>>;
 
 // RSB, RSC
-defm t2RSB  : T2I_rbin_is  <"rsb", BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
-defm t2RSBS : T2I_rbin_c_is<"rsb", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
-defm t2RSC  : T2I_rbin_s_is<"rsc", BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+defm t2RSB  : T2I_rbin_is   <"rsb", BinOpFrag<(sub  node:$LHS, node:$RHS)>>;
+defm t2RSBS : T2I_rbin_s_is <"rsb", BinOpFrag<(subc node:$LHS, node:$RHS)>>;
+defm t2RSC  : T2I_rsc_is    <"rsc", BinOpFrag<(sube node:$LHS, node:$RHS)>>;
 
 // (sub X, imm) gets canonicalized to (add X, -imm).  Match this form.
-def : Thumb2Pat<(add       GPR:$src, t2_so_imm_neg:$imm),
-                (t2SUBri   GPR:$src, t2_so_imm_neg:$imm)>;
-def : Thumb2Pat<(add       GPR:$src, imm0_4095_neg:$imm),
-                (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+def : T2Pat<(add       GPR:$src, t2_so_imm_neg:$imm),
+            (t2SUBri   GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(add       GPR:$src, imm0_4095_neg:$imm),
+            (t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
 
 
 //===----------------------------------------------------------------------===//
+//  Shift and rotate Instructions.
+//
+
+defm t2LSL  : T2I_sh_ir<"lsl", BinOpFrag<(shl  node:$LHS, node:$RHS)>>;
+defm t2LSR  : T2I_sh_ir<"lsr", BinOpFrag<(srl  node:$LHS, node:$RHS)>>;
+defm t2ASR  : T2I_sh_ir<"asr", BinOpFrag<(sra  node:$LHS, node:$RHS)>>;
+defm t2ROR  : T2I_sh_ir<"ror", BinOpFrag<(rotr node:$LHS, node:$RHS)>>;
+
+def t2MOVrx : T2sI<(outs GPR:$dst), (ins GPR:$src),
+                   "mov", " $dst, $src, rrx",
+                   [(set GPR:$dst, (ARMrrx GPR:$src))]>;
+
+//===----------------------------------------------------------------------===//
 //  Bitwise Instructions.
 //
 
-defm t2AND  : T2I_bin_is  <"and", BinOpFrag<(and node:$LHS, node:$RHS)>>;
-defm t2ORR  : T2I_bin_is  <"orr", BinOpFrag<(or  node:$LHS, node:$RHS)>>;
-defm t2EOR  : T2I_bin_is  <"eor", BinOpFrag<(xor node:$LHS, node:$RHS)>>;
+defm t2AND  : T2I_bin_irs<"and", BinOpFrag<(and node:$LHS, node:$RHS)>, 1>;
+defm t2ORR  : T2I_bin_irs<"orr", BinOpFrag<(or  node:$LHS, node:$RHS)>, 1>;
+defm t2EOR  : T2I_bin_irs<"eor", BinOpFrag<(xor node:$LHS, node:$RHS)>, 1>;
 
-defm t2BIC  : T2I_bin_is  <"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
+defm t2BIC  : T2I_bin_irs<"bic", BinOpFrag<(and node:$LHS, (not node:$RHS))>>;
 
-def : Thumb2Pat<(and     GPR:$src, t2_so_imm_not:$imm),
-                (t2BICri GPR:$src, t2_so_imm_not:$imm)>;
+def : T2Pat<(and     GPR:$src, t2_so_imm_not:$imm),
+            (t2BICri GPR:$src, t2_so_imm_not:$imm)>;
 
-defm t2ORN  : T2I_bin_is  <"orn", BinOpFrag<(or  node:$LHS, (not node:$RHS))>>;
+defm t2ORN  : T2I_bin_irs<"orn", BinOpFrag<(or  node:$LHS, (not node:$RHS))>>;
 
-def : Thumb2Pat<(or      GPR:$src, t2_so_imm_not:$imm),
-                (t2ORNri GPR:$src, t2_so_imm_not:$imm)>;
+def : T2Pat<(or      GPR:$src, t2_so_imm_not:$imm),
+            (t2ORNri GPR:$src, t2_so_imm_not:$imm)>;
 
+// Prefer over of t2EORri ra, rb, -1 because mvn has 16-bit version
+let AddedComplexity = 1 in
+defm t2MVN  : T2I_un_irs  <"mvn", UnOpFrag<(not node:$Src)>, 1, 1>;
 
-def t2MVNr : T2I<(outs GPR:$dst), (ins t2_so_reg:$rhs),
-                  "mvn $dst, $rhs",
-                 [(set GPR:$dst, (not t2_so_reg:$rhs))]>;
-let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def t2MVNi : T2I<(outs GPR:$dst), (ins t2_so_imm_not:$rhs),
-                  "mvn $dst, $rhs",
-                 [(set GPR:$dst, t2_so_imm_not:$rhs)]>;
+def : T2Pat<(t2_so_imm_not:$src),
+            (t2MVNi t2_so_imm_not:$src)>;
 
 // A8.6.17  BFC - Bitfield clear
 // FIXME: Also available in ARM mode.
 let Constraints = "$src = $dst" in
 def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
-                "bfc $dst, $imm",
+                "bfc", " $dst, $imm",
                 [(set GPR:$dst, (and GPR:$src, bf_inv_mask_imm:$imm))]>;
 
 // FIXME: A8.6.18  BFI - Bitfield insert (Encoding T1)
@@ -350,16 +510,17 @@ def t2BFC : T2I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
 //===----------------------------------------------------------------------===//
 //  Multiply Instructions.
 //
+let isCommutable = 1 in
 def t2MUL: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
-                "mul $dst, $a, $b",
+                "mul", " $dst, $a, $b",
                 [(set GPR:$dst, (mul GPR:$a, GPR:$b))]>;
 
 def t2MLA: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-		"mla $dst, $a, $b, $c",
+		"mla", " $dst, $a, $b, $c",
 		[(set GPR:$dst, (add (mul GPR:$a, GPR:$b), GPR:$c))]>;
 
 def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
-		"mls $dst, $a, $b, $c",
+		"mls", " $dst, $a, $b, $c",
                 [(set GPR:$dst, (sub GPR:$c, (mul GPR:$a, GPR:$b)))]>;
 
 // FIXME: SMULL, etc.
@@ -368,20 +529,16 @@ def t2MLS: T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
 //  Misc. Arithmetic Instructions.
 //
 
-/////
-/// A8.6.31  CLZ
-/////
-// FIXME not firing? but ARM version does...
 def t2CLZ : T2I<(outs GPR:$dst), (ins GPR:$src),
-                "clz $dst, $src",
+                "clz", " $dst, $src",
                 [(set GPR:$dst, (ctlz GPR:$src))]>;
 
 def t2REV : T2I<(outs GPR:$dst), (ins GPR:$src),
-                "rev $dst, $src",
+                "rev", " $dst, $src",
                 [(set GPR:$dst, (bswap GPR:$src))]>;
 
 def t2REV16 : T2I<(outs GPR:$dst), (ins GPR:$src),
-                "rev16 $dst, $src",
+                "rev16", " $dst, $src",
                 [(set GPR:$dst,
                     (or (and (srl GPR:$src, (i32 8)), 0xFF),
                         (or (and (shl GPR:$src, (i32 8)), 0xFF00),
@@ -392,7 +549,7 @@ def t2REV16 : T2I<(outs GPR:$dst), (ins GPR:$src),
 /// A8.6.137  REVSH
 /////
 def t2REVSH : T2I<(outs GPR:$dst), (ins GPR:$src),
-                 "revsh $dst, $src",
+                 "revsh", " $dst, $src",
                  [(set GPR:$dst,
                     (sext_inreg
                       (or (srl (and GPR:$src, 0xFFFF), (i32 8)),
@@ -414,11 +571,11 @@ defm t2CMN   : T2I_cmp_is<"cmn",
 defm t2CMNnz : T2I_cmp_is<"cmn",
                           BinOpFrag<(ARMcmpNZ node:$LHS,(ineg node:$RHS))>>;
 
-def : Thumb2Pat<(ARMcmp  GPR:$src, t2_so_imm_neg:$imm),
-                (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(ARMcmp  GPR:$src, t2_so_imm_neg:$imm),
+            (t2CMNri GPR:$src, t2_so_imm_neg:$imm)>;
 
-def : Thumb2Pat<(ARMcmpNZ  GPR:$src, t2_so_imm_neg:$imm),
-                (t2CMNri   GPR:$src, t2_so_imm_neg:$imm)>;
+def : T2Pat<(ARMcmpNZ  GPR:$src, t2_so_imm_neg:$imm),
+            (t2CMNri   GPR:$src, t2_so_imm_neg:$imm)>;
 
 // FIXME: TST, TEQ, etc.
 
@@ -433,8 +590,13 @@ def : Thumb2Pat<(ARMcmpNZ  GPR:$src, t2_so_imm_neg:$imm),
 // Non-Instruction Patterns
 //
 
+// ConstantPool, GlobalAddress, and JumpTable
+def : T2Pat<(ARMWrapper  tglobaladdr :$dst), (t2LEApcrel tglobaladdr :$dst)>;
+def : T2Pat<(ARMWrapper  tconstpool  :$dst), (t2LEApcrel tconstpool  :$dst)>;
+def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
+            (t2LEApcrelJT tjumptable:$dst, imm:$id)>;
+
 // Large immediate handling.
 
-def : Thumb2Pat<(i32 imm:$src),
-                (t2MOVTi16 (t2MOVi16 (t2_lo16 imm:$src)),
-                           (t2_hi16 imm:$src))>;
+def : T2Pat<(i32 imm:$src),
+            (t2MOVTi16 (t2MOVi16 (t2_lo16 imm:$src)), (t2_hi16 imm:$src))>;
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.cpp b/lib/Target/ARM/ARMTargetAsmInfo.cpp
index 42b8eae..bf2c14e 100644
--- a/lib/Target/ARM/ARMTargetAsmInfo.cpp
+++ b/lib/Target/ARM/ARMTargetAsmInfo.cpp
@@ -43,7 +43,7 @@ const char *const llvm::arm_asm_table[] = {
   0,0
 };
 
-ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM):
+ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMBaseTargetMachine &TM):
   ARMTargetAsmInfo<DarwinTargetAsmInfo>(TM) {
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
 
@@ -55,7 +55,7 @@ ARMDarwinTargetAsmInfo::ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM):
   SupportsDebugInformation = true;
 }
 
-ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMTargetMachine &TM):
+ARMELFTargetAsmInfo::ARMELFTargetAsmInfo(const ARMBaseTargetMachine &TM):
   ARMTargetAsmInfo<ELFTargetAsmInfo>(TM) {
   Subtarget = &TM.getSubtarget<ARMSubtarget>();
 
diff --git a/lib/Target/ARM/ARMTargetAsmInfo.h b/lib/Target/ARM/ARMTargetAsmInfo.h
index 683692f..d3f2da0 100644
--- a/lib/Target/ARM/ARMTargetAsmInfo.h
+++ b/lib/Target/ARM/ARMTargetAsmInfo.h
@@ -26,7 +26,7 @@ namespace llvm {
 
   template <class BaseTAI>
   struct ARMTargetAsmInfo : public BaseTAI {
-    explicit ARMTargetAsmInfo(const ARMTargetMachine &TM) : BaseTAI(TM) {
+    explicit ARMTargetAsmInfo(const ARMBaseTargetMachine &TM) : BaseTAI(TM) {
       BaseTAI::AsmTransCBE = arm_asm_table;
 
       BaseTAI::AlignmentIsInBytes = false;
@@ -51,11 +51,11 @@ namespace llvm {
   EXTERN_TEMPLATE_INSTANTIATION(class ARMTargetAsmInfo<TargetAsmInfo>);
 
   struct ARMDarwinTargetAsmInfo : public ARMTargetAsmInfo<DarwinTargetAsmInfo> {
-    explicit ARMDarwinTargetAsmInfo(const ARMTargetMachine &TM);
+    explicit ARMDarwinTargetAsmInfo(const ARMBaseTargetMachine &TM);
   };
 
   struct ARMELFTargetAsmInfo : public ARMTargetAsmInfo<ELFTargetAsmInfo> {
-    explicit ARMELFTargetAsmInfo(const ARMTargetMachine &TM);
+    explicit ARMELFTargetAsmInfo(const ARMBaseTargetMachine &TM);
   };
 
 } // namespace llvm
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 8006b9b..f7b8215 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -39,13 +39,11 @@ int ARMTargetMachineModule = 0;
 static RegisterTarget<ARMTargetMachine>   X("arm",   "ARM");
 static RegisterTarget<ThumbTargetMachine> Y("thumb", "Thumb");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeARMTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeARMTarget() { }
 
 // No assembler printer by default
-ARMTargetMachine::AsmPrinterCtorFn ARMTargetMachine::AsmPrinterCtor = 0;
+ARMBaseTargetMachine::AsmPrinterCtorFn ARMBaseTargetMachine::AsmPrinterCtor = 0;
 
 /// ThumbTargetMachine - Create an Thumb architecture model.
 ///
@@ -76,34 +74,36 @@ unsigned ThumbTargetMachine::getModuleMatchQuality(const Module &M) {
   return getJITMatchQuality()/2;
 }
 
-ThumbTargetMachine::ThumbTargetMachine(const Module &M, const std::string &FS)
-  : ARMTargetMachine(M, FS, true) {
-}
-
 /// TargetMachine ctor - Create an ARM architecture model.
 ///
-ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS,
-                                   bool isThumb)
+ARMBaseTargetMachine::ARMBaseTargetMachine(const Module &M,
+                                           const std::string &FS,
+                                           bool isThumb)
   : Subtarget(M, FS, isThumb),
-    DataLayout(Subtarget.isAPCS_ABI() ?
-               // APCS ABI
-          (isThumb ?
-           std::string("e-p:32:32-f64:32:32-i64:32:32-"
-                       "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
-           std::string("e-p:32:32-f64:32:32-i64:32:32")) :
-               // AAPCS ABI
-          (isThumb ?
-           std::string("e-p:32:32-f64:64:64-i64:64:64-"
-                       "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
-           std::string("e-p:32:32-f64:64:64-i64:64:64"))),
-    InstrInfo(Subtarget),
     FrameInfo(Subtarget),
     JITInfo(),
-    TLInfo(*this),
     InstrItins(Subtarget.getInstrItineraryData()) {
   DefRelocModel = getRelocationModel();
 }
 
+ARMTargetMachine::ARMTargetMachine(const Module &M, const std::string &FS)
+  : ARMBaseTargetMachine(M, FS, false), InstrInfo(Subtarget),
+    DataLayout(Subtarget.isAPCS_ABI() ?
+               std::string("e-p:32:32-f64:32:32-i64:32:32") :
+               std::string("e-p:32:32-f64:64:64-i64:64:64")),
+    TLInfo(*this) {
+}
+
+ThumbTargetMachine::ThumbTargetMachine(const Module &M, const std::string &FS)
+  : ARMBaseTargetMachine(M, FS, true), InstrInfo(Subtarget),
+    DataLayout(Subtarget.isAPCS_ABI() ?
+               std::string("e-p:32:32-f64:32:32-i64:32:32-"
+                           "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
+               std::string("e-p:32:32-f64:64:64-i64:64:64-"
+                           "i16:16:32-i8:8:32-i1:8:32-a:0:32")),
+    TLInfo(*this) {
+}
+
 unsigned ARMTargetMachine::getJITMatchQuality() {
 #if defined(__arm__)
   return 10;
@@ -131,7 +131,7 @@ unsigned ARMTargetMachine::getModuleMatchQuality(const Module &M) {
 }
 
 
-const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
+const TargetAsmInfo *ARMBaseTargetMachine::createTargetAsmInfo() const {
   switch (Subtarget.TargetType) {
    case ARMSubtarget::isDarwin:
     return new ARMDarwinTargetAsmInfo(*this);
@@ -144,22 +144,22 @@ const TargetAsmInfo *ARMTargetMachine::createTargetAsmInfo() const {
 
 
 // Pass Pipeline Configuration
-bool ARMTargetMachine::addInstSelector(PassManagerBase &PM,
-                                       CodeGenOpt::Level OptLevel) {
+bool ARMBaseTargetMachine::addInstSelector(PassManagerBase &PM,
+                                           CodeGenOpt::Level OptLevel) {
   PM.add(createARMISelDag(*this));
   return false;
 }
 
-bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel) {
+bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
   // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
   if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
     PM.add(createARMLoadStoreOptimizationPass(true));
   return true;
 }
 
-bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel) {
+bool ARMBaseTargetMachine::addPreEmitPass(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel) {
   // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
   if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
     PM.add(createARMLoadStoreOptimizationPass());
@@ -172,10 +172,10 @@ bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM,
   return true;
 }
 
-bool ARMTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
-                                          CodeGenOpt::Level OptLevel,
-                                          bool Verbose,
-                                          raw_ostream &Out) {
+bool ARMBaseTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
+                                              CodeGenOpt::Level OptLevel,
+                                              bool Verbose,
+                                              raw_ostream &Out) {
   // Output assembly language.
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
   if (AsmPrinterCtor)
@@ -185,10 +185,10 @@ bool ARMTargetMachine::addAssemblyEmitter(PassManagerBase &PM,
 }
 
 
-bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel,
-                                      bool DumpAsm,
-                                      MachineCodeEmitter &MCE) {
+bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel,
+                                          bool DumpAsm,
+                                          MachineCodeEmitter &MCE) {
   // FIXME: Move this to TargetJITInfo!
   if (DefRelocModel == Reloc::Default)
     setRelocationModel(Reloc::Static);
@@ -204,10 +204,10 @@ bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM,
   return false;
 }
 
-bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM,
-                                      CodeGenOpt::Level OptLevel,
-                                      bool DumpAsm,
-                                      JITCodeEmitter &JCE) {
+bool ARMBaseTargetMachine::addCodeEmitter(PassManagerBase &PM,
+                                          CodeGenOpt::Level OptLevel,
+                                          bool DumpAsm,
+                                          JITCodeEmitter &JCE) {
   // FIXME: Move this to TargetJITInfo!
   if (DefRelocModel == Reloc::Default)
     setRelocationModel(Reloc::Static);
@@ -223,10 +223,10 @@ bool ARMTargetMachine::addCodeEmitter(PassManagerBase &PM,
   return false;
 }
 
-bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                            CodeGenOpt::Level OptLevel,
-                                            bool DumpAsm,
-                                            MachineCodeEmitter &MCE) {
+bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+                                                CodeGenOpt::Level OptLevel,
+                                                bool DumpAsm,
+                                                MachineCodeEmitter &MCE) {
   // Machine code emitter pass for ARM.
   PM.add(createARMCodeEmitterPass(*this, MCE));
   if (DumpAsm) {
@@ -238,10 +238,10 @@ bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   return false;
 }
 
-bool ARMTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
-                                            CodeGenOpt::Level OptLevel,
-                                            bool DumpAsm,
-                                            JITCodeEmitter &JCE) {
+bool ARMBaseTargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
+                                                CodeGenOpt::Level OptLevel,
+                                                bool DumpAsm,
+                                                JITCodeEmitter &JCE) {
   // Machine code emitter pass for ARM.
   PM.add(createARMJITCodeEmitterPass(*this, JCE));
   if (DumpAsm) {
diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h
index c4c8e6c..0b49b92 100644
--- a/lib/Target/ARM/ARMTargetMachine.h
+++ b/lib/Target/ARM/ARMTargetMachine.h
@@ -22,18 +22,19 @@
 #include "ARMJITInfo.h"
 #include "ARMSubtarget.h"
 #include "ARMISelLowering.h"
+#include "ThumbInstrInfo.h"
 
 namespace llvm {
 
 class Module;
 
-class ARMTargetMachine : public LLVMTargetMachine {
+class ARMBaseTargetMachine : public LLVMTargetMachine {
+protected:
   ARMSubtarget        Subtarget;
-  const TargetData    DataLayout;       // Calculates type size & alignment
-  ARMInstrInfo        InstrInfo;
+
+private:
   ARMFrameInfo        FrameInfo;
   ARMJITInfo          JITInfo;
-  ARMTargetLowering   TLInfo;
   InstrItineraryData  InstrItins;
   Reloc::Model        DefRelocModel;    // Reloc model before it's overridden.
 
@@ -41,26 +42,18 @@ protected:
   // To avoid having target depend on the asmprinter stuff libraries, asmprinter
   // set this functions to ctor pointer at startup time if they are linked in.
   typedef FunctionPass *(*AsmPrinterCtorFn)(raw_ostream &o,
-                                            ARMTargetMachine &tm,
+                                            ARMBaseTargetMachine &tm,
                                             CodeGenOpt::Level OptLevel,
                                             bool verbose);
   static AsmPrinterCtorFn AsmPrinterCtor;
 
 public:
-  ARMTargetMachine(const Module &M, const std::string &FS, bool isThumb = false);
+  ARMBaseTargetMachine(const Module &M, const std::string &FS, bool isThumb);
 
-  virtual const ARMInstrInfo     *getInstrInfo() const { return &InstrInfo; }
   virtual const ARMFrameInfo     *getFrameInfo() const { return &FrameInfo; }
   virtual       ARMJITInfo       *getJITInfo()         { return &JITInfo; }
-  virtual const ARMRegisterInfo  *getRegisterInfo() const {
-    return &InstrInfo.getRegisterInfo();
-  }
-  virtual const TargetData       *getTargetData() const { return &DataLayout; }
   virtual const ARMSubtarget  *getSubtargetImpl() const { return &Subtarget; }
-  virtual       ARMTargetLowering *getTargetLowering() const {
-    return const_cast<ARMTargetLowering*>(&TLInfo);
-  }
-  virtual const InstrItineraryData getInstrItineraryData() const {  
+  virtual const InstrItineraryData getInstrItineraryData() const {
     return InstrItins;
   }
 
@@ -94,12 +87,50 @@ public:
                                     JITCodeEmitter &MCE);
 };
 
+/// ARMTargetMachine - ARM target machine.
+///
+class ARMTargetMachine : public ARMBaseTargetMachine {
+  ARMInstrInfo        InstrInfo;
+  const TargetData    DataLayout;       // Calculates type size & alignment
+  ARMTargetLowering   TLInfo;
+public:
+  ARMTargetMachine(const Module &M, const std::string &FS);
+
+  virtual const ARMRegisterInfo  *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  virtual       ARMTargetLowering *getTargetLowering() const {
+    return const_cast<ARMTargetLowering*>(&TLInfo);
+  }
+
+  virtual const ARMInstrInfo     *getInstrInfo() const { return &InstrInfo; }
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+
+  static unsigned getJITMatchQuality();
+  static unsigned getModuleMatchQuality(const Module &M);
+};
+
 /// ThumbTargetMachine - Thumb target machine.
 ///
-class ThumbTargetMachine : public ARMTargetMachine {
+class ThumbTargetMachine : public ARMBaseTargetMachine {
+  ThumbInstrInfo      InstrInfo;
+  const TargetData    DataLayout;       // Calculates type size & alignment
+  ARMTargetLowering   TLInfo;
 public:
   ThumbTargetMachine(const Module &M, const std::string &FS);
 
+  virtual const ARMRegisterInfo  *getRegisterInfo() const {
+    return &InstrInfo.getRegisterInfo();
+  }
+
+  virtual       ARMTargetLowering *getTargetLowering() const {
+    return const_cast<ARMTargetLowering*>(&TLInfo);
+  }
+
+  virtual const ThumbInstrInfo   *getInstrInfo() const { return &InstrInfo; }
+  virtual const TargetData       *getTargetData() const { return &DataLayout; }
+
   static unsigned getJITMatchQuality();
   static unsigned getModuleMatchQuality(const Module &M);
 };
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index fe1c980..400f628a 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -21,6 +21,7 @@
 #include "ARMMachineFunctionInfo.h"
 #include "llvm/Constants.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
@@ -96,9 +97,7 @@ namespace {
                       const char *Modifier = 0);
     void printSOImmOperand(const MachineInstr *MI, int opNum);
     void printSOImm2PartOperand(const MachineInstr *MI, int opNum);
-    void printSOOperand(const MachineInstr *MI, int OpNum);
     void printSORegOperand(const MachineInstr *MI, int opNum);
-    void printT2SOImmOperand(const MachineInstr *MI, int opNum);
     void printAddrMode2Operand(const MachineInstr *MI, int OpNo);
     void printAddrMode2OffsetOperand(const MachineInstr *MI, int OpNo);
     void printAddrMode3Operand(const MachineInstr *MI, int OpNo);
@@ -110,6 +109,7 @@ namespace {
     void printAddrModePCOperand(const MachineInstr *MI, int OpNo,
                                 const char *Modifier = 0);
     void printBitfieldInvMaskImmOperand (const MachineInstr *MI, int OpNo);
+
     void printThumbAddrModeRROperand(const MachineInstr *MI, int OpNo);
     void printThumbAddrModeRI5Operand(const MachineInstr *MI, int OpNo,
                                       unsigned Scale);
@@ -117,6 +117,10 @@ namespace {
     void printThumbAddrModeS2Operand(const MachineInstr *MI, int OpNo);
     void printThumbAddrModeS4Operand(const MachineInstr *MI, int OpNo);
     void printThumbAddrModeSPOperand(const MachineInstr *MI, int OpNo);
+
+    void printT2SOImmOperand(const MachineInstr *MI, int opNum);
+    void printT2SOOperand(const MachineInstr *MI, int OpNum);
+
     void printPredicateOperand(const MachineInstr *MI, int opNum);
     void printSBitModifierOperand(const MachineInstr *MI, int opNum);
     void printPCLabel(const MachineInstr *MI, int opNum);
@@ -169,11 +173,6 @@ namespace {
          O << ")";
       }
       O << "\n";
-
-      // If the constant pool value is a extern weak symbol, remember to emit
-      // the weak reference.
-      if (GV && GV->hasExternalWeakLinkage())
-        ExtWeakSymbols.insert(GV);
     }
     
     void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -331,8 +330,6 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
     if (isCallOp && Subtarget->isTargetELF() &&
         TM.getRelocationModel() == Reloc::PIC_)
       O << "(PLT)";
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
     break;
   }
   case MachineOperand::MO_ExternalSymbol: {
@@ -408,32 +405,10 @@ void ARMAsmPrinter::printSOImm2PartOperand(const MachineInstr *MI, int OpNum) {
   printSOImm(O, ARM_AM::getSOImmVal(V2), VerboseAsm, TAI);
 }
 
-// Constant shifts so_reg is a 3-operand unit corresponding to register forms of
-// the A5.1 "Addressing Mode 1 - Data-processing operands" forms.  This
-// includes:
-// REG 0 - e.g. R5
-// REG IMM, SH_OPC - e.g. R5, LSL #3
-void ARMAsmPrinter::printSOOperand(const MachineInstr *MI, int OpNum) {
-  const MachineOperand &MO1 = MI->getOperand(OpNum);
-  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
-
-  unsigned Reg = MO1.getReg();
-  assert(TargetRegisterInfo::isPhysicalRegister(Reg));
-  O << TM.getRegisterInfo()->getAsmName(Reg);
-
-  // Print the shift opc.
-  O << ", "
-    << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
-    << " ";
-
-  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
-  O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
-}
-
 // so_reg is a 4-operand unit corresponding to register forms of the A5.1
 // "Addressing Mode 1 - Data-processing operands" forms.  This includes:
-//    REG 0   0    - e.g. R5
-//    REG REG 0,SH_OPC     - e.g. R5, ROR R3
+//    REG 0   0           - e.g. R5
+//    REG REG 0,SH_OPC    - e.g. R5, ROR R3
 //    REG 0   IMM,SH_OPC  - e.g. R5, LSL #3
 void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO1 = MI->getOperand(Op);
@@ -457,24 +432,6 @@ void ARMAsmPrinter::printSORegOperand(const MachineInstr *MI, int Op) {
   }
 }
 
-static void printT2SOImm(raw_ostream &O, int64_t V) {
-  unsigned Imm = ARM_AM::getT2SOImmValDecode(V);
-  
-  // Always print the immediate directly, as the "rotate" form
-  // is deprecated in some contexts.
-  O << "#" << Imm;
-}
-
-/// printT2SOImmOperand - T2SOImm is:
-///  1. a 4-bit splat control value and 8 bit immediate value
-///  2. a 5-bit rotate amount and a non-zero 8-bit immediate value
-///     represented by a normalizedin 7-bit value (msb is always 1)
-void ARMAsmPrinter::printT2SOImmOperand(const MachineInstr *MI, int OpNum) {
-  const MachineOperand &MO = MI->getOperand(OpNum);
-  assert(MO.isImm() && "Not a valid so_imm value!");
-  printT2SOImm(O, MO.getImm());
-}
-
 void ARMAsmPrinter::printAddrMode2Operand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO1 = MI->getOperand(Op);
   const MachineOperand &MO2 = MI->getOperand(Op+1);
@@ -643,8 +600,8 @@ void
 ARMAsmPrinter::printBitfieldInvMaskImmOperand(const MachineInstr *MI, int Op) {
   const MachineOperand &MO = MI->getOperand(Op);
   uint32_t v = ~MO.getImm();
-  int32_t lsb = ffs (v) - 1;
-  int32_t width = fls (v) - lsb;
+  int32_t lsb = CountTrailingZeros_32(v);
+  int32_t width = (32 - CountLeadingZeros_32 (v)) - lsb;
   assert(MO.isImm() && "Not a valid bf_inv_mask_imm value!");
   O << "#" << lsb << ", #" << width;
 }
@@ -702,6 +659,42 @@ void ARMAsmPrinter::printThumbAddrModeSPOperand(const MachineInstr *MI,int Op) {
   O << "]";
 }
 
+/// printT2SOImmOperand - T2SOImm is:
+///  1. a 4-bit splat control value and 8 bit immediate value
+///  2. a 5-bit rotate amount and a non-zero 8-bit immediate value
+///     represented by a normalizedin 7-bit value (msb is always 1)
+void ARMAsmPrinter::printT2SOImmOperand(const MachineInstr *MI, int OpNum) {
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  assert(MO.isImm() && "Not a valid so_imm value!");
+
+  unsigned Imm = ARM_AM::getT2SOImmValDecode(MO.getImm());  
+  // Always print the immediate directly, as the "rotate" form
+  // is deprecated in some contexts.
+  O << "#" << Imm;
+}
+
+// Constant shifts t2_so_reg is a 2-operand unit corresponding to the Thumb2
+// register with shift forms.
+// REG 0   0           - e.g. R5
+// REG IMM, SH_OPC     - e.g. R5, LSL #3
+void ARMAsmPrinter::printT2SOOperand(const MachineInstr *MI, int OpNum) {
+  const MachineOperand &MO1 = MI->getOperand(OpNum);
+  const MachineOperand &MO2 = MI->getOperand(OpNum+1);
+
+  unsigned Reg = MO1.getReg();
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+  O << TM.getRegisterInfo()->getAsmName(Reg);
+
+  // Print the shift opc.
+  O << ", "
+    << ARM_AM::getShiftOpcStr(ARM_AM::getSORegShOp(MO2.getImm()))
+    << " ";
+
+  assert(MO2.isImm() && "Not a valid t2_so_reg value!");
+  O << "#" << ARM_AM::getSORegOffset(MO2.getImm());
+}
+
+
 void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int opNum) {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(opNum).getImm();
   if (CC != ARMCC::AL)
@@ -749,10 +742,6 @@ void ARMAsmPrinter::printCPInstOperand(const MachineInstr *MI, int OpNo,
       EmitMachineConstantPoolValue(MCPE.Val.MachineCPVal);
     } else {
       EmitGlobalConstant(MCPE.Val.ConstVal);
-      // remember to emit the weak reference
-      if (const GlobalValue *GV = dyn_cast<GlobalValue>(MCPE.Val.ConstVal))
-        if (GV->hasExternalWeakLinkage())
-          ExtWeakSymbols.insert(GV);
     }
   }
 }
@@ -934,6 +923,8 @@ void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
 
   std::string name = Mang->getValueName(GVar);
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   const Type *Type = C->getType();
   unsigned Size = TD->getTypeAllocSize(Type);
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
@@ -1046,12 +1037,6 @@ void ARMAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   if (TAI->hasDotTypeDotSizeDirective())
     O << "\t.size " << name << ", " << Size << "\n";
 
-  // If the initializer is a extern weak symbol, remember to emit the weak
-  // reference!
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
-
   EmitGlobalConstant(C);
   O << '\n';
 }
@@ -1135,18 +1120,12 @@ bool ARMAsmPrinter::doFinalization(Module &M) {
     }
 
 
-    // Emit initial debug information.
-    DW->EndModule();
-
     // Funny Darwin hack: This flag tells the linker that no global symbols
     // contain code that falls through to other global symbols (e.g. the obvious
     // implementation of multiple entry points).  If this doesn't occur, the
     // linker can safely perform dead code stripping.  Since LLVM never
     // generates code that does this, it is always safe to set.
     O << "\t.subsections_via_symbols\n";
-  } else {
-    // Emit final debug information for ELF.
-    DW->EndModule();
   }
 
   return AsmPrinter::doFinalization(M);
@@ -1158,7 +1137,7 @@ bool ARMAsmPrinter::doFinalization(Module &M) {
 /// regardless of whether the function is in SSA form.
 ///
 FunctionPass *llvm::createARMCodePrinterPass(raw_ostream &o,
-                                             ARMTargetMachine &tm,
+                                             ARMBaseTargetMachine &tm,
                                              CodeGenOpt::Level OptLevel,
                                              bool verbose) {
   return new ARMAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
@@ -1167,13 +1146,10 @@ FunctionPass *llvm::createARMCodePrinterPass(raw_ostream &o,
 namespace {
   static struct Register {
     Register() {
-      ARMTargetMachine::registerAsmPrinter(createARMCodePrinterPass);
+      ARMBaseTargetMachine::registerAsmPrinter(createARMCodePrinterPass);
     }
   } Registrator;
 }
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializeARMAsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeARMAsmPrinter() { }
diff --git a/lib/Target/ARM/AsmPrinter/CMakeLists.txt b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
index c22964f..a67fc84 100644
--- a/lib/Target/ARM/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/ARM/AsmPrinter/CMakeLists.txt
@@ -3,3 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMARMAsmPrinter
   ARMAsmPrinter.cpp
   )
+add_dependencies(LLVMARMAsmPrinter ARMCodeGenTable_gen)
+\ No newline at end of file
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index 2ac40f5..e665ed9 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -24,4 +24,5 @@ add_llvm_target(ARMCodeGen
   ARMSubtarget.cpp
   ARMTargetAsmInfo.cpp
   ARMTargetMachine.cpp
+  ThumbInstrInfo.cpp
   )
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 0252a4a..4223699 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -96,20 +96,7 @@ Which would be better.  This occurs in png decode.
 //===---------------------------------------------------------------------===//
 
 More load / store optimizations:
-1) Look past instructions without side-effects (not load, store, branch, etc.)
-   when forming the list of loads / stores to optimize.
-
-2) Smarter register allocation?
-We are probably missing some opportunities to use ldm / stm. Consider:
-
-ldr r5, [r0]
-ldr r4, [r0, #4]
-
-This cannot be merged into a ldm. Perhaps we will need to do the transformation
-before register allocation. Then teach the register allocator to allocate a
-chunk of consecutive registers.
-
-3) Better representation for block transfer? This is from Olden/power:
+1) Better representation for block transfer? This is from Olden/power:
 
 	fldd d0, [r4]
 	fstd d0, [r4, #+32]
@@ -123,7 +110,7 @@ chunk of consecutive registers.
 If we can spare the registers, it would be better to use fldm and fstm here.
 Need major register allocator enhancement though.
 
-4) Can we recognize the relative position of constantpool entries? i.e. Treat
+2) Can we recognize the relative position of constantpool entries? i.e. Treat
 
 	ldr r0, LCPI17_3
 	ldr r1, LCPI17_4
@@ -147,13 +134,7 @@ L6:
 	.long	-858993459
 	.long	1074318540
 
-5) Can we make use of ldrd and strd? Instead of generating ldm / stm, use
-ldrd/strd instead if there are only two destination registers that form an
-odd/even pair. However, we probably would pay a penalty if the address is not
-aligned on 8-byte boundary. This requires more information on load / store
-nodes (and MI's?) then we currently carry.
-
-6) struct copies appear to be done field by field 
+3) struct copies appear to be done field by field 
 instead of by words, at least sometimes:
 
 struct foo { int x; short s; char c1; char c2; };
@@ -313,11 +294,6 @@ See McCat/18-imp/ComputeBoundingBoxes for an example.
 
 //===---------------------------------------------------------------------===//
 
-Register scavenging is now implemented.  The example in the previous version
-of this document produces optimal code at -O2.
-
-//===---------------------------------------------------------------------===//
-
 Pre-/post- indexed load / stores:
 
 1) We should not make the pre/post- indexed load/store transform if the base ptr
@@ -353,20 +329,6 @@ time.
 
 //===---------------------------------------------------------------------===//
 
-We should add i64 support to take advantage of the 64-bit load / stores.
-We can add a pseudo i64 register class containing pseudo registers that are
-register pairs. All other ops (e.g. add, sub) would be expanded as usual.
-
-We need to add pseudo instructions (i.e. gethi / getlo) to extract i32 registers
-from the i64 register. These are single moves which can be eliminated if the
-destination register is a sub-register of the source. We should implement proper
-subreg support in the register allocator to coalesce these away.
-
-There are other minor issues such as multiple instructions for a spill / restore
-/ move.
-
-//===---------------------------------------------------------------------===//
-
 Implement support for some more tricky ways to materialize immediates.  For
 example, to get 0xffff8000, we can use:
 
@@ -465,12 +427,6 @@ More register scavenging work:
 1. Use the register scavenger to track frame index materialized into registers
    (those that do not fit in addressing modes) to allow reuse in the same BB.
 2. Finish scavenging for Thumb.
-3. We know some spills and restores are unnecessary. The issue is once live
-   intervals are merged, they are not never split. So every def is spilled
-   and every use requires a restore if the register allocator decides the
-   resulting live interval is not assigned a physical register. It may be
-   possible (with the help of the scavenger) to turn some spill / restore
-   pairs into register copies.
 
 //===---------------------------------------------------------------------===//
 
@@ -572,3 +528,5 @@ those operations and the ARMv6 scalar versions.
 
 //===---------------------------------------------------------------------===//
 
+ARM::MOVCCr is commutable (by flipping the condition). But we need to implement
+ARMInstrInfo::commuteInstruction() to support it.
diff --git a/lib/Target/ARM/ThumbInstrInfo.cpp b/lib/Target/ARM/ThumbInstrInfo.cpp
new file mode 100644
index 0000000..075d940
--- /dev/null
+++ b/lib/Target/ARM/ThumbInstrInfo.cpp
@@ -0,0 +1,282 @@
+//===- ThumbInstrInfo.cpp - Thumb Instruction Information --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the Thumb implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstrInfo.h"
+#include "ARM.h"
+#include "ARMGenInstrInfo.inc"
+#include "ARMMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/ADT/SmallVector.h"
+#include "ThumbInstrInfo.h"
+
+using namespace llvm;
+
+ThumbInstrInfo::ThumbInstrInfo(const ARMSubtarget &STI)
+  : ARMBaseInstrInfo(STI) {
+}
+
+bool ThumbInstrInfo::isMoveInstr(const MachineInstr &MI,
+                                 unsigned &SrcReg, unsigned &DstReg,
+                                 unsigned& SrcSubIdx, unsigned& DstSubIdx) const {
+  SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+  unsigned oc = MI.getOpcode();
+  switch (oc) {
+  default:
+    return false;
+  // FIXME: Thumb2
+  case ARM::tMOVr:
+  case ARM::tMOVhir2lor:
+  case ARM::tMOVlor2hir:
+  case ARM::tMOVhir2hir:
+    assert(MI.getDesc().getNumOperands() >= 2 &&
+           MI.getOperand(0).isReg() &&
+           MI.getOperand(1).isReg() &&
+           "Invalid Thumb MOV instruction");
+    SrcReg = MI.getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    return true;
+  }
+}
+
+unsigned ThumbInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+                                             int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  // FIXME: Thumb2
+  case ARM::tRestore:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+unsigned ThumbInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+                                            int &FrameIndex) const {
+  switch (MI->getOpcode()) {
+  default: break;
+  // FIXME: Thumb2
+  case ARM::tSpill:
+    if (MI->getOperand(1).isFI() &&
+        MI->getOperand(2).isImm() &&
+        MI->getOperand(2).getImm() == 0) {
+      FrameIndex = MI->getOperand(1).getIndex();
+      return MI->getOperand(0).getReg();
+    }
+    break;
+  }
+  return 0;
+}
+
+bool ThumbInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
+                                  MachineBasicBlock::iterator I,
+                                  unsigned DestReg, unsigned SrcReg,
+                                  const TargetRegisterClass *DestRC,
+                                  const TargetRegisterClass *SrcRC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  // FIXME: Thumb2
+  if (DestRC == ARM::GPRRegisterClass) {
+    if (SrcRC == ARM::GPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVhir2hir), DestReg).addReg(SrcReg);
+      return true;
+    } else if (SrcRC == ARM::tGPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVlor2hir), DestReg).addReg(SrcReg);
+      return true;
+    }
+  } else if (DestRC == ARM::tGPRRegisterClass) {
+    if (SrcRC == ARM::GPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVhir2lor), DestReg).addReg(SrcReg);
+      return true;
+    } else if (SrcRC == ARM::tGPRRegisterClass) {
+      BuildMI(MBB, I, DL, get(ARM::tMOVr), DestReg).addReg(SrcReg);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void ThumbInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                    unsigned SrcReg, bool isKill, int FI,
+                    const TargetRegisterClass *RC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!");
+
+  // FIXME: Thumb2
+  if (RC == ARM::tGPRRegisterClass) {
+    BuildMI(MBB, I, DL, get(ARM::tSpill))
+      .addReg(SrcReg, getKillRegState(isKill))
+      .addFrameIndex(FI).addImm(0);
+  }
+}
+
+void ThumbInstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
+                                    bool isKill,
+                                    SmallVectorImpl<MachineOperand> &Addr,
+                                    const TargetRegisterClass *RC,
+                                   SmallVectorImpl<MachineInstr*> &NewMIs) const{
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  unsigned Opc = 0;
+
+  // FIXME: Thumb2. Is GPRRegClass here correct?
+  assert(RC == ARM::GPRRegisterClass && "Unknown regclass!");
+  if (RC == ARM::GPRRegisterClass) {
+    Opc = Addr[0].isFI() ? ARM::tSpill : ARM::tSTR;
+  }
+
+  MachineInstrBuilder MIB =
+    BuildMI(MF, DL,  get(Opc)).addReg(SrcReg, getKillRegState(isKill));
+  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+    MIB.addOperand(Addr[i]);
+  NewMIs.push_back(MIB);
+  return;
+}
+
+void ThumbInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+                     unsigned DestReg, int FI,
+                     const TargetRegisterClass *RC) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (I != MBB.end()) DL = I->getDebugLoc();
+
+  // FIXME: Thumb2
+  assert(RC == ARM::tGPRRegisterClass && "Unknown regclass!");
+
+  if (RC == ARM::tGPRRegisterClass) {
+    BuildMI(MBB, I, DL, get(ARM::tRestore), DestReg)
+      .addFrameIndex(FI).addImm(0);
+  }
+}
+
+void ThumbInstrInfo::
+loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                SmallVectorImpl<MachineOperand> &Addr,
+                const TargetRegisterClass *RC,
+                SmallVectorImpl<MachineInstr*> &NewMIs) const {
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  unsigned Opc = 0;
+
+  // FIXME: Thumb2. Is GPRRegClass ok here?
+  if (RC == ARM::GPRRegisterClass) {
+    Opc = Addr[0].isFI() ? ARM::tRestore : ARM::tLDR;
+  }
+
+  MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
+  for (unsigned i = 0, e = Addr.size(); i != e; ++i)
+    MIB.addOperand(Addr[i]);
+  NewMIs.push_back(MIB);
+  return;
+}
+
+bool ThumbInstrInfo::
+spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MI,
+                          const std::vector<CalleeSavedInfo> &CSI) const {
+  if (CSI.empty())
+    return false;
+
+  DebugLoc DL = DebugLoc::getUnknownLoc();
+  if (MI != MBB.end()) DL = MI->getDebugLoc();
+
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, get(ARM::tPUSH));
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    // Add the callee-saved register as live-in. It's killed at the spill.
+    MBB.addLiveIn(Reg);
+    MIB.addReg(Reg, RegState::Kill);
+  }
+  return true;
+}
+
+bool ThumbInstrInfo::
+restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MI,
+                            const std::vector<CalleeSavedInfo> &CSI) const {
+  MachineFunction &MF = *MBB.getParent();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+  if (CSI.empty())
+    return false;
+
+  bool isVarArg = AFI->getVarArgsRegSaveSize() > 0;
+  MachineInstr *PopMI = MF.CreateMachineInstr(get(ARM::tPOP),MI->getDebugLoc());
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i-1].getReg();
+    if (Reg == ARM::LR) {
+      // Special epilogue for vararg functions. See emitEpilogue
+      if (isVarArg)
+        continue;
+      Reg = ARM::PC;
+      PopMI->setDesc(get(ARM::tPOP_RET));
+      MI = MBB.erase(MI);
+    }
+    PopMI->addOperand(MachineOperand::CreateReg(Reg, true));
+  }
+
+  // It's illegal to emit pop instruction without operands.
+  if (PopMI->getNumOperands() > 0)
+    MBB.insert(MI, PopMI);
+
+  return true;
+}
+
+MachineInstr *ThumbInstrInfo::
+foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
+                      const SmallVectorImpl<unsigned> &Ops, int FI) const {
+  if (Ops.size() != 1) return NULL;
+  const ARMRegisterInfo &RI = getRegisterInfo();
+
+  unsigned OpNum = Ops[0];
+  unsigned Opc = MI->getOpcode();
+  MachineInstr *NewMI = NULL;
+  switch (Opc) {
+  default: break;
+  case ARM::tMOVr:
+  case ARM::tMOVlor2hir:
+  case ARM::tMOVhir2lor:
+  case ARM::tMOVhir2hir: {
+    if (OpNum == 0) { // move -> store
+      unsigned SrcReg = MI->getOperand(1).getReg();
+      bool isKill = MI->getOperand(1).isKill();
+      if (RI.isPhysicalRegister(SrcReg) && !RI.isLowRegister(SrcReg))
+        // tSpill cannot take a high register operand.
+        break;
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tSpill))
+        .addReg(SrcReg, getKillRegState(isKill))
+        .addFrameIndex(FI).addImm(0);
+    } else {          // move -> load
+      unsigned DstReg = MI->getOperand(0).getReg();
+      if (RI.isPhysicalRegister(DstReg) && !RI.isLowRegister(DstReg))
+        // tRestore cannot target a high register operand.
+        break;
+      bool isDead = MI->getOperand(0).isDead();
+      NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::tRestore))
+        .addReg(DstReg, RegState::Define | getDeadRegState(isDead))
+        .addFrameIndex(FI).addImm(0);
+    }
+    break;
+  }
+  }
+
+  return NewMI;
+}
diff --git a/lib/Target/ARM/ThumbInstrInfo.h b/lib/Target/ARM/ThumbInstrInfo.h
new file mode 100644
index 0000000..dcf1095
--- /dev/null
+++ b/lib/Target/ARM/ThumbInstrInfo.h
@@ -0,0 +1,85 @@
+//===- ThumbInstrInfo.h - Thumb Instruction Information ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the ARM implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef THUMBINSTRUCTIONINFO_H
+#define THUMBINSTRUCTIONINFO_H
+
+#include "llvm/Target/TargetInstrInfo.h"
+#include "ARMRegisterInfo.h"
+#include "ARM.h"
+#include "ARMInstrInfo.h"
+
+namespace llvm {
+  class ARMSubtarget;
+
+class ThumbInstrInfo : public ARMBaseInstrInfo {
+public:
+  explicit ThumbInstrInfo(const ARMSubtarget &STI);
+
+  /// Return true if the instruction is a register to register move and return
+  /// the source and dest operands and their sub-register indices by reference.
+  virtual bool isMoveInstr(const MachineInstr &MI,
+                           unsigned &SrcReg, unsigned &DstReg,
+                           unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+  virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+                                       int &FrameIndex) const;
+  virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+                                      int &FrameIndex) const;
+
+  virtual bool copyRegToReg(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator I,
+                            unsigned DestReg, unsigned SrcReg,
+                            const TargetRegisterClass *DestRC,
+                            const TargetRegisterClass *SrcRC) const;
+  virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   unsigned SrcReg, bool isKill, int FrameIndex,
+                                   const TargetRegisterClass *RC) const;
+
+  virtual void storeRegToAddr(MachineFunction &MF, unsigned SrcReg, bool isKill,
+                              SmallVectorImpl<MachineOperand> &Addr,
+                              const TargetRegisterClass *RC,
+                              SmallVectorImpl<MachineInstr*> &NewMIs) const;
+
+  virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator MBBI,
+                                    unsigned DestReg, int FrameIndex,
+                                    const TargetRegisterClass *RC) const;
+
+  virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
+                               SmallVectorImpl<MachineOperand> &Addr,
+                               const TargetRegisterClass *RC,
+                               SmallVectorImpl<MachineInstr*> &NewMIs) const;
+  virtual bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI) const;
+  virtual bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
+                                           MachineBasicBlock::iterator MI,
+                                 const std::vector<CalleeSavedInfo> &CSI) const;
+
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              MachineInstr* LoadMI) const {
+    return 0;
+  }
+
+  virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+                                              MachineInstr* MI,
+                                           const SmallVectorImpl<unsigned> &Ops,
+                                              int FrameIndex) const;
+};
+}
+
+#endif // THUMBINSTRUCTIONINFO_H
diff --git a/lib/Target/Alpha/AlphaJITInfo.cpp b/lib/Target/Alpha/AlphaJITInfo.cpp
index 3fecb19..ba7478e 100644
--- a/lib/Target/Alpha/AlphaJITInfo.cpp
+++ b/lib/Target/Alpha/AlphaJITInfo.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Config/alloca.h"
 #include "llvm/Support/Debug.h"
 #include <cstdlib>
-#include <map>
 using namespace llvm;
 
 #define BUILD_OFormatI(Op, RA, LIT, FUN, RC) \
@@ -237,11 +236,6 @@ static long getLower16(long l)
 
 void AlphaJITInfo::relocate(void *Function, MachineRelocation *MR,
                             unsigned NumRelocs, unsigned char* GOTBase) {
-  //because gpdist are paired and relative to the pc of the first inst,
-  //we need to have some state
-
-  static std::map<std::pair<void*, int>, void*> gpdistmap;
-
   for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
     unsigned *RelocPos = (unsigned*)Function + MR->getMachineCodeOffset()/4;
     long idx = 0;
diff --git a/lib/Target/Alpha/AlphaJITInfo.h b/lib/Target/Alpha/AlphaJITInfo.h
index edff990..ecb467f 100644
--- a/lib/Target/Alpha/AlphaJITInfo.h
+++ b/lib/Target/Alpha/AlphaJITInfo.h
@@ -15,6 +15,7 @@
 #define ALPHA_JITINFO_H
 
 #include "llvm/Target/TargetJITInfo.h"
+#include <map>
 
 namespace llvm {
   class TargetMachine;
@@ -22,6 +23,10 @@ namespace llvm {
   class AlphaJITInfo : public TargetJITInfo {
   protected:
     TargetMachine &TM;
+    
+    //because gpdist are paired and relative to the pc of the first inst,
+    //we need to have some state
+    std::map<std::pair<void*, int>, void*> gpdistmap;
   public:
     explicit AlphaJITInfo(TargetMachine &tm) : TM(tm)
     { useGOT = true; }
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index feee6e4..0ff53c7 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -54,7 +54,7 @@ static long getLower16(long l)
 
 AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii)
   : AlphaGenRegisterInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP),
-    TII(tii)
+    TII(tii), curgpdist(0)
 {
 }
 
@@ -206,8 +206,6 @@ void AlphaRegisterInfo::emitPrologue(MachineFunction &MF) const {
                  MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
   bool FP = hasFP(MF);
 
-  static int curgpdist = 0;
-
   //handle GOP offset
   BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29)
     .addGlobalAddress(const_cast<Function*>(MF.getFunction()))
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
index c4f5f7b..5012fe8 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -60,6 +60,9 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
   int getDwarfRegNum(unsigned RegNum, bool isEH) const;
 
   static std::string getPrettyName(unsigned reg);
+  
+private:
+  mutable int curgpdist;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index cdd4fa4..10952eb 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -27,10 +27,8 @@ static RegisterTarget<AlphaTargetMachine> X("alpha", "Alpha [experimental]");
 // No assembler printer by default
 AlphaTargetMachine::AsmPrinterCtorFn AlphaTargetMachine::AsmPrinterCtor = 0;
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeAlphaTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeAlphaTarget() { }
 
 const TargetAsmInfo *AlphaTargetMachine::createTargetAsmInfo() const {
   return new AlphaTargetAsmInfo(*this);
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index 7b73bb3..e0c0a64 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -17,6 +17,7 @@
 #include "AlphaInstrInfo.h"
 #include "AlphaTargetMachine.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/Type.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
@@ -121,8 +122,6 @@ void AlphaAsmPrinter::printOp(const MachineOperand &MO, bool IsCallOp) {
   case MachineOperand::MO_GlobalAddress: {
     GlobalValue *GV = MO.getGlobal();
     O << Mang->getValueName(GV);
-    if (GV->isDeclaration() && GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
     return;
   }
 
@@ -224,6 +223,8 @@ void AlphaAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
 
   std::string name = Mang->getValueName(GVar);
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   unsigned Size = TD->getTypeAllocSize(C->getType());
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
 
@@ -265,12 +266,6 @@ void AlphaAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
 
   O << name << ":\n";
 
-  // If the initializer is a extern weak symbol, remember to emit the weak
-  // reference!
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
-
   EmitGlobalConstant(C);
   O << '\n';
 }
@@ -304,11 +299,8 @@ bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
   return false;
 }
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializeAlphaAsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeAlphaAsmPrinter() { }
 
 namespace {
   static struct Register {
diff --git a/lib/Target/Alpha/AsmPrinter/CMakeLists.txt b/lib/Target/Alpha/AsmPrinter/CMakeLists.txt
index bf04762..992c218 100644
--- a/lib/Target/Alpha/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/Alpha/AsmPrinter/CMakeLists.txt
@@ -3,3 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMAlphaAsmPrinter
   AlphaAsmPrinter.cpp
   )
+add_dependencies(LLVMAlphaAsmPrinter AlphaCodeGenTable_gen)
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index c3554f6..294c6d3 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -59,10 +59,8 @@ int CBackendTargetMachineModule = 0;
 // Register the target.
 static RegisterTarget<CTargetMachine> X("c", "C backend");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeCBackendTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeCBackendTarget() { }
 
 namespace {
   /// CBackendNameAllUsedStructsAndMergeFunctions - This pass inserts names for
@@ -102,12 +100,13 @@ namespace {
     std::set<Function*> intrinsicPrototypesAlreadyGenerated;
     std::set<const Argument*> ByValParams;
     unsigned FPCounter;
+    unsigned OpaqueCounter;
 
   public:
     static char ID;
     explicit CWriter(raw_ostream &o)
       : FunctionPass(&ID), Out(o), IL(0), Mang(0), LI(0), 
-        TheModule(0), TAsm(0), TD(0) {
+        TheModule(0), TAsm(0), TD(0), OpaqueCounter(0) {
       FPCounter = 0;
     }
 
@@ -647,8 +646,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
   }
 
   case Type::OpaqueTyID: {
-    static int Count = 0;
-    std::string TyName = "struct opaque_" + itostr(Count++);
+    std::string TyName = "struct opaque_" + itostr(OpaqueCounter++);
     assert(TypeNames.find(Ty) == TypeNames.end());
     TypeNames[Ty] = TyName;
     return Out << TyName << ' ' << NameSoFar;
@@ -752,8 +750,7 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
   }
 
   case Type::OpaqueTyID: {
-    static int Count = 0;
-    std::string TyName = "struct opaque_" + itostr(Count++);
+    std::string TyName = "struct opaque_" + itostr(OpaqueCounter++);
     assert(TypeNames.find(Ty) == TypeNames.end());
     TypeNames[Ty] = TyName;
     return Out << TyName << ' ' << NameSoFar;
diff --git a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
index 0dad083..9684e63 100644
--- a/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/CellSPU/AsmPrinter/CMakeLists.txt
@@ -6,3 +6,4 @@ include_directories(
 add_llvm_library(LLVMCellSPUAsmPrinter
   SPUAsmPrinter.cpp
   )
+add_dependencies(LLVMCellSPUAsmPrinter CellSPUCodeGenTable_gen)
+\ No newline at end of file
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index 26a8ece..02b625b 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
@@ -361,9 +362,6 @@ void SPUAsmPrinter::printOp(const MachineOperand &MO) {
       }
     }
     O << Name;
-
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
     return;
   }
 
@@ -524,6 +522,8 @@ void LinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   printVisibility(name, GVar->getVisibility());
 
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   const Type *Type = C->getType();
   unsigned Size = TD->getTypeAllocSize(Type);
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
@@ -584,12 +584,6 @@ void LinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   PrintUnmangledNameSafely(GVar, O);
   O << "'\n";
 
-  // If the initializer is a extern weak symbol, remember to emit the weak
-  // reference!
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
-
   EmitGlobalConstant(C);
   O << '\n';
 }
@@ -600,9 +594,6 @@ bool LinuxAsmPrinter::doFinalization(Module &M) {
        I != E; ++I)
     printModuleLevelGV(I);
 
-  // Emit initial debug information.
-  DW->EndModule();
-
   return AsmPrinter::doFinalization(M);
 }
 
@@ -617,11 +608,8 @@ FunctionPass *llvm::createSPUAsmPrinterPass(raw_ostream &o,
   return new LinuxAsmPrinter(o, tm, tm.getTargetAsmInfo(), OptLevel, verbose);
 }
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializeCellSPUAsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeCellSPUAsmPrinter() { }
 
 namespace {
   static struct Register {
diff --git a/lib/Target/CellSPU/SPUTargetMachine.cpp b/lib/Target/CellSPU/SPUTargetMachine.cpp
index c675ebb..256d63d 100644
--- a/lib/Target/CellSPU/SPUTargetMachine.cpp
+++ b/lib/Target/CellSPU/SPUTargetMachine.cpp
@@ -32,10 +32,8 @@ namespace {
 // No assembler printer by default
 SPUTargetMachine::AsmPrinterCtorFn SPUTargetMachine::AsmPrinterCtor = 0;
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeCellSPUTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeCellSPUTarget() { }
 
 const std::pair<unsigned, int> *
 SPUFrameInfo::getCalleeSaveSpillSlots(unsigned &NumEntries) const {
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 1feea96..28f58e8 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -82,10 +82,8 @@ int CppBackendTargetMachineModule = 0;
 // Register the target.
 static RegisterTarget<CPPTargetMachine> X("cpp", "C++ backend");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeCppBackendTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeCppBackendTarget() { }
 
 namespace {
   typedef std::vector<const Type*> TypeList;
@@ -1836,7 +1834,9 @@ namespace {
                               const std::string& mName) {
     nl(Out) << "Module* " << fname << "() {";
     nl(Out,1) << "// Module Construction";
-    nl(Out) << "Module* mod = new Module(\"" << mName << "\");";
+    nl(Out) << "Module* mod = new Module(\"";
+    printEscapedString(mName);
+    Out << "\");";
     if (!TheModule->getTargetTriple().empty()) {
       nl(Out) << "mod->setDataLayout(\"" << TheModule->getDataLayout() << "\");";
     }
@@ -1869,7 +1869,9 @@ namespace {
   void CppWriter::printContents(const std::string& fname,
                                 const std::string& mName) {
     Out << "\nModule* " << fname << "(Module *mod) {\n";
-    Out << "\nmod->setModuleIdentifier(\"" << mName << "\");\n";
+    Out << "\nmod->setModuleIdentifier(\"";
+    printEscapedString(mName);
+    Out << "\");\n";
     printModuleBody();
     Out << "\nreturn mod;\n";
     Out << "\n}\n";
diff --git a/lib/Target/IA64/AsmPrinter/CMakeLists.txt b/lib/Target/IA64/AsmPrinter/CMakeLists.txt
index b81ed4a..ffe0eed 100644
--- a/lib/Target/IA64/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/IA64/AsmPrinter/CMakeLists.txt
@@ -6,3 +6,4 @@ include_directories(
 add_llvm_library(LLVMIA64AsmPrinter
   IA64AsmPrinter.cpp
   )
+add_dependencies(LLVMIA64AsmPrinter IA64CodeGenTable_gen)
+\ No newline at end of file
diff --git a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
index 662c667..6b34a4e 100644
--- a/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
+++ b/lib/Target/IA64/AsmPrinter/IA64AsmPrinter.cpp
@@ -20,6 +20,7 @@
 #include "IA64.h"
 #include "IA64TargetMachine.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/Type.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
@@ -269,6 +270,8 @@ void IA64AsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   O << "\n\n";
   std::string name = Mang->getValueName(GVar);
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   unsigned Size = TD->getTypeAllocSize(C->getType());
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
 
@@ -384,8 +387,5 @@ namespace {
 }
 
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializeIA64AsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeIA64AsmPrinter() { }
diff --git a/lib/Target/IA64/IA64TargetMachine.cpp b/lib/Target/IA64/IA64TargetMachine.cpp
index 0b93ee5..4b05e1d 100644
--- a/lib/Target/IA64/IA64TargetMachine.cpp
+++ b/lib/Target/IA64/IA64TargetMachine.cpp
@@ -26,10 +26,8 @@ static RegisterTarget<IA64TargetMachine> X("ia64",
 // No assembler printer by default
 IA64TargetMachine::AsmPrinterCtorFn IA64TargetMachine::AsmPrinterCtor = 0;
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeIA64Target() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeIA64Target() { }
 
 const TargetAsmInfo *IA64TargetMachine::createTargetAsmInfo() const {
   return new IA64TargetAsmInfo(*this);
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index 0aff14f..ee73c38 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -55,10 +55,8 @@ int MSILTargetMachineModule = 0;
 
 static RegisterTarget<MSILTarget> X("msil", "MSIL backend");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeMSILTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeMSILTarget() { }
 
 bool MSILModule::runOnModule(Module &M) {
   ModulePtr = &M;
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index 0f5244d..b1fe758 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -35,10 +35,8 @@ int MSP430TargetMachineModule = 0;
 static RegisterTarget<MSP430TargetMachine>
 X("msp430", "MSP430 [experimental]");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeMSP430Target() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeMSP430Target() { }
 
 MSP430TargetMachine::MSP430TargetMachine(const Module &M,
                                          const std::string &FS) :
diff --git a/lib/Target/Mips/AsmPrinter/CMakeLists.txt b/lib/Target/Mips/AsmPrinter/CMakeLists.txt
index 942548d..197cc29 100644
--- a/lib/Target/Mips/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/Mips/AsmPrinter/CMakeLists.txt
@@ -6,3 +6,4 @@ include_directories(
 add_llvm_library(LLVMMipsAsmPrinter
   MipsAsmPrinter.cpp
   )
+add_dependencies(LLVMMipsAsmPrinter MipsCodeGenTable_gen)
+\ No newline at end of file
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index 077ec96..431630b 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -22,6 +22,7 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -483,6 +484,8 @@ printModuleLevelGV(const GlobalVariable* GVar) {
   O << "\n\n";
   std::string name = Mang->getValueName(GVar);
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   const Type *CTy = C->getType();
   unsigned Size = TD->getTypeAllocSize(CTy);
   const ConstantArray *CVA = dyn_cast<ConstantArray>(C);
@@ -587,8 +590,5 @@ namespace {
   } Registrator;
 }
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializeMipsAsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeMipsAsmPrinter() { }
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 83b9b62..c5f117b 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -34,10 +34,8 @@ static RegisterTarget<MipselTargetMachine>  Y("mipsel", "Mipsel");
 MipsTargetMachine::AsmPrinterCtorFn MipsTargetMachine::AsmPrinterCtor = 0;
 
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeMipsTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeMipsTarget() { }
 
 const TargetAsmInfo *MipsTargetMachine::
 createTargetAsmInfo() const 
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index 27551cd..4300588 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -300,7 +300,7 @@ void PIC16DbgInfo::EmitCompositeTypeElements (DICompositeType CTy,
     // Get mangleddd name for this structure/union  element.
     std::string MangMemName = ElementName + UniqueSuffix;
     PopulateDebugInfo(DITy, TypeNo, HasAux, ElementAux, TagName);
-    short Class;
+    short Class = 0;
     if( CTy.getTag() == dwarf::DW_TAG_union_type)
       Class = PIC16Dbg::C_MOU;
     else if  (CTy.getTag() == dwarf::DW_TAG_structure_type)
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index 122af70..ec1db90 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -97,6 +97,16 @@ static const char *getIntrinsicName(unsigned opcode) {
   case RTLIB::SUB_F32: Basename = "sub.f32"; break;
   case RTLIB::MUL_F32: Basename = "mul.f32"; break;
   case RTLIB::DIV_F32: Basename = "div.f32"; break;
+
+  // Floating point comparison
+  case RTLIB::O_F32: Basename = "unordered.f32"; break;
+  case RTLIB::UO_F32: Basename = "unordered.f32"; break;
+  case RTLIB::OLE_F32: Basename = "le.f32"; break;
+  case RTLIB::OGE_F32: Basename = "ge.f32"; break;
+  case RTLIB::OLT_F32: Basename = "lt.f32"; break;
+  case RTLIB::OGT_F32: Basename = "gt.f32"; break;
+  case RTLIB::OEQ_F32: Basename = "eq.f32"; break;
+  case RTLIB::UNE_F32: Basename = "neq.f32"; break;
   }
   
   std::string prefix = PAN::getTagName(PAN::PREFIX_SYMBOL);
@@ -187,6 +197,25 @@ PIC16TargetLowering::PIC16TargetLowering(PIC16TargetMachine &TM)
   setLibcallName(RTLIB::MUL_F32, getIntrinsicName(RTLIB::MUL_F32));
   setLibcallName(RTLIB::DIV_F32, getIntrinsicName(RTLIB::DIV_F32));
 
+  // Floationg point comparison
+  setLibcallName(RTLIB::UO_F32, getIntrinsicName(RTLIB::UO_F32));
+  setLibcallName(RTLIB::OLE_F32, getIntrinsicName(RTLIB::OLE_F32));
+  setLibcallName(RTLIB::OGE_F32, getIntrinsicName(RTLIB::OGE_F32));
+  setLibcallName(RTLIB::OLT_F32, getIntrinsicName(RTLIB::OLT_F32));
+  setLibcallName(RTLIB::OGT_F32, getIntrinsicName(RTLIB::OGT_F32));
+  setLibcallName(RTLIB::OEQ_F32, getIntrinsicName(RTLIB::OEQ_F32));
+  setLibcallName(RTLIB::UNE_F32, getIntrinsicName(RTLIB::UNE_F32));
+
+  // Return value comparisons of floating point calls. 
+  setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE);
+  setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE);
+  setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE);
+  setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE);
+  setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE);
+  setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE);
+  setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE);
+  setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ);
+
   setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
   setOperationAction(ISD::ExternalSymbol, MVT::i16, Custom);
 
diff --git a/lib/Target/PIC16/PIC16MemSelOpt.cpp b/lib/Target/PIC16/PIC16MemSelOpt.cpp
index 20f926d..43d47ae 100644
--- a/lib/Target/PIC16/PIC16MemSelOpt.cpp
+++ b/lib/Target/PIC16/PIC16MemSelOpt.cpp
@@ -109,7 +109,7 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) {
 
   // If this insn is not going to access any memory, return.
   const TargetInstrDesc &TID = TII->get(MI->getOpcode());
-  if (! (TID.isCall() || TID.mayLoad() || TID.mayStore()))
+  if (!(TID.isBranch() || TID.isCall() || TID.mayLoad() || TID.mayStore()))
     return false;
 
   // Scan for the memory address operand.
@@ -119,8 +119,9 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) {
   for (unsigned i = 0; i < NumOperands; i++) {
     MachineOperand Op = MI->getOperand(i);
     if (Op.getType() ==  MachineOperand::MO_GlobalAddress ||
-        Op.getType() ==  MachineOperand::MO_ExternalSymbol) {
-      // We found one mem operand. Next one should be BS.
+        Op.getType() ==  MachineOperand::MO_ExternalSymbol || 
+        Op.getType() ==  MachineOperand::MO_MachineBasicBlock) {
+      // We found one mem operand. Next one may be BS.
       MemOpPos = i;
       break;
     }
@@ -133,7 +134,8 @@ bool MemSelOpt::processInstruction(MachineInstr *MI) {
   MachineOperand &Op = MI->getOperand(MemOpPos);
 
   // If this is a pagesel material, handle it first.
-  if (MI->getOpcode() == PIC16::CALL) {
+  if (MI->getOpcode() == PIC16::CALL ||
+      MI->getOpcode() == PIC16::br_uncond) {
     DebugLoc dl = MI->getDebugLoc();
     BuildMI(*MBB, MI, dl, TII->get(PIC16::pagesel)).
       addOperand(Op);
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
index d4f46a4..4304732 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -37,10 +37,8 @@ X("pic16", "PIC16 14-bit [experimental].");
 static RegisterTarget<CooperTargetMachine> 
 Y("cooper", "PIC16 Cooper [experimental].");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializePIC16Target() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializePIC16Target() { }
 
 // PIC16TargetMachine - Traditional PIC16 Machine.
 PIC16TargetMachine::PIC16TargetMachine(const Module &M, const std::string &FS,
diff --git a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
index 1f80b17..236b264 100644
--- a/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/PowerPC/AsmPrinter/CMakeLists.txt
@@ -3,3 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMPowerPCAsmPrinter
   PPCAsmPrinter.cpp
   )
+add_dependencies(LLVMPowerPCAsmPrinter PowerPCCodeGenTable_gen)
+\ No newline at end of file
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index c7bfb6d..c5aa6ae 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/Assembly/Writer.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
@@ -194,8 +195,6 @@ namespace {
             std::string Name = Mang->getValueName(GV);
             FnStubs.insert(Name);
             printSuffixedName(Name, "$stub");
-            if (GV->hasExternalWeakLinkage())
-              ExtWeakSymbols.insert(GV);
             return;
           }
         }
@@ -295,20 +294,17 @@ namespace {
 
   /// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
   class VISIBILITY_HIDDEN PPCLinuxAsmPrinter : public PPCAsmPrinter {
-    DwarfWriter *DW;
-    MachineModuleInfo *MMI;
   public:
     explicit PPCLinuxAsmPrinter(raw_ostream &O, PPCTargetMachine &TM,
                                 const TargetAsmInfo *T, CodeGenOpt::Level OL,
                                 bool V)
-      : PPCAsmPrinter(O, TM, T, OL, V), DW(0), MMI(0) {}
+      : PPCAsmPrinter(O, TM, T, OL, V){}
 
     virtual const char *getPassName() const {
       return "Linux PPC Assembly Printer";
     }
 
     bool runOnMachineFunction(MachineFunction &F);
-    bool doInitialization(Module &M);
     bool doFinalization(Module &M);
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
@@ -324,14 +320,12 @@ namespace {
   /// PPCDarwinAsmPrinter - PowerPC assembly printer, customized for Darwin/Mac
   /// OS X
   class VISIBILITY_HIDDEN PPCDarwinAsmPrinter : public PPCAsmPrinter {
-    DwarfWriter *DW;
-    MachineModuleInfo *MMI;
     raw_ostream &OS;
   public:
     explicit PPCDarwinAsmPrinter(raw_ostream &O, PPCTargetMachine &TM,
                                  const TargetAsmInfo *T, CodeGenOpt::Level OL,
                                  bool V)
-      : PPCAsmPrinter(O, TM, T, OL, V), DW(0), MMI(0), OS(O) {}
+      : PPCAsmPrinter(O, TM, T, OL, V), OS(O) {}
 
     virtual const char *getPassName() const {
       return "Darwin PPC Assembly Printer";
@@ -403,17 +397,12 @@ void PPCAsmPrinter::printOp(const MachineOperand &MO) {
           GVStubs.insert(Name);
           printSuffixedName(Name, "$non_lazy_ptr");
         }
-        if (GV->hasExternalWeakLinkage())
-          ExtWeakSymbols.insert(GV);
         return;
       }
     }
     O << Name;
 
     printOffset(MO.getOffset());
-
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
     return;
   }
 
@@ -644,15 +633,6 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
-bool PPCLinuxAsmPrinter::doInitialization(Module &M) {
-  bool Result = AsmPrinter::doInitialization(M);
-  DW = getAnalysisIfAvailable<DwarfWriter>();
-  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  assert(MMI);
-  SwitchToSection(TAI->getTextSection());
-  return Result;
-}
-
 /// PrintUnmangledNameSafely - Print out the printable characters in the name.
 /// Don't print things like \\n or \\0.
 static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
@@ -677,6 +657,8 @@ void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   printVisibility(name, GVar->getVisibility());
 
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   const Type *Type = C->getType();
   unsigned Size = TD->getTypeAllocSize(Type);
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
@@ -743,12 +725,6 @@ void PPCLinuxAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   }
   O << '\n';
 
-  // If the initializer is a extern weak symbol, remember to emit the weak
-  // reference!
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
-
   EmitGlobalConstant(C);
   O << '\n';
 }
@@ -759,11 +735,6 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
        I != E; ++I)
     printModuleLevelGV(I);
 
-  // TODO
-
-  // Emit initial debug information.
-  DW->EndModule();
-
   return AsmPrinter::doFinalization(M);
 }
 
@@ -866,8 +837,6 @@ bool PPCDarwinAsmPrinter::doInitialization(Module &M) {
   O << "\t.machine " << CPUDirectives[Directive] << '\n';
 
   bool Result = AsmPrinter::doInitialization(M);
-  DW = getAnalysisIfAvailable<DwarfWriter>();
-  MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   assert(MMI);
 
   // Prime text sections so they are adjacent.  This reduces the likelihood a
@@ -987,12 +956,6 @@ void PPCDarwinAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   }
   O << '\n';
 
-  // If the initializer is a extern weak symbol, remember to emit the weak
-  // reference!
-  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
-
   EmitGlobalConstant(C);
   O << '\n';
 }
@@ -1100,8 +1063,7 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
   if (TAI->doesSupportExceptionHandling() && MMI) {
     // Add the (possibly multiple) personalities to the set of global values.
     // Only referenced functions get into the Personalities list.
-    const std::vector<Function *>& Personalities = MMI->getPersonalities();
-
+    const std::vector<Function *> &Personalities = MMI->getPersonalities();
     for (std::vector<Function *>::const_iterator I = Personalities.begin(),
            E = Personalities.end(); I != E; ++I)
       if (*I) GVStubs.insert("_" + (*I)->getName());
@@ -1139,10 +1101,6 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
     }
   }
 
-
-  // Emit initial debug information.
-  DW->EndModule();
-
   // Funny Darwin hack: This flag tells the linker that no global symbols
   // contain code that falls through to other global symbols (e.g. the obvious
   // implementation of multiple entry points).  If this doesn't occur, the
@@ -1185,8 +1143,5 @@ namespace {
 extern "C" int PowerPCAsmPrinterForceLink;
 int PowerPCAsmPrinterForceLink = 0;
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializePowerPCAsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializePowerPCAsmPrinter() { }
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 3e89885..2f95d7e 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -35,10 +35,8 @@ X("ppc32", "PowerPC 32");
 static RegisterTarget<PPC64TargetMachine>
 Y("ppc64", "PowerPC 64");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializePowerPCTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializePowerPCTarget() { }
 
 // No assembler printer by default
 PPCTargetMachine::AsmPrinterCtorFn PPCTargetMachine::AsmPrinterCtor = 0;
diff --git a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
index de905a9..e3ca18e 100644
--- a/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/Sparc/AsmPrinter/CMakeLists.txt
@@ -3,3 +3,4 @@ include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/
 add_llvm_library(LLVMSparcAsmPrinter
   SparcAsmPrinter.cpp
   )
+add_dependencies(LLVMSparcAsmPrinter SparcCodeGenTable_gen)
+\ No newline at end of file
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index 6a2fdca..cb23f62 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -46,11 +47,12 @@ namespace {
     ///
     typedef std::map<const Value *, unsigned> ValueMapTy;
     ValueMapTy NumberForBB;
+    unsigned BBNumber;
   public:
     explicit SparcAsmPrinter(raw_ostream &O, TargetMachine &TM,
                              const TargetAsmInfo *T, CodeGenOpt::Level OL,
                              bool V)
-      : AsmPrinter(O, TM, T, OL, V) {}
+      : AsmPrinter(O, TM, T, OL, V), BBNumber(0) {}
 
     virtual const char *getPassName() const {
       return "Sparc Assembly Printer";
@@ -101,7 +103,6 @@ bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
 
   // BBNumber is used here so that a given Printer will never give two
   // BBs the same name. (If you have a better way, please let me know!)
-  static unsigned BBNumber = 0;
 
   O << "\n\n";
 
@@ -253,6 +254,8 @@ void SparcAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   O << "\n\n";
   std::string name = Mang->getValueName(GVar);
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   unsigned Size = TD->getTypeAllocSize(C->getType());
   unsigned Align = TD->getPreferredAlignment(GVar);
 
@@ -362,8 +365,5 @@ namespace {
   } Registrator;
 }
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializeSparcAsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeSparcAsmPrinter() { }
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index fd0f124..aef238d 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -25,10 +25,8 @@ static RegisterTarget<SparcTargetMachine> X("sparc", "SPARC");
 SparcTargetMachine::AsmPrinterCtorFn SparcTargetMachine::AsmPrinterCtor = 0;
 
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeSparcTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeSparcTarget() { }
 
 const TargetAsmInfo *SparcTargetMachine::createTargetAsmInfo() const {
   // FIXME: Handle Solaris subtarget someday :)
diff --git a/lib/Target/X86/AsmPrinter/CMakeLists.txt b/lib/Target/X86/AsmPrinter/CMakeLists.txt
index 2079a9f..a28c826 100644
--- a/lib/Target/X86/AsmPrinter/CMakeLists.txt
+++ b/lib/Target/X86/AsmPrinter/CMakeLists.txt
@@ -6,3 +6,4 @@ add_llvm_library(LLVMX86AsmPrinter
   X86AsmPrinter.cpp
   X86IntelAsmPrinter.cpp
   )
+add_dependencies(LLVMX86AsmPrinter X86CodeGenTable_gen)
+\ No newline at end of file
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
index 60ed4f0..e75cfc5 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.cpp
@@ -23,10 +23,13 @@
 #include "llvm/CallingConv.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Module.h"
+#include "llvm/MDNode.h"
 #include "llvm/Type.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/CodeGen/DwarfWriter.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/Support/CommandLine.h"
@@ -41,18 +44,26 @@ STATISTIC(EmittedInsts, "Number of machine instrs printed");
 static cl::opt<bool> NewAsmPrinter("experimental-asm-printer",
                                    cl::Hidden);
 
-static std::string getPICLabelString(unsigned FnNum,
-                                     const TargetAsmInfo *TAI,
-                                     const X86Subtarget* Subtarget) {
-  std::string label;
+//===----------------------------------------------------------------------===//
+// Primitive Helper Functions.
+//===----------------------------------------------------------------------===//
+
+void X86ATTAsmPrinter::PrintPICBaseSymbol() const {
   if (Subtarget->isTargetDarwin())
-    label =  "\"L" + utostr_32(FnNum) + "$pb\"";
+    O << "\"L" << getFunctionNumber() << "$pb\"";
   else if (Subtarget->isTargetELF())
-    label = ".Lllvm$" + utostr_32(FnNum) + "." "$piclabel";
+    O << ".Lllvm$" << getFunctionNumber() << "." "$piclabel";
   else
     assert(0 && "Don't know how to print PIC label!\n");
+}
 
-  return label;
+/// PrintUnmangledNameSafely - Print out the printable characters in the name.
+/// Don't print things like \\n or \\0.
+static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
+  for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
+       Name != E; ++Name)
+    if (isprint(*Name))
+      OS << *Name;
 }
 
 static X86MachineFunctionInfo calculateFunctionInfo(const Function *F,
@@ -89,15 +100,6 @@ static X86MachineFunctionInfo calculateFunctionInfo(const Function *F,
   return Info;
 }
 
-/// PrintUnmangledNameSafely - Print out the printable characters in the name.
-/// Don't print things like \\n or \\0.
-static void PrintUnmangledNameSafely(const Value *V, raw_ostream &OS) {
-  for (const char *Name = V->getNameStart(), *E = Name+V->getNameLen();
-       Name != E; ++Name)
-    if (isprint(*Name))
-      OS << *Name;
-}
-
 /// decorateName - Query FunctionInfoMap and use this information for various
 /// name decoration.
 void X86ATTAsmPrinter::decorateName(std::string &Name,
@@ -152,6 +154,8 @@ void X86ATTAsmPrinter::decorateName(std::string &Name,
   }
 }
 
+
+
 void X86ATTAsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
   const Function *F = MF.getFunction();
 
@@ -159,9 +163,12 @@ void X86ATTAsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
 
   SwitchToSection(TAI->SectionForGlobal(F));
 
+  // FIXME: A function's alignment should be part of MachineFunction.  There
+  // shouldn't be a policy decision here.
   unsigned FnAlign = 4;
   if (F->hasFnAttr(Attribute::OptimizeForSize))
     FnAlign = 1;
+  
   switch (F->getLinkage()) {
   default: assert(0 && "Unknown linkage type!");
   case Function::InternalLinkage:  // Symbols default to internal.
@@ -283,13 +290,8 @@ bool X86ATTAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
-static inline bool shouldPrintGOT(TargetMachine &TM, const X86Subtarget* ST) {
-  return ST->isPICStyleGOT() && TM.getRelocationModel() == Reloc::PIC_;
-}
-
 static inline bool shouldPrintPLT(TargetMachine &TM, const X86Subtarget* ST) {
-  return ST->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_ &&
-      (ST->isPICStyleRIPRel() || ST->isPICStyleGOT());
+  return ST->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_;
 }
 
 static inline bool shouldPrintStub(TargetMachine &TM, const X86Subtarget* ST) {
@@ -324,6 +326,8 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
     }
     
     if (shouldPrintStub(TM, Subtarget)) {
+      // DARWIN/X86-32 in != static mode.
+      
       // Link-once, declaration, or Weakly-linked global variables need
       // non-lazily-resolved stubs
       if (GV->isDeclaration() || GV->isWeakForLinker()) {
@@ -354,9 +358,8 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
         O << Name;
       }
     } else {
-      if (GV->hasDLLImportLinkage()) {
+      if (GV->hasDLLImportLinkage())
         O << "__imp_";
-      }
       O << Name;
       
       if (shouldPrintPLT(TM, Subtarget)) {
@@ -370,9 +373,6 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
         FnStubs.insert(Name);
     }
     
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
-    
     printOffset(MO.getOffset());
     
     if (needCloseParen)
@@ -386,7 +386,9 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
     Name += MO.getSymbolName();
     // Print function stub suffix unless it's Mac OS X 10.5 and up.
     if (shouldPrintStub(TM, Subtarget) && 
+        // DARWIN/X86-32 in != static mode.
         !(Subtarget->isTargetDarwin() && Subtarget->getDarwinVers() >= 9)) {
+      
       FnStubs.insert(Name);
       printSuffixedName(Name, "$stub");
       return;
@@ -401,23 +403,15 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
     
     O << Name;
     
-    if (shouldPrintPLT(TM, Subtarget)) {
-      std::string GOTName(TAI->getGlobalPrefix());
-      GOTName+="_GLOBAL_OFFSET_TABLE_";
-      if (Name == GOTName)
-        // HACK! Emit extra offset to PC during printing GOT offset to
-        // compensate for the size of popl instruction. The resulting code
-        // should look like:
-        //   call .piclabel
-        // piclabel:
-        //   popl %some_register
-        //   addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register
-        O << " + [.-"
-          << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']';
-      
-      O << "@PLT";
+    if (MO.getTargetFlags() == X86II::MO_GOT_ABSOLUTE_ADDRESS) {
+      O << " + [.-";
+      PrintPICBaseSymbol();
+      O << ']';
     }
     
+    if (shouldPrintPLT(TM, Subtarget))
+      O << "@PLT";
+    
     if (needCloseParen)
       O << ')';
     
@@ -427,9 +421,10 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MachineInstr *MI, unsigned OpNo) {
 }
 
 void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
-                                    const char *Modifier, bool NotRIPRel) {
+                                    const char *Modifier) {
   const MachineOperand &MO = MI->getOperand(OpNo);
   switch (MO.getType()) {
+  default: assert(0 && "unknown operand type!");
   case MachineOperand::MO_Register: {
     assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) &&
            "Virtual registers should not make it this far!");
@@ -456,18 +451,7 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     if (!isMemOp) O << '$';
     O << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_'
       << MO.getIndex();
-
-    if (TM.getRelocationModel() == Reloc::PIC_) {
-      if (Subtarget->isPICStyleStub())
-        O << "-\"" << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
-          << "$pb\"";
-      else if (Subtarget->isPICStyleGOT())
-        O << "@GOTOFF";
-    }
-
-    if (isMemOp && Subtarget->isPICStyleRIPRel() && !NotRIPRel)
-      O << "(%rip)";
-    return;
+    break;
   }
   case MachineOperand::MO_ConstantPoolIndex: {
     bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
@@ -475,38 +459,17 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     O << TAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_'
       << MO.getIndex();
 
-    if (TM.getRelocationModel() == Reloc::PIC_) {
-      if (Subtarget->isPICStyleStub())
-        O << "-\"" << TAI->getPrivateGlobalPrefix() << getFunctionNumber()
-          << "$pb\"";
-      else if (Subtarget->isPICStyleGOT())
-        O << "@GOTOFF";
-    }
-
     printOffset(MO.getOffset());
-
-    if (isMemOp && Subtarget->isPICStyleRIPRel() && !NotRIPRel)
-      O << "(%rip)";
-    return;
+    break;
   }
   case MachineOperand::MO_GlobalAddress: {
     bool isMemOp = Modifier && !strcmp(Modifier, "mem");
-    bool needCloseParen = false;
 
     const GlobalValue *GV = MO.getGlobal();
-    const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
-    if (!GVar) {
-      // If GV is an alias then use the aliasee for determining
-      // thread-localness.
-      if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
-        GVar =dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false));
-    }
-
-    bool isThreadLocal = GVar && GVar->isThreadLocal();
-
     std::string Name = Mang->getValueName(GV);
     decorateName(Name, GV);
 
+    bool needCloseParen = false;
     if (!isMemOp)
       O << '$';
     else if (Name[0] == '$') {
@@ -517,6 +480,8 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
     }
 
     if (shouldPrintStub(TM, Subtarget)) {
+      // DARWIN/X86-32 in != static mode.
+
       // Link-once, declaration, or Weakly-linked global variables need
       // non-lazily-resolved stubs
       if (GV->isDeclaration() || GV->isWeakForLinker()) {
@@ -539,118 +504,59 @@ void X86ATTAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
         O << Name;
       }
 
-      if (TM.getRelocationModel() == Reloc::PIC_)
-        O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget);
+      if (TM.getRelocationModel() == Reloc::PIC_) {
+        O << '-';
+        PrintPICBaseSymbol();
+      }        
     } else {
       if (GV->hasDLLImportLinkage())
         O << "__imp_";
       O << Name;
     }
 
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
-
     printOffset(MO.getOffset());
 
     if (needCloseParen)
       O << ')';
     
-    bool isRIPRelative = false;
-    if (isThreadLocal) {
-      TLSModel::Model model = getTLSModel(GVar, TM.getRelocationModel());
-      switch (model) {
-      case TLSModel::GeneralDynamic:
-        O << "@TLSGD";
-        break;
-      case TLSModel::LocalDynamic:
-        // O << "@TLSLD"; // local dynamic not implemented
-        O << "@TLSGD";
-        break;
-      case TLSModel::InitialExec:
-        if (Subtarget->is64Bit()) {
-          assert (!NotRIPRel);
-          O << "@GOTTPOFF";
-          isRIPRelative = true;
-        } else {
-          O << "@INDNTPOFF";
-        }
-        break;
-      case TLSModel::LocalExec:
-        if (Subtarget->is64Bit())
-          O << "@TPOFF";
-        else
-          O << "@NTPOFF";
-        break;
-      default:
-        assert (0 && "Unknown TLS model");
-      }
-    } else if (isMemOp) {
-      if (shouldPrintGOT(TM, Subtarget)) {
-        if (Subtarget->GVRequiresExtraLoad(GV, TM, false))
-          O << "@GOT";
-        else
-          O << "@GOTOFF";
-      } else if (Subtarget->isPICStyleRIPRel() &&
-                 !NotRIPRel) {
-        if (TM.getRelocationModel() != Reloc::Static) {
-          if (Subtarget->GVRequiresExtraLoad(GV, TM, false))
-            O << "@GOTPCREL";
-        }
-        
-        isRIPRelative = true;
-      }
-    }
-
-    // Use rip when possible to reduce code size, except when
-    // index or base register are also part of the address. e.g.
-    // foo(%rip)(%rcx,%rax,4) is not legal.
-    if (isRIPRelative)
-      O << "(%rip)";
-    
-    return;
+    break;
   }
-  case MachineOperand::MO_ExternalSymbol: {
-    bool isMemOp  = Modifier && !strcmp(Modifier, "mem");
-    bool needCloseParen = false;
-    std::string Name(TAI->getGlobalPrefix());
-    Name += MO.getSymbolName();
-
-    // Print function stub suffix unless it's Mac OS X 10.5 and up.
-    if (!isMemOp)
-      O << '$';
-    else if (Name[0] == '$') {
-      // The name begins with a dollar-sign. In order to avoid having it look
-      // like an integer immediate to the assembler, enclose it in parens.
-      O << '(';
-      needCloseParen = true;
-    }
-
-    O << Name;
-
-    if (shouldPrintPLT(TM, Subtarget)) {
-      std::string GOTName(TAI->getGlobalPrefix());
-      GOTName+="_GLOBAL_OFFSET_TABLE_";
-      if (Name == GOTName)
-        // HACK! Emit extra offset to PC during printing GOT offset to
-        // compensate for the size of popl instruction. The resulting code
-        // should look like:
-        //   call .piclabel
-        // piclabel:
-        //   popl %some_register
-        //   addl $_GLOBAL_ADDRESS_TABLE_ + [.-piclabel], %some_register
-        O << " + [.-"
-          << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << ']';
-    }
-
-    if (needCloseParen)
-      O << ')';
-
-    if (Subtarget->isPICStyleRIPRel())
-      O << "(%rip)";
-    return;
+  case MachineOperand::MO_ExternalSymbol:
+    /// NOTE: MO_ExternalSymbol in a non-pcrel_imm context is *only* generated
+    /// by _GLOBAL_OFFSET_TABLE_ on X86-32.  All others are call operands, which
+    /// are pcrel_imm's.
+    assert(!Subtarget->is64Bit() && !Subtarget->isPICStyleRIPRel());
+    // These are never used as memory operands.
+    assert(!(Modifier && !strcmp(Modifier, "mem")));
+    
+    O << '$';
+    O << TAI->getGlobalPrefix();
+    O << MO.getSymbolName();
+    break;
   }
+  
+  switch (MO.getTargetFlags()) {
   default:
-    O << "<unknown operand type>"; return;
+    assert(0 && "Unknown target flag on GV operand");
+  case X86II::MO_NO_FLAG:
+    break;
+  case X86II::MO_GOT_ABSOLUTE_ADDRESS:
+    O << " + [.-";
+    PrintPICBaseSymbol();
+    O << ']';
+    break;      
+  case X86II::MO_PIC_BASE_OFFSET:
+    O << '-';
+    PrintPICBaseSymbol();
+    break;
+  case X86II::MO_TLSGD:     O << "@TLSGD";     break;
+  case X86II::MO_GOTTPOFF:  O << "@GOTTPOFF";  break;
+  case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
+  case X86II::MO_TPOFF:     O << "@TPOFF";     break;
+  case X86II::MO_NTPOFF:    O << "@NTPOFF";    break;
+  case X86II::MO_GOTPCREL:  O << "@GOTPCREL";  break;
+  case X86II::MO_GOT:       O << "@GOT";       break;
+  case X86II::MO_GOTOFF:    O << "@GOTOFF";    break;
   }
 }
 
@@ -670,25 +576,24 @@ void X86ATTAsmPrinter::printSSECC(const MachineInstr *MI, unsigned Op) {
 }
 
 void X86ATTAsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
-                                            const char *Modifier,
-                                            bool NotRIPRel) {
+                                            const char *Modifier) {
   MachineOperand BaseReg  = MI->getOperand(Op);
   MachineOperand IndexReg = MI->getOperand(Op+2);
   const MachineOperand &DispSpec = MI->getOperand(Op+3);
 
-  NotRIPRel |= IndexReg.getReg() || BaseReg.getReg();
   if (DispSpec.isGlobal() ||
       DispSpec.isCPI() ||
       DispSpec.isJTI() ||
       DispSpec.isSymbol()) {
-    printOperand(MI, Op+3, "mem", NotRIPRel);
+    printOperand(MI, Op+3, "mem");
   } else {
     int DispVal = DispSpec.getImm();
     if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
       O << DispVal;
   }
 
-  if (IndexReg.getReg() || BaseReg.getReg()) {
+  if ((IndexReg.getReg() || BaseReg.getReg()) &&
+      (Modifier == 0 || strcmp(Modifier, "no-rip"))) {
     unsigned ScaleVal = MI->getOperand(Op+1).getImm();
     unsigned BaseRegOperand = 0, IndexRegOperand = 2;
 
@@ -716,14 +621,14 @@ void X86ATTAsmPrinter::printLeaMemReference(const MachineInstr *MI, unsigned Op,
 }
 
 void X86ATTAsmPrinter::printMemReference(const MachineInstr *MI, unsigned Op,
-                                         const char *Modifier, bool NotRIPRel){
+                                         const char *Modifier) {
   assert(isMem(MI, Op) && "Invalid memory reference!");
   MachineOperand Segment = MI->getOperand(Op+4);
   if (Segment.getReg()) {
       printOperand(MI, Op+4, Modifier);
       O << ':';
     }
-  printLeaMemReference(MI, Op, Modifier, NotRIPRel);
+  printLeaMemReference(MI, Op, Modifier);
 }
 
 void X86ATTAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
@@ -741,13 +646,19 @@ void X86ATTAsmPrinter::printPICJumpTableSetLabel(unsigned uid,
   if (Subtarget->isPICStyleRIPRel())
     O << '-' << TAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
       << '_' << uid << '\n';
-  else
-    O << '-' << getPICLabelString(getFunctionNumber(), TAI, Subtarget) << '\n';
+  else {
+    O << '-';
+    PrintPICBaseSymbol();
+    O << '\n';
+  }
 }
 
+
 void X86ATTAsmPrinter::printPICLabel(const MachineInstr *MI, unsigned Op) {
-  std::string label = getPICLabelString(getFunctionNumber(), TAI, Subtarget);
-  O << label << '\n' << label << ':';
+  PrintPICBaseSymbol();
+  O << '\n';
+  PrintPICBaseSymbol();
+  O << ':';
 }
 
 
@@ -810,7 +721,7 @@ bool X86ATTAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
     switch (ExtraCode[0]) {
     default: return true;  // Unknown modifier.
     case 'c': // Don't print "$" before a global var name or constant.
-      printOperand(MI, OpNo, "mem", /*NotRIPRel=*/true);
+      printOperand(MI, OpNo, "mem");
       return false;
     case 'b': // Print QImode register
     case 'h': // Print QImode high register
@@ -823,8 +734,19 @@ bool X86ATTAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
       return false;
 
     case 'P': // Don't print @PLT, but do print as memory.
-      printOperand(MI, OpNo, "mem", /*NotRIPRel=*/true);
+      printOperand(MI, OpNo, "mem");
       return false;
+
+      case 'n': { // Negate the immediate or print a '-' before the operand.
+      // Note: this is a temporary solution. It should be handled target
+      // independently as part of the 'MC' work.
+      const MachineOperand &MO = MI->getOperand(OpNo);
+      if (MO.isImm()) {
+        O << -MO.getImm();
+        return false;
+      }
+      O << '-';
+    }
     }
   }
 
@@ -849,7 +771,7 @@ bool X86ATTAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
       // These only apply to registers, ignore on mem.
       break;
     case 'P': // Don't print @PLT, but do print as memory.
-      printMemReference(MI, OpNo, "mem", /*NotRIPRel=*/true);
+      printMemReference(MI, OpNo, "no-rip");
       return false;
     }
   }
@@ -931,8 +853,13 @@ void X86ATTAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
 
 /// doInitialization
 bool X86ATTAsmPrinter::doInitialization(Module &M) {
-  if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling()) 
-    MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+  if (NewAsmPrinter) {
+    Context = new MCContext();
+    // FIXME: Send this to "O" instead of outs().  For now, we force it to
+    // stdout to make it easy to compare.
+    Streamer = createAsmStreamer(*Context, outs());
+  }
+  
   return AsmPrinter::doInitialization(M);
 }
 
@@ -956,6 +883,8 @@ void X86ATTAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
 
   std::string name = Mang->getValueName(GVar);
   Constant *C = GVar->getInitializer();
+  if (isa<MDNode>(C) || isa<MDString>(C))
+    return;
   const Type *Type = C->getType();
   unsigned Size = TD->getTypeAllocSize(Type);
   unsigned Align = TD->getPreferredAlignmentLog(GVar);
@@ -1068,25 +997,6 @@ void X86ATTAsmPrinter::printModuleLevelGV(const GlobalVariable* GVar) {
   EmitGlobalConstant(C);
 }
 
-/// printGVStub - Print stub for a global value.
-///
-void X86ATTAsmPrinter::printGVStub(const char *GV, const char *Prefix) {
-  printSuffixedName(GV, "$non_lazy_ptr", Prefix);
-  O << ":\n\t.indirect_symbol ";
-  if (Prefix) O << Prefix;
-  O << GV << "\n\t.long\t0\n";
-}
-
-/// printHiddenGVStub - Print stub for a hidden global value.
-///
-void X86ATTAsmPrinter::printHiddenGVStub(const char *GV, const char *Prefix) {
-  EmitAlignment(2);
-  printSuffixedName(GV, "$non_lazy_ptr", Prefix);
-  if (Prefix) O << Prefix;
-  O << ":\n" << TAI->getData32bitsDirective() << GV << '\n';
-}
-
-
 bool X86ATTAsmPrinter::doFinalization(Module &M) {
   // Print out module-level global variables here.
   for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
@@ -1095,100 +1005,62 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) {
 
     if (I->hasDLLExportLinkage())
       DLLExportedGVs.insert(Mang->makeNameProper(I->getName(),""));
-
-    // If the global is a extern weak symbol, remember to emit the weak
-    // reference!
-    // FIXME: This is rather hacky, since we'll emit references to ALL weak
-    // stuff, not used. But currently it's the only way to deal with extern weak
-    // initializers hidden deep inside constant expressions.
-    if (I->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(I);
-  }
-
-  for (Module::const_iterator I = M.begin(), E = M.end();
-       I != E; ++I) {
-    // If the global is a extern weak symbol, remember to emit the weak
-    // reference!
-    // FIXME: This is rather hacky, since we'll emit references to ALL weak
-    // stuff, not used. But currently it's the only way to deal with extern weak
-    // initializers hidden deep inside constant expressions.
-    if (I->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(I);
   }
 
-  // Output linker support code for dllexported globals
-  if (!DLLExportedGVs.empty())
-    SwitchToDataSection(".section .drectve");
-
-  for (StringSet<>::iterator i = DLLExportedGVs.begin(),
-         e = DLLExportedGVs.end();
-         i != e; ++i)
-    O << "\t.ascii \" -export:" << i->getKeyData() << ",data\"\n";
-
-  if (!DLLExportedFns.empty()) {
-    SwitchToDataSection(".section .drectve");
-  }
-
-  for (StringSet<>::iterator i = DLLExportedFns.begin(),
-         e = DLLExportedFns.end();
-         i != e; ++i)
-    O << "\t.ascii \" -export:" << i->getKeyData() << "\"\n";
-
   if (Subtarget->isTargetDarwin()) {
     SwitchToDataSection("");
-
-    // Output stubs for dynamically-linked functions
-    for (StringSet<>::iterator i = FnStubs.begin(), e = FnStubs.end();
-         i != e; ++i) {
-      SwitchToDataSection("\t.section __IMPORT,__jump_table,symbol_stubs,"
-                          "self_modifying_code+pure_instructions,5", 0);
-      const char *p = i->getKeyData();
-      printSuffixedName(p, "$stub");
-      O << ":\n"
-           "\t.indirect_symbol " << p << "\n"
-           "\thlt ; hlt ; hlt ; hlt ; hlt\n";
-    }
-
-    O << '\n';
-
-    // Print global value stubs.
-    bool InStubSection = false;
+    
+    // Add the (possibly multiple) personalities to the set of global value
+    // stubs.  Only referenced functions get into the Personalities list.
     if (TAI->doesSupportExceptionHandling() && MMI && !Subtarget->is64Bit()) {
-      // Add the (possibly multiple) personalities to the set of global values.
-      // Only referenced functions get into the Personalities list.
-      const std::vector<Function *>& Personalities = MMI->getPersonalities();
-      for (std::vector<Function *>::const_iterator I = Personalities.begin(),
-             E = Personalities.end(); I != E; ++I) {
-        if (!*I)
+      const std::vector<Function*> &Personalities = MMI->getPersonalities();
+      for (unsigned i = 0, e = Personalities.size(); i != e; ++i) {
+        if (Personalities[i] == 0)
           continue;
-        if (!InStubSection) {
-          SwitchToDataSection(
-                     "\t.section __IMPORT,__pointers,non_lazy_symbol_pointers");
-          InStubSection = true;
-        }
-        printGVStub((*I)->getNameStart(), "_");
+        std::string Name = Mang->getValueName(Personalities[i]);
+        decorateName(Name, Personalities[i]);
+        GVStubs.insert(Name);
       }
     }
 
+    // Output stubs for dynamically-linked functions
+    if (!FnStubs.empty()) {
+      for (StringSet<>::iterator I = FnStubs.begin(), E = FnStubs.end();
+           I != E; ++I) {
+        SwitchToDataSection("\t.section __IMPORT,__jump_table,symbol_stubs,"
+                            "self_modifying_code+pure_instructions,5", 0);
+        const char *Name = I->getKeyData();
+        printSuffixedName(Name, "$stub");
+        O << ":\n"
+             "\t.indirect_symbol " << Name << "\n"
+             "\thlt ; hlt ; hlt ; hlt ; hlt\n";
+      }
+      O << '\n';
+    }
+
     // Output stubs for external and common global variables.
-    if (!InStubSection && !GVStubs.empty())
+    if (!GVStubs.empty()) {
       SwitchToDataSection(
                     "\t.section __IMPORT,__pointers,non_lazy_symbol_pointers");
-    for (StringSet<>::iterator i = GVStubs.begin(), e = GVStubs.end();
-         i != e; ++i)
-      printGVStub(i->getKeyData());
+      for (StringSet<>::iterator I = GVStubs.begin(), E = GVStubs.end();
+           I != E; ++I) {
+        const char *Name = I->getKeyData();
+        printSuffixedName(Name, "$non_lazy_ptr");
+        O << ":\n\t.indirect_symbol " << Name << "\n\t.long\t0\n";
+      }
+    }
 
     if (!HiddenGVStubs.empty()) {
       SwitchToSection(TAI->getDataSection());
-      for (StringSet<>::iterator i = HiddenGVStubs.begin(), e = HiddenGVStubs.end();
-           i != e; ++i)
-        printHiddenGVStub(i->getKeyData());
+      EmitAlignment(2);
+      for (StringSet<>::iterator I = HiddenGVStubs.begin(),
+           E = HiddenGVStubs.end(); I != E; ++I) {
+        const char *Name = I->getKeyData();
+        printSuffixedName(Name, "$non_lazy_ptr");
+        O << ":\n" << TAI->getData32bitsDirective() << Name << '\n';
+      }
     }
 
-    // Emit final debug information.
-    if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
-      DW->EndModule();
-
     // Funny Darwin hack: This flag tells the linker that no global symbols
     // contain code that falls through to other global symbols (e.g. the obvious
     // implementation of multiple entry points).  If this doesn't occur, the
@@ -1204,17 +1076,40 @@ bool X86ATTAsmPrinter::doFinalization(Module &M) {
         << ";\t.type\t" << (COFF::DT_FCN << COFF::N_BTSHFT)
         << ";\t.endef\n";
     }
-
-    // Emit final debug information.
-    if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
-      DW->EndModule();
-  } else if (Subtarget->isTargetELF()) {
-    // Emit final debug information.
-    if (TAI->doesSupportDebugInformation() || TAI->doesSupportExceptionHandling())
-      DW->EndModule();
   }
-
-  return AsmPrinter::doFinalization(M);
+  
+  
+  // Output linker support code for dllexported globals on windows.
+  if (!DLLExportedGVs.empty()) {
+    SwitchToDataSection(".section .drectve");
+  
+    for (StringSet<>::iterator i = DLLExportedGVs.begin(),
+         e = DLLExportedGVs.end(); i != e; ++i)
+      O << "\t.ascii \" -export:" << i->getKeyData() << ",data\"\n";
+  }
+  
+  if (!DLLExportedFns.empty()) {
+    SwitchToDataSection(".section .drectve");
+  
+    for (StringSet<>::iterator i = DLLExportedFns.begin(),
+         e = DLLExportedFns.end();
+         i != e; ++i)
+      O << "\t.ascii \" -export:" << i->getKeyData() << "\"\n";
+  }
+  
+  // Do common shutdown.
+  bool Changed = AsmPrinter::doFinalization(M);
+  
+  if (NewAsmPrinter) {
+    Streamer->Finish();
+    
+    delete Streamer;
+    delete Context;
+    Streamer = 0;
+    Context = 0;
+  }
+  
+  return Changed;
 }
 
 // Include the auto-generated portion of the assembly writer.
diff --git a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
index 68a6bc8..bd96115 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
+++ b/lib/Target/X86/AsmPrinter/X86ATTAsmPrinter.h
@@ -27,17 +27,23 @@
 namespace llvm {
 
 class MachineJumpTableInfo;
+class MCContext;
 class MCInst;
+class MCStreamer;
 
 class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
-  MachineModuleInfo *MMI;
   const X86Subtarget *Subtarget;
+  
+  MCContext *Context;
+  MCStreamer *Streamer;
  public:
   explicit X86ATTAsmPrinter(raw_ostream &O, X86TargetMachine &TM,
                             const TargetAsmInfo *T, CodeGenOpt::Level OL,
                             bool V)
-    : AsmPrinter(O, TM, T, OL, V), MMI(0) {
+    : AsmPrinter(O, TM, T, OL, V) {
     Subtarget = &TM.getSubtarget<X86Subtarget>();
+    Context = 0;
+    Streamer = 0;
   }
 
   virtual const char *getPassName() const {
@@ -69,7 +75,7 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
   bool printInstruction(const MCInst *MI);
 
   void printOperand(const MCInst *MI, unsigned OpNo,
-                    const char *Modifier = 0, bool NotRIPRel = false);
+                    const char *Modifier = 0);
   void printMemReference(const MCInst *MI, unsigned Op);
   void printLeaMemReference(const MCInst *MI, unsigned Op);
   void printSSECC(const MCInst *MI, unsigned Op);
@@ -117,7 +123,7 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
 
   // These methods are used by the tablegen'erated instruction printer.
   void printOperand(const MachineInstr *MI, unsigned OpNo,
-                    const char *Modifier = 0, bool NotRIPRel = false);
+                    const char *Modifier = 0);
   void print_pcrel_imm(const MachineInstr *MI, unsigned OpNo);
   void printi8mem(const MachineInstr *MI, unsigned OpNo) {
     printMemReference(MI, OpNo);
@@ -165,9 +171,9 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
   void printMachineInstruction(const MachineInstr *MI);
   void printSSECC(const MachineInstr *MI, unsigned Op);
   void printMemReference(const MachineInstr *MI, unsigned Op,
-                         const char *Modifier=NULL, bool NotRIPRel = false);
+                         const char *Modifier=NULL);
   void printLeaMemReference(const MachineInstr *MI, unsigned Op,
-                            const char *Modifier=NULL, bool NotRIPRel = false);
+                            const char *Modifier=NULL);
   void printPICJumpTableSetLabel(unsigned uid,
                                  const MachineBasicBlock *MBB) const;
   void printPICJumpTableSetLabel(unsigned uid, unsigned uid2,
@@ -181,9 +187,8 @@ class VISIBILITY_HIDDEN X86ATTAsmPrinter : public AsmPrinter {
   void printPICLabel(const MachineInstr *MI, unsigned Op);
   void printModuleLevelGV(const GlobalVariable* GVar);
 
-  void printGVStub(const char *GV, const char *Prefix = NULL);
-  void printHiddenGVStub(const char *GV, const char *Prefix = NULL);
-
+  void PrintPICBaseSymbol() const;
+  
   bool runOnMachineFunction(MachineFunction &F);
 
   void emitFunctionHeader(const MachineFunction &MF);
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
index 9d50edc..fa0ee75 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -65,7 +65,7 @@ void X86ATTAsmPrinter::print_pcrel_imm(const MCInst *MI, unsigned OpNo) {
 
 
 void X86ATTAsmPrinter::printOperand(const MCInst *MI, unsigned OpNo,
-                                    const char *Modifier, bool NotRIPRel) {
+                                    const char *Modifier) {
   assert(Modifier == 0 && "Modifiers should not be used");
   
   const MCOperand &Op = MI->getOperand(OpNo);
@@ -93,13 +93,11 @@ void X86ATTAsmPrinter::printOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
-  bool NotRIPRel = false;
 
   const MCOperand &BaseReg  = MI->getOperand(Op);
   const MCOperand &IndexReg = MI->getOperand(Op+2);
   const MCOperand &DispSpec = MI->getOperand(Op+3);
   
-  NotRIPRel |= IndexReg.getReg() || BaseReg.getReg();
   if (DispSpec.isImm()) {
     int64_t DispVal = DispSpec.getImm();
     if (DispVal || (!IndexReg.getReg() && !BaseReg.getReg()))
@@ -108,7 +106,7 @@ void X86ATTAsmPrinter::printLeaMemReference(const MCInst *MI, unsigned Op) {
     abort();
     //assert(DispSpec.isGlobal() || DispSpec.isCPI() ||
     //       DispSpec.isJTI() || DispSpec.isSymbol());
-    //printOperand(MI, Op+3, "mem", NotRIPRel);
+    //printOperand(MI, Op+3, "mem");
   }
   
   if (IndexReg.getReg() || BaseReg.getReg()) {
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index a39203b..d1623d6 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -47,8 +47,5 @@ namespace {
 extern "C" int X86AsmPrinterForceLink;
 int X86AsmPrinterForceLink = 0;
 
-// Force static initialization when called from
-// llvm/InitializeAllAsmPrinters.h
-namespace llvm {
-  void InitializeX86AsmPrinter() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeX86AsmPrinter() { }
diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
index 8df138d..4d26364 100644
--- a/lib/Target/X86/X86.td
+++ b/lib/Target/X86/X86.td
@@ -55,6 +55,13 @@ def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
 def FeatureSSE4A   : SubtargetFeature<"sse4a", "HasSSE4A", "true",
                                       "Support SSE 4a instructions">;
 
+def FeatureAVX     : SubtargetFeature<"avx", "HasAVX", "true",
+                                      "Enable AVX instructions">;
+def FeatureFMA3    : SubtargetFeature<"fma3", "HasFMA3", "true",
+                                      "Enable three-operand fused multiple-add">;
+def FeatureFMA4    : SubtargetFeature<"fma4", "HasFMA4", "true",
+                                      "Enable four-operand fused multiple-add">;
+
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
 //===----------------------------------------------------------------------===//
@@ -82,6 +89,9 @@ def : Proc<"core2",           [FeatureSSSE3,  Feature64Bit, FeatureSlowBTMem]>;
 def : Proc<"penryn",          [FeatureSSE41,  Feature64Bit, FeatureSlowBTMem]>;
 def : Proc<"atom",            [FeatureSSE3,   Feature64Bit, FeatureSlowBTMem]>;
 def : Proc<"corei7",          [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"nehalem",         [FeatureSSE42,  Feature64Bit, FeatureSlowBTMem]>;
+// Sandy Bridge does not have FMA
+def : Proc<"sandybridge",     [FeatureSSE42,  FeatureAVX,   Feature64Bit]>;
 
 def : Proc<"k6",              [FeatureMMX]>;
 def : Proc<"k6-2",            [FeatureMMX,    Feature3DNow]>;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index e988a5c..d5846a0 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -301,7 +301,7 @@ bool Emitter<CodeEmitter>::gvNeedsNonLazyPtr(const GlobalValue *GV) {
 
 template<class CodeEmitter>
 void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
-                                    int DispVal, intptr_t PCAdj) {
+                                                 int DispVal, intptr_t PCAdj) {
   // If this is a simple integer displacement that doesn't require a relocation,
   // emit it now.
   if (!RelocOp) {
@@ -371,8 +371,10 @@ void Emitter<CodeEmitter>::emitMemModRMByte(const MachineInstr &MI,
   // Is a SIB byte needed?
   if ((!Is64BitMode || DispForReloc || BaseReg != 0) &&
       IndexReg.getReg() == 0 &&
-      (BaseReg == 0 || getX86RegNum(BaseReg) != N86::ESP)) {
-    if (BaseReg == 0) {  // Just a displacement?
+      (BaseReg == 0 || BaseReg == X86::RIP ||
+       getX86RegNum(BaseReg) != N86::ESP)) {
+    if (BaseReg == 0 ||
+        BaseReg == X86::RIP) {  // Just a displacement?
       // Emit special case [disp32] encoding
       MCE.emitByte(ModRMByte(0, RegOpcodeField, 5));
       
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 2bcfd76..8a21b35 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -396,8 +396,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
           // Constant-offset addressing.
           Disp += CI->getSExtValue() * S;
         } else if (IndexReg == 0 &&
-                   (!AM.GV ||
-                    !getTargetMachine()->symbolicAddressesAreRIPRel()) &&
+                   (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
                    (S == 1 || S == 2 || S == 4 || S == 8)) {
           // Scaled-index addressing.
           Scale = S;
@@ -432,7 +431,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
       return false;
 
     // RIP-relative addresses can't have additional register operands.
-    if (getTargetMachine()->symbolicAddressesAreRIPRel() &&
+    if (Subtarget->isPICStyleRIPRel() &&
         (AM.Base.Reg != 0 || AM.IndexReg != 0))
       return false;
 
@@ -443,6 +442,7 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
 
     // Set up the basic address.
     AM.GV = GV;
+    
     if (!isCall &&
         TM.getRelocationModel() == Reloc::PIC_ &&
         !Subtarget->is64Bit())
@@ -481,12 +481,16 @@ bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) {
 
       // Prevent loading GV stub multiple times in same MBB.
       LocalValueMap[V] = AM.Base.Reg;
+    } else if (Subtarget->isPICStyleRIPRel()) {
+      // Use rip-relative addressing if we can.
+      AM.Base.Reg = X86::RIP;
     }
+    
     return true;
   }
 
   // If all else fails, try to materialize the value in a register.
-  if (!AM.GV || !getTargetMachine()->symbolicAddressesAreRIPRel()) {
+  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
     if (AM.Base.Reg == 0) {
       AM.Base.Reg = getRegForValue(V);
       return AM.Base.Reg != 0;
@@ -1140,12 +1144,10 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
     return false;
   unsigned CalleeOp = 0;
   GlobalValue *GV = 0;
-  if (CalleeAM.Base.Reg != 0) {
-    assert(CalleeAM.GV == 0);
-    CalleeOp = CalleeAM.Base.Reg;
-  } else if (CalleeAM.GV != 0) {
-    assert(CalleeAM.GV != 0);
+  if (CalleeAM.GV != 0) {
     GV = CalleeAM.GV;
+  } else if (CalleeAM.Base.Reg != 0) {
+    CalleeOp = CalleeAM.Base.Reg;
   } else
     return false;
 
@@ -1493,15 +1495,22 @@ unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
   
   // x86-32 PIC requires a PIC base register for constant pools.
   unsigned PICBase = 0;
-  if (TM.getRelocationModel() == Reloc::PIC_ &&
-      !Subtarget->is64Bit())
-    PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+  unsigned char OpFlag = 0;
+  if (TM.getRelocationModel() == Reloc::PIC_) {
+    if (Subtarget->isPICStyleStub()) {
+      OpFlag = X86II::MO_PIC_BASE_OFFSET;
+      PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+    } else if (Subtarget->isPICStyleGOT()) {
+      OpFlag = X86II::MO_GOTOFF;
+      PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
+    }
+  }
 
   // Create the load from the constant pool.
   unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
   unsigned ResultReg = createResultReg(RC);
-  addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), MCPOffset,
-                           PICBase);
+  addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg),
+                           MCPOffset, PICBase, OpFlag);
 
   return ResultReg;
 }
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 9cedafc..1336177 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -65,7 +65,6 @@ namespace {
       int FrameIndex;
     } Base;
 
-    bool isRIPRel;     // RIP as base?
     unsigned Scale;
     SDValue IndexReg; 
     int32_t Disp;
@@ -75,15 +74,35 @@ namespace {
     const char *ES;
     int JT;
     unsigned Align;    // CP alignment.
+    unsigned char SymbolFlags;  // X86II::MO_*
 
     X86ISelAddressMode()
-      : BaseType(RegBase), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
-        Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0) {
+      : BaseType(RegBase), Scale(1), IndexReg(), Disp(0),
+        Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0), SymbolFlags(0) {
     }
 
     bool hasSymbolicDisplacement() const {
       return GV != 0 || CP != 0 || ES != 0 || JT != -1;
     }
+    
+    bool hasBaseOrIndexReg() const {
+      return IndexReg.getNode() != 0 || Base.Reg.getNode() != 0;
+    }
+    
+    /// isRIPRelative - Return true if this addressing mode is already RIP
+    /// relative.
+    bool isRIPRelative() const {
+      if (BaseType != RegBase) return false;
+      if (RegisterSDNode *RegNode =
+            dyn_cast_or_null<RegisterSDNode>(Base.Reg.getNode()))
+        return RegNode->getReg() == X86::RIP;
+      return false;
+    }
+    
+    void setBaseReg(SDValue Reg) {
+      BaseType = RegBase;
+      Base.Reg = Reg;
+    }
 
     void dump() {
       cerr << "X86ISelAddressMode " << this << "\n";
@@ -91,7 +110,7 @@ namespace {
               if (Base.Reg.getNode() != 0) Base.Reg.getNode()->dump(); 
               else cerr << "nul";
       cerr << " Base.FrameIndex " << Base.FrameIndex << "\n";
-      cerr << "isRIPRel " << isRIPRel << " Scale" << Scale << "\n";
+      cerr << " Scale" << Scale << "\n";
       cerr << "IndexReg ";
               if (IndexReg.getNode() != 0) IndexReg.getNode()->dump();
               else cerr << "nul"; 
@@ -200,14 +219,15 @@ namespace {
       // These are 32-bit even in 64-bit mode since RIP relative offset
       // is 32-bit.
       if (AM.GV)
-        Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp);
+        Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i32, AM.Disp,
+                                              AM.SymbolFlags);
       else if (AM.CP)
         Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32,
-                                             AM.Align, AM.Disp);
+                                             AM.Align, AM.Disp, AM.SymbolFlags);
       else if (AM.ES)
-        Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32);
+        Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags);
       else if (AM.JT != -1)
-        Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32);
+        Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags);
       else
         Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i32);
 
@@ -683,61 +703,80 @@ bool X86DAGToDAGISel::MatchLoad(SDValue N, X86ISelAddressMode &AM) {
   return true;
 }
 
+/// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
+/// into an addressing mode.  These wrap things that will resolve down into a
+/// symbol reference.  If no match is possible, this returns true, otherwise it
+/// returns false.  
 bool X86DAGToDAGISel::MatchWrapper(SDValue N, X86ISelAddressMode &AM) {
-  bool SymbolicAddressesAreRIPRel =
-    getTargetMachine().symbolicAddressesAreRIPRel();
-  bool is64Bit = Subtarget->is64Bit();
-  DOUT << "Wrapper: 64bit " << is64Bit;
-  DOUT << " AM "; DEBUG(AM.dump()); DOUT << "\n";
-
-  // Under X86-64 non-small code model, GV (and friends) are 64-bits.
-  if (is64Bit && (TM.getCodeModel() != CodeModel::Small))
-    return true;
-
-  // Base and index reg must be 0 in order to use rip as base.
-  bool canUsePICRel = !AM.Base.Reg.getNode() && !AM.IndexReg.getNode();
-  if (is64Bit && !canUsePICRel && SymbolicAddressesAreRIPRel)
-    return true;
-
+  // If the addressing mode already has a symbol as the displacement, we can
+  // never match another symbol.
   if (AM.hasSymbolicDisplacement())
     return true;
-  // If value is available in a register both base and index components have
-  // been picked, we can't fit the result available in the register in the
-  // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
 
   SDValue N0 = N.getOperand(0);
-  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
-    uint64_t Offset = G->getOffset();
-    if (!is64Bit || isInt32(AM.Disp + Offset)) {
-      GlobalValue *GV = G->getGlobal();
-      bool isRIPRel = SymbolicAddressesAreRIPRel;
-      if (N0.getOpcode() == llvm::ISD::TargetGlobalTLSAddress) {
-        TLSModel::Model model =
-          getTLSModel (GV, TM.getRelocationModel());
-        if (is64Bit && model == TLSModel::InitialExec)
-          isRIPRel = true;
-      }
-      AM.GV = GV;
-      AM.Disp += Offset;
-      AM.isRIPRel = isRIPRel;
-      return false;
-    }
-  } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
-    uint64_t Offset = CP->getOffset();
-    if (!is64Bit || isInt32(AM.Disp + Offset)) {
+  
+  // Handle X86-64 rip-relative addresses.  We check this before checking direct
+  // folding because RIP is preferable to non-RIP accesses.
+  if (Subtarget->is64Bit() &&
+      // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
+      // they cannot be folded into immediate fields.
+      // FIXME: This can be improved for kernel and other models?
+      TM.getCodeModel() == CodeModel::Small &&
+      
+      // Base and index reg must be 0 in order to use %rip as base and lowering
+      // must allow RIP.
+      !AM.hasBaseOrIndexReg() && N.getOpcode() == X86ISD::WrapperRIP) {
+  
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+      int64_t Offset = AM.Disp + G->getOffset();
+      if (!isInt32(Offset)) return true;
+      AM.GV = G->getGlobal();
+      AM.Disp = Offset;
+      AM.SymbolFlags = G->getTargetFlags();
+    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+      int64_t Offset = AM.Disp + CP->getOffset();
+      if (!isInt32(Offset)) return true;
       AM.CP = CP->getConstVal();
       AM.Align = CP->getAlignment();
-      AM.Disp += Offset;
-      AM.isRIPRel = SymbolicAddressesAreRIPRel;
-      return false;
+      AM.Disp = Offset;
+      AM.SymbolFlags = CP->getTargetFlags();
+    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+      AM.ES = S->getSymbol();
+      AM.SymbolFlags = S->getTargetFlags();
+    } else {
+      JumpTableSDNode *J = cast<JumpTableSDNode>(N0);
+      AM.JT = J->getIndex();
+      AM.SymbolFlags = J->getTargetFlags();
     }
-  } else if (ExternalSymbolSDNode *S =dyn_cast<ExternalSymbolSDNode>(N0)) {
-    AM.ES = S->getSymbol();
-    AM.isRIPRel = SymbolicAddressesAreRIPRel;
+  
+    if (N.getOpcode() == X86ISD::WrapperRIP)
+      AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64));
     return false;
-  } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
-    AM.JT = J->getIndex();
-    AM.isRIPRel = SymbolicAddressesAreRIPRel;
+  }
+
+  // Handle the case when globals fit in our immediate field: This is true for
+  // X86-32 always and X86-64 when in -static -mcmodel=small mode.  In 64-bit
+  // mode, this results in a non-RIP-relative computation.
+  if (!Subtarget->is64Bit() ||
+      (TM.getCodeModel() == CodeModel::Small &&
+       TM.getRelocationModel() == Reloc::Static)) {
+    if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+      AM.GV = G->getGlobal();
+      AM.Disp += G->getOffset();
+      AM.SymbolFlags = G->getTargetFlags();
+    } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+      AM.CP = CP->getConstVal();
+      AM.Align = CP->getAlignment();
+      AM.Disp += CP->getOffset();
+      AM.SymbolFlags = CP->getTargetFlags();
+    } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+      AM.ES = S->getSymbol();
+      AM.SymbolFlags = S->getTargetFlags();
+    } else {
+      JumpTableSDNode *J = cast<JumpTableSDNode>(N0);
+      AM.JT = J->getIndex();
+      AM.SymbolFlags = J->getTargetFlags();
+    }
     return false;
   }
 
@@ -756,12 +795,19 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
   if (Depth > 5)
     return MatchAddressBase(N, AM);
   
+  // If this is already a %rip relative address, we can only merge immediates
+  // into it.  Instead of handling this in every case, we handle it here.
   // RIP relative addressing: %rip + 32-bit displacement!
-  if (AM.isRIPRel) {
-    if (!AM.ES && AM.JT != -1 && N.getOpcode() == ISD::Constant) {
-      uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
-      if (!is64Bit || isInt32(AM.Disp + Val)) {
-        AM.Disp += Val;
+  if (AM.isRIPRelative()) {
+    // FIXME: JumpTable and ExternalSymbol address currently don't like
+    // displacements.  It isn't very important, but this should be fixed for
+    // consistency.
+    if (!AM.ES && AM.JT != -1) return true;
+    
+    if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) {
+      int64_t Val = AM.Disp + Cst->getSExtValue();
+      if (isInt32(Val)) {
+        AM.Disp = Val;
         return false;
       }
     }
@@ -785,6 +831,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
     break;
 
   case X86ISD::Wrapper:
+  case X86ISD::WrapperRIP:
     if (!MatchWrapper(N, AM))
       return false;
     break;
@@ -804,7 +851,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
     break;
 
   case ISD::SHL:
-    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1 || AM.isRIPRel)
+    if (AM.IndexReg.getNode() != 0 || AM.Scale != 1)
       break;
       
     if (ConstantSDNode
@@ -845,8 +892,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
     // X*[3,5,9] -> X+X*[2,4,8]
     if (AM.BaseType == X86ISelAddressMode::RegBase &&
         AM.Base.Reg.getNode() == 0 &&
-        AM.IndexReg.getNode() == 0 &&
-        !AM.isRIPRel) {
+        AM.IndexReg.getNode() == 0) {
       if (ConstantSDNode
             *CN = dyn_cast<ConstantSDNode>(N.getNode()->getOperand(1)))
         if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 ||
@@ -895,7 +941,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
       break;
     }
     // Test if the index field is free for use.
-    if (AM.IndexReg.getNode() || AM.isRIPRel) {
+    if (AM.IndexReg.getNode() || AM.isRIPRelative()) {
       AM = Backup;
       break;
     }
@@ -966,8 +1012,7 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
     // the add.
     if (AM.BaseType == X86ISelAddressMode::RegBase &&
         !AM.Base.Reg.getNode() &&
-        !AM.IndexReg.getNode() &&
-        !AM.isRIPRel) {
+        !AM.IndexReg.getNode()) {
       AM.Base.Reg = N.getNode()->getOperand(0);
       AM.IndexReg = N.getNode()->getOperand(1);
       AM.Scale = 1;
@@ -1006,9 +1051,6 @@ bool X86DAGToDAGISel::MatchAddress(SDValue N, X86ISelAddressMode &AM,
     // Scale must not be used already.
     if (AM.IndexReg.getNode() != 0 || AM.Scale != 1) break;
 
-    // Not when RIP is used as the base.
-    if (AM.isRIPRel) break;
-
     SDValue X = Shift.getOperand(0);
     ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N.getOperand(1));
     ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
@@ -1130,7 +1172,7 @@ bool X86DAGToDAGISel::MatchAddressBase(SDValue N, X86ISelAddressMode &AM) {
   // Is the base register already occupied?
   if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
     // If so, check to see if the scale index register is set.
-    if (AM.IndexReg.getNode() == 0 && !AM.isRIPRel) {
+    if (AM.IndexReg.getNode() == 0) {
       AM.IndexReg = N;
       AM.Scale = 1;
       return false;
@@ -1157,7 +1199,7 @@ bool X86DAGToDAGISel::SelectAddr(SDValue Op, SDValue N, SDValue &Base,
   if (AvoidDupAddrCompute && !N.hasOneUse()) {
     unsigned Opcode = N.getOpcode();
     if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex &&
-        Opcode != X86ISD::Wrapper) {
+        Opcode != X86ISD::Wrapper && Opcode != X86ISD::WrapperRIP) {
       // If we are able to fold N into addressing mode, then we'll allow it even
       // if N has multiple uses. In general, addressing computation is used as
       // addresses by all of its uses. But watch out for CopyToReg uses, that
@@ -1307,7 +1349,8 @@ bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue Op, SDValue N, SDValue &Base,
   AM.GV = GA->getGlobal();
   AM.Disp += GA->getOffset();
   AM.Base.Reg = CurDAG->getRegister(0, N.getValueType());
-  
+  AM.SymbolFlags = GA->getTargetFlags();
+
   if (N.getValueType() == MVT::i32) {
     AM.Scale = 1;
     AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32);
@@ -1687,7 +1730,8 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
       
       // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled
       // somehow, just ignore it.
-      if (N2.getOpcode() != X86ISD::Wrapper) {
+      if (N2.getOpcode() != X86ISD::Wrapper &&
+          N2.getOpcode() != X86ISD::WrapperRIP) {
         ReplaceUses(N.getValue(0), Chain);
         return NULL;
       }
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 8d0ea66..9614e69 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CallingConv.h"
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
+#include "llvm/GlobalAlias.h"
 #include "llvm/GlobalVariable.h"
 #include "llvm/Function.h"
 #include "llvm/Intrinsics.h"
@@ -4311,21 +4312,102 @@ X86TargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
 SDValue
 X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
-  // FIXME there isn't really any debug info here, should come from the parent
-  DebugLoc dl = CP->getDebugLoc();
+  
+  // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+  // global base reg.
+  unsigned char OpFlag = 0;
+  unsigned WrapperKind = X86ISD::Wrapper;
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    if (Subtarget->isPICStyleStub())
+      OpFlag = X86II::MO_PIC_BASE_OFFSET;
+    else if (Subtarget->isPICStyleGOT())
+      OpFlag = X86II::MO_GOTOFF;
+    else if (Subtarget->isPICStyleRIPRel() &&
+             getTargetMachine().getCodeModel() == CodeModel::Small)
+      WrapperKind = X86ISD::WrapperRIP;
+  }
+  
   SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(),
-                                             CP->getAlignment());
-  Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+                                             CP->getAlignment(),
+                                             CP->getOffset(), OpFlag);
+  DebugLoc DL = CP->getDebugLoc();
+  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+  // With PIC, the address is actually $g + Offset.
+  if (OpFlag) {
+    Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                         DAG.getNode(X86ISD::GlobalBaseReg,
+                                     DebugLoc::getUnknownLoc(), getPointerTy()),
+                         Result);
+  }
+
+  return Result;
+}
+
+SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+  
+  // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+  // global base reg.
+  unsigned char OpFlag = 0;
+  unsigned WrapperKind = X86ISD::Wrapper;
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    if (Subtarget->isPICStyleStub())
+      OpFlag = X86II::MO_PIC_BASE_OFFSET;
+    else if (Subtarget->isPICStyleGOT())
+      OpFlag = X86II::MO_GOTOFF;
+    else if (Subtarget->isPICStyleRIPRel())
+      WrapperKind = X86ISD::WrapperRIP;
+  }
+  
+  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy(),
+                                          OpFlag);
+  DebugLoc DL = JT->getDebugLoc();
+  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+  
+  // With PIC, the address is actually $g + Offset.
+  if (OpFlag) {
+    Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
+                         DAG.getNode(X86ISD::GlobalBaseReg,
+                                     DebugLoc::getUnknownLoc(), getPointerTy()),
+                         Result);
+  }
+  
+  return Result;
+}
+
+SDValue
+X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
+  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
+  
+  // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
+  // global base reg.
+  unsigned char OpFlag = 0;
+  unsigned WrapperKind = X86ISD::Wrapper;
+  if (getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+    if (Subtarget->isPICStyleStub())
+      OpFlag = X86II::MO_PIC_BASE_OFFSET;
+    else if (Subtarget->isPICStyleGOT())
+      OpFlag = X86II::MO_GOTOFF;
+    else if (Subtarget->isPICStyleRIPRel())
+      WrapperKind = X86ISD::WrapperRIP;
+  }
+  
+  SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlag);
+  
+  DebugLoc DL = Op.getDebugLoc();
+  Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result);
+  
+  
   // With PIC, the address is actually $g + Offset.
   if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
       !Subtarget->isPICStyleRIPRel()) {
-    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
+    Result = DAG.getNode(ISD::ADD, DL, getPointerTy(),
                          DAG.getNode(X86ISD::GlobalBaseReg,
                                      DebugLoc::getUnknownLoc(),
                                      getPointerTy()),
                          Result);
   }
-
+  
   return Result;
 }
 
@@ -4343,9 +4425,29 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
   if (!IsPic && !ExtraLoadRequired && isInt32(Offset)) {
     Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), Offset);
     Offset = 0;
-  } else
-    Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0);
-  Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
+  } else {
+    unsigned char OpFlags = 0;
+    
+    if (Subtarget->isPICStyleRIPRel() &&
+        getTargetMachine().getRelocationModel() != Reloc::Static) {
+      if (ExtraLoadRequired)
+        OpFlags = X86II::MO_GOTPCREL;
+    } else if (Subtarget->isPICStyleGOT() &&
+               getTargetMachine().getRelocationModel() == Reloc::PIC_) {
+      if (ExtraLoadRequired)
+        OpFlags = X86II::MO_GOT;
+      else
+        OpFlags = X86II::MO_GOTOFF;
+    }
+    
+    Result = DAG.getTargetGlobalAddress(GV, getPointerTy(), 0, OpFlags);
+  }
+  
+  if (Subtarget->isPICStyleRIPRel() &&
+      getTargetMachine().getCodeModel() == CodeModel::Small)
+    Result = DAG.getNode(X86ISD::WrapperRIP, dl, getPointerTy(), Result);
+  else
+    Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
 
   // With PIC, the address is actually $g + Offset.
   if (IsPic && !Subtarget->isPICStyleRIPRel()) {
@@ -4381,12 +4483,14 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
 
 static SDValue
 GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
-           SDValue *InFlag, const MVT PtrVT, unsigned ReturnReg) {
+           SDValue *InFlag, const MVT PtrVT, unsigned ReturnReg,
+           unsigned char OperandFlags) {
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
   DebugLoc dl = GA->getDebugLoc();
   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
                                            GA->getValueType(0),
-                                           GA->getOffset());
+                                           GA->getOffset(),
+                                           OperandFlags);
   if (InFlag) {
     SDValue Ops[] = { Chain,  TGA, *InFlag };
     Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3);
@@ -4410,14 +4514,15 @@ LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG,
                                                  PtrVT), InFlag);
   InFlag = Chain.getValue(1);
 
-  return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX);
+  return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD);
 }
 
 // Lower ISD::GlobalTLSAddress using the "general dynamic" model, 64 bit
 static SDValue
 LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
                                 const MVT PtrVT) {
-  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX);
+  return GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT,
+                    X86::RAX, X86II::MO_TLSGD);
 }
 
 // Lower ISD::GlobalTLSAddress using the "initial exec" (for no-pic) or
@@ -4435,11 +4540,26 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
   SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Base,
                                       NULL, 0);
 
+  unsigned char OperandFlags = 0;
+  // Most TLS accesses are not RIP relative, even on x86-64.  One exception is
+  // initialexec.
+  unsigned WrapperKind = X86ISD::Wrapper;
+  if (model == TLSModel::LocalExec) {
+    OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF;
+  } else if (is64Bit) {
+    assert(model == TLSModel::InitialExec);
+    OperandFlags = X86II::MO_GOTTPOFF;
+    WrapperKind = X86ISD::WrapperRIP;
+  } else {
+    assert(model == TLSModel::InitialExec);
+    OperandFlags = X86II::MO_INDNTPOFF;
+  }
+  
   // emit "addl x@ntpoff,%eax" (local exec) or "addl x@indntpoff,%eax" (initial
   // exec)
   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), GA->getValueType(0),
-                                           GA->getOffset());
-  SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA);
+                                           GA->getOffset(), OperandFlags);
+  SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
 
   if (model == TLSModel::InitialExec)
     Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset,
@@ -4457,72 +4577,33 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
   assert(Subtarget->isTargetELF() &&
          "TLS not implemented for non-ELF targets");
   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
-  GlobalValue *GV = GA->getGlobal();
-  TLSModel::Model model =
-    getTLSModel (GV, getTargetMachine().getRelocationModel());
-  if (Subtarget->is64Bit()) {
-    switch (model) {
-    case TLSModel::GeneralDynamic:
-    case TLSModel::LocalDynamic: // not implemented
+  const GlobalValue *GV = GA->getGlobal();
+  
+  // If GV is an alias then use the aliasee for determining
+  // thread-localness.
+  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+    GV = GA->resolveAliasedGlobal(false);
+  
+  TLSModel::Model model = getTLSModel(GV,
+                                      getTargetMachine().getRelocationModel());
+  
+  switch (model) {
+  case TLSModel::GeneralDynamic:
+  case TLSModel::LocalDynamic: // not implemented
+    if (Subtarget->is64Bit())
       return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
-
-    case TLSModel::InitialExec:
-    case TLSModel::LocalExec:
-      return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, true);
-    }
-  } else {
-    switch (model) {
-    case TLSModel::GeneralDynamic:
-    case TLSModel::LocalDynamic: // not implemented
-      return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
-
-    case TLSModel::InitialExec:
-    case TLSModel::LocalExec:
-      return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, false);
-    }
+    return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
+    
+  case TLSModel::InitialExec:
+  case TLSModel::LocalExec:
+    return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
+                               Subtarget->is64Bit());
   }
+  
   assert(0 && "Unreachable");
   return SDValue();
 }
 
-SDValue
-X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) {
-  // FIXME there isn't really any debug info here
-  DebugLoc dl = Op.getDebugLoc();
-  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
-  SDValue Result = DAG.getTargetExternalSymbol(Sym, getPointerTy());
-  Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
-  // With PIC, the address is actually $g + Offset.
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-      !Subtarget->isPICStyleRIPRel()) {
-    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
-                         DAG.getNode(X86ISD::GlobalBaseReg,
-                                     DebugLoc::getUnknownLoc(),
-                                     getPointerTy()),
-                         Result);
-  }
-
-  return Result;
-}
-
-SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
-  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
-  // FIXME there isn't really any debug into here
-  DebugLoc dl = JT->getDebugLoc();
-  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy());
-  Result = DAG.getNode(X86ISD::Wrapper, dl, getPointerTy(), Result);
-  // With PIC, the address is actually $g + Offset.
-  if (getTargetMachine().getRelocationModel() == Reloc::PIC_ &&
-      !Subtarget->isPICStyleRIPRel()) {
-    Result = DAG.getNode(ISD::ADD, dl, getPointerTy(),
-                         DAG.getNode(X86ISD::GlobalBaseReg,
-                                     DebugLoc::getUnknownLoc(),
-                                     getPointerTy()),
-                         Result);
-  }
-
-  return Result;
-}
 
 /// LowerShift - Lower SRA_PARTS and friends, which return two i32 values and
 /// take a 2 x i32 value to shift plus a shift amount.
@@ -6779,6 +6860,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::REP_MOVS:           return "X86ISD::REP_MOVS";
   case X86ISD::GlobalBaseReg:      return "X86ISD::GlobalBaseReg";
   case X86ISD::Wrapper:            return "X86ISD::Wrapper";
+  case X86ISD::WrapperRIP:         return "X86ISD::WrapperRIP";
   case X86ISD::PEXTRB:             return "X86ISD::PEXTRB";
   case X86ISD::PEXTRW:             return "X86ISD::PEXTRW";
   case X86ISD::INSERTPS:           return "X86ISD::INSERTPS";
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 063913f..472ba4c 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -45,7 +45,8 @@ def lea64_32mem : Operand<i32> {
 // Complex Pattern Definitions.
 //
 def lea64addr : ComplexPattern<i64, 4, "SelectLEAAddr",
-                        [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper],
+                        [add, mul, X86mul_imm, shl, or, frameindex, X86Wrapper,
+                         X86WrapperRIP],
                         []>;
 
 def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr",
@@ -1418,6 +1419,9 @@ def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
 def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
           (MOV64ri texternalsym:$dst)>, Requires<[NotSmallCode]>;
 
+// If we have small model and -static mode, it is safe to store global addresses
+// directly as immediates.  FIXME: This is really a hack, the 'imm' predicate
+// should handle this sort of thing.
 def : Pat<(store (i64 (X86Wrapper tconstpool:$src)), addr:$dst),
           (MOV64mi32 addr:$dst, tconstpool:$src)>,
           Requires<[SmallCode, IsStatic]>;
@@ -1431,6 +1435,23 @@ def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
           (MOV64mi32 addr:$dst, texternalsym:$src)>,
           Requires<[SmallCode, IsStatic]>;
 
+// If we have small model and -static mode, it is safe to store global addresses
+// directly as immediates.  FIXME: This is really a hack, the 'imm' predicate
+// should handle this sort of thing.
+def : Pat<(store (i64 (X86WrapperRIP tconstpool:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tconstpool:$src)>,
+          Requires<[SmallCode, IsStatic]>;
+def : Pat<(store (i64 (X86WrapperRIP tjumptable:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tjumptable:$src)>,
+          Requires<[SmallCode, IsStatic]>;
+def : Pat<(store (i64 (X86WrapperRIP tglobaladdr:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, tglobaladdr:$src)>,
+          Requires<[SmallCode, IsStatic]>;
+def : Pat<(store (i64 (X86WrapperRIP texternalsym:$src)), addr:$dst),
+          (MOV64mi32 addr:$dst, texternalsym:$src)>,
+          Requires<[SmallCode, IsStatic]>;
+
+
 // Calls
 // Direct PC relative function call for small code model. 32-bit displacement
 // sign extended to 64-bit.
diff --git a/lib/Target/X86/X86InstrBuilder.h b/lib/Target/X86/X86InstrBuilder.h
index 39504cd..b50dd65 100644
--- a/lib/Target/X86/X86InstrBuilder.h
+++ b/lib/Target/X86/X86InstrBuilder.h
@@ -157,10 +157,10 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
 ///
 inline const MachineInstrBuilder &
 addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
-                         unsigned GlobalBaseReg = 0) {
+                         unsigned GlobalBaseReg, unsigned char OpFlags) {
   //FIXME: factor this
   return MIB.addReg(GlobalBaseReg).addImm(1).addReg(0)
-    .addConstantPoolIndex(CPI).addReg(0);
+    .addConstantPoolIndex(CPI, 0, OpFlags).addReg(0);
 }
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 8a9b7c9..21f71ec 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -18,6 +18,7 @@
 #include "X86MachineFunctionInfo.h"
 #include "X86Subtarget.h"
 #include "X86TargetMachine.h"
+#include "llvm/GlobalVariable.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -28,7 +29,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetAsmInfo.h"
-
 using namespace llvm;
 
 namespace {
@@ -781,6 +781,29 @@ static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
 static inline bool isGVStub(GlobalValue *GV, X86TargetMachine &TM) {
   return TM.getSubtarget<X86Subtarget>().GVRequiresExtraLoad(GV, TM, false);
 }
+
+/// CanRematLoadWithDispOperand - Return true if a load with the specified
+/// operand is a candidate for remat: for this to be true we need to know that
+/// the load will always return the same value, even if moved.
+static bool CanRematLoadWithDispOperand(const MachineOperand &MO,
+                                        X86TargetMachine &TM) {
+  // Loads from constant pool entries can be remat'd.
+  if (MO.isCPI()) return true;
+  
+  // We can remat globals in some cases.
+  if (MO.isGlobal()) {
+    // If this is a load of a stub, not of the global, we can remat it.  This
+    // access will always return the address of the global.
+    if (isGVStub(MO.getGlobal(), TM))
+      return true;
+    
+    // If the global itself is constant, we can remat the load.
+    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal()))
+      if (GV->isConstant())
+        return true;
+  }
+  return false;
+}
  
 bool
 X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
@@ -802,11 +825,9 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const {
       if (MI->getOperand(1).isReg() &&
           MI->getOperand(2).isImm() &&
           MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 &&
-          (MI->getOperand(4).isCPI() ||
-           (MI->getOperand(4).isGlobal() &&
-            isGVStub(MI->getOperand(4).getGlobal(), TM)))) {
+          CanRematLoadWithDispOperand(MI->getOperand(4), TM)) {
         unsigned BaseReg = MI->getOperand(1).getReg();
-        if (BaseReg == 0)
+        if (BaseReg == 0 || BaseReg == X86::RIP)
           return true;
         // Allow re-materialization of PIC load.
         if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal())
@@ -3190,9 +3211,8 @@ unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_);
   bool Is64BitMode = TM.getSubtargetImpl()->is64Bit();
   unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode);
-  if (Desc.getOpcode() == X86::MOVPC32r) {
+  if (Desc.getOpcode() == X86::MOVPC32r)
     Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode);
-  }
   return Size;
 }
 
@@ -3220,17 +3240,17 @@ unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
   const TargetInstrInfo *TII = TM.getInstrInfo();
   // Operand of MovePCtoStack is completely ignored by asm printer. It's
   // only used in JIT code emission as displacement to pc.
-  BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC)
-    .addImm(0);
+  BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0);
   
   // If we're using vanilla 'GOT' PIC style, we should use relative addressing
-  // not to pc, but to _GLOBAL_ADDRESS_TABLE_ external
+  // not to pc, but to _GLOBAL_OFFSET_TABLE_ external.
   if (TM.getRelocationModel() == Reloc::PIC_ &&
       TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) {
-    GlobalBaseReg =
-      RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+    GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass);
+    // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register
     BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg)
-      .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_");
+      .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 0,
+                                    X86II::MO_GOT_ABSOLUTE_ADDRESS);
   } else {
     GlobalBaseReg = PC;
   }
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index e09769e..83f0194 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -71,7 +71,86 @@ namespace X86 {
 namespace X86II {
   enum {
     //===------------------------------------------------------------------===//
-    // Instruction types.  These are the standard/most common forms for X86
+    // X86 Specific MachineOperand flags.
+    
+    MO_NO_FLAG = 0,
+    
+    /// MO_GOT_ABSOLUTE_ADDRESS - On a symbol operand, this represents a
+    /// relocation of:
+    ///    SYMBOL_LABEL + [. - PICBASELABEL]
+    MO_GOT_ABSOLUTE_ADDRESS = 1,
+    
+    /// MO_PIC_BASE_OFFSET - On a symbol operand this indicates that the
+    /// immediate should get the value of the symbol minus the PIC base label:
+    ///    SYMBOL_LABEL - PICBASELABEL
+    MO_PIC_BASE_OFFSET = 2,
+
+    /// MO_GOT - On a symbol operand this indicates that the immediate is the
+    /// offset to the GOT entry for the symbol name from the base of the GOT.
+    ///
+    /// See the X86-64 ELF ABI supplement for more details. 
+    ///    SYMBOL_LABEL @GOT
+    MO_GOT = 3,
+    
+    /// MO_GOTOFF - On a symbol operand this indicates that the immediate is
+    /// the offset to the location of the symbol name from the base of the GOT. 
+    ///
+    /// See the X86-64 ELF ABI supplement for more details. 
+    ///    SYMBOL_LABEL @GOTOFF
+    MO_GOTOFF = 4,
+    
+    /// MO_GOTPCREL - On a symbol operand this indicates that the immediate is
+    /// offset to the GOT entry for the symbol name from the current code
+    /// location. 
+    ///
+    /// See the X86-64 ELF ABI supplement for more details. 
+    ///    SYMBOL_LABEL @GOTPCREL
+    MO_GOTPCREL = 5,
+    
+    /// MO_PLT - On a symbol operand this indicates that the immediate is
+    /// offset to the PLT entry of symbol name from the current code location. 
+    ///
+    /// See the X86-64 ELF ABI supplement for more details. 
+    ///    SYMBOL_LABEL @PLT
+    MO_PLT = 6,
+    
+    /// MO_TLSGD - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details. 
+    ///    SYMBOL_LABEL @TLSGD
+    MO_TLSGD = 7,
+    
+    /// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details. 
+    ///    SYMBOL_LABEL @GOTTPOFF
+    MO_GOTTPOFF = 8,
+   
+    /// MO_INDNTPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details. 
+    ///    SYMBOL_LABEL @INDNTPOFF
+    MO_INDNTPOFF = 9,
+    
+    /// MO_TPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details. 
+    ///    SYMBOL_LABEL @TPOFF
+    MO_TPOFF = 10,
+    
+    /// MO_NTPOFF - On a symbol operand this indicates that the immediate is
+    /// some TLS offset.
+    ///
+    /// See 'ELF Handling for Thread-Local Storage' for more details. 
+    ///    SYMBOL_LABEL @NTPOFF
+    MO_NTPOFF = 11,
+    
+    //===------------------------------------------------------------------===//
+    // Instruction encodings.  These are the standard/most common forms for X86
     // instructions.
     //
 
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 2d8f55f..a6b0880 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -50,9 +50,9 @@ def SDTX86atomicBinary : SDTypeProfile<2, 3, [SDTCisInt<0>, SDTCisInt<1>,
                                 SDTCisPtrTy<2>, SDTCisInt<3>,SDTCisInt<4>]>;
 def SDTX86Ret     : SDTypeProfile<0, -1, [SDTCisVT<0, i16>]>;
 
-def SDT_X86CallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
-def SDT_X86CallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
-                                         SDTCisVT<1, i32> ]>;
+def SDT_X86CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_X86CallSeqEnd   : SDCallSeqEnd<[SDTCisVT<0, i32>,
+                                        SDTCisVT<1, i32>]>;
 
 def SDT_X86Call   : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
 
@@ -236,6 +236,10 @@ def HasSSE3      : Predicate<"Subtarget->hasSSE3()">;
 def HasSSSE3     : Predicate<"Subtarget->hasSSSE3()">;
 def HasSSE41     : Predicate<"Subtarget->hasSSE41()">;
 def HasSSE42     : Predicate<"Subtarget->hasSSE42()">;
+def HasSSE4A     : Predicate<"Subtarget->hasSSE4A()">;
+def HasAVX       : Predicate<"Subtarget->hasAVX()">;
+def HasFMA3      : Predicate<"Subtarget->hasFMA3()">;
+def HasFMA4      : Predicate<"Subtarget->hasFMA4()">;
 def FPStackf32   : Predicate<"!Subtarget->hasSSE1()">;
 def FPStackf64   : Predicate<"!Subtarget->hasSSE2()">;
 def In32BitMode  : Predicate<"!Subtarget->is64Bit()">;
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 43fadc2..b79a006 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -481,11 +481,11 @@ def MMX_PMOVMSKBrr : MMXI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR64:$src),
 
 // Misc.
 let Uses = [EDI] in
-def MMX_MASKMOVQ : MMXI<0xF7, MRMDestMem, (outs), (ins VR64:$src, VR64:$mask),
+def MMX_MASKMOVQ : MMXI<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
                         "maskmovq\t{$mask, $src|$src, $mask}",
                         [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, EDI)]>;
 let Uses = [RDI] in
-def MMX_MASKMOVQ64: MMXI64<0xF7, MRMDestMem, (outs), (ins VR64:$src, VR64:$mask),
+def MMX_MASKMOVQ64: MMXI64<0xF7, MRMSrcReg, (outs), (ins VR64:$src, VR64:$mask),
                            "maskmovq\t{$mask, $src|$src, $mask}",
                            [(int_x86_mmx_maskmovq VR64:$src, VR64:$mask, RDI)]>;
 
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index d552cb3..996baa0 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -439,7 +439,7 @@ def GR32 : RegisterClass<"X86", [i32], 32,
 
 def GR64 : RegisterClass<"X86", [i64], 64, 
                          [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
-                          RBX, R14, R15, R12, R13, RBP, RSP]> {
+                          RBX, R14, R15, R12, R13, RBP, RSP, RIP]> {
   let SubRegClassList = [GR8, GR8, GR16, GR32];
   let MethodProtos = [{
     iterator allocation_order_end(const MachineFunction &MF) const;
@@ -453,9 +453,9 @@ def GR64 : RegisterClass<"X86", [i64], 64,
       if (!Subtarget.is64Bit())
         return begin();  // None of these are allocatable in 32-bit.
       if (RI->hasFP(MF)) // Does the function dedicate RBP to being a frame ptr?
-        return end()-2;  // If so, don't allocate RSP or RBP
+        return end()-3;  // If so, don't allocate RIP, RSP or RBP
       else
-        return end()-1;  // If not, just don't allocate RSP
+        return end()-2;  // If not, just don't allocate RIP or RSP
     }
   }];
 }
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 56983ce..8506fa6 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -207,6 +207,10 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
 
   bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
   bool IsAMD   = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
+
+  HasFMA3 = IsIntel && ((ECX >> 12) & 0x1);
+  HasAVX = ((ECX >> 28) & 0x1);
+
   if (IsIntel || IsAMD) {
     // Determine if bit test memory instructions are slow.
     unsigned Family = 0;
@@ -217,6 +221,7 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
     X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
     HasX86_64 = (EDX >> 29) & 0x1;
     HasSSE4A = IsAMD && ((ECX >> 6) & 0x1);
+    HasFMA4 = IsAMD && ((ECX >> 16) & 0x1);
   }
 }
 
@@ -342,6 +347,10 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
   , X86SSELevel(NoMMXSSE)
   , X863DNowLevel(NoThreeDNow)
   , HasX86_64(false)
+  , HasSSE4A(false)
+  , HasAVX(false)
+  , HasFMA3(false)
+  , HasFMA4(false)
   , IsBTMemSlow(false)
   , DarwinVers(0)
   , IsLinux(false)
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 694b0eb..f4f6cce 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -64,12 +64,21 @@ protected:
   ///
   bool HasX86_64;
 
-  /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
-  bool IsBTMemSlow;
-  
   /// HasSSE4A - True if the processor supports SSE4A instructions.
   bool HasSSE4A;
 
+  /// HasAVX - Target has AVX instructions
+  bool HasAVX;
+
+  /// HasFMA3 - Target has 3-operand fused multiply-add
+  bool HasFMA3;
+
+  /// HasFMA4 - Target has 4-operand fused multiply-add
+  bool HasFMA4;
+
+  /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
+  bool IsBTMemSlow;
+  
   /// DarwinVers - Nonzero if this is a darwin platform: the numeric
   /// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
   unsigned char DarwinVers; // Is any darwin-x86 platform.
@@ -133,6 +142,9 @@ public:
   bool hasSSE4A() const { return HasSSE4A; }
   bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
   bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+  bool hasAVX() const { return hasAVX(); }
+  bool hasFMA3() const { return HasFMA3; }
+  bool hasFMA4() const { return HasFMA4; }
 
   bool isBTMemSlow() const { return IsBTMemSlow; }
 
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 53c46c3..67dcd01 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -36,10 +36,8 @@ X("x86",    "32-bit X86: Pentium-Pro and above");
 static RegisterTarget<X86_64TargetMachine>
 Y("x86-64", "64-bit X86: EM64T and AMD64");
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeX86Target() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeX86Target() { }
 
 // No assembler printer by default
 X86TargetMachine::AsmPrinterCtorFn X86TargetMachine::AsmPrinterCtor = 0;
@@ -222,7 +220,8 @@ bool X86TargetMachine::addAssemblyEmitter(PassManagerBase &PM,
   // On Darwin, override 64-bit static relocation to pic_ since the
   // assembler doesn't support it.
   if (DefRelocModel == Reloc::Static &&
-      Subtarget.isTargetDarwin() && Subtarget.is64Bit())
+      Subtarget.isTargetDarwin() && Subtarget.is64Bit() &&
+      getCodeModel() == CodeModel::Small)
     setRelocationModel(Reloc::PIC_);
 
   assert(AsmPrinterCtor && "AsmPrinter was not linked in");
@@ -319,11 +318,3 @@ bool X86TargetMachine::addSimpleCodeEmitter(PassManagerBase &PM,
   return false;
 }
 
-/// symbolicAddressesAreRIPRel - Return true if symbolic addresses are
-/// RIP-relative on this machine, taking into consideration the relocation
-/// model and subtarget. RIP-relative addresses cannot have a separate
-/// base or index register.
-bool X86TargetMachine::symbolicAddressesAreRIPRel() const {
-  return getRelocationModel() != Reloc::Static &&
-         Subtarget.isPICStyleRIPRel();
-}
diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h
index ecc1d39..ba73ca8 100644
--- a/lib/Target/X86/X86TargetMachine.h
+++ b/lib/Target/X86/X86TargetMachine.h
@@ -91,12 +91,6 @@ public:
   virtual bool addSimpleCodeEmitter(PassManagerBase &PM,
                                     CodeGenOpt::Level OptLevel,
                                     bool DumpAsm, JITCodeEmitter &JCE);
-
-  /// symbolicAddressesAreRIPRel - Return true if symbolic addresses are
-  /// RIP-relative on this machine, taking into consideration the relocation
-  /// model and subtarget. RIP-relative addresses cannot have a separate
-  /// base or index register.
-  bool symbolicAddressesAreRIPRel() const;
 };
 
 /// X86_32TargetMachine - X86 32-bit target machine.
diff --git a/lib/Target/XCore/XCoreAsmPrinter.cpp b/lib/Target/XCore/XCoreAsmPrinter.cpp
index ed4c101..4ab5d75 100644
--- a/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -244,9 +244,6 @@ emitGlobal(const GlobalVariable *GV)
     
     // Mark the end of the global
     O << "\t.cc_bottom " << name << ".data\n";
-  } else {
-    if (GV->hasExternalWeakLinkage())
-      ExtWeakSymbols.insert(GV);
   }
 }
 
@@ -375,12 +372,7 @@ void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
     printBasicBlockLabel(MO.getMBB());
     break;
   case MachineOperand::MO_GlobalAddress:
-    {
-      const GlobalValue *GV = MO.getGlobal();
-      O << Mang->getValueName(GV);
-      if (GV->hasExternalWeakLinkage())
-        ExtWeakSymbols.insert(GV);
-    }
+    O << Mang->getValueName(MO.getGlobal());
     break;
   case MachineOperand::MO_ExternalSymbol:
     O << MO.getSymbolName();
@@ -430,25 +422,8 @@ bool XCoreAsmPrinter::doInitialization(Module &M) {
   bool Result = AsmPrinter::doInitialization(M);
   DW = getAnalysisIfAvailable<DwarfWriter>();
   
-  if (!FileDirective.empty()) {
+  if (!FileDirective.empty())
     emitFileDirective(FileDirective);
-  }
-  
-  // Print out type strings for external functions here
-  for (Module::const_iterator I = M.begin(), E = M.end();
-       I != E; ++I) {
-    if (I->isDeclaration() && !I->isIntrinsic()) {
-      switch (I->getLinkage()) {
-      default:
-        assert(0 && "Unexpected linkage");
-      case Function::ExternalWeakLinkage:
-        ExtWeakSymbols.insert(I);
-        // fallthrough
-      case Function::ExternalLinkage:
-        break;
-      }
-    }
-  }
 
   return Result;
 }
@@ -461,8 +436,5 @@ bool XCoreAsmPrinter::doFinalization(Module &M) {
     emitGlobal(I);
   }
   
-  // Emit final debug information.
-  DW->EndModule();
-
   return AsmPrinter::doFinalization(M);
 }
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index cfd3cd3..09227d9 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -31,10 +31,8 @@ namespace {
   RegisterTarget<XCoreTargetMachine> X("xcore", "XCore");
 }
 
-// Force static initialization when called from llvm/InitializeAllTargets.h
-namespace llvm {
-  void InitializeXCoreTarget() { }
-}
+// Force static initialization.
+extern "C" void LLVMInitializeXCoreTarget() { }
 
 const TargetAsmInfo *XCoreTargetMachine::createTargetAsmInfo() const {
   return new XCoreTargetAsmInfo(*this);
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index ab8fe5f..046e044 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -26,6 +26,7 @@
 #include "llvm/Instructions.h"
 #include "llvm/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Analysis/DebugInfo.h"
 #include "llvm/ValueSymbolTable.h"
 #include "llvm/TypeSymbolTable.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -210,7 +211,25 @@ bool StripDebugInfo(Module &M) {
   SmallPtrSet<const GlobalValue*, 8> llvmUsedValues;
   findUsedValues(M, llvmUsedValues);
 
-  // Delete all dbg variables.
+  SmallVector<GlobalVariable *, 2> CUs;
+  SmallVector<GlobalVariable *, 4> GVs;
+  SmallVector<GlobalVariable *, 4> SPs;
+  CollectDebugInfoAnchors(M, CUs, GVs, SPs);
+  // These anchors use LinkOnce linkage so that the optimizer does not
+  // remove them accidently. Set InternalLinkage for all these debug
+  // info anchors.
+  for (SmallVector<GlobalVariable *, 2>::iterator I = CUs.begin(),
+         E = CUs.end(); I != E; ++I)
+    (*I)->setLinkage(GlobalValue::InternalLinkage);
+  for (SmallVector<GlobalVariable *, 4>::iterator I = GVs.begin(),
+         E = GVs.end(); I != E; ++I)
+    (*I)->setLinkage(GlobalValue::InternalLinkage);
+  for (SmallVector<GlobalVariable *, 4>::iterator I = SPs.begin(),
+         E = SPs.end(); I != E; ++I)
+    (*I)->setLinkage(GlobalValue::InternalLinkage);
+
+
+ // Delete all dbg variables.
   for (Module::global_iterator I = M.global_begin(), E = M.global_end(); 
        I != E; ++I) {
     GlobalVariable *GV = dyn_cast<GlobalVariable>(I);
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index 7a7c48b..8a8f83f 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -31,3 +31,5 @@ add_llvm_library(LLVMScalarOpts
   TailDuplication.cpp
   TailRecursionElimination.cpp
   )
+
+target_link_libraries (LLVMScalarOpts LLVMTransformUtils)
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 6c20e7d..27e377f 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -70,6 +70,7 @@ namespace {
     IVUsers         *IU;
     LoopInfo        *LI;
     ScalarEvolution *SE;
+    DominatorTree   *DT;
     bool Changed;
   public:
 
@@ -101,14 +102,13 @@ namespace {
                                    BasicBlock *ExitingBlock,
                                    BranchInst *BI,
                                    SCEVExpander &Rewriter);
-    void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount);
+    void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount,
+                               SCEVExpander &Rewriter);
 
     void RewriteIVExpressions(Loop *L, const Type *LargestType,
                               SCEVExpander &Rewriter);
 
-    void SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter);
-
-    void FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter);
+    void SinkUnusedInvariants(Loop *L);
 
     void HandleFloatingPointIV(Loop *L, PHINode *PH);
   };
@@ -169,10 +169,10 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
     CmpIndVar = IndVar;
   }
 
-  // Expand the code for the iteration count into the preheader of the loop.
-  BasicBlock *Preheader = L->getLoopPreheader();
-  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(),
-                                          Preheader->getTerminator());
+  // Expand the code for the iteration count.
+  assert(RHS->isLoopInvariant(L) &&
+         "Computed iteration count is not loop invariant!");
+  Value *ExitCnt = Rewriter.expandCodeFor(RHS, IndVar->getType(), BI);
 
   // Insert a new icmp_ne or icmp_eq instruction before the branch.
   ICmpInst::Predicate Opcode;
@@ -214,28 +214,13 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
 /// able to brute-force evaluate arbitrary instructions as long as they have
 /// constant operands at the beginning of the loop.
 void IndVarSimplify::RewriteLoopExitValues(Loop *L,
-                                           const SCEV *BackedgeTakenCount) {
+                                           const SCEV *BackedgeTakenCount,
+                                           SCEVExpander &Rewriter) {
   // Verify the input to the pass in already in LCSSA form.
   assert(L->isLCSSAForm());
 
-  BasicBlock *Preheader = L->getLoopPreheader();
-
-  // Scan all of the instructions in the loop, looking at those that have
-  // extra-loop users and which are recurrences.
-  SCEVExpander Rewriter(*SE);
-
-  // We insert the code into the preheader of the loop if the loop contains
-  // multiple exit blocks, or in the exit block if there is exactly one.
-  BasicBlock *BlockToInsertInto;
   SmallVector<BasicBlock*, 8> ExitBlocks;
   L->getUniqueExitBlocks(ExitBlocks);
-  if (ExitBlocks.size() == 1)
-    BlockToInsertInto = ExitBlocks[0];
-  else
-    BlockToInsertInto = Preheader;
-  BasicBlock::iterator InsertPt = BlockToInsertInto->getFirstNonPHI();
-
-  std::map<Instruction*, Value*> ExitValues;
 
   // Find all values that are computed inside the loop, but used outside of it.
   // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
@@ -285,11 +270,7 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
         Changed = true;
         ++NumReplaced;
 
-        // See if we already computed the exit value for the instruction, if so,
-        // just reuse it.
-        Value *&ExitVal = ExitValues[Inst];
-        if (!ExitVal)
-          ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), InsertPt);
+        Value *ExitVal = Rewriter.expandCodeFor(ExitValue, PN->getType(), Inst);
 
         DOUT << "INDVARS: RLEV: AfterLoopVal = " << *ExitVal
              << "  LoopVal = " << *Inst << "\n";
@@ -309,6 +290,15 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
           break;
         }
       }
+      if (ExitBlocks.size() != 1) {
+        // Clone the PHI and delete the original one. This lets IVUsers and
+        // any other maps purge the original user from their records.
+        PHINode *NewPN = PN->clone();
+        NewPN->takeName(PN);
+        NewPN->insertBefore(PN);
+        PN->replaceAllUsesWith(NewPN);
+        PN->eraseFromParent();
+      }
     }
   }
 }
@@ -340,16 +330,19 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   IU = &getAnalysis<IVUsers>();
   LI = &getAnalysis<LoopInfo>();
   SE = &getAnalysis<ScalarEvolution>();
+  DT = &getAnalysis<DominatorTree>();
   Changed = false;
 
   // If there are any floating-point recurrences, attempt to
   // transform them to use integer recurrences.
   RewriteNonIntegerIVs(L);
 
-  BasicBlock *Header       = L->getHeader();
   BasicBlock *ExitingBlock = L->getExitingBlock(); // may be null
   const SCEV* BackedgeTakenCount = SE->getBackedgeTakenCount(L);
 
+  // Create a rewriter object which we'll use to transform the code with.
+  SCEVExpander Rewriter(*SE);
+
   // Check to see if this loop has a computable loop-invariant execution count.
   // If so, this means that we can compute the final value of any expressions
   // that are recurrent in the loop, and substitute the exit values from the
@@ -357,7 +350,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
   // the current expressions.
   //
   if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
-    RewriteLoopExitValues(L, BackedgeTakenCount);
+    RewriteLoopExitValues(L, BackedgeTakenCount, Rewriter);
 
   // Compute the type of the largest recurrence expression, and decide whether
   // a canonical induction variable should be inserted.
@@ -388,9 +381,6 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
       NeedCannIV = true;
   }
 
-  // Create a rewriter object which we'll use to transform the code with.
-  SCEVExpander Rewriter(*SE);
-
   // Now that we know the largest of of the induction variable expressions
   // in this loop, insert a canonical induction variable of the largest size.
   Value *IndVar = 0;
@@ -408,7 +398,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
         OldCannIV = 0;
     }
 
-    IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L,LargestType);
+    IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
 
     ++NumInserted;
     Changed = true;
@@ -434,20 +424,14 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
                                           ExitingBlock, BI, Rewriter);
   }
 
-  Rewriter.setInsertionPoint(Header->getFirstNonPHI());
-
   // Rewrite IV-derived expressions. Clears the rewriter cache.
   RewriteIVExpressions(L, LargestType, Rewriter);
 
-  // The Rewriter may only be used for isInsertedInstruction queries from this
-  // point on.
+  // The Rewriter may not be used from this point on.
 
   // Loop-invariant instructions in the preheader that aren't used in the
   // loop may be sunk below the loop to reduce register pressure.
-  SinkUnusedInvariants(L, Rewriter);
-
-  // Reorder instructions to avoid use-before-def conditions.
-  FixUsesBeforeDefs(L, Rewriter);
+  SinkUnusedInvariants(L);
 
   // For completeness, inform IVUsers of the IV use in the newly-created
   // loop exit test instruction.
@@ -488,29 +472,35 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
       // Compute the final addrec to expand into code.
       const SCEV* AR = IU->getReplacementExpr(*UI);
 
-      Value *NewVal = 0;
-      if (AR->isLoopInvariant(L)) {
-        BasicBlock::iterator I = Rewriter.getInsertionPoint();
-        // Expand loop-invariant values in the loop preheader. They will
-        // be sunk to the exit block later, if possible.
-        NewVal =
-          Rewriter.expandCodeFor(AR, UseTy,
-                                 L->getLoopPreheader()->getTerminator());
-        Rewriter.setInsertionPoint(I);
-        ++NumReplaced;
-      } else {
-        // FIXME: It is an extremely bad idea to indvar substitute anything more
-        // complex than affine induction variables.  Doing so will put expensive
-        // polynomial evaluations inside of the loop, and the str reduction pass
-        // currently can only reduce affine polynomials.  For now just disable
-        // indvar subst on anything more complex than an affine addrec, unless
-        // it can be expanded to a trivial value.
-        if (!Stride->isLoopInvariant(L))
-          continue;
-
-        // Now expand it into actual Instructions and patch it into place.
-        NewVal = Rewriter.expandCodeFor(AR, UseTy);
-      }
+      // FIXME: It is an extremely bad idea to indvar substitute anything more
+      // complex than affine induction variables.  Doing so will put expensive
+      // polynomial evaluations inside of the loop, and the str reduction pass
+      // currently can only reduce affine polynomials.  For now just disable
+      // indvar subst on anything more complex than an affine addrec, unless
+      // it can be expanded to a trivial value.
+      if (!AR->isLoopInvariant(L) && !Stride->isLoopInvariant(L))
+        continue;
+
+      // Determine the insertion point for this user. By default, insert
+      // immediately before the user. The SCEVExpander class will automatically
+      // hoist loop invariants out of the loop. For PHI nodes, there may be
+      // multiple uses, so compute the nearest common dominator for the
+      // incoming blocks.
+      Instruction *InsertPt = User;
+      if (PHINode *PHI = dyn_cast<PHINode>(InsertPt))
+        for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i)
+          if (PHI->getIncomingValue(i) == Op) {
+            if (InsertPt == User)
+              InsertPt = PHI->getIncomingBlock(i)->getTerminator();
+            else
+              InsertPt =
+                DT->findNearestCommonDominator(InsertPt->getParent(),
+                                               PHI->getIncomingBlock(i))
+                      ->getTerminator();
+          }
+
+      // Now expand it into actual Instructions and patch it into place.
+      Value *NewVal = Rewriter.expandCodeFor(AR, UseTy, InsertPt);
 
       // Patch the new value into place.
       if (Op->hasName())
@@ -543,19 +533,20 @@ void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
 /// If there's a single exit block, sink any loop-invariant values that
 /// were defined in the preheader but not used inside the loop into the
 /// exit block to reduce register pressure in the loop.
-void IndVarSimplify::SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter) {
+void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
   BasicBlock *ExitBlock = L->getExitBlock();
   if (!ExitBlock) return;
 
-  Instruction *NonPHI = ExitBlock->getFirstNonPHI();
+  Instruction *InsertPt = ExitBlock->getFirstNonPHI();
   BasicBlock *Preheader = L->getLoopPreheader();
   BasicBlock::iterator I = Preheader->getTerminator();
   while (I != Preheader->begin()) {
     --I;
-    // New instructions were inserted at the end of the preheader. Only
-    // consider those new instructions.
-    if (!Rewriter.isInsertedInstruction(I))
+    // New instructions were inserted at the end of the preheader.
+    if (isa<PHINode>(I))
       break;
+    if (I->isTrapping())
+      continue;
     // Determine if there is a use in or before the loop (direct or
     // otherwise).
     bool UsedInLoop = false;
@@ -582,75 +573,13 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L, SCEVExpander &Rewriter) {
       --I;
     else
       Done = true;
-    ToMove->moveBefore(NonPHI);
+    ToMove->moveBefore(InsertPt);
     if (Done)
       break;
+    InsertPt = ToMove;
   }
 }
 
-/// Re-schedule the inserted instructions to put defs before uses. This
-/// fixes problems that arrise when SCEV expressions contain loop-variant
-/// values unrelated to the induction variable which are defined inside the
-/// loop. FIXME: It would be better to insert instructions in the right
-/// place so that this step isn't needed.
-void IndVarSimplify::FixUsesBeforeDefs(Loop *L, SCEVExpander &Rewriter) {
-  // Visit all the blocks in the loop in pre-order dom-tree dfs order.
-  DominatorTree *DT = &getAnalysis<DominatorTree>();
-  std::map<Instruction *, unsigned> NumPredsLeft;
-  SmallVector<DomTreeNode *, 16> Worklist;
-  Worklist.push_back(DT->getNode(L->getHeader()));
-  do {
-    DomTreeNode *Node = Worklist.pop_back_val();
-    for (DomTreeNode::iterator I = Node->begin(), E = Node->end(); I != E; ++I)
-      if (L->contains((*I)->getBlock()))
-        Worklist.push_back(*I);
-    BasicBlock *BB = Node->getBlock();
-    // Visit all the instructions in the block top down.
-    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
-      // Count the number of operands that aren't properly dominating.
-      unsigned NumPreds = 0;
-      if (Rewriter.isInsertedInstruction(I) && !isa<PHINode>(I))
-        for (User::op_iterator OI = I->op_begin(), OE = I->op_end();
-             OI != OE; ++OI)
-          if (Instruction *Inst = dyn_cast<Instruction>(OI))
-            if (L->contains(Inst->getParent()) && !NumPredsLeft.count(Inst))
-              ++NumPreds;
-      NumPredsLeft[I] = NumPreds;
-      // Notify uses of the position of this instruction, and move the
-      // users (and their dependents, recursively) into place after this
-      // instruction if it is their last outstanding operand.
-      for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
-           UI != UE; ++UI) {
-        Instruction *Inst = cast<Instruction>(UI);
-        std::map<Instruction *, unsigned>::iterator Z = NumPredsLeft.find(Inst);
-        if (Z != NumPredsLeft.end() && Z->second != 0 && --Z->second == 0) {
-          SmallVector<Instruction *, 4> UseWorkList;
-          UseWorkList.push_back(Inst);
-          BasicBlock::iterator InsertPt = I;
-          if (InvokeInst *II = dyn_cast<InvokeInst>(InsertPt))
-            InsertPt = II->getNormalDest()->begin();
-          else
-            ++InsertPt;
-          while (isa<PHINode>(InsertPt)) ++InsertPt;
-          do {
-            Instruction *Use = UseWorkList.pop_back_val();
-            Use->moveBefore(InsertPt);
-            NumPredsLeft.erase(Use);
-            for (Value::use_iterator IUI = Use->use_begin(),
-                 IUE = Use->use_end(); IUI != IUE; ++IUI) {
-              Instruction *IUIInst = cast<Instruction>(IUI);
-              if (L->contains(IUIInst->getParent()) &&
-                  Rewriter.isInsertedInstruction(IUIInst) &&
-                  !isa<PHINode>(IUIInst))
-                UseWorkList.push_back(IUIInst);
-            }
-          } while (!UseWorkList.empty());
-        }
-      }
-    }
-  } while (!Worklist.empty());
-}
-
 /// Return true if it is OK to use SIToFPInst for an inducation variable
 /// with given inital and exit values.
 static bool useSIToFPInst(ConstantFP &InitV, ConstantFP &ExitV,
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index a088230..7a24b35 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -108,7 +108,7 @@ static RegisterPass<LoopRotate> X("loop-rotate", "Rotate Loops");
 Pass *llvm::createLoopRotatePass() { return new LoopRotate(); }
 
 /// Rotate Loop L as many times as possible. Return true if
-/// loop is rotated at least once.
+/// the loop is rotated at least once.
 bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) {
 
   bool RotatedOneLoop = false;
@@ -132,15 +132,15 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
   OrigPreHeader = L->getLoopPreheader();
   OrigLatch = L->getLoopLatch();
 
-  // If loop has only one block then there is not much to rotate.
+  // If the loop has only one block then there is not much to rotate.
   if (L->getBlocks().size() == 1)
     return false;
 
   assert(OrigHeader && OrigLatch && OrigPreHeader &&
          "Loop is not in canonical form");
 
-  // If loop header is not one of the loop exit block then
-  // either this loop is already rotated or it is not 
+  // If the loop header is not one of the loop exiting blocks then
+  // either this loop is already rotated or it is not
   // suitable for loop rotation transformations.
   if (!L->isLoopExit(OrigHeader))
     return false;
@@ -189,19 +189,19 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
   assert(L->contains(NewHeader) && !L->contains(Exit) && 
          "Unable to determine loop header and exit blocks");
   
-  // This code assumes that new header has exactly one predecessor.  Remove any
-  // single entry PHI nodes in it.
+  // This code assumes that the new header has exactly one predecessor.
+  // Remove any single-entry PHI nodes in it.
   assert(NewHeader->getSinglePredecessor() &&
          "New header doesn't have one pred!");
   FoldSingleEntryPHINodes(NewHeader);
 
-  // Copy PHI nodes and other instructions from original header
-  // into original pre-header. Unlike original header, original pre-header is
-  // not a member of loop. 
+  // Copy PHI nodes and other instructions from the original header
+  // into the original pre-header. Unlike the original header, the original
+  // pre-header is not a member of the loop.
   //
-  // New loop header is one and only successor of original header that 
+  // The new loop header is the one and only successor of original header that
   // is inside the loop. All other original header successors are outside 
-  // the loop. Copy PHI Nodes from original header into new loop header. 
+  // the loop. Copy PHI Nodes from the original header into the new loop header.
   // Add second incoming value, from original loop pre-header into these phi 
   // nodes. If a value defined in original header is used outside original 
   // header then new loop header will need new phi nodes with two incoming 
@@ -218,8 +218,8 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
     // are directly propagated.
     Value *NPV = PN->getIncomingValueForBlock(OrigPreHeader);
 
-    // Create new PHI node with two incoming values for NewHeader.
-    // One incoming value is from OrigLatch (through OrigHeader) and 
+    // Create a new PHI node with two incoming values for NewHeader.
+    // One incoming value is from OrigLatch (through OrigHeader) and the
     // second incoming value is from original pre-header.
     PHINode *NH = PHINode::Create(PN->getType(), PN->getName(),
                                   NewHeader->begin());
@@ -334,8 +334,8 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
         // Add second incoming argument from new Pre header.
         UPhi->addIncoming(ILoopHeaderInfo.PreHeader, OrigPreHeader);
       } else {
-        // Used outside Exit block. Create a new PHI node from exit block
-        // to receive value from ne new header ane pre header.
+        // Used outside Exit block. Create a new PHI node in the exit block
+        // to receive the value from the new header and pre-header.
         PHINode *PN = PHINode::Create(U->getType(), U->getName(),
                                       Exit->begin());
         PN->addIncoming(ILoopHeaderInfo.PreHeader, OrigPreHeader);
@@ -367,16 +367,13 @@ bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
 }
 
 /// Make sure all Exit block PHINodes have required incoming values.
-/// If incoming value is constant or defined outside the loop then
-/// PHINode may not have an entry for original pre-header. 
+/// If an incoming value is constant or defined outside the loop then
+/// PHINode may not have an entry for the original pre-header.
 void LoopRotate::updateExitBlock() {
 
-  for (BasicBlock::iterator I = Exit->begin(), E = Exit->end();
-       I != E; ++I) {
-
-    PHINode *PN = dyn_cast<PHINode>(I);
-    if (!PN)
-      break;
+  PHINode *PN;
+  for (BasicBlock::iterator I = Exit->begin();
+       (PN = dyn_cast<PHINode>(I)); ++I) {
 
     // There is already one incoming value from original pre-header block.
     if (PN->getBasicBlockIndex(OrigPreHeader) != -1)
@@ -384,7 +381,7 @@ void LoopRotate::updateExitBlock() {
 
     const RenameData *ILoopHeaderInfo;
     Value *V = PN->getIncomingValueForBlock(OrigHeader);
-    if (isa<Instruction>(V) && 
+    if (isa<Instruction>(V) &&
         (ILoopHeaderInfo = findReplacementData(cast<Instruction>(V)))) {
       assert(ILoopHeaderInfo->PreHeader && "Missing New Preheader Instruction");
       PN->addIncoming(ILoopHeaderInfo->PreHeader, OrigPreHeader);
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index ba60058..a877c4e 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -409,16 +409,8 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV* const &NewBase,
 
   const SCEV* NewValSCEV = SE->getUnknown(Base);
 
-  // If there is no immediate value, skip the next part.
-  if (!Imm->isZero()) {
-    // If we are inserting the base and imm values in the same block, make sure
-    // to adjust the IP position if insertion reused a result.
-    if (IP == BaseInsertPt)
-      IP = Rewriter.getInsertionPoint();
-
-    // Always emit the immediate (if non-zero) into the same block as the user.
-    NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
-  }
+  // Always emit the immediate into the same block as the user.
+  NewValSCEV = SE->getAddExpr(NewValSCEV, Imm);
 
   return Rewriter.expandCodeFor(NewValSCEV, Ty, IP);
 }
@@ -1642,7 +1634,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV* const &Stride,
       // the preheader, instead of being forward substituted into the uses.  We
       // do this by forcing a BitCast (noop cast) to be inserted into the
       // preheader in this case.
-      if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false)) {
+      if (!fitsInAddressMode(Base, getAccessType(Inst), TLI, false) &&
+          !isa<Instruction>(BaseV)) {
         // We want this constant emitted into the preheader! This is just
         // using cast as a copy so BitCast (no-op cast) is appropriate
         BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
diff --git a/lib/Transforms/Scalar/PredicateSimplifier.cpp b/lib/Transforms/Scalar/PredicateSimplifier.cpp
index b9b5688..a3cb751 100644
--- a/lib/Transforms/Scalar/PredicateSimplifier.cpp
+++ b/lib/Transforms/Scalar/PredicateSimplifier.cpp
@@ -110,6 +110,8 @@ STATISTIC(NumSimple      , "Number of simple replacements");
 STATISTIC(NumBlocks      , "Number of blocks marked unreachable");
 STATISTIC(NumSnuggle     , "Number of comparisons snuggled");
 
+static const ConstantRange empty(1, false);
+
 namespace {
   class DomTreeDFS {
   public:
@@ -939,7 +941,6 @@ namespace {
       const_iterator end()   const { return RangeList.end(); }
 
       iterator find(DomTreeDFS::Node *Subtree) {
-        static ConstantRange empty(1, false);
         iterator E = end();
         iterator I = std::lower_bound(begin(), E,
                                       std::make_pair(Subtree, empty), swo);
@@ -949,7 +950,6 @@ namespace {
       }
 
       const_iterator find(DomTreeDFS::Node *Subtree) const {
-        static const ConstantRange empty(1, false);
         const_iterator E = end();
         const_iterator I = std::lower_bound(begin(), E,
                                             std::make_pair(Subtree, empty), swo);
@@ -962,7 +962,6 @@ namespace {
         assert(!CR.isEmptySet() && "Empty ConstantRange.");
         assert(!CR.isSingleElement() && "Refusing to store single element.");
 
-        static ConstantRange empty(1, false);
         iterator E = end();
         iterator I =
             std::lower_bound(begin(), E, std::make_pair(Subtree, empty), swo);
diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt
index 6628b4b..d68bf02 100644
--- a/lib/Transforms/Utils/CMakeLists.txt
+++ b/lib/Transforms/Utils/CMakeLists.txt
@@ -25,3 +25,5 @@ add_llvm_library(LLVMTransformUtils
   ValueMapper.cpp
   InstructionNamer.cpp
   )
+
+target_link_libraries (LLVMTransformUtils LLVMSupport)
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 7d4f3a3..d5e7303 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -149,7 +149,16 @@ void LCSSA::ProcessInstruction(Instruction *Instr,
   // Keep track of the blocks that have the value available already.
   DenseMap<DomTreeNode*, Value*> Phis;
 
-  DomTreeNode *InstrNode = DT->getNode(Instr->getParent());
+  BasicBlock *DomBB = Instr->getParent();
+
+  // Invoke instructions are special in that their result value is not available
+  // along their unwind edge. The code below tests to see whether DomBB dominates
+  // the value, so adjust DomBB to the normal destination block, which is
+  // effectively where the value is first usable.
+  if (InvokeInst *Inv = dyn_cast<InvokeInst>(Instr))
+    DomBB = Inv->getNormalDest();
+
+  DomTreeNode *DomNode = DT->getNode(DomBB);
 
   // Insert the LCSSA phi's into the exit blocks (dominated by the value), and
   // add them to the Phi's map.
@@ -158,7 +167,7 @@ void LCSSA::ProcessInstruction(Instruction *Instr,
     BasicBlock *BB = *BBI;
     DomTreeNode *ExitBBNode = DT->getNode(BB);
     Value *&Phi = Phis[ExitBBNode];
-    if (!Phi && DT->dominates(InstrNode, ExitBBNode)) {
+    if (!Phi && DT->dominates(DomNode, ExitBBNode)) {
       PHINode *PN = PHINode::Create(Instr->getType(), Instr->getName()+".lcssa",
                                     BB->begin());
       PN->reserveOperandSpace(PredCache.GetNumPreds(BB));
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index c7fff54..8c08638 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -340,44 +340,3 @@ bool llvm::OnlyUsedByDbgInfoIntrinsics(Instruction *I,
   return true;
 }
 
-/// UserIsDebugInfo - Return true if U is a constant expr used by 
-/// llvm.dbg.variable or llvm.dbg.global_variable
-bool llvm::UserIsDebugInfo(User *U) {
-  ConstantExpr *CE = dyn_cast<ConstantExpr>(U);
-
-  if (!CE || CE->getNumUses() != 1)
-    return false;
-
-  Constant *Init = dyn_cast<Constant>(CE->use_back());
-  if (!Init || Init->getNumUses() != 1)
-    return false;
-
-  GlobalVariable *GV = dyn_cast<GlobalVariable>(Init->use_back());
-  if (!GV || !GV->hasInitializer() || GV->getInitializer() != Init)
-    return false;
-
-  DIVariable DV(GV);
-  if (!DV.isNull()) 
-    return true; // User is llvm.dbg.variable
-
-  DIGlobalVariable DGV(GV);
-  if (!DGV.isNull())
-    return true; // User is llvm.dbg.global_variable
-
-  return false;
-}
-
-/// RemoveDbgInfoUser - Remove an User which is representing debug info.
-void llvm::RemoveDbgInfoUser(User *U) {
-  assert (UserIsDebugInfo(U) && "Unexpected User!");
-  ConstantExpr *CE = cast<ConstantExpr>(U);
-  while (!CE->use_empty()) {
-    Constant *C = cast<Constant>(CE->use_back());
-    while (!C->use_empty()) {
-      GlobalVariable *GV = cast<GlobalVariable>(C->use_back());
-      GV->eraseFromParent();
-    }
-    C->destroyConstant();
-  }
-  CE->destroyConstant();
-}
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index c164a3b..a9e4e78 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -128,8 +128,8 @@ bool Constant::ContainsRelocations(unsigned Kind) const {
 }
 
 // Static constructor to create a '0' constant of arbitrary type...
+static const uint64_t zero[2] = {0, 0};
 Constant *Constant::getNullValue(const Type *Ty) {
-  static uint64_t zero[2] = {0, 0};
   switch (Ty->getTypeID()) {
   case Type::IntegerTyID:
     return ConstantInt::get(Ty, 0);
@@ -1803,6 +1803,17 @@ MDString *MDString::get(const char *StrBegin, const char *StrEnd) {
   return S;
 }
 
+MDString *MDString::get(const std::string &Str) {
+  sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
+  StringMapEntry<MDString *> &Entry = MDStringCache->GetOrCreateValue(
+                                        Str.data(), Str.data() + Str.size());
+  MDString *&S = Entry.getValue();
+  if (!S) S = new MDString(Entry.getKeyData(),
+                           Entry.getKeyData() + Entry.getKeyLength());
+
+  return S;
+}
+
 void MDString::destroyConstant() {
   sys::SmartScopedWriter<true> Writer(&*ConstantsLock);
   MDStringCache->erase(MDStringCache->find(StrBegin, StrEnd));
diff --git a/lib/VMCore/Mangler.cpp b/lib/VMCore/Mangler.cpp
index 6be06d22..1a68b89 100644
--- a/lib/VMCore/Mangler.cpp
+++ b/lib/VMCore/Mangler.cpp
@@ -165,10 +165,10 @@ std::string Mangler::getValueName(const GlobalValue *GV, const char * Suffix) {
   } else if (!GV->hasName()) {
     // Must mangle the global into a unique ID.
     unsigned TypeUniqueID = getTypeID(GV->getType());
-    static int32_t GlobalID = 0;
+    static uint32_t GlobalID = 0;
     
-    int32_t OldID = GlobalID;
-    sys::AtomicIncrement32(&GlobalID);
+    unsigned OldID = GlobalID;
+    sys::AtomicIncrement(&GlobalID);
     
     Name = "__unnamed_" + utostr(TypeUniqueID) + "_" + utostr(OldID);
   } else {
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index e943e31..b037994 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/System/Atomic.h"
+#include "llvm/System/Mutex.h"
 #include "llvm/System/Threading.h"
 #include <algorithm>
 #include <map>
@@ -187,6 +188,7 @@ public:
 }
 
 static std::vector<PassRegistrationListener*> *Listeners = 0;
+static sys::SmartMutex<true> ListenersLock;
 
 // FIXME: This should use ManagedStatic to manage the pass registrar.
 // Unfortunately, we can't do this, because passes are registered with static
@@ -231,6 +233,7 @@ void PassInfo::registerPass() {
   getPassRegistrar()->RegisterPass(*this);
 
   // Notify any listeners.
+  sys::SmartScopedLock<true> Lock(&ListenersLock);
   if (Listeners)
     for (std::vector<PassRegistrationListener*>::iterator
            I = Listeners->begin(), E = Listeners->end(); I != E; ++I)
@@ -283,12 +286,14 @@ RegisterAGBase::RegisterAGBase(const char *Name, intptr_t InterfaceID,
 // PassRegistrationListener ctor - Add the current object to the list of
 // PassRegistrationListeners...
 PassRegistrationListener::PassRegistrationListener() {
+  sys::SmartScopedLock<true> Lock(&ListenersLock);
   if (!Listeners) Listeners = new std::vector<PassRegistrationListener*>();
   Listeners->push_back(this);
 }
 
 // dtor - Remove object from list of listeners...
 PassRegistrationListener::~PassRegistrationListener() {
+  sys::SmartScopedLock<true> Lock(&ListenersLock);
   std::vector<PassRegistrationListener*>::iterator I =
     std::find(Listeners->begin(), Listeners->end(), this);
   assert(Listeners && I != Listeners->end() &&
diff --git a/projects/sample/lib/sample/Makefile b/projects/sample/lib/sample/Makefile
index 05bca61..af63399 100644
--- a/projects/sample/lib/sample/Makefile
+++ b/projects/sample/lib/sample/Makefile
@@ -9,8 +9,6 @@ LEVEL=../..
 # Give the name of a library.  This will build a dynamic version.
 #
 LIBRARYNAME=sample
-DONT_BUILD_RELINKED=1
-BUILD_ARCHIVE=1
 
 #
 # Include Makefile.common so we know what to do.
diff --git a/test/Analysis/ScalarEvolution/trip-count6.ll b/test/Analysis/ScalarEvolution/trip-count6.ll
new file mode 100644
index 0000000..a667409
--- /dev/null
+++ b/test/Analysis/ScalarEvolution/trip-count6.ll
@@ -0,0 +1,37 @@
+; RUN: llvm-as < %s | opt -analyze -disable-output -scalar-evolution \
+; RUN:  | grep {max backedge-taken count is 1\$}
+
+@mode_table = global [4 x i32] zeroinitializer          ; <[4 x i32]*> [#uses=1]
+
+define i8 @f() {
+entry:
+  tail call i32 @fegetround( )          ; <i32>:0 [#uses=1]
+  br label %bb
+
+bb:             ; preds = %bb4, %entry
+  %mode.0 = phi i8 [ 0, %entry ], [ %indvar.next, %bb4 ]                ; <i8> [#uses=4]
+  zext i8 %mode.0 to i32                ; <i32>:1 [#uses=1]
+  getelementptr [4 x i32]* @mode_table, i32 0, i32 %1           ; <i32*>:2 [#uses=1]
+  load i32* %2, align 4         ; <i32>:3 [#uses=1]
+  icmp eq i32 %3, %0            ; <i1>:4 [#uses=1]
+  br i1 %4, label %bb1, label %bb2
+
+bb1:            ; preds = %bb
+  ret i8 %mode.0
+
+bb2:            ; preds = %bb
+  icmp eq i8 %mode.0, 1         ; <i1>:5 [#uses=1]
+  br i1 %5, label %bb5, label %bb4
+
+bb4:            ; preds = %bb2
+  %indvar.next = add i8 %mode.0, 1              ; <i8> [#uses=1]
+  br label %bb
+
+bb5:            ; preds = %bb2
+  tail call void @raise_exception( ) noreturn 
+  unreachable
+}
+
+declare i32 @fegetround()
+
+declare void @raise_exception() noreturn 
diff --git a/test/CodeGen/ARM/bic.ll b/test/CodeGen/ARM/bic.ll
new file mode 100644
index 0000000..b4ea433
--- /dev/null
+++ b/test/CodeGen/ARM/bic.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llc -march=arm | grep {bic\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 2
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = and i32 %tmp, %a
+    ret i32 %tmp1
+}
diff --git a/test/CodeGen/ARM/carry.ll b/test/CodeGen/ARM/carry.ll
new file mode 100644
index 0000000..3bf2dc0
--- /dev/null
+++ b/test/CodeGen/ARM/carry.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-as < %s | llc -march=arm | grep "subs r" | count 2
+; RUN: llvm-as < %s | llc -march=arm | grep "adc r"
+; RUN: llvm-as < %s | llc -march=arm | grep "sbc r"  | count 2
+
+define i64 @f1(i64 %a, i64 %b) {
+entry:
+	%tmp = sub i64 %a, %b
+	ret i64 %tmp
+}
+
+define i64 @f2(i64 %a, i64 %b) {
+entry:
+        %tmp1 = shl i64 %a, 1
+	%tmp2 = sub i64 %tmp1, %b
+	ret i64 %tmp2
+}
diff --git a/test/CodeGen/ARM/dyn-stackalloc.ll b/test/CodeGen/ARM/dyn-stackalloc.ll
index 602fd9b..e0cd4e1 100644
--- a/test/CodeGen/ARM/dyn-stackalloc.ll
+++ b/test/CodeGen/ARM/dyn-stackalloc.ll
@@ -1,8 +1,4 @@
 ; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=thumb | not grep {ldr sp}
-; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin | \
-; RUN:   not grep {sub.*r7}
-; RUN: llvm-as < %s | llc -march=thumb | grep 4294967280
 
 	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
 	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
diff --git a/test/CodeGen/ARM/fpconv.ll b/test/CodeGen/ARM/fpconv.ll
index 2385007..218b25f 100644
--- a/test/CodeGen/ARM/fpconv.ll
+++ b/test/CodeGen/ARM/fpconv.ll
@@ -20,7 +20,6 @@
 ; RUN: grep floatsidf %t
 ; RUN: grep floatunsisf %t
 ; RUN: grep floatunsidf %t
-; RUN: llvm-as < %s | llc -march=thumb
 
 define float @f1(double %x) {
 entry:
diff --git a/test/CodeGen/ARM/fpow.ll b/test/CodeGen/ARM/fpow.ll
index 155763c..461a2c9 100644
--- a/test/CodeGen/ARM/fpow.ll
+++ b/test/CodeGen/ARM/fpow.ll
@@ -1,5 +1,4 @@
 ; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=thumb
 
 define double @t(double %x, double %y) nounwind optsize {
 entry:
diff --git a/test/CodeGen/ARM/frame_thumb.ll b/test/CodeGen/ARM/frame_thumb.ll
deleted file mode 100644
index fe82db9..0000000
--- a/test/CodeGen/ARM/frame_thumb.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; RUN: llvm-as < %s | llc -march=thumb -mtriple=arm-apple-darwin \
-; RUN:     -disable-fp-elim | not grep {r11}
-; RUN: llvm-as < %s | llc -march=thumb -mtriple=arm-linux-gnueabi \
-; RUN:     -disable-fp-elim | not grep {r11}
-
-define i32 @f() {
-entry:
-	ret i32 10
-}
diff --git a/test/CodeGen/ARM/iabs.ll b/test/CodeGen/ARM/iabs.ll
index f10591f..ede6d74 100644
--- a/test/CodeGen/ARM/iabs.ll
+++ b/test/CodeGen/ARM/iabs.ll
@@ -1,17 +1,10 @@
 ; RUN: llvm-as < %s | llc -march=arm -stats |& \
 ; RUN:   grep {3 .*Number of machine instrs printed}
-; RUN: llvm-as < %s | llc -march=thumb -stats |& \
-; RUN:   grep {4 .*Number of machine instrs printed}
 
 ;; Integer absolute value, should produce something as good as: ARM:
 ;;   add r3, r0, r0, asr #31
 ;;   eor r0, r3, r0, asr #31
 ;;   bx lr
-;; Thumb:
-;;   asr r2, r0, #31
-;;   add r0, r0, r2
-;;   eor r0, r2
-;;   bx lr
 
 define i32 @test(i32 %a) {
         %tmp1neg = sub i32 0, %a
diff --git a/test/CodeGen/ARM/ispositive.ll b/test/CodeGen/ARM/ispositive.ll
index 8dcac30..7e8eb42 100644
--- a/test/CodeGen/ARM/ispositive.ll
+++ b/test/CodeGen/ARM/ispositive.ll
@@ -1,5 +1,4 @@
 ; RUN: llvm-as < %s | llc -march=arm | grep {mov r0, r0, lsr #31}
-; RUN: llvm-as < %s | llc -march=thumb | grep {lsr r0, r0, #31}
 
 define i32 @test1(i32 %X) {
 entry:
diff --git a/test/CodeGen/ARM/large-stack.ll b/test/CodeGen/ARM/large-stack.ll
index 42d7d09..b1738a4 100644
--- a/test/CodeGen/ARM/large-stack.ll
+++ b/test/CodeGen/ARM/large-stack.ll
@@ -1,5 +1,4 @@
 ; RUN: llvm-as < %s | llc -march=arm
-; RUN: llvm-as < %s | llc -march=thumb | grep {ldr.*LCP} | count 5
 
 define void @test1() {
     %tmp = alloca [ 64 x i32 ] , align 4
diff --git a/test/CodeGen/ARM/ldr_ext.ll b/test/CodeGen/ARM/ldr_ext.ll
index edb70d5..b99c721 100644
--- a/test/CodeGen/ARM/ldr_ext.ll
+++ b/test/CodeGen/ARM/ldr_ext.ll
@@ -2,10 +2,6 @@
 ; RUN: llvm-as < %s | llc -march=arm | grep ldrh | count 1
 ; RUN: llvm-as < %s | llc -march=arm | grep ldrsb | count 1
 ; RUN: llvm-as < %s | llc -march=arm | grep ldrsh | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrb | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrh | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrsb | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep ldrsh | count 1
 
 define i32 @test1(i8* %v.pntr.s0.u1) {
     %tmp.u = load i8* %v.pntr.s0.u1
diff --git a/test/CodeGen/ARM/ldr_frame.ll b/test/CodeGen/ARM/ldr_frame.ll
index 56acc90..4431506 100644
--- a/test/CodeGen/ARM/ldr_frame.ll
+++ b/test/CodeGen/ARM/ldr_frame.ll
@@ -1,5 +1,4 @@
 ; RUN: llvm-as < %s | llc -march=arm | not grep mov
-; RUN: llvm-as < %s | llc -march=thumb | grep cpy | count 2
 
 define i32 @f1() {
 	%buf = alloca [32 x i32], align 4
diff --git a/test/CodeGen/ARM/long-setcc.ll b/test/CodeGen/ARM/long-setcc.ll
index 12af8b8..4bab330 100644
--- a/test/CodeGen/ARM/long-setcc.ll
+++ b/test/CodeGen/ARM/long-setcc.ll
@@ -1,5 +1,4 @@
 ; RUN: llvm-as < %s | llc -march=arm | grep cmp | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep cmp | count 1
 
 
 define i1 @t1(i64 %x) {
diff --git a/test/CodeGen/ARM/long.ll b/test/CodeGen/ARM/long.ll
index c7bb386..fe0ee54 100644
--- a/test/CodeGen/ARM/long.ll
+++ b/test/CodeGen/ARM/long.ll
@@ -9,13 +9,6 @@
 ; RUN:   grep smull | count 1
 ; RUN: llvm-as < %s | llc -march=arm | \
 ; RUN:   grep umull | count 1
-; RUN: llvm-as < %s | llc -march=thumb | \
-; RUN:   grep mvn | count 1
-; RUN: llvm-as < %s | llc -march=thumb | \
-; RUN:   grep adc | count 1
-; RUN: llvm-as < %s | llc -march=thumb | \
-; RUN:   grep sbc | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep __muldi3
 
 define i64 @f1() {
 entry:
diff --git a/test/CodeGen/ARM/long_shift.ll b/test/CodeGen/ARM/long_shift.ll
index 8d5d2f3..55d0cdc 100644
--- a/test/CodeGen/ARM/long_shift.ll
+++ b/test/CodeGen/ARM/long_shift.ll
@@ -1,4 +1,3 @@
-; RUN: llvm-as < %s | llc -march=thumb
 ; RUN: llvm-as < %s | llc -march=arm > %t
 ; RUN: grep rrx %t | count 1
 ; RUN: grep __ashldi3 %t
diff --git a/test/CodeGen/ARM/mul.ll b/test/CodeGen/ARM/mul.ll
index f4f0a04..3543b5d 100644
--- a/test/CodeGen/ARM/mul.ll
+++ b/test/CodeGen/ARM/mul.ll
@@ -1,7 +1,5 @@
 ; RUN: llvm-as < %s | llc -march=arm | grep mul | count 2
 ; RUN: llvm-as < %s | llc -march=arm | grep lsl | count 2
-; RUN: llvm-as < %s | llc -march=thumb | grep mul | count 3
-; RUN: llvm-as < %s | llc -march=thumb | grep lsl | count 1
 
 define i32 @f1(i32 %u) {
     %tmp = mul i32 %u, %u
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index ba29c30..5148a5b 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -6,13 +6,6 @@
 ; RUN: llvm-as < %s | llc -march=arm | grep movhi | count 1
 ; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 | \
 ; RUN:   grep fcpydmi | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep beq | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep bgt | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep blt | count 3
-; RUN: llvm-as < %s | llc -march=thumb | grep ble | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep bls | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep bhi | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep __ltdf2
 
 define i32 @f1(i32 %a.s) {
 entry:
diff --git a/test/CodeGen/ARM/stack-frame.ll b/test/CodeGen/ARM/stack-frame.ll
index 73ae11b..c3dd65a 100644
--- a/test/CodeGen/ARM/stack-frame.ll
+++ b/test/CodeGen/ARM/stack-frame.ll
@@ -1,7 +1,5 @@
 ; RUN: llvm-as < %s | llc -march=arm
 ; RUN: llvm-as < %s | llc -march=arm | grep add | count 1
-; RUN: llvm-as < %s | llc -march=thumb
-; RUN: llvm-as < %s | llc -march=thumb | grep add | count 1
 
 define void @f1() {
 	%c = alloca i8, align 1
diff --git a/test/CodeGen/ARM/tst_teq.ll b/test/CodeGen/ARM/tst_teq.ll
index e5aa029..bdeee3f 100644
--- a/test/CodeGen/ARM/tst_teq.ll
+++ b/test/CodeGen/ARM/tst_teq.ll
@@ -1,6 +1,5 @@
 ; RUN: llvm-as < %s | llc -march=arm | grep tst
 ; RUN: llvm-as < %s | llc -march=arm | grep teq
-; RUN: llvm-as < %s | llc -march=thumb | grep tst
 
 define i32 @f(i32 %a) {
 entry:
diff --git a/test/CodeGen/ARM/unord.ll b/test/CodeGen/ARM/unord.ll
index e177423..149afc4 100644
--- a/test/CodeGen/ARM/unord.ll
+++ b/test/CodeGen/ARM/unord.ll
@@ -1,7 +1,5 @@
 ; RUN: llvm-as < %s | llc -march=arm | grep movne | count 1
 ; RUN: llvm-as < %s | llc -march=arm | grep moveq | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep bne | count 1
-; RUN: llvm-as < %s | llc -march=thumb | grep beq | count 1
 
 define i32 @f1(float %X, float %Y) {
 	%tmp = fcmp uno float %X, %Y
diff --git a/test/CodeGen/PowerPC/int-fp-conv-0.ll b/test/CodeGen/PowerPC/int-fp-conv-0.ll
index 3dfc4b1..82a1826 100644
--- a/test/CodeGen/PowerPC/int-fp-conv-0.ll
+++ b/test/CodeGen/PowerPC/int-fp-conv-0.ll
@@ -1,4 +1,3 @@
-; RUN: llvm-as < %s | llc -march=ppc64 -debug |& not grep {= store}
 ; RUN: llvm-as < %s | llc -march=ppc64 > %t
 ; RUN: grep  __floattitf %t
 ; RUN: grep  __fixunstfti %t
diff --git a/test/CodeGen/ARM/2007-01-31-RegInfoAssert.ll b/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
index 19c156d..19c156d 100644
--- a/test/CodeGen/ARM/2007-01-31-RegInfoAssert.ll
+++ b/test/CodeGen/Thumb/2007-01-31-RegInfoAssert.ll
diff --git a/test/CodeGen/ARM/2007-02-02-JoinIntervalsCrash.ll b/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll
index ee52cf0..ee52cf0 100644
--- a/test/CodeGen/ARM/2007-02-02-JoinIntervalsCrash.ll
+++ b/test/CodeGen/Thumb/2007-02-02-JoinIntervalsCrash.ll
diff --git a/test/CodeGen/ARM/2007-03-06-AddR7.ll b/test/CodeGen/Thumb/2007-03-06-AddR7.ll
index ad3e195..ad3e195 100644
--- a/test/CodeGen/ARM/2007-03-06-AddR7.ll
+++ b/test/CodeGen/Thumb/2007-03-06-AddR7.ll
diff --git a/test/CodeGen/ARM/2007-05-05-InvalidPushPop.ll b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
index 159be4e..159be4e 100644
--- a/test/CodeGen/ARM/2007-05-05-InvalidPushPop.ll
+++ b/test/CodeGen/Thumb/2007-05-05-InvalidPushPop.ll
diff --git a/test/CodeGen/ARM/2009-06-18-ThumbCommuteMul.ll b/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
index 9b2aba9..9b2aba9 100644
--- a/test/CodeGen/ARM/2009-06-18-ThumbCommuteMul.ll
+++ b/test/CodeGen/Thumb/2009-06-18-ThumbCommuteMul.ll
diff --git a/test/CodeGen/Thumb/dg.exp b/test/CodeGen/Thumb/dg.exp
new file mode 100644
index 0000000..3ff359a
--- /dev/null
+++ b/test/CodeGen/Thumb/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target ARM] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/Thumb/dyn-stackalloc.ll b/test/CodeGen/Thumb/dyn-stackalloc.ll
new file mode 100644
index 0000000..cd76250
--- /dev/null
+++ b/test/CodeGen/Thumb/dyn-stackalloc.ll
@@ -0,0 +1,59 @@
+; RUN: llvm-as < %s | llc -march=thumb | not grep {ldr sp}
+; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin | \
+; RUN:   not grep {sub.*r7}
+; RUN: llvm-as < %s | llc -march=thumb | grep 4294967280
+
+	%struct.state = type { i32, %struct.info*, float**, i32, i32, i32, i32, i32, i32, i32, i32, i32, i64, i64, i64, i64, i64, i64, i8* }
+	%struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* }
+
+define void @t1(%struct.state* %v) {
+	%tmp6 = load i32* null
+	%tmp8 = alloca float, i32 %tmp6
+	store i32 1, i32* null
+	br i1 false, label %bb123.preheader, label %return
+
+bb123.preheader:
+	br i1 false, label %bb43, label %return
+
+bb43:
+	call fastcc void @f1( float* %tmp8, float* null, i32 0 )
+	%tmp70 = load i32* null
+	%tmp85 = getelementptr float* %tmp8, i32 0
+	call fastcc void @f2( float* null, float* null, float* %tmp85, i32 %tmp70 )
+	ret void
+
+return:
+	ret void
+}
+
+declare fastcc void @f1(float*, float*, i32)
+
+declare fastcc void @f2(float*, float*, float*, i32)
+
+	%struct.comment = type { i8**, i32*, i32, i8* }
+@str215 = external global [2 x i8]
+
+define void @t2(%struct.comment* %vc, i8* %tag, i8* %contents) {
+	%tmp1 = call i32 @strlen( i8* %tag )
+	%tmp3 = call i32 @strlen( i8* %contents )
+	%tmp4 = add i32 %tmp1, 2
+	%tmp5 = add i32 %tmp4, %tmp3
+	%tmp6 = alloca i8, i32 %tmp5
+	%tmp9 = call i8* @strcpy( i8* %tmp6, i8* %tag )
+	%tmp6.len = call i32 @strlen( i8* %tmp6 )
+	%tmp6.indexed = getelementptr i8* %tmp6, i32 %tmp6.len
+	call void @llvm.memcpy.i32( i8* %tmp6.indexed, i8* getelementptr ([2 x i8]* @str215, i32 0, i32 0), i32 2, i32 1 )
+	%tmp15 = call i8* @strcat( i8* %tmp6, i8* %contents )
+	call fastcc void @comment_add( %struct.comment* %vc, i8* %tmp6 )
+	ret void
+}
+
+declare i32 @strlen(i8*)
+
+declare i8* @strcat(i8*, i8*)
+
+declare fastcc void @comment_add(%struct.comment*, i8*)
+
+declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
+
+declare i8* @strcpy(i8*, i8*)
diff --git a/test/CodeGen/Thumb/fpconv.ll b/test/CodeGen/Thumb/fpconv.ll
new file mode 100644
index 0000000..2003131
--- /dev/null
+++ b/test/CodeGen/Thumb/fpconv.ll
@@ -0,0 +1,61 @@
+; RUN: llvm-as < %s | llc -march=thumb
+
+define float @f1(double %x) {
+entry:
+	%tmp1 = fptrunc double %x to float		; <float> [#uses=1]
+	ret float %tmp1
+}
+
+define double @f2(float %x) {
+entry:
+	%tmp1 = fpext float %x to double		; <double> [#uses=1]
+	ret double %tmp1
+}
+
+define i32 @f3(float %x) {
+entry:
+	%tmp = fptosi float %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define i32 @f4(float %x) {
+entry:
+	%tmp = fptoui float %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define i32 @f5(double %x) {
+entry:
+	%tmp = fptosi double %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define i32 @f6(double %x) {
+entry:
+	%tmp = fptoui double %x to i32		; <i32> [#uses=1]
+	ret i32 %tmp
+}
+
+define float @f7(i32 %a) {
+entry:
+	%tmp = sitofp i32 %a to float		; <float> [#uses=1]
+	ret float %tmp
+}
+
+define double @f8(i32 %a) {
+entry:
+	%tmp = sitofp i32 %a to double		; <double> [#uses=1]
+	ret double %tmp
+}
+
+define float @f9(i32 %a) {
+entry:
+	%tmp = uitofp i32 %a to float		; <float> [#uses=1]
+	ret float %tmp
+}
+
+define double @f10(i32 %a) {
+entry:
+	%tmp = uitofp i32 %a to double		; <double> [#uses=1]
+	ret double %tmp
+}
diff --git a/test/CodeGen/Thumb/fpow.ll b/test/CodeGen/Thumb/fpow.ll
new file mode 100644
index 0000000..e5b92ad
--- /dev/null
+++ b/test/CodeGen/Thumb/fpow.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc -march=thumb
+
+define double @t(double %x, double %y) nounwind optsize {
+entry:
+	%0 = tail call double @llvm.pow.f64( double %x, double %y )		; <double> [#uses=1]
+	ret double %0
+}
+
+declare double @llvm.pow.f64(double, double) nounwind readonly
diff --git a/test/CodeGen/Thumb/frame_thumb.ll b/test/CodeGen/Thumb/frame_thumb.ll
new file mode 100644
index 0000000..270e331
--- /dev/null
+++ b/test/CodeGen/Thumb/frame_thumb.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc -mtriple=thumb-apple-darwin \
+; RUN:     -disable-fp-elim | not grep {r11}
+; RUN: llvm-as < %s | llc -mtriple=thumb-linux-gnueabi \
+; RUN:     -disable-fp-elim | not grep {r11}
+
+define i32 @f() {
+entry:
+	ret i32 10
+}
diff --git a/test/CodeGen/Thumb/iabs.ll b/test/CodeGen/Thumb/iabs.ll
new file mode 100644
index 0000000..13084f6
--- /dev/null
+++ b/test/CodeGen/Thumb/iabs.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llc -march=thumb -stats |& \
+; RUN:   grep {4 .*Number of machine instrs printed}
+
+;; Integer absolute value, should produce something as good as:
+;; Thumb:
+;;   asr r2, r0, #31
+;;   add r0, r0, r2
+;;   eor r0, r2
+;;   bx lr
+
+define i32 @test(i32 %a) {
+        %tmp1neg = sub i32 0, %a
+        %b = icmp sgt i32 %a, -1
+        %abs = select i1 %b, i32 %a, i32 %tmp1neg
+        ret i32 %abs
+}
+
diff --git a/test/CodeGen/ARM/inlineasm-imm-thumb.ll b/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
index 2c872e7..2c872e7 100644
--- a/test/CodeGen/ARM/inlineasm-imm-thumb.ll
+++ b/test/CodeGen/Thumb/inlineasm-imm-thumb.ll
diff --git a/test/CodeGen/Thumb/ispositive.ll b/test/CodeGen/Thumb/ispositive.ll
new file mode 100644
index 0000000..91f5970
--- /dev/null
+++ b/test/CodeGen/Thumb/ispositive.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc -march=thumb | grep {lsr r0, r0, #31}
+
+define i32 @test1(i32 %X) {
+entry:
+        icmp slt i32 %X, 0              ; <i1>:0 [#uses=1]
+        zext i1 %0 to i32               ; <i32>:1 [#uses=1]
+        ret i32 %1
+}
+
diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll
new file mode 100644
index 0000000..f7c9ed0
--- /dev/null
+++ b/test/CodeGen/Thumb/large-stack.ll
@@ -0,0 +1,20 @@
+; RUN: llvm-as < %s | llc -march=thumb | grep {ldr.*LCP} | count 5
+
+define void @test1() {
+    %tmp = alloca [ 64 x i32 ] , align 4
+    ret void
+}
+
+define void @test2() {
+    %tmp = alloca [ 4168 x i8 ] , align 4
+    ret void
+}
+
+define i32 @test3() {
+	%retval = alloca i32, align 4
+	%tmp = alloca i32, align 4
+	%a = alloca [805306369 x i8], align 16
+	store i32 0, i32* %tmp
+	%tmp1 = load i32* %tmp
+        ret i32 %tmp1
+}
diff --git a/test/CodeGen/Thumb/ldr_ext.ll b/test/CodeGen/Thumb/ldr_ext.ll
new file mode 100644
index 0000000..4b2a7b2
--- /dev/null
+++ b/test/CodeGen/Thumb/ldr_ext.ll
@@ -0,0 +1,28 @@
+; RUN: llvm-as < %s | llc -march=thumb | grep ldrb | count 1
+; RUN: llvm-as < %s | llc -march=thumb | grep ldrh | count 1
+; RUN: llvm-as < %s | llc -march=thumb | grep ldrsb | count 1
+; RUN: llvm-as < %s | llc -march=thumb | grep ldrsh | count 1
+
+define i32 @test1(i8* %v.pntr.s0.u1) {
+    %tmp.u = load i8* %v.pntr.s0.u1
+    %tmp1.s = zext i8 %tmp.u to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test2(i16* %v.pntr.s0.u1) {
+    %tmp.u = load i16* %v.pntr.s0.u1
+    %tmp1.s = zext i16 %tmp.u to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test3(i8* %v.pntr.s1.u0) {
+    %tmp.s = load i8* %v.pntr.s1.u0
+    %tmp1.s = sext i8 %tmp.s to i32
+    ret i32 %tmp1.s
+}
+
+define i32 @test4() {
+    %tmp.s = load i16* null
+    %tmp1.s = sext i16 %tmp.s to i32
+    ret i32 %tmp1.s
+}
diff --git a/test/CodeGen/Thumb/ldr_frame.ll b/test/CodeGen/Thumb/ldr_frame.ll
new file mode 100644
index 0000000..0043fb5
--- /dev/null
+++ b/test/CodeGen/Thumb/ldr_frame.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llc -march=thumb | grep cpy | count 2
+
+define i32 @f1() {
+	%buf = alloca [32 x i32], align 4
+	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 0
+	%tmp1 = load i32* %tmp
+	ret i32 %tmp1
+}
+
+define i32 @f2() {
+	%buf = alloca [32 x i8], align 4
+	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 0
+	%tmp1 = load i8* %tmp
+        %tmp2 = zext i8 %tmp1 to i32
+	ret i32 %tmp2
+}
+
+define i32 @f3() {
+	%buf = alloca [32 x i32], align 4
+	%tmp = getelementptr [32 x i32]* %buf, i32 0, i32 32
+	%tmp1 = load i32* %tmp
+	ret i32 %tmp1
+}
+
+define i32 @f4() {
+	%buf = alloca [32 x i8], align 4
+	%tmp = getelementptr [32 x i8]* %buf, i32 0, i32 2
+	%tmp1 = load i8* %tmp
+        %tmp2 = zext i8 %tmp1 to i32
+	ret i32 %tmp2
+}
diff --git a/test/CodeGen/Thumb/long-setcc.ll b/test/CodeGen/Thumb/long-setcc.ll
new file mode 100644
index 0000000..df6d137
--- /dev/null
+++ b/test/CodeGen/Thumb/long-setcc.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llc -march=thumb | grep cmp | count 1
+
+
+define i1 @t1(i64 %x) {
+	%B = icmp slt i64 %x, 0
+	ret i1 %B
+}
+
+define i1 @t2(i64 %x) {
+	%tmp = icmp ult i64 %x, 4294967296
+	ret i1 %tmp
+}
+
+define i1 @t3(i32 %x) {
+	%tmp = icmp ugt i32 %x, -1
+	ret i1 %tmp
+}
diff --git a/test/CodeGen/Thumb/long.ll b/test/CodeGen/Thumb/long.ll
new file mode 100644
index 0000000..2287443
--- /dev/null
+++ b/test/CodeGen/Thumb/long.ll
@@ -0,0 +1,76 @@
+; RUN: llvm-as < %s | llc -march=thumb | \
+; RUN:   grep mvn | count 1
+; RUN: llvm-as < %s | llc -march=thumb | \
+; RUN:   grep adc | count 1
+; RUN: llvm-as < %s | llc -march=thumb | \
+; RUN:   grep sbc | count 1
+; RUN: llvm-as < %s | llc -march=thumb | grep __muldi3
+
+define i64 @f1() {
+entry:
+        ret i64 0
+}
+
+define i64 @f2() {
+entry:
+        ret i64 1
+}
+
+define i64 @f3() {
+entry:
+        ret i64 2147483647
+}
+
+define i64 @f4() {
+entry:
+        ret i64 2147483648
+}
+
+define i64 @f5() {
+entry:
+        ret i64 9223372036854775807
+}
+
+define i64 @f6(i64 %x, i64 %y) {
+entry:
+        %tmp1 = add i64 %y, 1           ; <i64> [#uses=1]
+        ret i64 %tmp1
+}
+
+define void @f7() {
+entry:
+        %tmp = call i64 @f8( )          ; <i64> [#uses=0]
+        ret void
+}
+
+declare i64 @f8()
+
+define i64 @f9(i64 %a, i64 %b) {
+entry:
+        %tmp = sub i64 %a, %b           ; <i64> [#uses=1]
+        ret i64 %tmp
+}
+
+define i64 @f(i32 %a, i32 %b) {
+entry:
+        %tmp = sext i32 %a to i64               ; <i64> [#uses=1]
+        %tmp1 = sext i32 %b to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        ret i64 %tmp2
+}
+
+define i64 @g(i32 %a, i32 %b) {
+entry:
+        %tmp = zext i32 %a to i64               ; <i64> [#uses=1]
+        %tmp1 = zext i32 %b to i64              ; <i64> [#uses=1]
+        %tmp2 = mul i64 %tmp1, %tmp             ; <i64> [#uses=1]
+        ret i64 %tmp2
+}
+
+define i64 @f10() {
+entry:
+        %a = alloca i64, align 8                ; <i64*> [#uses=1]
+        %retval = load i64* %a          ; <i64> [#uses=1]
+        ret i64 %retval
+}
+
diff --git a/test/CodeGen/Thumb/select.ll b/test/CodeGen/Thumb/select.ll
new file mode 100644
index 0000000..ae75549
--- /dev/null
+++ b/test/CodeGen/Thumb/select.ll
@@ -0,0 +1,55 @@
+; RUN: llvm-as < %s | llc -march=thumb | grep beq | count 1
+; RUN: llvm-as < %s | llc -march=thumb | grep bgt | count 1
+; RUN: llvm-as < %s | llc -march=thumb | grep blt | count 3
+; RUN: llvm-as < %s | llc -march=thumb | grep ble | count 1
+; RUN: llvm-as < %s | llc -march=thumb | grep bls | count 1
+; RUN: llvm-as < %s | llc -march=thumb | grep bhi | count 1
+; RUN: llvm-as < %s | llc -march=thumb | grep __ltdf2
+
+define i32 @f1(i32 %a.s) {
+entry:
+    %tmp = icmp eq i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f2(i32 %a.s) {
+entry:
+    %tmp = icmp sgt i32 %a.s, 4
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f3(i32 %a.s, i32 %b.s) {
+entry:
+    %tmp = icmp slt i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f4(i32 %a.s, i32 %b.s) {
+entry:
+    %tmp = icmp sle i32 %a.s, %b.s
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f5(i32 %a.u, i32 %b.u) {
+entry:
+    %tmp = icmp ule i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define i32 @f6(i32 %a.u, i32 %b.u) {
+entry:
+    %tmp = icmp ugt i32 %a.u, %b.u
+    %tmp1.s = select i1 %tmp, i32 2, i32 3
+    ret i32 %tmp1.s
+}
+
+define double @f7(double %a, double %b) {
+    %tmp = fcmp olt double %a, 1.234e+00
+    %tmp1 = select i1 %tmp, double -1.000e+00, double %b
+    ret double %tmp1
+}
diff --git a/test/CodeGen/Thumb/stack-frame.ll b/test/CodeGen/Thumb/stack-frame.ll
new file mode 100644
index 0000000..756d257
--- /dev/null
+++ b/test/CodeGen/Thumb/stack-frame.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llc -march=thumb
+; RUN: llvm-as < %s | llc -march=thumb | grep add | count 1
+
+define void @f1() {
+	%c = alloca i8, align 1
+	ret void
+}
+
+define i32 @f2() {
+	ret i32 1
+}
+
+
diff --git a/test/CodeGen/ARM/thumb-imm.ll b/test/CodeGen/Thumb/thumb-imm.ll
index 2be393a..2be393a 100644
--- a/test/CodeGen/ARM/thumb-imm.ll
+++ b/test/CodeGen/Thumb/thumb-imm.ll
diff --git a/test/CodeGen/Thumb/tst_teq.ll b/test/CodeGen/Thumb/tst_teq.ll
new file mode 100644
index 0000000..0456951
--- /dev/null
+++ b/test/CodeGen/Thumb/tst_teq.ll
@@ -0,0 +1,17 @@
+; RUN: llvm-as < %s | llc -march=thumb | grep tst
+
+define i32 @f(i32 %a) {
+entry:
+	%tmp2 = and i32 %a, 255		; <i32> [#uses=1]
+	icmp eq i32 %tmp2, 0		; <i1>:0 [#uses=1]
+	%retval = select i1 %0, i32 20, i32 10		; <i32> [#uses=1]
+	ret i32 %retval
+}
+
+define i32 @g(i32 %a) {
+entry:
+        %tmp2 = xor i32 %a, 255
+	icmp eq i32 %tmp2, 0		; <i1>:0 [#uses=1]
+	%retval = select i1 %0, i32 20, i32 10		; <i32> [#uses=1]
+	ret i32 %retval
+}
diff --git a/test/CodeGen/Thumb/unord.ll b/test/CodeGen/Thumb/unord.ll
new file mode 100644
index 0000000..4202d26
--- /dev/null
+++ b/test/CodeGen/Thumb/unord.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llc -march=thumb | grep bne | count 1
+; RUN: llvm-as < %s | llc -march=thumb | grep beq | count 1
+
+define i32 @f1(float %X, float %Y) {
+	%tmp = fcmp uno float %X, %Y
+	%retval = select i1 %tmp, i32 1, i32 -1
+	ret i32 %retval
+}
+
+define i32 @f2(float %X, float %Y) {
+	%tmp = fcmp ord float %X, %Y
+	%retval = select i1 %tmp, i32 1, i32 -1
+	ret i32 %retval
+}
diff --git a/test/CodeGen/ARM/vargs2.ll b/test/CodeGen/Thumb/vargs.ll
index 5cc86a9..a18010f 100644
--- a/test/CodeGen/ARM/vargs2.ll
+++ b/test/CodeGen/Thumb/vargs.ll
@@ -1,6 +1,6 @@
 ; RUN: llvm-as < %s | llc -march=thumb
-; RUN: llvm-as < %s | llc -mtriple=arm-linux -march=thumb | grep pop | count 1
-; RUN: llvm-as < %s | llc -mtriple=arm-darwin -march=thumb | grep pop | count 2
+; RUN: llvm-as < %s | llc -mtriple=thumb-linux | grep pop | count 1
+; RUN: llvm-as < %s | llc -mtriple=thumb-darwin | grep pop | count 2
 
 @str = internal constant [4 x i8] c"%d\0A\00"           ; <[4 x i8]*> [#uses=1]
 
diff --git a/test/CodeGen/Thumb2/carry.ll b/test/CodeGen/Thumb2/carry.ll
new file mode 100644
index 0000000..3450c5a
--- /dev/null
+++ b/test/CodeGen/Thumb2/carry.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "subs r" | count 2
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "adc r"
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "sbc r"  | count 2
+
+define i64 @f1(i64 %a, i64 %b) {
+entry:
+	%tmp = sub i64 %a, %b
+	ret i64 %tmp
+}
+
+define i64 @f2(i64 %a, i64 %b) {
+entry:
+        %tmp1 = shl i64 %a, 1
+	%tmp2 = sub i64 %tmp1, %b
+	ret i64 %tmp2
+}
diff --git a/test/CodeGen/Thumb2/dg.exp b/test/CodeGen/Thumb2/dg.exp
new file mode 100644
index 0000000..3ff359a
--- /dev/null
+++ b/test/CodeGen/Thumb2/dg.exp
@@ -0,0 +1,5 @@
+load_lib llvm.exp
+
+if { [llvm_supports_target ARM] } {
+  RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{ll,c,cpp}]]
+}
diff --git a/test/CodeGen/Thumb2/load-global.ll b/test/CodeGen/Thumb2/load-global.ll
new file mode 100644
index 0000000..0ffcb95
--- /dev/null
+++ b/test/CodeGen/Thumb2/load-global.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc -mtriple=thumbv7-apple-darwin
+; RUN: llvm-as < %s | llc -mtriple=thumbv7-apple-darwin -relocation-model=pic | grep add | grep pc
+
+@G = external global i32
+
+define i32 @test1() {
+	%tmp = load i32* @G
+	ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/pic-jtbl.ll b/test/CodeGen/Thumb2/pic-jtbl.ll
new file mode 100644
index 0000000..701d308
--- /dev/null
+++ b/test/CodeGen/Thumb2/pic-jtbl.ll
@@ -0,0 +1,55 @@
+; RUN: llvm-as < %s | llc -mtriple=thumbv7-apple-darwin -relocation-model=pic \
+; RUN:   -o %t -f
+; RUN: grep add %t | grep pc
+;; NOT YET: grep "add pc"
+
+define void @bar(i32 %n.u) {
+entry:
+    switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
+bb:
+    tail call void(...)* @foo1()
+    ret void
+bb1:
+    tail call void(...)* @foo2()
+    ret void
+bb2:
+    tail call void(...)* @foo6()
+    ret void
+bb3:
+    tail call void(...)* @foo3()
+    ret void
+bb4:
+    tail call void(...)* @foo4()
+    ret void
+bb5:
+    tail call void(...)* @foo5()
+    ret void
+bb6:
+    tail call void(...)* @foo1()
+    ret void
+bb7:
+    tail call void(...)* @foo2()
+    ret void
+bb8:
+    tail call void(...)* @foo6()
+    ret void
+bb9:
+    tail call void(...)* @foo3()
+    ret void
+bb10:
+    tail call void(...)* @foo4()
+    ret void
+bb11:
+    tail call void(...)* @foo5()
+    ret void
+bb12:
+    tail call void(...)* @foo6()
+    ret void
+}
+
+declare void @foo1(...)
+declare void @foo2(...)
+declare void @foo6(...)
+declare void @foo3(...)
+declare void @foo4(...)
+declare void @foo5(...)
diff --git a/test/CodeGen/Thumb2/thumb2-adc.ll b/test/CodeGen/Thumb2/thumb2-adc.ll
new file mode 100644
index 0000000..4424c1a
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-adc.ll
@@ -0,0 +1,32 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {adc\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+
+; 734439407617 = 0x000000ab00000001
+define i64 @f1(i64 %a) {
+    %tmp = add i64 %a, 734439407617
+    ret i64 %tmp
+}
+
+; 5066626890203137 = 0x0012001200000001
+define i64 @f2(i64 %a) {
+    %tmp = add i64 %a, 5066626890203137
+    ret i64 %tmp
+}
+
+; 3747052064576897025 = 0x3400340000000001
+define i64 @f3(i64 %a) {
+    %tmp = add i64 %a, 3747052064576897025
+    ret i64 %tmp
+}
+
+; 6221254862626095105 = 0x5656565600000001
+define i64 @f4(i64 %a) {
+    %tmp = add i64 %a, 6221254862626095105 
+    ret i64 %tmp
+}
+
+; 287104476244869121 = 0x03fc000000000001
+define i64 @f5(i64 %a) {
+    %tmp = add i64 %a, 287104476244869121 
+    ret i64 %tmp
+}
+
diff --git a/test/CodeGen/Thumb2/thumb2-adc2.ll b/test/CodeGen/Thumb2/thumb2-adc2.ll
new file mode 100644
index 0000000..2530d8d
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-adc2.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {adc\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]*} | count 1
+
+define i64 @f1(i64 %a, i64 %b) {
+    %tmp = add i64 %a, %b
+    ret i64 %tmp
+}
diff --git a/test/CodeGen/ARM/thumb2-add.ll b/test/CodeGen/Thumb2/thumb2-add.ll
index d4f408f..d4f408f 100644
--- a/test/CodeGen/ARM/thumb2-add.ll
+++ b/test/CodeGen/Thumb2/thumb2-add.ll
diff --git a/test/CodeGen/Thumb2/thumb2-add2.ll b/test/CodeGen/Thumb2/thumb2-add2.ll
new file mode 100644
index 0000000..f94b3c1
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-add2.ll
@@ -0,0 +1,36 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#510} | count 5
+
+; 171 = 0x000000ab
+define i32 @f1(i32 %a) {
+    %tmp = add i32 %a, 171
+    ret i32 %tmp
+}
+
+; 1179666 = 0x00120012
+define i32 @f2(i32 %a) {
+    %tmp = add i32 %a, 1179666
+    ret i32 %tmp
+}
+
+; 872428544 = 0x34003400
+define i32 @f3(i32 %a) {
+    %tmp = add i32 %a, 872428544
+    ret i32 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i32 @f4(i32 %a) {
+    %tmp = add i32 %a, 1448498774
+    ret i32 %tmp
+}
+
+; 510 = 0x000001fe
+define i32 @f5(i32 %a) {
+    %tmp = add i32 %a, 510
+    ret i32 %tmp
+}
+
+define i32 @f6(i32 %a) {
+    %tmp = add i32 %a, 4095
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-add3.ll b/test/CodeGen/Thumb2/thumb2-add3.ll
new file mode 100644
index 0000000..1e6341e
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-add3.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {addw\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#4095} | count 1
+
+define i32 @f1(i32 %a) {
+    %tmp = add i32 %a, 4095
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-add4.ll b/test/CodeGen/Thumb2/thumb2-add4.ll
new file mode 100644
index 0000000..b74a33c
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-add4.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {adds\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+
+; 171 = 0x000000ab
+define i64 @f1(i64 %a) {
+    %tmp = add i64 %a, 171
+    ret i64 %tmp
+}
+
+; 1179666 = 0x00120012
+define i64 @f2(i64 %a) {
+    %tmp = add i64 %a, 1179666
+    ret i64 %tmp
+}
+
+; 872428544 = 0x34003400
+define i64 @f3(i64 %a) {
+    %tmp = add i64 %a, 872428544
+    ret i64 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i64 @f4(i64 %a) {
+    %tmp = add i64 %a, 1448498774
+    ret i64 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i64 @f5(i64 %a) {
+    %tmp = add i64 %a, 66846720
+    ret i64 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-add5.ll b/test/CodeGen/Thumb2/thumb2-add5.ll
new file mode 100644
index 0000000..5870be2
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-add5.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {add\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = add i32 %a, %b
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-add6.ll b/test/CodeGen/Thumb2/thumb2-add6.ll
new file mode 100644
index 0000000..9dd3efc
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-add6.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {adds\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+
+define i64 @f1(i64 %a, i64 %b) {
+    %tmp = add i64 %a, %b
+    ret i64 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-and.ll b/test/CodeGen/Thumb2/thumb2-and.ll
new file mode 100644
index 0000000..360c977
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-and.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = and i32 %a, %b
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-and2.ll b/test/CodeGen/Thumb2/thumb2-and2.ll
new file mode 100644
index 0000000..266d256
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-and2.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {and\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+
+; 171 = 0x000000ab
+define i32 @f1(i32 %a) {
+    %tmp = and i32 %a, 171
+    ret i32 %tmp
+}
+
+; 1179666 = 0x00120012
+define i32 @f2(i32 %a) {
+    %tmp = and i32 %a, 1179666
+    ret i32 %tmp
+}
+
+; 872428544 = 0x34003400
+define i32 @f3(i32 %a) {
+    %tmp = and i32 %a, 872428544
+    ret i32 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i32 @f4(i32 %a) {
+    %tmp = and i32 %a, 1448498774
+    ret i32 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i32 @f5(i32 %a) {
+    %tmp = and i32 %a, 66846720
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-asr.ll b/test/CodeGen/Thumb2/thumb2-asr.ll
new file mode 100644
index 0000000..4edf92b
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-asr.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {asr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = ashr i32 %a, %b
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-asr2.ll b/test/CodeGen/Thumb2/thumb2-asr2.ll
new file mode 100644
index 0000000..7007948
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-asr2.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {asr\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#17} | count 1
+
+define i32 @f1(i32 %a) {
+    %tmp = ashr i32 %a, 17
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-bfc.ll b/test/CodeGen/Thumb2/thumb2-bfc.ll
new file mode 100644
index 0000000..1e5016c
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-bfc.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "bfc " | count 3
+
+; 4278190095 = 0xff00000f
+define i32 @f1(i32 %a) {
+    %tmp = and i32 %a, 4278190095
+    ret i32 %tmp
+}
+
+; 4286578688 = 0xff800000
+define i32 @f2(i32 %a) {
+    %tmp = and i32 %a, 4286578688
+    ret i32 %tmp
+}
+
+; 4095 = 0x00000fff
+define i32 @f3(i32 %a) {
+    %tmp = and i32 %a, 4095
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-bic.ll b/test/CodeGen/Thumb2/thumb2-bic.ll
new file mode 100644
index 0000000..ac15ad6
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-bic.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {bic\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 4
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = and i32 %tmp, %a
+    ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+    %tmp = xor i32 4294967295, %b
+    %tmp1 = and i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+    %tmp = xor i32 4294967295, %b
+    %tmp1 = and i32 %tmp, %a
+    ret i32 %tmp1
+}
diff --git a/test/CodeGen/Thumb2/thumb2-bic2.ll b/test/CodeGen/Thumb2/thumb2-bic2.ll
new file mode 100644
index 0000000..b8abdba
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-bic2.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "bic "  | grep {#187\\|#11141290\\|#3422604288\\|#1114112} | count 4
+
+; ~0x000000bb = 4294967108
+define i32 @f1(i32 %a) {
+    %tmp = and i32 %a, 4294967108
+    ret i32 %tmp
+}
+
+; ~0x00aa00aa = 4283826005
+define i32 @f2(i32 %a) {
+    %tmp = and i32 %a, 4283826005
+    ret i32 %tmp
+}
+
+; ~0xcc00cc00 = 872363007
+define i32 @f3(i32 %a) {
+    %tmp = and i32 %a, 872363007
+    ret i32 %tmp
+}
+
+; ~0x00110000 = 4293853183
+define i32 @f4(i32 %a) {
+    %tmp = and i32 %a, 4293853183
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-clz.ll b/test/CodeGen/Thumb2/thumb2-clz.ll
new file mode 100644
index 0000000..e5f94a6
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-clz.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2,+v7a | grep "clz " | count 1
+
+define i32 @f1(i32 %a) {
+    %tmp = tail call i32 @llvm.ctlz.i32(i32 %a)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.ctlz.i32(i32) nounwind readnone
diff --git a/test/CodeGen/Thumb2/thumb2-cmn2.ll b/test/CodeGen/Thumb2/thumb2-cmn2.ll
new file mode 100644
index 0000000..9763dea
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-cmn2.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "cmn "  | grep {#187\\|#11141290\\|#3422604288\\|#1114112} | count 4
+
+; -0x000000bb = 4294967109
+define i1 @f1(i32 %a) {
+    %tmp = icmp ne i32 %a, 4294967109
+    ret i1 %tmp
+}
+
+; -0x00aa00aa = 4283826006
+define i1 @f2(i32 %a) {
+    %tmp = icmp eq i32 %a, 4283826006
+    ret i1 %tmp
+}
+
+; -0xcc00cc00 = 872363008
+define i1 @f3(i32 %a) {
+    %tmp = icmp ne i32 %a, 872363008
+    ret i1 %tmp
+}
+
+; -0x00110000 = 4293853184
+define i1 @f4(i32 %a) {
+    %tmp = icmp eq i32 %a, 4293853184
+    ret i1 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-cmp.ll b/test/CodeGen/Thumb2/thumb2-cmp.ll
new file mode 100644
index 0000000..cd2442b
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-cmp.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "cmp "  | grep {#187\\|#11141290\\|#3422604288\\|#1114112\\|#3722304989} | count 5
+
+; 0x000000bb = 187
+define i1 @f1(i32 %a) {
+    %tmp = icmp ne i32 %a, 187
+    ret i1 %tmp
+}
+
+; 0x00aa00aa = 11141290
+define i1 @f2(i32 %a) {
+    %tmp = icmp eq i32 %a, 11141290 
+    ret i1 %tmp
+}
+
+; 0xcc00cc00 = 3422604288
+define i1 @f3(i32 %a) {
+    %tmp = icmp ne i32 %a, 3422604288
+    ret i1 %tmp
+}
+
+; 0xdddddddd = 3722304989
+define i1 @f4(i32 %a) {
+    %tmp = icmp ne i32 %a, 3722304989
+    ret i1 %tmp
+}
+
+; 0x00110000 = 1114112
+define i1 @f5(i32 %a) {
+    %tmp = icmp eq i32 %a, 1114112
+    ret i1 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-cmp2.ll b/test/CodeGen/Thumb2/thumb2-cmp2.ll
new file mode 100644
index 0000000..8c60b46
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-cmp2.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {cmp\\W*r\[0-9\],\\W*r\[0-9\]} | count 2
+
+define i1 @f1(i32 %a, i32 %b) {
+    %tmp = icmp ne i32 %a, %b
+    ret i1 %tmp
+}
+
+define i1 @f2(i32 %a, i32 %b) {
+    %tmp = icmp eq i32 %a, %b
+    ret i1 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-eor.ll b/test/CodeGen/Thumb2/thumb2-eor.ll
new file mode 100644
index 0000000..ec98f64
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-eor.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {eor\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = xor i32 %a, %b
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-eor2.ll b/test/CodeGen/Thumb2/thumb2-eor2.ll
new file mode 100644
index 0000000..11784ca
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-eor2.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep "eor "  | grep {#187\\|#11141290\\|#3422604288\\|#1114112\\|#3722304989} | count 5
+
+; 0x000000bb = 187
+define i32 @f1(i32 %a) {
+    %tmp = xor i32 %a, 187
+    ret i32 %tmp
+}
+
+; 0x00aa00aa = 11141290
+define i32 @f2(i32 %a) {
+    %tmp = xor i32 %a, 11141290 
+    ret i32 %tmp
+}
+
+; 0xcc00cc00 = 3422604288
+define i32 @f3(i32 %a) {
+    %tmp = xor i32 %a, 3422604288
+    ret i32 %tmp
+}
+
+; 0xdddddddd = 3722304989
+define i32 @f4(i32 %a) {
+    %tmp = xor i32 %a, 3722304989
+    ret i32 %tmp
+}
+
+; 0x00110000 = 1114112
+define i32 @f5(i32 %a) {
+    %tmp = xor i32 %a, 1114112
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-lsl.ll b/test/CodeGen/Thumb2/thumb2-lsl.ll
new file mode 100644
index 0000000..666963a
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-lsl.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsl\\W*r\[0-9\],\\W*r\[0-9\],\\W*\[0-9\]} | count 1
+
+define i32 @f1(i32 %a) {
+    %tmp = shl i32 %a, 5
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-lsl2.ll b/test/CodeGen/Thumb2/thumb2-lsl2.ll
new file mode 100644
index 0000000..eb7a279
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-lsl2.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsl\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = shl i32 %a, %b
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-lsr.ll b/test/CodeGen/Thumb2/thumb2-lsr.ll
new file mode 100644
index 0000000..cf4d2f8
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-lsr.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsr\\W*r\[0-9\],\\W*r\[0-9\],\\W*\[0-9\]} | count 1
+
+define i32 @f1(i32 %a) {
+    %tmp = lshr i32 %a, 13
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-lsr2.ll b/test/CodeGen/Thumb2/thumb2-lsr2.ll
new file mode 100644
index 0000000..01fd56d
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-lsr2.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {lsr\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = lshr i32 %a, %b
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-mla.ll b/test/CodeGen/Thumb2/thumb2-mla.ll
new file mode 100644
index 0000000..0772d7f
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-mla.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mla\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 2
+
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = add i32 %c, %tmp1
+    ret i32 %tmp2
+}
+
+define i32 @f2(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = add i32 %tmp1, %c
+    ret i32 %tmp2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-mls.ll b/test/CodeGen/Thumb2/thumb2-mls.ll
new file mode 100644
index 0000000..6d1640f
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-mls.ll
@@ -0,0 +1,14 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mls\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %c, %tmp1
+    ret i32 %tmp2
+}
+
+; sub doesn't commute, so no mls for this one
+define i32 @f2(i32 %a, i32 %b, i32 %c) {
+    %tmp1 = mul i32 %a, %b
+    %tmp2 = sub i32 %tmp1, %c
+    ret i32 %tmp2
+}
diff --git a/test/CodeGen/ARM/thumb2-mov.ll b/test/CodeGen/Thumb2/thumb2-mov.ll
index 0c4c596..0c4c596 100644
--- a/test/CodeGen/ARM/thumb2-mov.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov.ll
diff --git a/test/CodeGen/ARM/thumb2-mov2.ll b/test/CodeGen/Thumb2/thumb2-mov2.ll
index d2f8c0b..d2f8c0b 100644
--- a/test/CodeGen/ARM/thumb2-mov2.ll
+++ b/test/CodeGen/Thumb2/thumb2-mov2.ll
diff --git a/test/CodeGen/Thumb2/thumb2-mov3.ll b/test/CodeGen/Thumb2/thumb2-mov3.ll
new file mode 100644
index 0000000..74418c1
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-mov3.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mov\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+
+; 171 = 0x000000ab
+define i32 @f1(i32 %a) {
+    %tmp = add i32 0, 171
+    ret i32 %tmp
+}
+
+; 1179666 = 0x00120012
+define i32 @f2(i32 %a) {
+    %tmp = add i32 0, 1179666
+    ret i32 %tmp
+}
+
+; 872428544 = 0x34003400
+define i32 @f3(i32 %a) {
+    %tmp = add i32 0, 872428544
+    ret i32 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i32 @f4(i32 %a) {
+    %tmp = add i32 0, 1448498774
+    ret i32 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i32 @f5(i32 %a) {
+    %tmp = add i32 0, 66846720
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-mov4.ll b/test/CodeGen/Thumb2/thumb2-mov4.ll
new file mode 100644
index 0000000..74c522f
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-mov4.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {movw\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#65535} | count 1
+
+define i32 @f6(i32 %a) {
+    %tmp = add i32 0, 65535
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-mul.ll b/test/CodeGen/Thumb2/thumb2-mul.ll
new file mode 100644
index 0000000..e976e66
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-mul.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mul\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]} | count 1
+
+define i32 @f1(i32 %a, i32 %b, i32 %c) {
+    %tmp = mul i32 %a, %b
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-mvn.ll b/test/CodeGen/Thumb2/thumb2-mvn.ll
new file mode 100644
index 0000000..95694d6
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-mvn.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-as < %s | llc | grep {mvn\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1114112} | count 4
+
+target triple = "thumbv7-apple-darwin"
+
+; 0x000000bb = 187
+define i32 @f1(i32 %a) {
+    %tmp = xor i32 4294967295, 187
+    ret i32 %tmp
+}
+
+; 0x00aa00aa = 11141290
+define i32 @f2(i32 %a) {
+    %tmp = xor i32 4294967295, 11141290 
+    ret i32 %tmp
+}
+
+; 0xcc00cc00 = 3422604288
+define i32 @f3(i32 %a) {
+    %tmp = xor i32 4294967295, 3422604288
+    ret i32 %tmp
+}
+
+; 0x00110000 = 1114112
+define i32 @f5(i32 %a) {
+    %tmp = xor i32 4294967295, 1114112
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-mvn2.ll b/test/CodeGen/Thumb2/thumb2-mvn2.ll
new file mode 100644
index 0000000..178f02b
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-mvn2.ll
@@ -0,0 +1,11 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {mvn\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 2
+
+define i32 @f1(i32 %a) {
+    %tmp = xor i32 4294967295, %a
+    ret i32 %tmp
+}
+
+define i32 @f2(i32 %a) {
+    %tmp = xor i32 %a, 4294967295
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-neg.ll b/test/CodeGen/Thumb2/thumb2-neg.ll
new file mode 100644
index 0000000..8f938d5
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-neg.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*#0} | count 1
+
+define i32 @f1(i32 %a) {
+    %tmp = sub i32 0, %a
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-orn.ll b/test/CodeGen/Thumb2/thumb2-orn.ll
new file mode 100644
index 0000000..1add347
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-orn.ll
@@ -0,0 +1,25 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 4
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = or i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f2(i32 %a, i32 %b) {
+    %tmp = xor i32 %b, 4294967295
+    %tmp1 = or i32 %tmp, %a
+    ret i32 %tmp1
+}
+
+define i32 @f3(i32 %a, i32 %b) {
+    %tmp = xor i32 4294967295, %b
+    %tmp1 = or i32 %a, %tmp
+    ret i32 %tmp1
+}
+
+define i32 @f4(i32 %a, i32 %b) {
+    %tmp = xor i32 4294967295, %b
+    %tmp1 = or i32 %tmp, %a
+    ret i32 %tmp1
+}
diff --git a/test/CodeGen/Thumb2/thumb2-orn2.ll b/test/CodeGen/Thumb2/thumb2-orn2.ll
new file mode 100644
index 0000000..7758edd
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-orn2.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orn\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1114112} | count 4
+
+; 0x000000bb = 187
+define i32 @f1(i32 %a) {
+    %tmp1 = xor i32 4294967295, 187
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+
+; 0x00aa00aa = 11141290
+define i32 @f2(i32 %a) {
+    %tmp1 = xor i32 4294967295, 11141290 
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+
+; 0xcc00cc00 = 3422604288
+define i32 @f3(i32 %a) {
+    %tmp1 = xor i32 4294967295, 3422604288
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
+
+; 0x00110000 = 1114112
+define i32 @f5(i32 %a) {
+    %tmp1 = xor i32 4294967295, 1114112
+    %tmp2 = or i32 %a, %tmp1
+    ret i32 %tmp2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-orr.ll b/test/CodeGen/Thumb2/thumb2-orr.ll
new file mode 100644
index 0000000..9222946
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-orr.ll
@@ -0,0 +1,6 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp2 = or i32 %a, %b
+    ret i32 %tmp2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-orr2.ll b/test/CodeGen/Thumb2/thumb2-orr2.ll
new file mode 100644
index 0000000..6f2b62c
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-orr2.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {orr\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep {#187\\|#11141290\\|#3422604288\\|#1145324612\\|#1114112} | count 5
+
+; 0x000000bb = 187
+define i32 @f1(i32 %a) {
+    %tmp2 = or i32 %a, 187
+    ret i32 %tmp2
+}
+
+; 0x00aa00aa = 11141290
+define i32 @f2(i32 %a) {
+    %tmp2 = or i32 %a, 11141290 
+    ret i32 %tmp2
+}
+
+; 0xcc00cc00 = 3422604288
+define i32 @f3(i32 %a) {
+    %tmp2 = or i32 %a, 3422604288
+    ret i32 %tmp2
+}
+
+; 0x44444444 = 1145324612
+define i32 @f4(i32 %a) {
+    %tmp2 = or i32 %a, 1145324612
+    ret i32 %tmp2
+}
+
+; 0x00110000 = 1114112
+define i32 @f5(i32 %a) {
+    %tmp2 = or i32 %a, 1114112
+    ret i32 %tmp2
+}
diff --git a/test/CodeGen/Thumb2/thumb2-rev.ll b/test/CodeGen/Thumb2/thumb2-rev.ll
new file mode 100644
index 0000000..4009da3
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-rev.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2,+v7a | grep {rev\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+
+define i32 @f1(i32 %a) {
+    %tmp = tail call i32 @llvm.bswap.i32(i32 %a)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.bswap.i32(i32) nounwind readnone
diff --git a/test/CodeGen/Thumb2/thumb2-ror.ll b/test/CodeGen/Thumb2/thumb2-ror.ll
new file mode 100644
index 0000000..305ab99
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ror.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ror\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*#\[0-9\]*} | grep 22 | count 1
+
+define i32 @f1(i32 %a) {
+    %l8 = shl i32 %a, 10
+    %r8 = lshr i32 %a, 22
+    %tmp = or i32 %l8, %r8
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-ror2.ll b/test/CodeGen/Thumb2/thumb2-ror2.ll
new file mode 100644
index 0000000..dd19b0a
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-ror2.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {ror\\W*r\[0-9\]*,\\W*r\[0-9\]*,\\W*r\[0-9\]*} | count 1
+
+define i32 @f1(i32 %a, i32 %b) {
+    %db = sub i32 32, %b
+    %l8 = shl i32 %a, %b
+    %r8 = lshr i32 %a, %db
+    %tmp = or i32 %l8, %r8
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-rsb.ll b/test/CodeGen/Thumb2/thumb2-rsb.ll
new file mode 100644
index 0000000..934e377
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-rsb.ll
@@ -0,0 +1,9 @@
+; XFAIL: *
+; this will match as "sub" until we get register shifting
+
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*r\[0-9\]*} | count 1
+
+define i32 @f1(i32 %a, i32 %b) {
+    %tmp = sub i32 %b, %a
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/Thumb2/thumb2-rsb2.ll b/test/CodeGen/Thumb2/thumb2-rsb2.ll
new file mode 100644
index 0000000..957d1d0
--- /dev/null
+++ b/test/CodeGen/Thumb2/thumb2-rsb2.ll
@@ -0,0 +1,31 @@
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep {rsb\\W*r\[0-9\],\\W*r\[0-9\],\\W*#\[0-9\]*} | grep {#171\\|#1179666\\|#872428544\\|#1448498774\\|#66846720} | count 5
+
+; 171 = 0x000000ab
+define i32 @f1(i32 %a) {
+    %tmp = sub i32 171, %a
+    ret i32 %tmp
+}
+
+; 1179666 = 0x00120012
+define i32 @f2(i32 %a) {
+    %tmp = sub i32 1179666, %a
+    ret i32 %tmp
+}
+
+; 872428544 = 0x34003400
+define i32 @f3(i32 %a) {
+    %tmp = sub i32 872428544, %a
+    ret i32 %tmp
+}
+
+; 1448498774 = 0x56565656
+define i32 @f4(i32 %a) {
+    %tmp = sub i32 1448498774, %a
+    ret i32 %tmp
+}
+
+; 66846720 = 0x03fc0000
+define i32 @f5(i32 %a) {
+    %tmp = sub i32 66846720, %a
+    ret i32 %tmp
+}
diff --git a/test/CodeGen/ARM/thumb2-shifter.ll b/test/CodeGen/Thumb2/thumb2-shifter.ll
index f9ec506..9bd6e43 100644
--- a/test/CodeGen/ARM/thumb2-shifter.ll
+++ b/test/CodeGen/Thumb2/thumb2-shifter.ll
@@ -2,7 +2,7 @@
 ; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep lsr
 ; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep asr
 ; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep ror
-; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | grep mov
+; RUN: llvm-as < %s | llc -march=thumb -mattr=+thumb2 | not grep mov
 
 define i32 @t2ADDrs_lsl(i32 %X, i32 %Y) {
         %A = shl i32 %Y, 16
diff --git a/test/CodeGen/X86/constpool.ll b/test/CodeGen/X86/constpool.ll
new file mode 100644
index 0000000..60d51e5
--- /dev/null
+++ b/test/CodeGen/X86/constpool.ll
@@ -0,0 +1,16 @@
+; RUN: llvm-as < %s | llc 
+; RUN: llvm-as < %s | llc -fast-isel
+; RUN: llvm-as < %s | llc -march=x86-64
+; RUN: llvm-as < %s | llc -fast-isel -march=x86-64
+; PR4466
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin9.7"
+
+define i32 @main() nounwind {
+entry:
+	%0 = fcmp oeq float undef, 0x7FF0000000000000		; <i1> [#uses=1]
+	%1 = zext i1 %0 to i32		; <i32> [#uses=1]
+	store i32 %1, i32* undef, align 4
+	ret i32 undef
+}
diff --git a/test/CodeGen/X86/inline-asm-modifier-n.ll b/test/CodeGen/X86/inline-asm-modifier-n.ll
new file mode 100644
index 0000000..97eac38
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-modifier-n.ll
@@ -0,0 +1,8 @@
+; RUN: llvm-as < %s | llc -march=x86 | grep { 37}
+; rdar://7008959
+
+define void @bork() nounwind {
+entry:
+	tail call void asm sideeffect "BORK ${0:n}", "i,~{dirflag},~{fpsr},~{flags}"(i32 -37) nounwind
+	ret void
+}
diff --git a/test/CodeGen/X86/inline-asm-tied.ll b/test/CodeGen/X86/inline-asm-tied.ll
new file mode 100644
index 0000000..6df2c48
--- /dev/null
+++ b/test/CodeGen/X86/inline-asm-tied.ll
@@ -0,0 +1,19 @@
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin9 -O0 | grep {movl	%edx, 4(%esp)} | count 2
+; rdar://6992609
+
+target triple = "i386-apple-darwin9.0"
+@llvm.used = appending global [1 x i8*] [i8* bitcast (i64 (i64)* @_OSSwapInt64 to i8*)], section "llvm.metadata"		; <[1 x i8*]*> [#uses=0]
+
+define i64 @_OSSwapInt64(i64 %_data) nounwind {
+entry:
+	%retval = alloca i64		; <i64*> [#uses=2]
+	%_data.addr = alloca i64		; <i64*> [#uses=4]
+	store i64 %_data, i64* %_data.addr
+	%tmp = load i64* %_data.addr		; <i64> [#uses=1]
+	%0 = call i64 asm "bswap   %eax\0A\09bswap   %edx\0A\09xchgl   %eax, %edx", "=A,0,~{dirflag},~{fpsr},~{flags}"(i64 %tmp) nounwind		; <i64> [#uses=1]
+	store i64 %0, i64* %_data.addr
+	%tmp1 = load i64* %_data.addr		; <i64> [#uses=1]
+	store i64 %tmp1, i64* %retval
+	%1 = load i64* %retval		; <i64> [#uses=1]
+	ret i64 %1
+}
diff --git a/test/CodeGen/X86/pic-1.ll b/test/CodeGen/X86/pic-1.ll
index ab07718..7bd59dc 100644
--- a/test/CodeGen/X86/pic-1.ll
+++ b/test/CodeGen/X86/pic-1.ll
@@ -9,7 +9,7 @@
 @dst = external global i32 
 @src = external global i32 
 
-define void @foo() {
+define void @foo() nounwind {
 entry:
     store i32* @dst, i32** @ptr
     %tmp.s = load i32* @src
diff --git a/test/CodeGen/X86/pic-jtbl.ll b/test/CodeGen/X86/pic-jtbl.ll
index e23f7c1..6096592 100644
--- a/test/CodeGen/X86/pic-jtbl.ll
+++ b/test/CodeGen/X86/pic-jtbl.ll
@@ -6,7 +6,10 @@
 ; RUN: grep GOTOFF %t | count 14
 ; RUN: grep JTI %t | count 2
 
-define void @bar(i32 %n.u) {
+; X86-64:
+; RUN: llvm-as < %s | llc -mtriple=x86_64-pc-linux-gnu -relocation-model=pic > %t
+; RUN: grep {LJTI1_0(%rip)} %t
+define void @bar(i32 %n.u) nounwind {
 entry:
     switch i32 %n.u, label %bb12 [i32 1, label %bb i32 2, label %bb6 i32 4, label %bb7 i32 5, label %bb8 i32 6, label %bb10 i32 7, label %bb1 i32 8, label %bb3 i32 9, label %bb4 i32 10, label %bb9 i32 11, label %bb2 i32 12, label %bb5 i32 13, label %bb11 ]
 bb:
diff --git a/test/CodeGen/X86/pic_jumptable.ll b/test/CodeGen/X86/pic_jumptable.ll
index 229865b..04245d1 100644
--- a/test/CodeGen/X86/pic_jumptable.ll
+++ b/test/CodeGen/X86/pic_jumptable.ll
@@ -6,7 +6,7 @@
 
 declare void @_Z3bari(i32)
 
-define linkonce void @_Z3fooILi1EEvi(i32 %Y) {
+define linkonce void @_Z3fooILi1EEvi(i32 %Y) nounwind {
 entry:
 	%Y_addr = alloca i32		; <i32*> [#uses=2]
 	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll
index 62382c6..ca6204c 100644
--- a/test/CodeGen/X86/pr3495.ll
+++ b/test/CodeGen/X86/pr3495.ll
@@ -1,6 +1,6 @@
 ; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of reloads omited} | grep 2
 ; RUN: llvm-as < %s | llc -march=x86 -stats |& not grep {Number of available reloads turned into copies}
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of machine instrs printed} | grep 38
+; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of machine instrs printed} | grep 39
 ; PR3495
 ; The loop reversal kicks in once here, resulting in one fewer instruction.
 
diff --git a/test/CodeGen/X86/remat-constant.ll b/test/CodeGen/X86/remat-constant.ll
index 4c983b0..8dfed5e 100644
--- a/test/CodeGen/X86/remat-constant.ll
+++ b/test/CodeGen/X86/remat-constant.ll
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static -aggressive-remat | grep xmm | count 2
+; RUN: llvm-as < %s | llc -mtriple=x86_64-linux -relocation-model=static | grep xmm | count 2
 
 declare void @bar() nounwind
 
diff --git a/test/CodeGen/X86/scev-interchange.ll b/test/CodeGen/X86/scev-interchange.ll
new file mode 100644
index 0000000..b253dd9
--- /dev/null
+++ b/test/CodeGen/X86/scev-interchange.ll
@@ -0,0 +1,386 @@
+; RUN: llvm-as < %s | llc -march=x86-64
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+	%struct..0__pthread_mutex_s = type { i32, i32, i32, i32, i32, i32, %struct.__pthread_list_t }
+	%"struct.DataOutBase::GmvFlags" = type { i32 }
+	%"struct.FE_DGPNonparametric<3>" = type { [1156 x i8], i32, %"struct.PolynomialSpace<1>" }
+	%"struct.FE_Q<3>" = type { %"struct.FE_DGPNonparametric<3>", %"struct.std::vector<int,std::allocator<int> >" }
+	%"struct.FiniteElementData<1>" = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
+	%struct.Line = type { [2 x i32] }
+	%"struct.PolynomialSpace<1>" = type { %"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >", i32, %"struct.std::vector<int,std::allocator<int> >", %"struct.std::vector<int,std::allocator<int> >" }
+	%"struct.Polynomials::Polynomial<double>" = type { %struct.Subscriptor, %"struct.std::vector<double,std::allocator<double> >" }
+	%struct.Subscriptor = type { i32 (...)**, i32, %"struct.std::type_info"* }
+	%"struct.TableBase<2,double>" = type { %struct.Subscriptor, double*, i32, %"struct.TableIndices<2>" }
+	%"struct.TableIndices<2>" = type { %struct.Line }
+	%struct.__pthread_list_t = type { %struct.__pthread_list_t*, %struct.__pthread_list_t* }
+	%struct.pthread_attr_t = type { i64, [48 x i8] }
+	%struct.pthread_mutex_t = type { %struct..0__pthread_mutex_s }
+	%"struct.std::_Bit_const_iterator" = type { %"struct.std::_Bit_iterator_base" }
+	%"struct.std::_Bit_iterator_base" = type { i64*, i32 }
+	%"struct.std::_Bvector_base<std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" }
+	%"struct.std::_Bvector_base<std::allocator<bool> >::_Bvector_impl" = type { %"struct.std::_Bit_const_iterator", %"struct.std::_Bit_const_iterator", i64* }
+	%"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >" = type { %"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >::_Vector_impl" }
+	%"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >::_Vector_impl" = type { %"struct.Polynomials::Polynomial<double>"*, %"struct.Polynomials::Polynomial<double>"*, %"struct.Polynomials::Polynomial<double>"* }
+	%"struct.std::_Vector_base<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" }
+	%"struct.std::_Vector_base<double,std::allocator<double> >::_Vector_impl" = type { double*, double*, double* }
+	%"struct.std::_Vector_base<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" }
+	%"struct.std::_Vector_base<int,std::allocator<int> >::_Vector_impl" = type { i32*, i32*, i32* }
+	%"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" = type { %"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >::_Vector_impl" }
+	%"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >::_Vector_impl" = type { %"struct.std::vector<bool,std::allocator<bool> >"*, %"struct.std::vector<bool,std::allocator<bool> >"*, %"struct.std::vector<bool,std::allocator<bool> >"* }
+	%"struct.std::type_info" = type { i32 (...)**, i8* }
+	%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >" = type { %"struct.std::_Vector_base<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >" }
+	%"struct.std::vector<bool,std::allocator<bool> >" = type { %"struct.std::_Bvector_base<std::allocator<bool> >" }
+	%"struct.std::vector<double,std::allocator<double> >" = type { %"struct.std::_Vector_base<double,std::allocator<double> >" }
+	%"struct.std::vector<int,std::allocator<int> >" = type { %"struct.std::_Vector_base<int,std::allocator<int> >" }
+	%"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" = type { %"struct.std::_Vector_base<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >" }
+
+@_ZL20__gthrw_pthread_oncePiPFvvE = alias weak i32 (i32*, void ()*)* @pthread_once		; <i32 (i32*, void ()*)*> [#uses=0]
+@_ZL27__gthrw_pthread_getspecificj = alias weak i8* (i32)* @pthread_getspecific		; <i8* (i32)*> [#uses=0]
+@_ZL27__gthrw_pthread_setspecificjPKv = alias weak i32 (i32, i8*)* @pthread_setspecific		; <i32 (i32, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_ = alias weak i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)* @pthread_create		; <i32 (i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)*> [#uses=0]
+@_ZL22__gthrw_pthread_cancelm = alias weak i32 (i64)* @pthread_cancel		; <i32 (i64)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_lockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_lock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL29__gthrw_pthread_mutex_trylockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_trylock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL28__gthrw_pthread_mutex_unlockP15pthread_mutex_t = alias weak i32 (%struct.pthread_mutex_t*)* @pthread_mutex_unlock		; <i32 (%struct.pthread_mutex_t*)*> [#uses=0]
+@_ZL26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t = alias weak i32 (%struct.pthread_mutex_t*, %"struct.DataOutBase::GmvFlags"*)* @pthread_mutex_init		; <i32 (%struct.pthread_mutex_t*, %"struct.DataOutBase::GmvFlags"*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_createPjPFvPvE = alias weak i32 (i32*, void (i8*)*)* @pthread_key_create		; <i32 (i32*, void (i8*)*)*> [#uses=0]
+@_ZL26__gthrw_pthread_key_deletej = alias weak i32 (i32)* @pthread_key_delete		; <i32 (i32)*> [#uses=0]
+@_ZL30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t = alias weak i32 (%"struct.DataOutBase::GmvFlags"*)* @pthread_mutexattr_init		; <i32 (%"struct.DataOutBase::GmvFlags"*)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti = alias weak i32 (%"struct.DataOutBase::GmvFlags"*, i32)* @pthread_mutexattr_settype		; <i32 (%"struct.DataOutBase::GmvFlags"*, i32)*> [#uses=0]
+@_ZL33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t = alias weak i32 (%"struct.DataOutBase::GmvFlags"*)* @pthread_mutexattr_destroy		; <i32 (%"struct.DataOutBase::GmvFlags"*)*> [#uses=0]
+
+declare void @_Unwind_Resume(i8*)
+
+declare i8* @_Znwm(i64)
+
+declare fastcc void @_ZNSt6vectorIjSaIjEEaSERKS1_(%"struct.std::vector<int,std::allocator<int> >"*, %"struct.std::vector<int,std::allocator<int> >"*)
+
+declare fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* nocapture, i32, i32)
+
+declare fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* nocapture, i64, i8* nocapture)
+
+declare fastcc void @_ZNSt6vectorIS_IbSaIbEESaIS1_EEC2EmRKS1_RKS2_(%"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* nocapture, i64, %"struct.std::vector<bool,std::allocator<bool> >"* nocapture)
+
+declare fastcc void @_ZNSt6vectorIN11Polynomials10PolynomialIdEESaIS2_EED1Ev(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* nocapture)
+
+declare fastcc void @_ZN24TensorProductPolynomialsILi3EEC2IN11Polynomials10PolynomialIdEEEERKSt6vectorIT_SaIS6_EE(%"struct.PolynomialSpace<1>"* nocapture, %"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* nocapture)
+
+declare fastcc void @_ZN7FE_PolyI24TensorProductPolynomialsILi3EELi3EEC2EjRKS1_RK17FiniteElementDataILi3EERKSt6vectorIbSaIbEERKS9_ISB_SaISB_EE(%"struct.FE_DGPNonparametric<3>"*, i32, %"struct.PolynomialSpace<1>"* nocapture, %"struct.FiniteElementData<1>"* nocapture, %"struct.std::vector<bool,std::allocator<bool> >"* nocapture, %"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* nocapture)
+
+declare fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vectorIjSaIjEE(%"struct.std::vector<int,std::allocator<int> >"* noalias nocapture sret, %"struct.std::vector<int,std::allocator<int> >"* nocapture)
+
+declare fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias nocapture sret, i32)
+
+define fastcc void @_ZN4FE_QILi3EEC1Ej(%"struct.FE_Q<3>"* %this, i32 %degree) {
+entry:
+	invoke fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* undef, i64 1, i8* undef)
+			to label %invcont.i unwind label %lpad.i
+
+invcont.i:		; preds = %entry
+	invoke fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, i32 %degree)
+			to label %invcont1.i unwind label %lpad120.i
+
+invcont1.i:		; preds = %invcont.i
+	invoke fastcc void @_ZNSt6vectorIS_IbSaIbEESaIS1_EEC2EmRKS1_RKS2_(%"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* undef, i64 undef, %"struct.std::vector<bool,std::allocator<bool> >"* undef)
+			to label %invcont3.i unwind label %lpad124.i
+
+invcont3.i:		; preds = %invcont1.i
+	invoke fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, i32 %degree)
+			to label %invcont4.i unwind label %lpad128.i
+
+invcont4.i:		; preds = %invcont3.i
+	invoke fastcc void @_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_(%"struct.std::vector<bool,std::allocator<bool> >"* undef, i64 undef, i8* undef)
+			to label %invcont6.i unwind label %lpad132.i
+
+invcont6.i:		; preds = %invcont4.i
+	invoke fastcc void @_ZN4FE_QILi3EE14get_dpo_vectorEj(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, i32 %degree)
+			to label %invcont7.i unwind label %lpad136.i
+
+invcont7.i:		; preds = %invcont6.i
+	invoke fastcc void @_ZN11Polynomials19LagrangeEquidistant23generate_complete_basisEj(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* noalias sret undef, i32 %degree)
+			to label %invcont9.i unwind label %lpad140.i
+
+invcont9.i:		; preds = %invcont7.i
+	invoke fastcc void @_ZN24TensorProductPolynomialsILi3EEC2IN11Polynomials10PolynomialIdEEEERKSt6vectorIT_SaIS6_EE(%"struct.PolynomialSpace<1>"* undef, %"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* undef)
+			to label %invcont10.i unwind label %lpad144.i
+
+invcont10.i:		; preds = %invcont9.i
+	invoke fastcc void @_ZN7FE_PolyI24TensorProductPolynomialsILi3EELi3EEC2EjRKS1_RK17FiniteElementDataILi3EERKSt6vectorIbSaIbEERKS9_ISB_SaISB_EE(%"struct.FE_DGPNonparametric<3>"* undef, i32 %degree, %"struct.PolynomialSpace<1>"* undef, %"struct.FiniteElementData<1>"* undef, %"struct.std::vector<bool,std::allocator<bool> >"* undef, %"struct.std::vector<std::vector<bool, std::allocator<bool> >,std::allocator<std::vector<bool, std::allocator<bool> > > >"* undef)
+			to label %bb14.i unwind label %lpad148.i
+
+bb14.i:		; preds = %invcont10.i
+	br i1 false, label %bb3.i164.i, label %bb.i.i.i.i160.i
+
+bb.i.i.i.i160.i:		; preds = %bb14.i
+	unreachable
+
+bb3.i164.i:		; preds = %bb14.i
+	br i1 undef, label %bb10.i168.i, label %bb.i.i.i20.i166.i
+
+bb.i.i.i20.i166.i:		; preds = %bb3.i164.i
+	unreachable
+
+bb10.i168.i:		; preds = %bb3.i164.i
+	invoke fastcc void @_ZNSt6vectorIN11Polynomials10PolynomialIdEESaIS2_EED1Ev(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* undef)
+			to label %bb21.i unwind label %lpad144.i
+
+bb21.i:		; preds = %bb10.i168.i
+	invoke fastcc void @_ZNSt6vectorIN11Polynomials10PolynomialIdEESaIS2_EED1Ev(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* undef)
+			to label %bb28.i unwind label %lpad140.i
+
+bb28.i:		; preds = %bb21.i
+	br i1 undef, label %bb35.i, label %bb.i.i.i175.i
+
+bb.i.i.i175.i:		; preds = %bb28.i
+	br label %bb35.i
+
+bb35.i:		; preds = %bb.i.i.i175.i, %bb28.i
+	br i1 undef, label %bb42.i, label %bb.i.i.i205.i
+
+bb.i.i.i205.i:		; preds = %bb35.i
+	unreachable
+
+bb42.i:		; preds = %bb35.i
+	br i1 undef, label %bb47.i, label %bb.i.i.i213.i
+
+bb.i.i.i213.i:		; preds = %bb42.i
+	unreachable
+
+bb47.i:		; preds = %bb42.i
+	br i1 undef, label %bb59.i, label %bb.i.i.i247.i
+
+bb.i.i.i247.i:		; preds = %bb47.i
+	unreachable
+
+bb59.i:		; preds = %bb47.i
+	br i1 undef, label %bb66.i, label %bb.i.i.i255.i
+
+bb.i.i.i255.i:		; preds = %bb59.i
+	unreachable
+
+bb66.i:		; preds = %bb59.i
+	br i1 undef, label %bb71.i, label %bb.i.i.i262.i
+
+bb.i.i.i262.i:		; preds = %bb66.i
+	br label %bb71.i
+
+bb71.i:		; preds = %bb.i.i.i262.i, %bb66.i
+	%tmp11.i.i29.i.i.i.i.i.i = invoke i8* @_Znwm(i64 12)
+			to label %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i unwind label %lpad.i.i.i.i.i.i		; <i8*> [#uses=0]
+
+lpad.i.i.i.i.i.i:		; preds = %bb71.i
+	unreachable
+
+_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i:		; preds = %bb71.i
+	br i1 undef, label %_ZNSt6vectorIjSaIjEED1Ev.exit.i.i, label %bb.i.i.i.i94.i
+
+bb.i.i.i.i94.i:		; preds = %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i
+	unreachable
+
+_ZNSt6vectorIjSaIjEED1Ev.exit.i.i:		; preds = %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i.i
+	%tmp11.i.i29.i.i.i.i5.i.i = invoke i8* @_Znwm(i64 undef)
+			to label %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i unwind label %lpad.i.i.i.i8.i.i		; <i8*> [#uses=0]
+
+lpad.i.i.i.i8.i.i:		; preds = %_ZNSt6vectorIjSaIjEED1Ev.exit.i.i
+	invoke void @_Unwind_Resume(i8* undef)
+			to label %.noexc.i9.i.i unwind label %lpad.i19.i.i
+
+.noexc.i9.i.i:		; preds = %lpad.i.i.i.i8.i.i
+	unreachable
+
+_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i:		; preds = %_ZNSt6vectorIjSaIjEED1Ev.exit.i.i
+	br i1 undef, label %bb50.i.i.i, label %bb.i.i.i.i.i.i.i.i.i.i
+
+bb.i.i.i.i.i.i.i.i.i.i:		; preds = %bb.i.i.i.i.i.i.i.i.i.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i
+	br i1 undef, label %bb50.i.i.i, label %bb.i.i.i.i.i.i.i.i.i.i
+
+bb50.i.i.i:		; preds = %bb.i.i.i.i.i.i.i.i.i.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i12.i.i
+	invoke fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vectorIjSaIjEE(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, %"struct.std::vector<int,std::allocator<int> >"* undef)
+			to label %bb83.i unwind label %lpad188.i
+
+lpad.i19.i.i:		; preds = %lpad.i.i.i.i8.i.i
+	unreachable
+
+bb83.i:		; preds = %bb50.i.i.i
+	br i1 undef, label %invcont84.i, label %bb.i.i.i221.i
+
+bb.i.i.i221.i:		; preds = %bb83.i
+	unreachable
+
+invcont84.i:		; preds = %bb83.i
+	%tmp11.i.i29.i.i.i.i.i = invoke i8* @_Znwm(i64 undef)
+			to label %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i unwind label %lpad.i.i.i.i315.i		; <i8*> [#uses=0]
+
+lpad.i.i.i.i315.i:		; preds = %invcont84.i
+	invoke void @_Unwind_Resume(i8* undef)
+			to label %.noexc.i316.i unwind label %lpad.i352.i
+
+.noexc.i316.i:		; preds = %lpad.i.i.i.i315.i
+	unreachable
+
+_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i:		; preds = %invcont84.i
+	br i1 undef, label %bb50.i.i, label %bb.i.i.i.i.i.i.i.i320.i
+
+bb.i.i.i.i.i.i.i.i320.i:		; preds = %bb.i.i.i.i.i.i.i.i320.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i
+	br i1 undef, label %bb50.i.i, label %bb.i.i.i.i.i.i.i.i320.i
+
+bb50.i.i:		; preds = %bb.i.i.i.i.i.i.i.i320.i, %_ZNSt12_Vector_baseIjSaIjEEC2EmRKS0_.exit.i.i.i.i
+	invoke fastcc void @_ZN11FE_Q_Helper12_GLOBAL__N_116invert_numberingERKSt6vectorIjSaIjEE(%"struct.std::vector<int,std::allocator<int> >"* noalias sret undef, %"struct.std::vector<int,std::allocator<int> >"* undef)
+			to label %invcont86.i unwind label %lpad200.i
+
+lpad.i352.i:		; preds = %lpad.i.i.i.i315.i
+	unreachable
+
+invcont86.i:		; preds = %bb50.i.i
+	invoke fastcc void @_ZNSt6vectorIjSaIjEEaSERKS1_(%"struct.std::vector<int,std::allocator<int> >"* undef, %"struct.std::vector<int,std::allocator<int> >"* undef)
+			to label %.noexc380.i unwind label %lpad204.i
+
+.noexc380.i:		; preds = %invcont86.i
+	br i1 undef, label %bb100.i, label %bb.i.i.i198.i
+
+bb.i.i.i198.i:		; preds = %.noexc380.i
+	unreachable
+
+bb100.i:		; preds = %.noexc380.i
+	br i1 undef, label %invcont101.i, label %bb.i.i.i190.i
+
+bb.i.i.i190.i:		; preds = %bb100.i
+	unreachable
+
+invcont101.i:		; preds = %bb100.i
+	invoke fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* undef, i32 undef, i32 undef)
+			to label %_ZN10FullMatrixIdEC1Ejj.exit.i.i unwind label %lpad.i.i.i.i.i
+
+lpad.i.i.i.i.i:		; preds = %invcont101.i
+	unreachable
+
+_ZN10FullMatrixIdEC1Ejj.exit.i.i:		; preds = %invcont101.i
+	invoke fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* undef, i32 undef, i32 undef)
+			to label %_ZN10FullMatrixIdEC1Ejj.exit28.i.i unwind label %lpad.i.i.i27.i.i
+
+lpad.i.i.i27.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit.i.i
+	invoke void @_Unwind_Resume(i8* undef)
+			to label %.noexc.i.i unwind label %lpad.i.i
+
+.noexc.i.i:		; preds = %lpad.i.i.i27.i.i
+	unreachable
+
+_ZN10FullMatrixIdEC1Ejj.exit28.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit.i.i
+	br i1 undef, label %bb58.i.i, label %bb.i.i.i304.i.i
+
+bb.i.i.i304.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit28.i.i
+	unreachable
+
+bb58.i.i:		; preds = %_ZN10FullMatrixIdEC1Ejj.exit28.i.i
+	br i1 false, label %bb.i191.i, label %bb.i.i.i297.i.i
+
+bb.i.i.i297.i.i:		; preds = %bb58.i.i
+	unreachable
+
+lpad.i.i:		; preds = %lpad.i.i.i27.i.i
+	unreachable
+
+bb.i191.i:		; preds = %.noexc232.i, %bb58.i.i
+	invoke fastcc void @_ZN9TableBaseILi2EdE6reinitERK12TableIndicesILi2EE(%"struct.TableBase<2,double>"* undef, i32 undef, i32 undef)
+			to label %.noexc232.i unwind label %lpad196.i
+
+.noexc232.i:		; preds = %bb.i191.i
+	br i1 undef, label %bb29.loopexit.i.i, label %bb.i191.i
+
+bb7.i215.i:		; preds = %bb9.i216.i
+	br i1 undef, label %bb16.preheader.i.i, label %bb8.i.i
+
+bb8.i.i:		; preds = %bb7.i215.i
+	%tmp60.i.i = add i32 %0, 1		; <i32> [#uses=1]
+	br label %bb9.i216.i
+
+bb9.i216.i:		; preds = %bb29.loopexit.i.i, %bb8.i.i
+	%0 = phi i32 [ 0, %bb29.loopexit.i.i ], [ %tmp60.i.i, %bb8.i.i ]		; <i32> [#uses=2]
+	br i1 undef, label %bb7.i215.i, label %bb16.preheader.i.i
+
+bb15.i.i:		; preds = %bb16.preheader.i.i, %bb15.i.i
+	%j1.0212.i.i = phi i32 [ %1, %bb15.i.i ], [ 0, %bb16.preheader.i.i ]		; <i32> [#uses=2]
+	%tmp6.i.i195.i.i = load i32* undef, align 4		; <i32> [#uses=1]
+	%tmp231.i.i = mul i32 %0, %tmp6.i.i195.i.i		; <i32> [#uses=1]
+	%tmp13.i197.i.i = add i32 %j1.0212.i.i, %tmp231.i.i		; <i32> [#uses=0]
+	%1 = add i32 %j1.0212.i.i, 1		; <i32> [#uses=1]
+	br i1 undef, label %bb15.i.i, label %bb17.i.i
+
+bb17.i.i:		; preds = %bb16.preheader.i.i, %bb15.i.i
+	br label %bb16.preheader.i.i
+
+bb16.preheader.i.i:		; preds = %bb17.i.i, %bb9.i216.i, %bb7.i215.i
+	br i1 undef, label %bb17.i.i, label %bb15.i.i
+
+bb29.loopexit.i.i:		; preds = %.noexc232.i
+	br label %bb9.i216.i
+
+lpad.i:		; preds = %entry
+	unreachable
+
+lpad120.i:		; preds = %invcont.i
+	unreachable
+
+lpad124.i:		; preds = %invcont1.i
+	unreachable
+
+lpad128.i:		; preds = %invcont3.i
+	unreachable
+
+lpad132.i:		; preds = %invcont4.i
+	unreachable
+
+lpad136.i:		; preds = %invcont6.i
+	unreachable
+
+lpad140.i:		; preds = %bb21.i, %invcont7.i
+	unreachable
+
+lpad144.i:		; preds = %bb10.i168.i, %invcont9.i
+	unreachable
+
+lpad148.i:		; preds = %invcont10.i
+	unreachable
+
+lpad188.i:		; preds = %bb50.i.i.i
+	unreachable
+
+lpad196.i:		; preds = %bb.i191.i
+	unreachable
+
+lpad200.i:		; preds = %bb50.i.i
+	unreachable
+
+lpad204.i:		; preds = %invcont86.i
+	unreachable
+}
+
+declare fastcc void @_ZN11Polynomials19LagrangeEquidistant23generate_complete_basisEj(%"struct.std::vector<Polynomials::Polynomial<double>,std::allocator<Polynomials::Polynomial<double> > >"* noalias nocapture sret, i32)
+
+declare i32 @pthread_once(i32*, void ()*)
+
+declare i8* @pthread_getspecific(i32)
+
+declare i32 @pthread_setspecific(i32, i8*)
+
+declare i32 @pthread_create(i64*, %struct.pthread_attr_t*, i8* (i8*)*, i8*)
+
+declare i32 @pthread_cancel(i64)
+
+declare i32 @pthread_mutex_lock(%struct.pthread_mutex_t*)
+
+declare i32 @pthread_mutex_trylock(%struct.pthread_mutex_t*)
+
+declare i32 @pthread_mutex_unlock(%struct.pthread_mutex_t*)
+
+declare i32 @pthread_mutex_init(%struct.pthread_mutex_t*, %"struct.DataOutBase::GmvFlags"*)
+
+declare i32 @pthread_key_create(i32*, void (i8*)*)
+
+declare i32 @pthread_key_delete(i32)
+
+declare i32 @pthread_mutexattr_init(%"struct.DataOutBase::GmvFlags"*)
+
+declare i32 @pthread_mutexattr_settype(%"struct.DataOutBase::GmvFlags"*, i32)
+
+declare i32 @pthread_mutexattr_destroy(%"struct.DataOutBase::GmvFlags"*)
diff --git a/test/CodeGen/X86/tls3.ll b/test/CodeGen/X86/tls3.ll
index e8d1a34..0618499 100644
--- a/test/CodeGen/X86/tls3.ll
+++ b/test/CodeGen/X86/tls3.ll
@@ -7,7 +7,7 @@
 
 @i = external thread_local global i32		; <i32*> [#uses=2]
 
-define i32 @f() {
+define i32 @f() nounwind {
 entry:
 	%tmp1 = load i32* @i		; <i32> [#uses=1]
 	ret i32 %tmp1
diff --git a/test/CodeGen/X86/x86-64-pic-6.ll b/test/CodeGen/X86/x86-64-pic-6.ll
index b8a91f1..965a550 100644
--- a/test/CodeGen/X86/x86-64-pic-6.ll
+++ b/test/CodeGen/X86/x86-64-pic-6.ll
@@ -5,7 +5,7 @@
 
 @a = internal global i32 0
 
-define i32 @get_a() {
+define i32 @get_a() nounwind {
 entry:
 	%tmp1 = load i32* @a, align 4
 	ret i32 %tmp1
diff --git a/test/CodeGen/X86/x86-64-pic-7.ll b/test/CodeGen/X86/x86-64-pic-7.ll
index 6339790..95b7197f 100644
--- a/test/CodeGen/X86/x86-64-pic-7.ll
+++ b/test/CodeGen/X86/x86-64-pic-7.ll
@@ -2,7 +2,7 @@
 ; RUN:   llc -mtriple=x86_64-pc-linux -relocation-model=pic -o %t1 -f
 ; RUN: grep {movq	f@GOTPCREL(%rip),} %t1
 
-define void ()* @g() {
+define void ()* @g() nounwind {
 entry:
 	ret void ()* @f
 }
diff --git a/test/CodeGen/X86/x86-64-pic-9.ll b/test/CodeGen/X86/x86-64-pic-9.ll
index eacfcc1..175ec4e 100644
--- a/test/CodeGen/X86/x86-64-pic-9.ll
+++ b/test/CodeGen/X86/x86-64-pic-9.ll
@@ -3,12 +3,12 @@
 ; RUN: grep {leaq	f(%rip),} %t1
 ; RUN: not grep GOTPCREL %t1
 
-define void ()* @g() {
+define void ()* @g() nounwind {
 entry:
 	ret void ()* @f
 }
 
-define internal void @f() {
+define internal void @f() nounwind {
 entry:
 	ret void
 }
diff --git a/test/DebugInfo/printdbginfo.ll b/test/DebugInfo/printdbginfo.ll
index bd667a7..b3a871d 100644
--- a/test/DebugInfo/printdbginfo.ll
+++ b/test/DebugInfo/printdbginfo.ll
@@ -11,58 +11,58 @@
 	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
 	%struct.Bar = type { %struct.Foo, i32 }
 	%struct.Foo = type { i32 }
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([9 x i8]* @.str4, i32 0, i32 0), i8* getelementptr ([14 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 393233, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 4, i8* getelementptr ([8 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([9 x i8]* @.str4, i32 0, i32 0), i8* getelementptr ([14 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 45872, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
+@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 4, i8* getelementptr ([8 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
+@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 45872, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
 @.str = internal constant [8 x i8] c"tst.cpp\00", section "llvm.metadata"		; <[8 x i8]*> [#uses=1]
 @.str1 = internal constant [13 x i8] c"/home/edwin/\00", section "llvm.metadata"		; <[13 x i8]*> [#uses=1]
 @.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5623) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
 @.str3 = internal constant [4 x i8] c"bar\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
 @.str4 = internal constant [9 x i8] c"Bar::bar\00", section "llvm.metadata"		; <[9 x i8]*> [#uses=1]
 @.str5 = internal constant [14 x i8] c"_ZN3Bar3barEv\00", section "llvm.metadata"		; <[14 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str6, i32 0, i32 0), { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
+@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str6, i32 0, i32 0), { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
 @.str6 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 393473, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([5 x i8]* @.str7, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
+@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([5 x i8]* @.str7, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
 @.str7 = internal constant [5 x i8] c"this\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 393231, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 64, i64 64, i64 0, i32 0, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.compositetype = internal constant %llvm.dbg.compositetype.type { i32 393235, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str8, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 5, i64 64, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([5 x { }*]* @llvm.dbg.array36 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.compositetype.type*> [#uses=1]
+@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 64, i64 64, i64 0, i32 0, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.compositetype = internal constant %llvm.dbg.compositetype.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str8, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 5, i64 64, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([5 x { }*]* @llvm.dbg.array36 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.compositetype.type*> [#uses=1]
 @.str8 = internal constant [4 x i8] c"Bar\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype9 = internal constant %llvm.dbg.derivedtype.type { i32 393244, { }* null, i8* null, { }* null, i32 0, i64 0, i64 0, i64 0, i32 0, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype10 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.compositetype10 = internal constant %llvm.dbg.compositetype.type { i32 393235, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str11, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, i64 32, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array22 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.compositetype.type*> [#uses=1]
+@llvm.dbg.derivedtype9 = internal constant %llvm.dbg.derivedtype.type { i32 458780, { }* null, i8* null, { }* null, i32 0, i64 0, i64 0, i64 0, i32 0, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype10 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.compositetype10 = internal constant %llvm.dbg.compositetype.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str11, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, i64 32, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array22 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.compositetype.type*> [#uses=1]
 @.str11 = internal constant [4 x i8] c"Foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype12 = internal constant %llvm.dbg.derivedtype.type { i32 393229, { }* null, i8* getelementptr ([7 x i8]* @.str13, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.derivedtype12 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* null, i8* getelementptr ([7 x i8]* @.str13, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
 @.str13 = internal constant [7 x i8] c"FooVar\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
-@llvm.dbg.subprogram14 = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str11, i32 0, i32 0), i8* getelementptr ([9 x i8]* @.str15, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype16 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.subprogram14 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str11, i32 0, i32 0), i8* getelementptr ([9 x i8]* @.str15, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype16 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
 @.str15 = internal constant [9 x i8] c"Foo::Foo\00", section "llvm.metadata"		; <[9 x i8]*> [#uses=1]
-@llvm.dbg.compositetype16 = internal constant %llvm.dbg.compositetype.type { i32 393237, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 8, i64 8, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array to { }*) }, section "llvm.metadata"		; <%llvm.dbg.compositetype.type*> [#uses=1]
-@llvm.dbg.derivedtype17 = internal constant %llvm.dbg.derivedtype.type { i32 393231, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 64, i64 64, i64 0, i32 0, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype10 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype18 = internal constant %llvm.dbg.derivedtype.type { i32 393232, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 64, i64 64, i64 0, i32 0, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype10 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.compositetype16 = internal constant %llvm.dbg.compositetype.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 8, i64 8, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array to { }*) }, section "llvm.metadata"		; <%llvm.dbg.compositetype.type*> [#uses=1]
+@llvm.dbg.derivedtype17 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 64, i64 64, i64 0, i32 0, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype10 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.derivedtype18 = internal constant %llvm.dbg.derivedtype.type { i32 458768, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 64, i64 64, i64 0, i32 0, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype10 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
 @llvm.dbg.array = internal constant [3 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype17 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype18 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.subprogram19 = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str11, i32 0, i32 0), i8* getelementptr ([9 x i8]* @.str15, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype20 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.compositetype20 = internal constant %llvm.dbg.compositetype.type { i32 393237, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 8, i64 8, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array21 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.compositetype.type*> [#uses=1]
+@llvm.dbg.subprogram19 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str11, i32 0, i32 0), i8* getelementptr ([9 x i8]* @.str15, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype20 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.compositetype20 = internal constant %llvm.dbg.compositetype.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 8, i64 8, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array21 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.compositetype.type*> [#uses=1]
 @llvm.dbg.array21 = internal constant [2 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype17 to { }*) ], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
 @llvm.dbg.array22 = internal constant [3 x { }*] [ { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype12 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram14 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram19 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.derivedtype23 = internal constant %llvm.dbg.derivedtype.type { i32 393229, { }* null, i8* getelementptr ([7 x i8]* @.str24, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 6, i64 32, i64 32, i64 32, i32 1, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.derivedtype23 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* null, i8* getelementptr ([7 x i8]* @.str24, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 6, i64 32, i64 32, i64 32, i32 1, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
 @.str24 = internal constant [7 x i8] c"BarVar\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
-@llvm.dbg.subprogram25 = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str8, i32 0, i32 0), i8* getelementptr ([9 x i8]* @.str26, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype27 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.subprogram25 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str8, i32 0, i32 0), i8* getelementptr ([9 x i8]* @.str26, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype27 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
 @.str26 = internal constant [9 x i8] c"Bar::Bar\00", section "llvm.metadata"		; <[9 x i8]*> [#uses=1]
-@llvm.dbg.compositetype27 = internal constant %llvm.dbg.compositetype.type { i32 393237, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 8, i64 8, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array29 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.compositetype.type*> [#uses=1]
-@llvm.dbg.derivedtype28 = internal constant %llvm.dbg.derivedtype.type { i32 393232, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 64, i64 64, i64 0, i32 0, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
+@llvm.dbg.compositetype27 = internal constant %llvm.dbg.compositetype.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 8, i64 8, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array29 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.compositetype.type*> [#uses=1]
+@llvm.dbg.derivedtype28 = internal constant %llvm.dbg.derivedtype.type { i32 458768, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 64, i64 64, i64 0, i32 0, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
 @llvm.dbg.array29 = internal constant [3 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype28 to { }*) ], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.subprogram30 = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str8, i32 0, i32 0), i8* getelementptr ([9 x i8]* @.str26, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype31 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.compositetype31 = internal constant %llvm.dbg.compositetype.type { i32 393237, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 8, i64 8, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array32 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.compositetype.type*> [#uses=1]
+@llvm.dbg.subprogram30 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str8, i32 0, i32 0), i8* getelementptr ([9 x i8]* @.str26, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype31 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.compositetype31 = internal constant %llvm.dbg.compositetype.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 8, i64 8, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array32 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.compositetype.type*> [#uses=1]
 @llvm.dbg.array32 = internal constant [2 x { }*] [ { }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*) ], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
-@llvm.dbg.subprogram33 = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([9 x i8]* @.str4, i32 0, i32 0), i8* getelementptr ([14 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype34 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.compositetype34 = internal constant %llvm.dbg.compositetype.type { i32 393237, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 8, i64 8, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array35 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.compositetype.type*> [#uses=1]
+@llvm.dbg.subprogram33 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([9 x i8]* @.str4, i32 0, i32 0), i8* getelementptr ([14 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 12, { }* bitcast (%llvm.dbg.compositetype.type* @llvm.dbg.compositetype34 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.compositetype34 = internal constant %llvm.dbg.compositetype.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 8, i64 8, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array35 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.compositetype.type*> [#uses=1]
 @llvm.dbg.array35 = internal constant [2 x { }*] [ { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*) ], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
 @llvm.dbg.array36 = internal constant [5 x { }*] [ { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype9 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype23 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram25 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram30 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram33 to { }*) ], section "llvm.metadata"		; <[5 x { }*]*> [#uses=1]
-@llvm.dbg.variable37 = internal constant %llvm.dbg.variable.type { i32 393472, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([4 x i8]* @.str38, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 15, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
+@llvm.dbg.variable37 = internal constant %llvm.dbg.variable.type { i32 459008, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([4 x i8]* @.str38, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 15, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
 @.str38 = internal constant [4 x i8] c"tmp\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.subprogram39 = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([7 x i8]* @.str40, i32 0, i32 0), i8* getelementptr ([7 x i8]* @.str40, i32 0, i32 0), i8* getelementptr ([11 x i8]* @.str41, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 21, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
+@llvm.dbg.subprogram39 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([7 x i8]* @.str40, i32 0, i32 0), i8* getelementptr ([7 x i8]* @.str40, i32 0, i32 0), i8* getelementptr ([11 x i8]* @.str41, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 21, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
 @.str40 = internal constant [7 x i8] c"foobar\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
 @.str41 = internal constant [11 x i8] c"_Z6foobarv\00", section "llvm.metadata"		; <[11 x i8]*> [#uses=1]
-@llvm.dbg.variable42 = internal constant %llvm.dbg.variable.type { i32 393472, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram39 to { }*), i8* getelementptr ([4 x i8]* @.str38, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 23, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
+@llvm.dbg.variable42 = internal constant %llvm.dbg.variable.type { i32 459008, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram39 to { }*), i8* getelementptr ([4 x i8]* @.str38, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 23, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=0]
 
 define i32 @_ZN3Bar3barEv(%struct.Bar* %this1) nounwind {
 entry:
diff --git a/test/Feature/mdnode.ll b/test/Feature/mdnode.ll
new file mode 100644
index 0000000..d63b46e
--- /dev/null
+++ b/test/Feature/mdnode.ll
@@ -0,0 +1,4 @@
+; RUN: llvm-as < %s | llc -f -o /dev/null
+@llvm.foo =  constant metadata !{i17 123, null, metadata !"foobar"}
+@llvm.bar =  constant metadata !"barbar"
+
diff --git a/test/MC/AsmParser/assignment.s b/test/MC/AsmParser/assignment.s
new file mode 100644
index 0000000..8e6ff34
--- /dev/null
+++ b/test/MC/AsmParser/assignment.s
@@ -0,0 +1,7 @@
+# RUN: llvm-mc %s > %t
+
+# RUN: grep -A 2 TEST0 %t > %t2
+# RUN: grep "a = 0" %t2
+TEST0:  
+        a = 0
+        
+\ No newline at end of file
diff --git a/test/MC/AsmParser/dg.exp b/test/MC/AsmParser/dg.exp
new file mode 100644
index 0000000..ebd8418
--- /dev/null
+++ b/test/MC/AsmParser/dg.exp
@@ -0,0 +1,3 @@
+load_lib llvm.exp
+
+RunLLVMTests [lsort [glob -nocomplain $srcdir/$subdir/*.{s}]]
diff --git a/test/MC/AsmParser/directive_ascii.s b/test/MC/AsmParser/directive_ascii.s
new file mode 100644
index 0000000..95e194a
--- /dev/null
+++ b/test/MC/AsmParser/directive_ascii.s
@@ -0,0 +1,25 @@
+# RUN: llvm-mc %s > %t
+
+# RUN: grep -A 1 TEST0 %t > %t2
+# RUN: not grep ".byte" %t2
+TEST0:  
+        .ascii
+
+# RUN: grep -A 1 TEST1 %t > %t2
+# RUN: not grep "byte" %t2
+TEST1:  
+        .asciz
+
+# RUN: grep -A 2 TEST2 %t > %t2
+# RUN: grep ".byte 65" %t2 | count 1
+TEST2:  
+        .ascii "A"
+
+# RUN: grep -A 5 TEST3 %t > %t2
+# RUN: grep ".byte 66" %t2 | count 1
+# RUN: grep ".byte 67" %t2 | count 1
+# RUN: grep ".byte 0" %t2 | count 2
+TEST3:  
+        .asciz "B", "C"
+
+       
+\ No newline at end of file
diff --git a/test/MC/AsmParser/directive_fill.s b/test/MC/AsmParser/directive_fill.s
new file mode 100644
index 0000000..ec8bdf2
--- /dev/null
+++ b/test/MC/AsmParser/directive_fill.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc %s > %t
+
+# RUN: grep -A 2 TEST0 %t > %t2
+# RUN: grep ".byte 10" %t2 | count 1
+TEST0:  
+        .fill 1, 1, 10
+
+# RUN: grep -A 3 TEST1 %t > %t2
+# RUN: grep ".short 3" %t2 | count 2
+TEST1:  
+        .fill 2, 2, 3
diff --git a/test/MC/AsmParser/directive_org.s b/test/MC/AsmParser/directive_org.s
new file mode 100644
index 0000000..ac50f63
--- /dev/null
+++ b/test/MC/AsmParser/directive_org.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc %s > %t
+
+# RUN: grep -A 2 TEST0 %t > %t2
+# RUN: grep ".org 1, 0" %t2 | count 1
+TEST0:  
+        .org 1
+
+# RUN: grep -A 2 TEST1 %t > %t2
+# RUN: grep ".org 1, 3" %t2 | count 1
+TEST1:  
+        .org 1, 3
diff --git a/test/MC/AsmParser/directive_set.s b/test/MC/AsmParser/directive_set.s
new file mode 100644
index 0000000..51119a6
--- /dev/null
+++ b/test/MC/AsmParser/directive_set.s
@@ -0,0 +1,7 @@
+# RUN: llvm-mc %s > %t
+
+# RUN: grep -A 2 TEST0 %t > %t2
+# RUN: grep ".set a, 0" %t2
+TEST0:  
+        .set a, 0
+        
+\ No newline at end of file
diff --git a/test/MC/AsmParser/directive_space.s b/test/MC/AsmParser/directive_space.s
new file mode 100644
index 0000000..6159775
--- /dev/null
+++ b/test/MC/AsmParser/directive_space.s
@@ -0,0 +1,11 @@
+# RUN: llvm-mc %s > %t
+
+# RUN: grep -A 2 TEST0 %t > %t2
+# RUN: grep ".byte 0" %t2 | count 1
+TEST0:  
+        .space 1
+
+# RUN: grep -A 3 TEST1 %t > %t2
+# RUN: grep ".byte 3" %t2 | count 2
+TEST1:  
+        .space 2, 3
diff --git a/test/MC/AsmParser/directive_values.s b/test/MC/AsmParser/directive_values.s
new file mode 100644
index 0000000..39ba068
--- /dev/null
+++ b/test/MC/AsmParser/directive_values.s
@@ -0,0 +1,21 @@
+# RUN: llvm-mc %s > %t
+
+# RUN: grep -A 2 TEST0 %t > %t2
+# RUN: grep ".byte 0" %t2 | count 1
+TEST0:  
+        .byte 0
+
+# RUN: grep -A 2 TEST1 %t > %t2
+# RUN: grep ".short 3" %t2 | count 1
+TEST1:  
+        .short 3
+
+# RUN: grep -A 2 TEST2 %t > %t2
+# RUN: grep ".long 8" %t2 | count 1
+TEST2:  
+        .long 8
+
+# RUN: grep -A 2 TEST3 %t > %t2
+# RUN: grep ".quad 9" %t2 | count 1
+TEST3:  
+        .quad 9
diff --git a/test/Makefile b/test/Makefile
index 82422b5..e02daa0 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -22,6 +22,10 @@ include Makefile.tests
 # DejaGNU testing support
 #===------------------------------------------------------------------------===#
 
+ifneq ($(GREP_OPTIONS),)
+$(warning GREP_OPTIONS environment variable may interfere with test results)
+endif
+
 ifdef VERBOSE
 RUNTESTFLAGS := $(VERBOSE)
 endif
diff --git a/test/Transforms/IndVarSimplify/loop-invariant-step.ll b/test/Transforms/IndVarSimplify/loop-invariant-step.ll
new file mode 100644
index 0000000..40156ea
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/loop-invariant-step.ll
@@ -0,0 +1,33 @@
+; RUN: llvm-as < %s | opt -loop-index-split -instcombine -indvars -disable-output
+; PR4455
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+declare i8* @fast_memcpy(i8*, i8*, i64)
+
+define void @dvdsub_decode() nounwind {
+entry:		; preds = %bb1
+	br label %LoopA
+
+LoopA:		; preds = %LoopA, %entry
+	%x1.0.i17 = phi i32 [ %t0, %LoopA ], [ 0, %entry ]		; <i32> [#uses=2]
+	%t0 = add i32 %x1.0.i17, 1		; <i32> [#uses=1]
+	br i1 undef, label %LoopA, label %middle
+
+middle:		; preds = %LoopA
+	%t1 = sub i32 0, %x1.0.i17		; <i32> [#uses=1]
+	%t2 = add i32 %t1, 1		; <i32> [#uses=1]
+	br label %LoopB
+
+LoopB:		; preds = %LoopB, %bb.nph.i27
+	%y.029.i = phi i32 [ 0, %middle ], [ %t7, %LoopB ]		; <i32> [#uses=2]
+	%t3 = mul i32 %y.029.i, %t2		; <i32> [#uses=1]
+	%t4 = sext i32 %t3 to i64		; <i64> [#uses=1]
+	%t5 = getelementptr i8* null, i64 %t4		; <i8*> [#uses=1]
+	%t6 = call i8* @fast_memcpy(i8* %t5, i8* undef, i64 undef) nounwind		; <i8*> [#uses=0]
+	%t7 = add i32 %y.029.i, 1		; <i32> [#uses=1]
+	br i1 undef, label %LoopB, label %exit
+
+exit:
+	ret void
+}
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate7.ll b/test/Transforms/IndVarSimplify/loop_evaluate7.ll
new file mode 100644
index 0000000..6e31c55
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/loop_evaluate7.ll
@@ -0,0 +1,61 @@
+; RUN: llvm-as < %s | opt -indvars
+; PR4436
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define i8* @string_expandtabs(i32 %n, i8* %m) nounwind {
+entry:
+	br i1 undef, label %bb33, label %bb1
+
+bb1:		; preds = %entry
+	br i1 undef, label %overflow1, label %bb15
+
+bb15:		; preds = %bb1
+	br i1 undef, label %bb33, label %bb17
+
+bb17:		; preds = %bb15
+	br label %bb30
+
+bb19:		; preds = %bb30
+	br i1 undef, label %bb20, label %bb29
+
+bb20:		; preds = %bb19
+	%0 = load i32* undef, align 4		; <i32> [#uses=1]
+	%1 = sub i32 %0, %n		; <i32> [#uses=1]
+	br label %bb23
+
+bb21:		; preds = %bb23
+	%2 = icmp ult i8* %q.0, %m		; <i1> [#uses=1]
+	br i1 %2, label %bb22, label %overflow2
+
+bb22:		; preds = %bb21
+	%3 = getelementptr i8* %q.0, i32 1		; <i8*> [#uses=1]
+	br label %bb23
+
+bb23:		; preds = %bb22, %bb20
+	%i.2 = phi i32 [ %1, %bb20 ], [ %4, %bb22 ]		; <i32> [#uses=1]
+	%q.0 = phi i8* [ undef, %bb20 ], [ %3, %bb22 ]		; <i8*> [#uses=3]
+	%4 = add i32 %i.2, -1		; <i32> [#uses=2]
+	%5 = icmp eq i32 %4, -1		; <i1> [#uses=1]
+	br i1 %5, label %bb29, label %bb21
+
+bb29:		; preds = %bb23, %bb19
+	%q.1 = phi i8* [ undef, %bb19 ], [ %q.0, %bb23 ]		; <i8*> [#uses=0]
+	br label %bb30
+
+bb30:		; preds = %bb29, %bb17
+	br i1 undef, label %bb19, label %bb33
+
+overflow2:		; preds = %bb21
+	br i1 undef, label %bb32, label %overflow1
+
+bb32:		; preds = %overflow2
+	br label %overflow1
+
+overflow1:		; preds = %bb32, %overflow2, %bb1
+	ret i8* null
+
+bb33:		; preds = %bb30, %bb15, %entry
+	ret i8* undef
+}
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate8.ll b/test/Transforms/IndVarSimplify/loop_evaluate8.ll
new file mode 100644
index 0000000..fa2f9e5
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/loop_evaluate8.ll
@@ -0,0 +1,63 @@
+; RUN: llvm-as < %s | opt -indvars | llvm-dis | not grep select
+
+; This loop has backedge-taken-count zero. Indvars shouldn't expand any
+; instructions to compute a trip count.
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+target triple = "i386-pc-linux-gnu"
+
+define i8* @string_expandtabs() nounwind {
+entry:
+	br i1 undef, label %bb33, label %bb1
+
+bb1:		; preds = %entry
+	br i1 undef, label %overflow1, label %bb15
+
+bb15:		; preds = %bb1
+	br i1 undef, label %bb33, label %bb17
+
+bb17:		; preds = %bb15
+	br label %bb30
+
+bb19:		; preds = %bb30
+	br i1 undef, label %bb20, label %bb29
+
+bb20:		; preds = %bb19
+	%0 = load i32* undef, align 4		; <i32> [#uses=1]
+	%1 = sub i32 %0, undef		; <i32> [#uses=1]
+	br label %bb23
+
+bb21:		; preds = %bb23
+	%2 = icmp ult i8* %q.0, undef		; <i1> [#uses=1]
+	br i1 %2, label %bb22, label %overflow2
+
+bb22:		; preds = %bb21
+	%3 = getelementptr i8* %q.0, i32 1		; <i8*> [#uses=1]
+	br label %bb23
+
+bb23:		; preds = %bb22, %bb20
+	%i.2 = phi i32 [ %1, %bb20 ], [ %4, %bb22 ]		; <i32> [#uses=1]
+	%q.0 = phi i8* [ undef, %bb20 ], [ %3, %bb22 ]		; <i8*> [#uses=3]
+	%4 = add i32 %i.2, -1		; <i32> [#uses=2]
+	%5 = icmp eq i32 %4, -1		; <i1> [#uses=1]
+	br i1 %5, label %bb29, label %bb21
+
+bb29:		; preds = %bb23, %bb19
+	%q.1 = phi i8* [ undef, %bb19 ], [ %q.0, %bb23 ]		; <i8*> [#uses=0]
+	br label %bb30
+
+bb30:		; preds = %bb29, %bb17
+	br i1 undef, label %bb19, label %bb33
+
+overflow2:		; preds = %bb21
+	br i1 undef, label %bb32, label %overflow1
+
+bb32:		; preds = %overflow2
+	br label %overflow1
+
+overflow1:		; preds = %bb32, %overflow2, %bb1
+	ret i8* null
+
+bb33:		; preds = %bb30, %bb15, %entry
+	ret i8* undef
+}
diff --git a/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll b/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
new file mode 100644
index 0000000..7119cbb
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/phi-uses-value-multiple-times.ll
@@ -0,0 +1,33 @@
+; RUN: llvm-as < %s | opt -indvars
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+@ue = external global i64
+
+define i32 @foo() nounwind {
+entry:
+	br label %bb38.i
+
+bb14.i27:
+	%t0 = load i64* @ue, align 8
+	%t1 = sub i64 %t0, %i.0.i35
+	%t2 = add i64 %t1, 1
+	br i1 undef, label %bb15.i28, label %bb19.i31
+
+bb15.i28:
+	br label %bb19.i31
+
+bb19.i31:
+	%y.0.i = phi i64 [ %t2, %bb15.i28 ], [ %t2, %bb14.i27 ]
+	br label %bb35.i
+
+bb35.i:
+	br i1 undef, label %bb37.i, label %bb14.i27
+
+bb37.i:
+	%t3 = add i64 %i.0.i35, 1
+	br label %bb38.i
+
+bb38.i:
+	%i.0.i35 = phi i64 [ 1, %entry ], [ %t3, %bb37.i ]
+	br label %bb35.i
+}
diff --git a/test/Transforms/LCSSA/invoke-dest.ll b/test/Transforms/LCSSA/invoke-dest.ll
new file mode 100644
index 0000000..5c6c7a0
--- /dev/null
+++ b/test/Transforms/LCSSA/invoke-dest.ll
@@ -0,0 +1,143 @@
+; RUN: llvm-as < %s | opt -lcssa
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
+
+@.str12 = external constant [3 x i8], align 1		; <[3 x i8]*> [#uses=1]
+@.str17175 = external constant [4 x i8], align 1		; <[4 x i8]*> [#uses=1]
+@.str21179 = external constant [12 x i8], align 1		; <[12 x i8]*> [#uses=1]
+@.str25183 = external constant [10 x i8], align 1		; <[10 x i8]*> [#uses=1]
+@.str32190 = external constant [92 x i8], align 1		; <[92 x i8]*> [#uses=1]
+@.str41 = external constant [25 x i8], align 1		; <[25 x i8]*> [#uses=1]
+
+define void @_ZN8EtherBus10initializeEv() {
+entry:
+	br i1 undef, label %_ZN7cObjectnwEj.exit, label %bb.i
+
+bb.i:		; preds = %entry
+	br label %_ZN7cObjectnwEj.exit
+
+_ZN7cObjectnwEj.exit:		; preds = %bb.i, %entry
+	invoke void @_ZN7cObjectC2EPKc(i8* undef, i8* getelementptr ([12 x i8]* @.str21179, i32 0, i32 0))
+			to label %bb1 unwind label %lpad
+
+bb1:		; preds = %_ZN7cObjectnwEj.exit
+	br i1 undef, label %_ZNK5cGate4sizeEv.exit, label %bb.i110
+
+bb.i110:		; preds = %bb1
+	br label %_ZNK5cGate4sizeEv.exit
+
+_ZNK5cGate4sizeEv.exit:		; preds = %bb.i110, %bb1
+	br i1 undef, label %_ZNK5cGate4sizeEv.exit122, label %bb.i120
+
+bb.i120:		; preds = %_ZNK5cGate4sizeEv.exit
+	br label %_ZNK5cGate4sizeEv.exit122
+
+_ZNK5cGate4sizeEv.exit122:		; preds = %bb.i120, %_ZNK5cGate4sizeEv.exit
+	br i1 undef, label %bb8, label %bb2
+
+bb2:		; preds = %_ZNK5cGate4sizeEv.exit122
+	unreachable
+
+bb8:		; preds = %_ZNK5cGate4sizeEv.exit122
+	%tmp = invoke i8* @_ZN7cModule3parEPKc(i8* undef, i8* getelementptr ([10 x i8]* @.str25183, i32 0, i32 0))
+			to label %invcont9 unwind label %lpad119		; <i8*> [#uses=1]
+
+invcont9:		; preds = %bb8
+	%tmp1 = invoke i8* @_ZN4cPar11stringValueEv(i8* %tmp)
+			to label %invcont10 unwind label %lpad119		; <i8*> [#uses=1]
+
+invcont10:		; preds = %invcont9
+	invoke void @_ZN8EtherBus8tokenizeEPKcRSt6vectorIdSaIdEE(i8* null, i8* %tmp1, i8* undef)
+			to label %invcont11 unwind label %lpad119
+
+invcont11:		; preds = %invcont10
+	br i1 undef, label %bb12, label %bb18
+
+bb12:		; preds = %invcont11
+	invoke void (i8*, i8*, ...)* @_ZN6cEnvir6printfEPKcz(i8* null, i8* getelementptr ([3 x i8]* @.str12, i32 0, i32 0), i32 undef)
+			to label %bb.i.i159 unwind label %lpad119
+
+bb.i.i159:		; preds = %bb12
+	unreachable
+
+bb18:		; preds = %invcont11
+	br i1 undef, label %bb32, label %bb34
+
+bb32:		; preds = %bb18
+	br i1 undef, label %bb.i.i123, label %bb34
+
+bb.i.i123:		; preds = %bb32
+	br label %bb34
+
+bb34:		; preds = %bb.i.i123, %bb32, %bb18
+	%tmp2 = invoke i8* @_Znaj(i32 undef)
+			to label %invcont35 unwind label %lpad119		; <i8*> [#uses=0]
+
+invcont35:		; preds = %bb34
+	br i1 undef, label %bb49, label %bb61
+
+bb49:		; preds = %invcont35
+	invoke void (i8*, i8*, ...)* @_ZNK13cSimpleModule5errorEPKcz(i8* undef, i8* getelementptr ([92 x i8]* @.str32190, i32 0, i32 0))
+			to label %bb51 unwind label %lpad119
+
+bb51:		; preds = %bb49
+	unreachable
+
+bb61:		; preds = %invcont35
+	br label %bb106
+
+.noexc:		; preds = %bb106
+	invoke void @_ZN7cObjectC2EPKc(i8* undef, i8* getelementptr ([25 x i8]* @.str41, i32 0, i32 0))
+			to label %bb102 unwind label %lpad123
+
+bb102:		; preds = %.noexc
+	invoke void undef(i8* undef, i8 zeroext 1)
+			to label %invcont103 unwind label %lpad119
+
+invcont103:		; preds = %bb102
+	invoke void undef(i8* undef, double 1.000000e+07)
+			to label %invcont104 unwind label %lpad119
+
+invcont104:		; preds = %invcont103
+	%tmp3 = invoke i32 @_ZN13cSimpleModule11sendDelayedEP8cMessagedPKci(i8* undef, i8* undef, double 0.000000e+00, i8* getelementptr ([4 x i8]* @.str17175, i32 0, i32 0), i32 undef)
+			to label %invcont105 unwind label %lpad119		; <i32> [#uses=0]
+
+invcont105:		; preds = %invcont104
+	br label %bb106
+
+bb106:		; preds = %invcont105, %bb61
+	%tmp4 = invoke i8* @_Znaj(i32 124)
+			to label %.noexc unwind label %lpad119		; <i8*> [#uses=1]
+
+lpad:		; preds = %_ZN7cObjectnwEj.exit
+	br label %Unwind
+
+lpad119:		; preds = %bb106, %invcont104, %invcont103, %bb102, %bb49, %bb34, %bb12, %invcont10, %invcont9, %bb8
+	unreachable
+
+lpad123:		; preds = %.noexc
+	%tmp5 = icmp eq i8* %tmp4, null		; <i1> [#uses=1]
+	br i1 %tmp5, label %Unwind, label %bb.i2
+
+bb.i2:		; preds = %lpad123
+	br label %Unwind
+
+Unwind:		; preds = %bb.i2, %lpad123, %lpad
+	unreachable
+}
+
+declare void @_ZN8EtherBus8tokenizeEPKcRSt6vectorIdSaIdEE(i8* nocapture, i8*, i8*)
+
+declare i8* @_Znaj(i32)
+
+declare void @_ZN6cEnvir6printfEPKcz(i8* nocapture, i8* nocapture, ...)
+
+declare void @_ZNK13cSimpleModule5errorEPKcz(i8* nocapture, i8* nocapture, ...) noreturn
+
+declare i8* @_ZN7cModule3parEPKc(i8*, i8*)
+
+declare i32 @_ZN13cSimpleModule11sendDelayedEP8cMessagedPKci(i8*, i8*, double, i8*, i32)
+
+declare void @_ZN7cObjectC2EPKc(i8*, i8*)
+
+declare i8* @_ZN4cPar11stringValueEv(i8*)
diff --git a/tools/gold/Makefile b/tools/gold/Makefile
index f282a35..65e99bf 100644
--- a/tools/gold/Makefile
+++ b/tools/gold/Makefile
@@ -18,7 +18,6 @@ include $(LEVEL)/Makefile.config
 LINK_LIBS_IN_SHARED=1
 SHARED_LIBRARY = 1
 BUILD_ARCHIVE = 0
-DONT_BUILD_RELINKED = 1
 
 LINK_COMPONENTS :=
 LIBS += -llto
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index afd3c5a..2553674 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -18,9 +18,10 @@
 #include "llvm/Type.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
-#include "llvm/ExecutionEngine/JIT.h"
-#include "llvm/ExecutionEngine/Interpreter.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/Interpreter.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -149,6 +150,8 @@ int main(int argc, char **argv, char * const *envp) {
     exit(1);
   }
 
+  EE->RegisterJITEventListener(createMacOSJITEventListener());
+
   if (NoLazyCompilation)
     EE->DisableLazyCompilation();
 
diff --git a/tools/llvm-mc/AsmLexer.cpp b/tools/llvm-mc/AsmLexer.cpp
index dbd3c06..db86825 100644
--- a/tools/llvm-mc/AsmLexer.cpp
+++ b/tools/llvm-mc/AsmLexer.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AsmLexer.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Config/config.h"  // for strtoull.
@@ -20,11 +21,21 @@
 #include <cstdlib>
 using namespace llvm;
 
+static StringSet<> &getSS(void *TheSS) {
+  return *(StringSet<>*)TheSS;
+}
+
 AsmLexer::AsmLexer(SourceMgr &SM) : SrcMgr(SM) {
   CurBuffer = 0;
   CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
   CurPtr = CurBuf->getBufferStart();
   TokStart = 0;
+  
+  TheStringSet = new StringSet<>();
+}
+
+AsmLexer::~AsmLexer() {
+  delete &getSS(TheStringSet);
 }
 
 SMLoc AsmLexer::getLoc() const {
@@ -75,7 +86,9 @@ asmtok::TokKind AsmLexer::LexIdentifier() {
   while (isalnum(*CurPtr) || *CurPtr == '_' || *CurPtr == '$' ||
          *CurPtr == '.' || *CurPtr == '@')
     ++CurPtr;
-  CurStrVal.assign(TokStart, CurPtr);
+  // Unique string.
+  CurStrVal =
+    getSS(TheStringSet).GetOrCreateValue(TokStart, CurPtr, 0).getKeyData();
   return asmtok::Identifier;
 }
 
@@ -86,7 +99,10 @@ asmtok::TokKind AsmLexer::LexPercent() {
   
   while (isalnum(*CurPtr))
     ++CurPtr;
-  CurStrVal.assign(TokStart, CurPtr);   // Include %
+  
+  // Unique string.
+  CurStrVal =
+    getSS(TheStringSet).GetOrCreateValue(TokStart, CurPtr, 0).getKeyData();
   return asmtok::Register;
 }
 
@@ -208,7 +224,9 @@ asmtok::TokKind AsmLexer::LexQuote() {
     CurChar = getNextChar();
   }
   
-  CurStrVal.assign(TokStart, CurPtr);   // include quotes.
+  // Unique string, include quotes for now.
+  CurStrVal =
+    getSS(TheStringSet).GetOrCreateValue(TokStart, CurPtr, 0).getKeyData();
   return asmtok::String;
 }
 
@@ -244,6 +262,7 @@ asmtok::TokKind AsmLexer::LexToken() {
   case '*': return asmtok::Star;
   case ',': return asmtok::Comma;
   case '$': return asmtok::Dollar;
+  case '=': return asmtok::Equal;
   case '|': return asmtok::Pipe;
   case '^': return asmtok::Caret;
   case '&': return asmtok::Amp;
diff --git a/tools/llvm-mc/AsmLexer.h b/tools/llvm-mc/AsmLexer.h
index 23c5f85..19a1b38 100644
--- a/tools/llvm-mc/AsmLexer.h
+++ b/tools/llvm-mc/AsmLexer.h
@@ -42,7 +42,7 @@ namespace asmtok {
     Plus, Minus, Tilde,
     Slash,    // '/'
     LParen, RParen,
-    Star, Comma, Dollar,
+    Star, Comma, Dollar, Equal,
     
     Pipe, Caret, Amp, Exclaim,
     Percent, LessLess, GreaterGreater
@@ -55,20 +55,24 @@ class AsmLexer {
   
   const char *CurPtr;
   const MemoryBuffer *CurBuf;
+  // A llvm::StringSet<>, which provides uniqued and null-terminated strings.
+  void *TheStringSet;
   
   // Information about the current token.
   const char *TokStart;
   asmtok::TokKind CurKind;
-  std::string CurStrVal;  // This is valid for Identifier.
+  const char *CurStrVal;  // This is valid for Identifier.
   int64_t CurIntVal;
   
   /// CurBuffer - This is the current buffer index we're lexing from as managed
   /// by the SourceMgr object.
   int CurBuffer;
   
+  void operator=(const AsmLexer&); // DO NOT IMPLEMENT
+  AsmLexer(const AsmLexer&);       // DO NOT IMPLEMENT
 public:
   AsmLexer(SourceMgr &SrcMgr);
-  ~AsmLexer() {}
+  ~AsmLexer();
   
   asmtok::TokKind Lex() {
     return CurKind = LexToken();
@@ -78,7 +82,7 @@ public:
   bool is(asmtok::TokKind K) const { return CurKind == K; }
   bool isNot(asmtok::TokKind K) const { return CurKind != K; }
   
-  const std::string &getCurStrVal() const {
+  const char *getCurStrVal() const {
     assert((CurKind == asmtok::Identifier || CurKind == asmtok::Register ||
             CurKind == asmtok::String) &&
            "This token doesn't have a string value");
diff --git a/tools/llvm-mc/AsmParser.cpp b/tools/llvm-mc/AsmParser.cpp
index 04c1d03..2b697a6 100644
--- a/tools/llvm-mc/AsmParser.cpp
+++ b/tools/llvm-mc/AsmParser.cpp
@@ -12,7 +12,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "AsmParser.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -177,29 +179,167 @@ bool AsmParser::ParseStatement() {
   
   // If we have an identifier, handle it as the key symbol.
   SMLoc IDLoc = Lexer.getLoc();
-  std::string IDVal = Lexer.getCurStrVal();
+  const char *IDVal = Lexer.getCurStrVal();
   
   // Consume the identifier, see what is after it.
-  if (Lexer.Lex() == asmtok::Colon) {
+  switch (Lexer.Lex()) {
+  case asmtok::Colon:
     // identifier ':'   -> Label.
     Lexer.Lex();
+    
+    // Since we saw a label, create a symbol and emit it.
+    // FIXME: If the label starts with L it is an assembler temporary label.
+    // Why does the client of this api need to know this?
+    Out.EmitLabel(Ctx.GetOrCreateSymbol(IDVal));
+    
     return ParseStatement();
+
+  case asmtok::Equal:
+    // identifier '=' ... -> assignment statement
+    Lexer.Lex();
+
+    return ParseAssignment(IDVal, false);
+
+  default: // Normal instruction or directive.
+    break;
   }
   
   // Otherwise, we have a normal instruction or directive.  
   if (IDVal[0] == '.') {
+    // FIXME: This should be driven based on a hash lookup and callback.
+    if (!strcmp(IDVal, ".section"))
+      return ParseDirectiveDarwinSection();
+    if (!strcmp(IDVal, ".text"))
+      // FIXME: This changes behavior based on the -static flag to the
+      // assembler.
+      return ParseDirectiveSectionSwitch("__TEXT,__text",
+                                         "regular,pure_instructions");
+    if (!strcmp(IDVal, ".const"))
+      return ParseDirectiveSectionSwitch("__TEXT,__const");
+    if (!strcmp(IDVal, ".static_const"))
+      return ParseDirectiveSectionSwitch("__TEXT,__static_const");
+    if (!strcmp(IDVal, ".cstring"))
+      return ParseDirectiveSectionSwitch("__TEXT,__cstring", 
+                                         "cstring_literals");
+    if (!strcmp(IDVal, ".literal4"))
+      return ParseDirectiveSectionSwitch("__TEXT,__literal4", "4byte_literals");
+    if (!strcmp(IDVal, ".literal8"))
+      return ParseDirectiveSectionSwitch("__TEXT,__literal8", "8byte_literals");
+    if (!strcmp(IDVal, ".literal16"))
+      return ParseDirectiveSectionSwitch("__TEXT,__literal16",
+                                         "16byte_literals");
+    if (!strcmp(IDVal, ".constructor"))
+      return ParseDirectiveSectionSwitch("__TEXT,__constructor");
+    if (!strcmp(IDVal, ".destructor"))
+      return ParseDirectiveSectionSwitch("__TEXT,__destructor");
+    if (!strcmp(IDVal, ".fvmlib_init0"))
+      return ParseDirectiveSectionSwitch("__TEXT,__fvmlib_init0");
+    if (!strcmp(IDVal, ".fvmlib_init1"))
+      return ParseDirectiveSectionSwitch("__TEXT,__fvmlib_init1");
+    if (!strcmp(IDVal, ".symbol_stub")) // FIXME: Different on PPC.
+      return ParseDirectiveSectionSwitch("__IMPORT,__jump_table,symbol_stubs",
+                                    "self_modifying_code+pure_instructions,5");
+    // FIXME: .picsymbol_stub on PPC.
+    if (!strcmp(IDVal, ".data"))
+      return ParseDirectiveSectionSwitch("__DATA,__data");
+    if (!strcmp(IDVal, ".static_data"))
+      return ParseDirectiveSectionSwitch("__DATA,__static_data");
+    if (!strcmp(IDVal, ".non_lazy_symbol_pointer"))
+      return ParseDirectiveSectionSwitch("__DATA,__nl_symbol_pointer",
+                                         "non_lazy_symbol_pointers");
+    if (!strcmp(IDVal, ".lazy_symbol_pointer"))
+      return ParseDirectiveSectionSwitch("__DATA,__la_symbol_pointer",
+                                         "lazy_symbol_pointers");
+    if (!strcmp(IDVal, ".dyld"))
+      return ParseDirectiveSectionSwitch("__DATA,__dyld");
+    if (!strcmp(IDVal, ".mod_init_func"))
+      return ParseDirectiveSectionSwitch("__DATA,__mod_init_func",
+                                         "mod_init_funcs");
+    if (!strcmp(IDVal, ".mod_term_func"))
+      return ParseDirectiveSectionSwitch("__DATA,__mod_term_func",
+                                         "mod_term_funcs");
+    if (!strcmp(IDVal, ".const_data"))
+      return ParseDirectiveSectionSwitch("__DATA,__const", "regular");
+    
+    
+    // FIXME: Verify attributes on sections.
+    if (!strcmp(IDVal, ".objc_class"))
+      return ParseDirectiveSectionSwitch("__OBJC,__class");
+    if (!strcmp(IDVal, ".objc_meta_class"))
+      return ParseDirectiveSectionSwitch("__OBJC,__meta_class");
+    if (!strcmp(IDVal, ".objc_cat_cls_meth"))
+      return ParseDirectiveSectionSwitch("__OBJC,__cat_cls_meth");
+    if (!strcmp(IDVal, ".objc_cat_inst_meth"))
+      return ParseDirectiveSectionSwitch("__OBJC,__cat_inst_meth");
+    if (!strcmp(IDVal, ".objc_protocol"))
+      return ParseDirectiveSectionSwitch("__OBJC,__protocol");
+    if (!strcmp(IDVal, ".objc_string_object"))
+      return ParseDirectiveSectionSwitch("__OBJC,__string_object");
+    if (!strcmp(IDVal, ".objc_cls_meth"))
+      return ParseDirectiveSectionSwitch("__OBJC,__cls_meth");
+    if (!strcmp(IDVal, ".objc_inst_meth"))
+      return ParseDirectiveSectionSwitch("__OBJC,__inst_meth");
+    if (!strcmp(IDVal, ".objc_cls_refs"))
+      return ParseDirectiveSectionSwitch("__OBJC,__cls_refs");
+    if (!strcmp(IDVal, ".objc_message_refs"))
+      return ParseDirectiveSectionSwitch("__OBJC,__message_refs");
+    if (!strcmp(IDVal, ".objc_symbols"))
+      return ParseDirectiveSectionSwitch("__OBJC,__symbols");
+    if (!strcmp(IDVal, ".objc_category"))
+      return ParseDirectiveSectionSwitch("__OBJC,__category");
+    if (!strcmp(IDVal, ".objc_class_vars"))
+      return ParseDirectiveSectionSwitch("__OBJC,__class_vars");
+    if (!strcmp(IDVal, ".objc_instance_vars"))
+      return ParseDirectiveSectionSwitch("__OBJC,__instance_vars");
+    if (!strcmp(IDVal, ".objc_module_info"))
+      return ParseDirectiveSectionSwitch("__OBJC,__module_info");
+    if (!strcmp(IDVal, ".objc_class_names"))
+      return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
+    if (!strcmp(IDVal, ".objc_meth_var_types"))
+      return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
+    if (!strcmp(IDVal, ".objc_meth_var_names"))
+      return ParseDirectiveSectionSwitch("__TEXT,__cstring","cstring_literals");
+    if (!strcmp(IDVal, ".objc_selector_strs"))
+      return ParseDirectiveSectionSwitch("__OBJC,__selector_strs");
+    
+    // Assembler features
+    if (!strcmp(IDVal, ".set"))
+      return ParseDirectiveSet();
+
+    // Data directives
+
+    if (!strcmp(IDVal, ".ascii"))
+      return ParseDirectiveAscii(false);
+    if (!strcmp(IDVal, ".asciz"))
+      return ParseDirectiveAscii(true);
+
+    // FIXME: Target hooks for size? Also for "word", "hword".
+    if (!strcmp(IDVal, ".byte"))
+      return ParseDirectiveValue(1);
+    if (!strcmp(IDVal, ".short"))
+      return ParseDirectiveValue(2);
+    if (!strcmp(IDVal, ".long"))
+      return ParseDirectiveValue(4);
+    if (!strcmp(IDVal, ".quad"))
+      return ParseDirectiveValue(8);
+    if (!strcmp(IDVal, ".fill"))
+      return ParseDirectiveFill();
+    if (!strcmp(IDVal, ".org"))
+      return ParseDirectiveOrg();
+    if (!strcmp(IDVal, ".space"))
+      return ParseDirectiveSpace();
+
     Lexer.PrintMessage(IDLoc, "warning: ignoring directive for now");
     EatToEndOfStatement();
     return false;
   }
 
-
   MCInst Inst;
   if (ParseX86InstOperands(Inst))
     return true;
   
   if (Lexer.isNot(asmtok::EndOfStatement))
-    return TokError("unexpected token in operand list");
+    return TokError("unexpected token in argument list");
 
   // Eat the end of statement marker.
   Lexer.Lex();
@@ -211,3 +351,239 @@ bool AsmParser::ParseStatement() {
   // Skip to end of line for now.
   return false;
 }
+
+bool AsmParser::ParseAssignment(const char *Name, bool IsDotSet) {
+  int64_t Value;
+  if (ParseExpression(Value))
+    return true;
+  
+  if (Lexer.isNot(asmtok::EndOfStatement))
+    return TokError("unexpected token in assignment");
+
+  // Eat the end of statement marker.
+  Lexer.Lex();
+
+  // Get the symbol for this name.
+  // FIXME: Handle '.'.
+  MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name);
+  Out.EmitAssignment(Sym, MCValue::get(Value), IsDotSet);
+
+  return false;
+}
+
+/// ParseDirectiveSet:
+///   ::= .set identifier ',' expression
+bool AsmParser::ParseDirectiveSet() {
+  if (Lexer.isNot(asmtok::Identifier))
+    return TokError("expected identifier after '.set' directive");
+
+  const char *Name = Lexer.getCurStrVal();
+  
+  if (Lexer.Lex() != asmtok::Comma)
+    return TokError("unexpected token in '.set'");
+  Lexer.Lex();
+
+  return ParseAssignment(Name, true);
+}
+
+/// ParseDirectiveSection:
+///   ::= .section identifier (',' identifier)*
+/// FIXME: This should actually parse out the segment, section, attributes and
+/// sizeof_stub fields.
+bool AsmParser::ParseDirectiveDarwinSection() {
+  if (Lexer.isNot(asmtok::Identifier))
+    return TokError("expected identifier after '.section' directive");
+  
+  std::string Section = Lexer.getCurStrVal();
+  Lexer.Lex();
+  
+  // Accept a comma separated list of modifiers.
+  while (Lexer.is(asmtok::Comma)) {
+    Lexer.Lex();
+    
+    if (Lexer.isNot(asmtok::Identifier))
+      return TokError("expected identifier in '.section' directive");
+    Section += ',';
+    Section += Lexer.getCurStrVal();
+    Lexer.Lex();
+  }
+  
+  if (Lexer.isNot(asmtok::EndOfStatement))
+    return TokError("unexpected token in '.section' directive");
+  Lexer.Lex();
+
+  Out.SwitchSection(Ctx.GetSection(Section.c_str()));
+  return false;
+}
+
+bool AsmParser::ParseDirectiveSectionSwitch(const char *Section,
+                                            const char *Directives) {
+  if (Lexer.isNot(asmtok::EndOfStatement))
+    return TokError("unexpected token in section switching directive");
+  Lexer.Lex();
+  
+  std::string SectionStr = Section;
+  if (Directives && Directives[0]) {
+    SectionStr += ","; 
+    SectionStr += Directives;
+  }
+  
+  Out.SwitchSection(Ctx.GetSection(Section));
+  return false;
+}
+
+/// ParseDirectiveAscii:
+///   ::= ( .ascii | .asciiz ) [ "string" ( , "string" )* ]
+bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
+  if (Lexer.isNot(asmtok::EndOfStatement)) {
+    for (;;) {
+      if (Lexer.isNot(asmtok::String))
+        return TokError("expected string in '.ascii' or '.asciz' directive");
+      
+      // FIXME: This shouldn't use a const char* + strlen, the string could have
+      // embedded nulls.
+      // FIXME: Should have accessor for getting string contents.
+      const char *Str = Lexer.getCurStrVal();
+      Out.EmitBytes(Str + 1, strlen(Str) - 2);
+      if (ZeroTerminated)
+        Out.EmitBytes("\0", 1);
+      
+      Lexer.Lex();
+      
+      if (Lexer.is(asmtok::EndOfStatement))
+        break;
+
+      if (Lexer.isNot(asmtok::Comma))
+        return TokError("unexpected token in '.ascii' or '.asciz' directive");
+      Lexer.Lex();
+    }
+  }
+
+  Lexer.Lex();
+  return false;
+}
+
+/// ParseDirectiveValue
+///  ::= (.byte | .short | ... ) [ expression (, expression)* ]
+bool AsmParser::ParseDirectiveValue(unsigned Size) {
+  if (Lexer.isNot(asmtok::EndOfStatement)) {
+    for (;;) {
+      int64_t Expr;
+      if (ParseExpression(Expr))
+        return true;
+
+      Out.EmitValue(MCValue::get(Expr), Size);
+
+      if (Lexer.is(asmtok::EndOfStatement))
+        break;
+      
+      // FIXME: Improve diagnostic.
+      if (Lexer.isNot(asmtok::Comma))
+        return TokError("unexpected token in directive");
+      Lexer.Lex();
+    }
+  }
+
+  Lexer.Lex();
+  return false;
+}
+
+/// ParseDirectiveSpace
+///  ::= .space expression [ , expression ]
+bool AsmParser::ParseDirectiveSpace() {
+  int64_t NumBytes;
+  if (ParseExpression(NumBytes))
+    return true;
+
+  int64_t FillExpr = 0;
+  bool HasFillExpr = false;
+  if (Lexer.isNot(asmtok::EndOfStatement)) {
+    if (Lexer.isNot(asmtok::Comma))
+      return TokError("unexpected token in '.space' directive");
+    Lexer.Lex();
+    
+    if (ParseExpression(FillExpr))
+      return true;
+
+    HasFillExpr = true;
+
+    if (Lexer.isNot(asmtok::EndOfStatement))
+      return TokError("unexpected token in '.space' directive");
+  }
+
+  Lexer.Lex();
+
+  if (NumBytes <= 0)
+    return TokError("invalid number of bytes in '.space' directive");
+
+  // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
+  for (uint64_t i = 0, e = NumBytes; i != e; ++i)
+    Out.EmitValue(MCValue::get(FillExpr), 1);
+
+  return false;
+}
+
+/// ParseDirectiveFill
+///  ::= .fill expression , expression , expression
+bool AsmParser::ParseDirectiveFill() {
+  int64_t NumValues;
+  if (ParseExpression(NumValues))
+    return true;
+
+  if (Lexer.isNot(asmtok::Comma))
+    return TokError("unexpected token in '.fill' directive");
+  Lexer.Lex();
+  
+  int64_t FillSize;
+  if (ParseExpression(FillSize))
+    return true;
+
+  if (Lexer.isNot(asmtok::Comma))
+    return TokError("unexpected token in '.fill' directive");
+  Lexer.Lex();
+  
+  int64_t FillExpr;
+  if (ParseExpression(FillExpr))
+    return true;
+
+  if (Lexer.isNot(asmtok::EndOfStatement))
+    return TokError("unexpected token in '.fill' directive");
+  
+  Lexer.Lex();
+
+  if (FillSize != 1 && FillSize != 2 && FillSize != 4)
+    return TokError("invalid '.fill' size, expected 1, 2, or 4");
+
+  for (uint64_t i = 0, e = NumValues; i != e; ++i)
+    Out.EmitValue(MCValue::get(FillExpr), FillSize);
+
+  return false;
+}
+
+/// ParseDirectiveOrg
+///  ::= .org expression [ , expression ]
+bool AsmParser::ParseDirectiveOrg() {
+  int64_t Offset;
+  if (ParseExpression(Offset))
+    return true;
+
+  // Parse optional fill expression.
+  int64_t FillExpr = 0;
+  if (Lexer.isNot(asmtok::EndOfStatement)) {
+    if (Lexer.isNot(asmtok::Comma))
+      return TokError("unexpected token in '.org' directive");
+    Lexer.Lex();
+    
+    if (ParseExpression(FillExpr))
+      return true;
+
+    if (Lexer.isNot(asmtok::EndOfStatement))
+      return TokError("unexpected token in '.org' directive");
+  }
+
+  Lexer.Lex();
+  
+  Out.EmitValueToOffset(MCValue::get(Offset), FillExpr);
+
+  return false;
+}
diff --git a/tools/llvm-mc/AsmParser.h b/tools/llvm-mc/AsmParser.h
index c133345..da256c2 100644
--- a/tools/llvm-mc/AsmParser.h
+++ b/tools/llvm-mc/AsmParser.h
@@ -17,14 +17,20 @@
 #include "AsmLexer.h"
 
 namespace llvm {
+class MCContext;
 class MCInst;
+class MCStreamer;
   
 class AsmParser {
   AsmLexer Lexer;
+  MCContext &Ctx;
+  MCStreamer &Out;
+  
   struct X86Operand;
   
 public:
-  AsmParser(SourceMgr &SM) : Lexer(SM) {}
+  AsmParser(SourceMgr &SM, MCContext &ctx, MCStreamer &OutStr)
+    : Lexer(SM), Ctx(ctx), Out(OutStr) {}
   ~AsmParser() {}
   
   bool Run();
@@ -37,6 +43,7 @@ private:
   
   void EatToEndOfStatement();
   
+  bool ParseAssignment(const char *Name, bool IsDotSet);
   bool ParseExpression(int64_t &Res);
   bool ParsePrimaryExpr(int64_t &Res);
   bool ParseBinOpRHS(unsigned Precedence, int64_t &Res);
@@ -46,6 +53,18 @@ private:
   bool ParseX86InstOperands(MCInst &Inst);
   bool ParseX86Operand(X86Operand &Op);
   bool ParseX86MemOperand(X86Operand &Op);
+  
+  // Directive Parsing.
+  bool ParseDirectiveDarwinSection(); // Darwin specific ".section".
+  bool ParseDirectiveSectionSwitch(const char *Section,
+                                   const char *Directives = 0);
+  bool ParseDirectiveAscii(bool ZeroTerminated); // ".ascii", ".asciiz"
+  bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ...
+  bool ParseDirectiveFill(); // ".fill"
+  bool ParseDirectiveSpace(); // ".space"
+  bool ParseDirectiveSet(); // ".set"
+  bool ParseDirectiveOrg(); // ".org"
+  
 };
 
 } // end namespace llvm
diff --git a/tools/llvm-mc/CMakeLists.txt b/tools/llvm-mc/CMakeLists.txt
index 369d522..2dd878d 100644
--- a/tools/llvm-mc/CMakeLists.txt
+++ b/tools/llvm-mc/CMakeLists.txt
@@ -1,4 +1,4 @@
-set(LLVM_LINK_COMPONENTS support)
+set(LLVM_LINK_COMPONENTS support MC)
 
 add_llvm_tool(llvm-mc
   llvm-mc.cpp
diff --git a/tools/llvm-mc/Makefile b/tools/llvm-mc/Makefile
index 7b4d944..3c327da 100644
--- a/tools/llvm-mc/Makefile
+++ b/tools/llvm-mc/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ../..
 TOOLNAME = llvm-mc
-LINK_COMPONENTS := support
+LINK_COMPONENTS := support MC
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS = 1
diff --git a/tools/llvm-mc/llvm-mc.cpp b/tools/llvm-mc/llvm-mc.cpp
index 52205c4..4100cb1 100644
--- a/tools/llvm-mc/llvm-mc.cpp
+++ b/tools/llvm-mc/llvm-mc.cpp
@@ -12,6 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/ADT/OwningPtr.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -136,7 +139,13 @@ static int AssembleInput(const char *ProgName) {
   // it later.
   SrcMgr.setIncludeDirs(IncludeDirs);
   
-  AsmParser Parser(SrcMgr);
+  MCContext Ctx;
+  OwningPtr<MCStreamer> Str(createAsmStreamer(Ctx, outs()));
+
+  // FIXME: Target hook & command line option for initial section.
+  Str.get()->SwitchSection(Ctx.GetSection("__TEXT,__text,regular,pure_instructions"));
+
+  AsmParser Parser(SrcMgr, Ctx, *Str.get());
   return Parser.Run();
 }  
 
diff --git a/tools/llvmc/doc/LLVMC-Reference.rst b/tools/llvmc/doc/LLVMC-Reference.rst
index 7befe8f..329f9ea 100644
--- a/tools/llvmc/doc/LLVMC-Reference.rst
+++ b/tools/llvmc/doc/LLVMC-Reference.rst
@@ -92,6 +92,12 @@ configuration libraries:
 
 * ``-v`` - Enable verbose mode, i.e. print out all executed commands.
 
+* ``--save-temps`` - Write temporary files to the current directory and do not
+  delete them on exit. This option can also take an argument: the
+  ``--save-temps=obj`` switch will write files into the directory specified with
+  the ``-o`` option. The ``--save-temps=cwd`` and ``--save-temps`` switches are
+  both synonyms for the default behaviour.
+
 * ``--check-graph`` - Check the compilation for common errors like mismatched
   output/input language names, multiple default edges and cycles. Because of
   plugins, these checks can't be performed at compile-time. Exit with code zero
@@ -108,9 +114,6 @@ configuration libraries:
   used to set the output file name. Hidden option, useful for debugging LLVMC
   plugins.
 
-* ``--save-temps`` - Write temporary files to the current directory
-  and do not delete them on exit. Hidden option, useful for debugging.
-
 * ``--help``, ``--help-hidden``, ``--version`` - These options have
   their standard meaning.
 
diff --git a/tools/llvmc/driver/Makefile b/tools/llvmc/driver/Makefile
index 5f5ec53..2f3104b 100644
--- a/tools/llvmc/driver/Makefile
+++ b/tools/llvmc/driver/Makefile
@@ -8,15 +8,6 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../../..
-
-TOOLNAME = $(LLVMC_BASED_DRIVER_NAME)
-LLVMLIBS = CompilerDriver.a
-
-ifneq ($(LLVMC_BUILTIN_PLUGINS),)
-USEDLIBS += $(patsubst %,plugin_llvmc_%.a,$(LLVMC_BUILTIN_PLUGINS))
-endif
-
-LINK_COMPONENTS = support system
-REQUIRES_EH := 1
+LLVMC_BASED_DRIVER = $(LLVMC_BASED_DRIVER_NAME)
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/llvmc/example/Skeleton/Makefile b/tools/llvmc/example/Skeleton/Makefile
index 2e4cbb9..f489abf 100644
--- a/tools/llvmc/example/Skeleton/Makefile
+++ b/tools/llvmc/example/Skeleton/Makefile
@@ -8,7 +8,7 @@
 ##===----------------------------------------------------------------------===##
 
 # Change this so that $(BASE_LEVEL)/Makefile.common refers to
-# $LLVM_DIR/Makefile.common.
+# $LLVM_DIR/Makefile.common or $YOUR_LLVM_BASED_PROJECT/Makefile.common.
 export LLVMC_BASE_LEVEL = ../../../..
 
 # Change this to the name of your LLVMC-based driver.
diff --git a/tools/llvmc/example/Skeleton/driver/Makefile b/tools/llvmc/example/Skeleton/driver/Makefile
index bf6d7a5..93e795b 100644
--- a/tools/llvmc/example/Skeleton/driver/Makefile
+++ b/tools/llvmc/example/Skeleton/driver/Makefile
@@ -8,15 +8,6 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = $(LLVMC_BASE_LEVEL)/..
-
-TOOLNAME = $(LLVMC_BASED_DRIVER_NAME)
-LLVMLIBS = CompilerDriver
-
-ifneq ($(LLVMC_BUILTIN_PLUGINS),)
-USEDLIBS += $(patsubst %,plugin_llvmc_%,$(LLVMC_BUILTIN_PLUGINS))
-endif
-
-LINK_COMPONENTS = support system
-REQUIRES_EH := 1
+LLVMC_BASED_DRIVER = $(LLVMC_BASED_DRIVER_NAME)
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/llvmc/example/mcc16/Makefile b/tools/llvmc/example/mcc16/Makefile
index efc9d2d..e94bca2 100644
--- a/tools/llvmc/example/mcc16/Makefile
+++ b/tools/llvmc/example/mcc16/Makefile
@@ -7,14 +7,8 @@
 #
 ##===----------------------------------------------------------------------===##
 
-# Change this so that $(BASE_LEVEL)/Makefile.common refers to
-# $LLVM_DIR/Makefile.common.
 export LLVMC_BASE_LEVEL = ../../../..
-
-# Change this to the name of your LLVMC-based driver.
 export LLVMC_BASED_DRIVER_NAME = mcc16
-
-# List your plugin names here
 export LLVMC_BUILTIN_PLUGINS = PIC16Base
 
 LEVEL = $(LLVMC_BASE_LEVEL)
diff --git a/tools/llvmc/example/mcc16/driver/Makefile b/tools/llvmc/example/mcc16/driver/Makefile
index ed9ebfd..670d8bd 100644
--- a/tools/llvmc/example/mcc16/driver/Makefile
+++ b/tools/llvmc/example/mcc16/driver/Makefile
@@ -8,15 +8,6 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = $(LLVMC_BASE_LEVEL)/..
-
-TOOLNAME = $(LLVMC_BASED_DRIVER_NAME)
-LLVMLIBS = CompilerDriver
-
-ifneq ($(LLVMC_BUILTIN_PLUGINS),)
-USEDLIBS += $(patsubst %,plugin_llvmc_%,$(LLVMC_BUILTIN_PLUGINS))
-endif
-
-LINK_COMPONENTS = support system
-REQUIRES_EH := 1
+LLVMC_BASED_DRIVER = $(LLVMC_BASED_DRIVER_NAME)
 
 include $(LEVEL)/Makefile.common
diff --git a/tools/lto/LTOCodeGenerator.cpp b/tools/lto/LTOCodeGenerator.cpp
index 8db573e..52624eb 100644
--- a/tools/lto/LTOCodeGenerator.cpp
+++ b/tools/lto/LTOCodeGenerator.cpp
@@ -111,7 +111,7 @@ bool LTOCodeGenerator::setDebugInfo(lto_debug_model debug, std::string& errMsg)
 
 
 bool LTOCodeGenerator::setCodePICModel(lto_codegen_model model, 
-                                                        std::string& errMsg)
+                                       std::string& errMsg)
 {
     switch (model) {
         case LTO_CODEGEN_PIC_MODEL_STATIC:
diff --git a/tools/lto/Makefile b/tools/lto/Makefile
index f0f6da7..de885d9 100644
--- a/tools/lto/Makefile
+++ b/tools/lto/Makefile
@@ -17,7 +17,6 @@ include $(LEVEL)/Makefile.config
 
 LINK_LIBS_IN_SHARED = 1
 SHARED_LIBRARY = 1
-DONT_BUILD_RELINKED = 1
 
 LINK_COMPONENTS := $(TARGETS_TO_BUILD) ipo scalaropts linker bitreader bitwriter
 
diff --git a/tools/lto/lto.cpp b/tools/lto/lto.cpp
index 7eb39ef..a0f67b4 100644
--- a/tools/lto/lto.cpp
+++ b/tools/lto/lto.cpp
@@ -198,7 +198,7 @@ bool lto_codegen_set_debug_model(lto_code_gen_t cg, lto_debug_model debug)
 //
 bool lto_codegen_set_pic_model(lto_code_gen_t cg, lto_codegen_model model)
 {
-    return cg->setCodePICModel(model, sLastErrorString);
+  return cg->setCodePICModel(model, sLastErrorString);
 }
 
 //
@@ -206,7 +206,7 @@ bool lto_codegen_set_pic_model(lto_code_gen_t cg, lto_codegen_model model)
 //
 void lto_codegen_set_gcc_path(lto_code_gen_t cg, const char* path)
 {
-    cg->setGccPath(path);
+  cg->setGccPath(path);
 }
 
 //
@@ -224,7 +224,7 @@ void lto_codegen_set_assembler_path(lto_code_gen_t cg, const char* path)
 //
 void lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg, const char* symbol)
 {
-    cg->addMustPreserveSymbol(symbol);
+  cg->addMustPreserveSymbol(symbol);
 }
 
 
@@ -235,7 +235,7 @@ void lto_codegen_add_must_preserve_symbol(lto_code_gen_t cg, const char* symbol)
 //
 bool lto_codegen_write_merged_modules(lto_code_gen_t cg, const char* path)
 {
-   return cg->writeMergedModules(path, sLastErrorString);
+  return cg->writeMergedModules(path, sLastErrorString);
 }
 
 
@@ -250,7 +250,7 @@ bool lto_codegen_write_merged_modules(lto_code_gen_t cg, const char* path)
 extern const void*
 lto_codegen_compile(lto_code_gen_t cg, size_t* length)
 {
-    return cg->compile(length, sLastErrorString);
+  return cg->compile(length, sLastErrorString);
 }
 
 
@@ -262,6 +262,3 @@ lto_codegen_debug_options(lto_code_gen_t cg, const char * opt)
 {
   cg->setCodeGenDebugOptions(opt);
 }
-
-
-
diff --git a/tools/opt/GraphPrinters.cpp b/tools/opt/GraphPrinters.cpp
index a52baf7..5d581e4 100644
--- a/tools/opt/GraphPrinters.cpp
+++ b/tools/opt/GraphPrinters.cpp
@@ -49,7 +49,8 @@ namespace llvm {
       return "Call Graph";
     }
 
-    static std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) {
+    static std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph,
+                                    bool ShortNames) {
       if (Node->getFunction())
         return ((Value*)Node->getFunction())->getName();
       else
diff --git a/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp b/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
new file mode 100644
index 0000000..3c9beeb
--- /dev/null
+++ b/unittests/ExecutionEngine/JIT/JITEventListenerTest.cpp
@@ -0,0 +1,241 @@
+//===- JITEventListenerTest.cpp - Unit tests for JITEventListeners --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITEventListener.h"
+
+#include "llvm/Instructions.h"
+#include "llvm/Module.h"
+#include "llvm/ModuleProvider.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/CodeGen/MachineCodeInfo.h"
+#include "llvm/ExecutionEngine/JIT.h"
+#include "llvm/Support/TypeBuilder.h"
+#include "llvm/Target/TargetSelect.h"
+#include "gtest/gtest.h"
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+struct FunctionEmittedEvent {
+  // Indices are local to the RecordingJITEventListener, since the
+  // JITEventListener interface makes no guarantees about the order of
+  // calls between Listeners.
+  unsigned Index;
+  const Function *F;
+  void *Code;
+  size_t Size;
+  JITEvent_EmittedFunctionDetails Details;
+};
+struct FunctionFreedEvent {
+  unsigned Index;
+  const Function *F;
+  void *Code;
+};
+
+struct RecordingJITEventListener : public JITEventListener {
+  std::vector<FunctionEmittedEvent> EmittedEvents;
+  std::vector<FunctionFreedEvent> FreedEvents;
+
+  int NextIndex;
+
+  RecordingJITEventListener() : NextIndex(0) {}
+
+  virtual void NotifyFunctionEmitted(const Function &F,
+                                     void *Code, size_t Size,
+                                     const EmittedFunctionDetails &Details) {
+    FunctionEmittedEvent Event = {NextIndex++, &F, Code, Size, Details};
+    EmittedEvents.push_back(Event);
+  }
+
+  virtual void NotifyFreeingMachineCode(const Function &F, void *OldPtr) {
+    FunctionFreedEvent Event = {NextIndex++, &F, OldPtr};
+    FreedEvents.push_back(Event);
+  }
+};
+
+class JITEventListenerTest : public testing::Test {
+ protected:
+  JITEventListenerTest()
+      : M(new Module("module")),
+        EE(ExecutionEngine::createJIT(new ExistingModuleProvider(M))) {
+  }
+
+  Module *M;
+  const OwningPtr<ExecutionEngine> EE;
+};
+
+Function *buildFunction(Module *M) {
+  Function *Result = Function::Create(
+      TypeBuilder<int32_t(int32_t), false>::get(),
+      GlobalValue::ExternalLinkage, "id", M);
+  Value *Arg = Result->arg_begin();
+  BasicBlock *BB = BasicBlock::Create("entry", Result);
+  ReturnInst::Create(Arg, BB);
+  return Result;
+}
+
+// Tests that a single JITEventListener follows JIT events accurately.
+TEST_F(JITEventListenerTest, Simple) {
+  RecordingJITEventListener Listener;
+  EE->RegisterJITEventListener(&Listener);
+  Function *F1 = buildFunction(M);
+  Function *F2 = buildFunction(M);
+
+  void *F1_addr = EE->getPointerToFunction(F1);
+  void *F2_addr = EE->getPointerToFunction(F2);
+  EE->getPointerToFunction(F1);  // Should do nothing.
+  EE->freeMachineCodeForFunction(F1);
+  EE->freeMachineCodeForFunction(F2);
+
+  ASSERT_EQ(2U, Listener.EmittedEvents.size());
+  ASSERT_EQ(2U, Listener.FreedEvents.size());
+
+  EXPECT_EQ(0U, Listener.EmittedEvents[0].Index);
+  EXPECT_EQ(F1, Listener.EmittedEvents[0].F);
+  EXPECT_EQ(F1_addr, Listener.EmittedEvents[0].Code);
+  EXPECT_LT(0U, Listener.EmittedEvents[0].Size)
+      << "We don't know how big the function will be, but it had better"
+      << " contain some bytes.";
+
+  EXPECT_EQ(1U, Listener.EmittedEvents[1].Index);
+  EXPECT_EQ(F2, Listener.EmittedEvents[1].F);
+  EXPECT_EQ(F2_addr, Listener.EmittedEvents[1].Code);
+  EXPECT_LT(0U, Listener.EmittedEvents[1].Size)
+      << "We don't know how big the function will be, but it had better"
+      << " contain some bytes.";
+
+  EXPECT_EQ(2U, Listener.FreedEvents[0].Index);
+  EXPECT_EQ(F1, Listener.FreedEvents[0].F);
+  EXPECT_EQ(F1_addr, Listener.FreedEvents[0].Code);
+
+  EXPECT_EQ(3U, Listener.FreedEvents[1].Index);
+  EXPECT_EQ(F2, Listener.FreedEvents[1].F);
+  EXPECT_EQ(F2_addr, Listener.FreedEvents[1].Code);
+
+  F1->eraseFromParent();
+  F2->eraseFromParent();
+}
+
+// Tests that a single JITEventListener follows JIT events accurately.
+TEST_F(JITEventListenerTest, MultipleListenersDontInterfere) {
+  RecordingJITEventListener Listener1;
+  RecordingJITEventListener Listener2;
+  RecordingJITEventListener Listener3;
+  Function *F1 = buildFunction(M);
+  Function *F2 = buildFunction(M);
+
+  EE->RegisterJITEventListener(&Listener1);
+  EE->RegisterJITEventListener(&Listener2);
+  void *F1_addr = EE->getPointerToFunction(F1);
+  EE->RegisterJITEventListener(&Listener3);
+  EE->UnregisterJITEventListener(&Listener1);
+  void *F2_addr = EE->getPointerToFunction(F2);
+  EE->UnregisterJITEventListener(&Listener2);
+  EE->UnregisterJITEventListener(&Listener3);
+  EE->freeMachineCodeForFunction(F1);
+  EE->RegisterJITEventListener(&Listener2);
+  EE->RegisterJITEventListener(&Listener3);
+  EE->RegisterJITEventListener(&Listener1);
+  EE->freeMachineCodeForFunction(F2);
+  EE->UnregisterJITEventListener(&Listener1);
+  EE->UnregisterJITEventListener(&Listener2);
+  EE->UnregisterJITEventListener(&Listener3);
+
+  // Listener 1.
+  ASSERT_EQ(1U, Listener1.EmittedEvents.size());
+  ASSERT_EQ(1U, Listener1.FreedEvents.size());
+
+  EXPECT_EQ(0U, Listener1.EmittedEvents[0].Index);
+  EXPECT_EQ(F1, Listener1.EmittedEvents[0].F);
+  EXPECT_EQ(F1_addr, Listener1.EmittedEvents[0].Code);
+  EXPECT_LT(0U, Listener1.EmittedEvents[0].Size)
+      << "We don't know how big the function will be, but it had better"
+      << " contain some bytes.";
+
+  EXPECT_EQ(1U, Listener1.FreedEvents[0].Index);
+  EXPECT_EQ(F2, Listener1.FreedEvents[0].F);
+  EXPECT_EQ(F2_addr, Listener1.FreedEvents[0].Code);
+
+  // Listener 2.
+  ASSERT_EQ(2U, Listener2.EmittedEvents.size());
+  ASSERT_EQ(1U, Listener2.FreedEvents.size());
+
+  EXPECT_EQ(0U, Listener2.EmittedEvents[0].Index);
+  EXPECT_EQ(F1, Listener2.EmittedEvents[0].F);
+  EXPECT_EQ(F1_addr, Listener2.EmittedEvents[0].Code);
+  EXPECT_LT(0U, Listener2.EmittedEvents[0].Size)
+      << "We don't know how big the function will be, but it had better"
+      << " contain some bytes.";
+
+  EXPECT_EQ(1U, Listener2.EmittedEvents[1].Index);
+  EXPECT_EQ(F2, Listener2.EmittedEvents[1].F);
+  EXPECT_EQ(F2_addr, Listener2.EmittedEvents[1].Code);
+  EXPECT_LT(0U, Listener2.EmittedEvents[1].Size)
+      << "We don't know how big the function will be, but it had better"
+      << " contain some bytes.";
+
+  EXPECT_EQ(2U, Listener2.FreedEvents[0].Index);
+  EXPECT_EQ(F2, Listener2.FreedEvents[0].F);
+  EXPECT_EQ(F2_addr, Listener2.FreedEvents[0].Code);
+
+  // Listener 3.
+  ASSERT_EQ(1U, Listener3.EmittedEvents.size());
+  ASSERT_EQ(1U, Listener3.FreedEvents.size());
+
+  EXPECT_EQ(0U, Listener3.EmittedEvents[0].Index);
+  EXPECT_EQ(F2, Listener3.EmittedEvents[0].F);
+  EXPECT_EQ(F2_addr, Listener3.EmittedEvents[0].Code);
+  EXPECT_LT(0U, Listener3.EmittedEvents[0].Size)
+      << "We don't know how big the function will be, but it had better"
+      << " contain some bytes.";
+
+  EXPECT_EQ(1U, Listener3.FreedEvents[0].Index);
+  EXPECT_EQ(F2, Listener3.FreedEvents[0].F);
+  EXPECT_EQ(F2_addr, Listener3.FreedEvents[0].Code);
+
+  F1->eraseFromParent();
+  F2->eraseFromParent();
+}
+
+TEST_F(JITEventListenerTest, MatchesMachineCodeInfo) {
+  RecordingJITEventListener Listener;
+  MachineCodeInfo MCI;
+  Function *F = buildFunction(M);
+
+  EE->RegisterJITEventListener(&Listener);
+  EE->runJITOnFunction(F, &MCI);
+  void *F_addr = EE->getPointerToFunction(F);
+  EE->freeMachineCodeForFunction(F);
+
+  ASSERT_EQ(1U, Listener.EmittedEvents.size());
+  ASSERT_EQ(1U, Listener.FreedEvents.size());
+
+  EXPECT_EQ(0U, Listener.EmittedEvents[0].Index);
+  EXPECT_EQ(F, Listener.EmittedEvents[0].F);
+  EXPECT_EQ(F_addr, Listener.EmittedEvents[0].Code);
+  EXPECT_EQ(MCI.address(), Listener.EmittedEvents[0].Code);
+  EXPECT_EQ(MCI.size(), Listener.EmittedEvents[0].Size);
+
+  EXPECT_EQ(1U, Listener.FreedEvents[0].Index);
+  EXPECT_EQ(F, Listener.FreedEvents[0].F);
+  EXPECT_EQ(F_addr, Listener.FreedEvents[0].Code);
+}
+
+class JITEnvironment : public testing::Environment {
+  virtual void SetUp() {
+    // Required for ExecutionEngine::createJIT to create a JIT.
+    InitializeNativeTarget();
+  }
+};
+testing::Environment* const jit_env =
+  testing::AddGlobalTestEnvironment(new JITEnvironment);
+
+}  // anonymous namespace
diff --git a/unittests/ExecutionEngine/JIT/Makefile b/unittests/ExecutionEngine/JIT/Makefile
new file mode 100644
index 0000000..0069c76
--- /dev/null
+++ b/unittests/ExecutionEngine/JIT/Makefile
@@ -0,0 +1,15 @@
+##===- unittests/ExecutionEngine/JIT/Makefile --------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+TESTNAME = JIT
+LINK_COMPONENTS := core support jit native
+
+include $(LEVEL)/Makefile.config
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/ExecutionEngine/Makefile b/unittests/ExecutionEngine/Makefile
new file mode 100644
index 0000000..e837a7d
--- /dev/null
+++ b/unittests/ExecutionEngine/Makefile
@@ -0,0 +1,19 @@
+##===- unittests/ExecutionEngine/Makefile ------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+
+include $(LEVEL)/Makefile.config
+
+PARALLEL_DIRS = JIT
+
+include $(LEVEL)/Makefile.common
+
+clean::
+	$(Verb) $(RM) -f *Tests
diff --git a/unittests/MC/AsmStreamerTest.cpp b/unittests/MC/AsmStreamerTest.cpp
new file mode 100644
index 0000000..d4b93ea
--- /dev/null
+++ b/unittests/MC/AsmStreamerTest.cpp
@@ -0,0 +1,103 @@
+//===- AsmStreamerTest.cpp - Triple unit tests ----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+namespace {
+
+// Helper class.
+class StringAsmStreamer {
+  std::string Str;
+  raw_string_ostream OS;
+  MCContext Context;
+  MCStreamer *Streamer;
+
+public:
+  StringAsmStreamer() : OS(Str), Streamer(createAsmStreamer(Context, OS)) {}
+  ~StringAsmStreamer() { 
+    delete Streamer;
+  }
+
+  MCContext &getContext() { return Context; }
+  MCStreamer &getStreamer() { return *Streamer; }
+
+  const std::string &getString() {
+    Streamer->Finish();
+    return Str;
+  }
+};
+
+TEST(AsmStreamer, EmptyOutput) {
+  StringAsmStreamer S;
+  EXPECT_EQ(S.getString(), "");
+}
+
+TEST(AsmStreamer, Sections) {
+  StringAsmStreamer S;
+  MCSection *Sec0 = S.getContext().GetSection("foo");
+  S.getStreamer().SwitchSection(Sec0);
+  EXPECT_EQ(S.getString(), ".section foo\n");
+}
+
+TEST(AsmStreamer, Values) {
+  StringAsmStreamer S;
+  MCSection *Sec0 = S.getContext().GetSection("foo");
+  MCSymbol *A = S.getContext().CreateSymbol("a");
+  MCSymbol *B = S.getContext().CreateSymbol("b");
+  S.getStreamer().SwitchSection(Sec0);
+  S.getStreamer().EmitLabel(A);
+  S.getStreamer().EmitLabel(B);
+  S.getStreamer().EmitValue(MCValue::get(A, B, 10), 1);
+  S.getStreamer().EmitValue(MCValue::get(A, B, 10), 2);
+  S.getStreamer().EmitValue(MCValue::get(A, B, 10), 4);
+  S.getStreamer().EmitValue(MCValue::get(A, B, 10), 8);
+  EXPECT_EQ(S.getString(), ".section foo\n\
+a:\n\
+b:\n\
+.byte a - b + 10\n\
+.short a - b + 10\n\
+.long a - b + 10\n\
+.quad a - b + 10\n\
+");
+}
+
+TEST(AsmStreamer, Align) {
+  StringAsmStreamer S;
+  MCSection *Sec0 = S.getContext().GetSection("foo");
+  S.getStreamer().SwitchSection(Sec0);
+  S.getStreamer().EmitValueToAlignment(4);
+  S.getStreamer().EmitValueToAlignment(4, /*Value=*/12, /*ValueSize=*/2);
+  S.getStreamer().EmitValueToAlignment(8, /*Value=*/12, /*ValueSize=*/4, 
+                                       /*MaxBytesToEmit=*/24);
+  EXPECT_EQ(S.getString(), ".section foo\n\
+.p2align 2, 0\n\
+.p2alignw 2, 12\n\
+.p2alignl 3, 12, 24\n\
+");
+}
+
+TEST(AsmStreamer, Org) {
+  StringAsmStreamer S;
+  MCSection *Sec0 = S.getContext().GetSection("foo");
+  S.getStreamer().SwitchSection(Sec0);
+  MCSymbol *A = S.getContext().CreateSymbol("a");
+  S.getStreamer().EmitLabel(A);
+  S.getStreamer().EmitValueToOffset(MCValue::get(A, 0, 4), 32);
+  EXPECT_EQ(S.getString(), ".section foo\n\
+a:\n\
+.org a + 4, 32\n\
+");
+}
+
+}
diff --git a/unittests/MC/Makefile b/unittests/MC/Makefile
new file mode 100644
index 0000000..410d386
--- /dev/null
+++ b/unittests/MC/Makefile
@@ -0,0 +1,15 @@
+##===- unittests/MC/Makefile -------------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../..
+TESTNAME = MC
+LINK_COMPONENTS := core support mc
+
+include $(LEVEL)/Makefile.config
+include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
diff --git a/unittests/Makefile b/unittests/Makefile
index 1ff54111..1eb69ab 100644
--- a/unittests/Makefile
+++ b/unittests/Makefile
@@ -16,7 +16,7 @@ BUILD_ARCHIVE = 1
 CPP.Flags += -I$(LLVM_SRC_ROOT)/utils/unittest/googletest/include/
 CPP.Flags += -Wno-variadic-macros
 
-PARALLEL_DIRS = ADT Support VMCore
+PARALLEL_DIRS = ADT ExecutionEngine Support VMCore MC
 
 include $(LEVEL)/Makefile.common
 
diff --git a/utils/NewNightlyTest.pl b/utils/NewNightlyTest.pl
index 00d4038..c005bcb 100755
--- a/utils/NewNightlyTest.pl
+++ b/utils/NewNightlyTest.pl
@@ -109,6 +109,13 @@ $BuildDir      = "$HOME/buildtest" unless $BuildDir;
 my $WebDir     = $ENV{'WEBDIR'};
 $WebDir        = "$HOME/cvs/testresults-X86" unless $WebDir;
 
+my $LLVMSrcDir   = $ENV{'LLVMSRCDIR'};
+$LLVMSrcDir    = "$BuildDir/llvm" unless $LLVMSrcDir;
+my $LLVMObjDir   = $ENV{'LLVMOBJDIR'};
+$LLVMObjDir    = "$BuildDir/llvm" unless $LLVMObjDir;
+my $LLVMTestDir   = $ENV{'LLVMTESTDIR'};
+$LLVMTestDir    = "$BuildDir/llvm/projects/llvm-test" unless $LLVMTestDir;
+
 ##############################################################
 #
 # Calculate the date prefix...
@@ -168,6 +175,7 @@ while (scalar(@ARGV) and ($_ = $ARGV[0], /^[-+]/)) {
   if (/^-disable-lto$/)    { $PROGTESTOPTS .= " DISABLE_LTO=1"; next; }
   if (/^-test-opts$/)      { $PROGTESTOPTS .= " $ARGV[0]"; shift; next; }
   if (/^-verbose$/)        { $VERBOSE = 1; next; }
+  if (/^-teelogs$/)        { $TEELOGS = 1; next; }
   if (/^-debug$/)          { $DEBUG = 1; next; }
   if (/^-nice$/)           { $NICE = "nice "; next; }
   if (/^-f2c$/)            { $CONFIGUREARGS .= " --with-f2c=$ARGV[0]";
@@ -293,6 +301,44 @@ sub GetDir {
   return @Result;
 }
 
+sub RunLoggedCommand {
+  my $Command = shift;
+  my $Log = shift;
+  my $Title = shift;
+  if ($TEELOGS) {
+      if ($VERBOSE) {
+          print "$Title\n";
+          print "$Command 2>&1 | tee $Log\n";
+      }
+      system "$Command 2>&1 | tee $Log";
+  } else {
+      if ($VERBOSE) {
+          print "$Title\n";
+          print "$Command 2>&1 > $Log\n";
+      }
+      system "$Command 2>&1 > $Log";
+  }
+}
+
+sub RunAppendingLoggedCommand {
+  my $Command = shift;
+  my $Log = shift;
+  my $Title = shift;
+  if ($TEELOGS) {
+      if ($VERBOSE) {
+          print "$Title\n";
+          print "$Command 2>&1 | tee -a $Log\n";
+      }
+      system "$Command 2>&1 | tee -a $Log";
+  } else {
+      if ($VERBOSE) {
+          print "$Title\n";
+          print "$Command 2>&1 > $Log\n";
+      }
+      system "$Command 2>&1 >> $Log";
+  }
+}
+
 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 #
 # DiffFiles - Diff the current version of the file against the last version of
@@ -458,7 +504,11 @@ sub SendData{
     WriteFile "$Prefix-sentdata.txt", $sentdata;
 
     if (!($SUBMITAUX eq "")) {
-      system "$SUBMITAUX \"$Prefix-sentdata.txt\"";
+        system "$SUBMITAUX \"$Prefix-sentdata.txt\"";
+    }
+
+    if (!$SUBMIT) { 
+        return "Skipped standard submit.\n";
     }
 
     # Create the content to send to the server.
@@ -528,7 +578,6 @@ if (!$NOCHECKOUT) {
     mkdir $BuildDir or die "Could not create checkout directory $BuildDir!";
   }
 }
-ChangeDir( $BuildDir, "checkout directory" );
 
 
 ##############################################################
@@ -537,38 +586,29 @@ ChangeDir( $BuildDir, "checkout directory" );
 #
 ##############################################################
 if (!$NOCHECKOUT) {
-  if ( $VERBOSE ) { print "CHECKOUT STAGE:\n"; }
+  ChangeDir( $BuildDir, "checkout directory" );
   if ($USESVN) {
       my $SVNCMD = "$NICE svn co --non-interactive $SVNURL";
       my $SVNCMD2 = "$NICE svn co --non-interactive $TestSVNURL";
-      if ($VERBOSE) {
-        print "( time -p $SVNCMD/llvm/trunk llvm; cd llvm/projects ; " .
-              "$SVNCMD2/test-suite/trunk llvm-test ) > $COLog 2>&1\n";
-      }
-      system "( time -p $SVNCMD/llvm/trunk llvm; cd llvm/projects ; " .
-            "$SVNCMD2/test-suite/trunk llvm-test ) > $COLog 2>&1\n";
+      RunLoggedCommand("( time -p $SVNCMD/llvm/trunk llvm; cd llvm/projects ; " .
+                       "$SVNCMD2/test-suite/trunk llvm-test )", $COLog,
+                       "CHECKOUT LLVM");
       if ($WITHCLANG) {
         my $SVNCMD = "$NICE svn co --non-interactive $SVNURL/cfe/trunk";
-        if ($VERBOSE) {
-          print "( time -p cd llvm/tools ; $SVNCMD clang ) > $COLog 2>&1\n"; 
-        }
-          system "( time -p cd llvm/tools ; $SVNCMD clang ) > $COLog 2>&1\n";
+        RunLoggedCommand("( time -p cd llvm/tools ; $SVNCMD clang )", $COLog,
+                         "CHECKOUT CLANG");
       }
   } else {
     my $CVSOPT = "";
     $CVSOPT = "-z3" # Use compression if going over ssh.
       if $CVSRootDir =~ /^:ext:/;
     my $CVSCMD = "$NICE cvs $CVSOPT -d $CVSRootDir co -P $CVSCOOPT";
-    if ($VERBOSE) {
-      print "( time -p $CVSCMD llvm; cd llvm/projects ; " .
-            "$CVSCMD llvm-test ) > $COLog 2>&1\n";
-    }
-    system "( time -p $CVSCMD llvm; cd llvm/projects ; " .
-          "$CVSCMD llvm-test ) > $COLog 2>&1\n";
+    RunLoggedCommand("( time -p $CVSCMD llvm; cd llvm/projects ; " .
+                     "$CVSCMD llvm-test )", $COLog,
+                     "CHECKOUT LLVM-TEST");
   }
 }
-ChangeDir( $BuildDir , "Checkout directory") ;
-ChangeDir( "llvm" , "llvm source directory") ;
+ChangeDir( $LLVMSrcDir , "llvm source directory") ;
 
 ##############################################################
 #
@@ -721,21 +761,11 @@ my $UserUpdateList = join "\n", sort keys %UsersUpdated;
 ##############################################################
 if (!$NOCHECKOUT && !$NOBUILD) {
   my $EXTRAFLAGS = "--enable-spec --with-objroot=.";
-  if ( $VERBOSE ) {
-    print "CONFIGURE STAGE:\n";
-    print "(time -p $NICE ./configure $CONFIGUREARGS $EXTRAFLAGS) " .
-          "> $BuildLog 2>&1\n";
-  }
-  system "(time -p $NICE ./configure $CONFIGUREARGS $EXTRAFLAGS) " .
-         "> $BuildLog 2>&1";
-  if ( $VERBOSE ) {
-    print "BUILD STAGE:\n";
-    print "(time -p $NICE $MAKECMD clean) >> $BuildLog 2>&1\n";
-    print "(time -p $NICE $MAKECMD $MAKEOPTS) >> $BuildLog 2>&1\n";
-  }
+  RunLoggedCommand("(time -p $NICE ./configure $CONFIGUREARGS $EXTRAFLAGS) ",
+                   $BuildLog, "CONFIGURE");
   # Build the entire tree, capturing the output into $BuildLog
-  system "(time -p $NICE $MAKECMD clean) >> $BuildLog 2>&1";
-  system "(time -p $NICE $MAKECMD $MAKEOPTS) >> $BuildLog 2>&1";
+  RunAppendingLoggedCommand("(time -p $NICE $MAKECMD clean)", $BuildLog, "BUILD CLEAN");
+  RunAppendingLoggedCommand("(time -p $NICE $MAKECMD $MAKEOPTS)", $BuildLog, "BUILD");
 }
 
 ##############################################################
@@ -751,7 +781,7 @@ if (!$NOCHECKOUT && !$NOBUILD) {
 
 # Get the number of lines of source code. Must be here after the build is done
 # because countloc.sh uses the llvm-config script which must be built.
-my $LOC = `utils/countloc.sh -topdir $BuildDir/llvm`;
+my $LOC = `utils/countloc.sh -topdir $LLVMSrcDir`;
 
 # Get the time taken by the configure script
 my $ConfigTimeU = GetRegexNum "^user", 0, "([0-9.]+)", "$BuildLog";
@@ -787,7 +817,7 @@ my $o_file_sizes="";
 if (!$BuildError) {
   print "Organizing size of .o and .a files\n"
     if ( $VERBOSE );
-  ChangeDir( "$BuildDir/llvm", "Build Directory" );
+  ChangeDir( "$LLVMObjDir", "Build Directory" );
 
   my @dirs = ('utils', 'lib', 'tools');
   if($BUILDTYPE eq "release"){
@@ -815,14 +845,9 @@ if (!$BuildError) {
 my $DejangnuTestResults=""; # String containing the results of the dejagnu
 my $dejagnu_output = "$DejagnuTestsLog";
 if (!$NODEJAGNU) {
-  if($VERBOSE) {
-    print "DEJAGNU FEATURE/REGRESSION TEST STAGE:\n";
-    print "(time -p $MAKECMD $MAKEOPTS check) > $dejagnu_output 2>&1\n";
-  }
-
   #Run the feature and regression tests, results are put into testrun.sum
   #Full log in testrun.log
-  system "(time -p $MAKECMD $MAKEOPTS check) > $dejagnu_output 2>&1";
+  RunLoggedCommand("(time -p $MAKECMD $MAKEOPTS check)", $dejagnu_output, "DEJAGNU");
 
   #Copy the testrun.log and testrun.sum to our webdir
   CopyFile("test/testrun.log", $DejagnuLog);
@@ -857,7 +882,7 @@ if (!$NODEJAGNU) {
     if ($Warning =~ m/Entering directory \`([^\`]+)\'/) {
       $CurDir = $1;                 # Keep track of directory warning is in...
       # Remove buildir prefix if included
-      if ($CurDir =~ m#$BuildDir/llvm/(.*)#) { $CurDir = $1; }
+      if ($CurDir =~ m#$LLVMSrcDir/(.*)#) { $CurDir = $1; }
     } else {
       push @Warnings, "$CurDir/$Warning";     # Add directory to warning...
     }
@@ -886,9 +911,10 @@ if (!$NODEJAGNU) {
 # "External")
 #
 ##############################################################
+
 sub TestDirectory {
   my $SubDir = shift;
-  ChangeDir( "$BuildDir/llvm/projects/llvm-test/$SubDir",
+  ChangeDir( "$LLVMTestDir/$SubDir",
              "Programs Test Subdirectory" ) || return ("", "");
 
   my $ProgramTestLog = "$Prefix-$SubDir-ProgramTest.txt";
@@ -899,8 +925,8 @@ sub TestDirectory {
       print "$MAKECMD -k $MAKEOPTS $PROGTESTOPTS report.nightly.csv ".
             "TEST=nightly > $ProgramTestLog 2>&1\n";
     }
-    system "$MAKECMD -k $MAKEOPTS $PROGTESTOPTS report.nightly.csv ".
-           "TEST=nightly > $ProgramTestLog 2>&1";
+    RunLoggedCommand("$MAKECMD -k $MAKEOPTS $PROGTESTOPTS report.nightly.csv ".
+                     "TEST=nightly", $ProgramTestLog, "TEST DIRECTORY $SubDir");
     $llcbeta_options=`$MAKECMD print-llcbeta-option`;
   }
 
@@ -933,21 +959,12 @@ sub TestDirectory {
 #
 ##############################################################
 if (!$BuildError) {
-  if ( $VERBOSE ) {
-     print "SingleSource TEST STAGE\n";
-  }
   ($SingleSourceProgramsTable, $llcbeta_options) =
     TestDirectory("SingleSource");
   WriteFile "$Prefix-SingleSource-Performance.txt", $SingleSourceProgramsTable;
-  if ( $VERBOSE ) {
-    print "MultiSource TEST STAGE\n";
-  }
   ($MultiSourceProgramsTable, $llcbeta_options) = TestDirectory("MultiSource");
   WriteFile "$Prefix-MultiSource-Performance.txt", $MultiSourceProgramsTable;
   if ( ! $NOEXTERNALS ) {
-    if ( $VERBOSE ) {
-      print "External TEST STAGE\n";
-    }
     ($ExternalProgramsTable, $llcbeta_options) = TestDirectory("External");
     WriteFile "$Prefix-External-Performance.txt", $ExternalProgramsTable;
     system "cat $Prefix-SingleSource-Tests.txt " .
@@ -1116,7 +1133,7 @@ my %hash_of_data = (
   'target_triple' => $targetTriple
 );
 
-if ($SUBMIT) {
+if ($SUBMIT || !($SUBMITAUX eq "")) {
   my $response = SendData $SUBMITSERVER,$SUBMITSCRIPT,\%hash_of_data;
   if( $VERBOSE) { print "============================\n$response"; }
 } else {
diff --git a/utils/TableGen/CodeGenDAGPatterns.cpp b/utils/TableGen/CodeGenDAGPatterns.cpp
index 839059d..6a7d305 100644
--- a/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -2390,6 +2390,10 @@ void CodeGenDAGPatterns::GenerateVariants() {
       // Scan to see if an instruction or explicit pattern already matches this.
       bool AlreadyExists = false;
       for (unsigned p = 0, e = PatternsToMatch.size(); p != e; ++p) {
+        // Skip if the top level predicates do not match.
+        if (PatternsToMatch[i].getPredicates() !=
+            PatternsToMatch[p].getPredicates())
+          continue;
         // Check to see if this variant already exists.
         if (Variant->isIsomorphicTo(PatternsToMatch[p].getSrcPattern(), DepVars)) {
           DOUT << "  *** ALREADY EXISTS, ignoring variant.\n";
diff --git a/utils/TableGen/LLVMCConfigurationEmitter.cpp b/utils/TableGen/LLVMCConfigurationEmitter.cpp
index d7e8550..9fcfd05 100644
--- a/utils/TableGen/LLVMCConfigurationEmitter.cpp
+++ b/utils/TableGen/LLVMCConfigurationEmitter.cpp
@@ -1132,12 +1132,14 @@ void TokenizeCmdline(const std::string& CmdLine, StrVector& Out) {
   enum TokenizerState
   { Normal, SpecialCommand, InsideSpecialCommand, InsideQuotationMarks }
   cur_st  = Normal;
+
+  if (CmdLine.empty())
+    return;
   Out.push_back("");
 
   std::string::size_type B = CmdLine.find_first_not_of(Delimiters),
     E = CmdLine.size();
-  if (B == std::string::npos)
-    throw "Empty command-line string!";
+
   for (; B != E; ++B) {
     char cur_ch = CmdLine[B];
 
@@ -1278,7 +1280,7 @@ void EmitCmdLineVecFill(const Init* CmdLine, const std::string& ToolName,
   TokenizeCmdline(InitPtrToString(CmdLine), StrVec);
 
   if (StrVec.empty())
-    throw "Tool " + ToolName + " has empty command line!";
+    throw "Tool '" + ToolName + "' has empty command line!";
 
   StrVector::const_iterator I = StrVec.begin(), E = StrVec.end();
 
@@ -1652,11 +1654,11 @@ void EmitToolClassDefinition (const ToolDescription& D,
 
 }
 
-/// EmitOptionDefintions - Iterate over a list of option descriptions
+/// EmitOptionDefinitions - Iterate over a list of option descriptions
 /// and emit registration code.
-void EmitOptionDefintions (const OptionDescriptions& descs,
-                           bool HasSink, bool HasExterns,
-                           std::ostream& O)
+void EmitOptionDefinitions (const OptionDescriptions& descs,
+                            bool HasSink, bool HasExterns,
+                            std::ostream& O)
 {
   std::vector<OptionDescription> Aliases;
 
@@ -1681,7 +1683,7 @@ void EmitOptionDefintions (const OptionDescriptions& descs,
       continue;
     }
 
-    O << "(\"" << val.Name << '\"';
+    O << "(\"" << val.Name << "\"\n";
 
     if (val.Type == OptionType::Prefix || val.Type == OptionType::PrefixList)
       O << ", cl::Prefix";
@@ -1712,7 +1714,7 @@ void EmitOptionDefintions (const OptionDescriptions& descs,
     if (!val.Help.empty())
       O << ", cl::desc(\"" << val.Help << "\")";
 
-    O << ");\n";
+    O << ");\n\n";
   }
 
   // Emit the aliases (they should go after all the 'proper' options).
@@ -1984,6 +1986,7 @@ void EmitRegisterPlugin(int Priority, std::ostream& O) {
 /// additional declarations.
 void EmitIncludes(std::ostream& O) {
   O << "#include \"llvm/CompilerDriver/CompilationGraph.h\"\n"
+    << "#include \"llvm/CompilerDriver/ForceLinkageMacros.h\"\n"
     << "#include \"llvm/CompilerDriver/Plugin.h\"\n"
     << "#include \"llvm/CompilerDriver/Tool.h\"\n\n"
 
@@ -2081,7 +2084,7 @@ void EmitPluginCode(const PluginData& Data, std::ostream& O) {
   EmitIncludes(O);
 
   // Emit global option registration code.
-  EmitOptionDefintions(Data.OptDescs, Data.HasSink, Data.HasExterns, O);
+  EmitOptionDefinitions(Data.OptDescs, Data.HasSink, Data.HasExterns, O);
 
   // Emit hook declarations.
   EmitHookDeclarations(Data.ToolDescs, O);
@@ -2106,7 +2109,13 @@ void EmitPluginCode(const PluginData& Data, std::ostream& O) {
   // Emit code for plugin registration.
   EmitRegisterPlugin(Data.Priority, O);
 
-  O << "} // End anonymous namespace.\n";
+  O << "} // End anonymous namespace.\n\n";
+
+  // Force linkage magic.
+  O << "namespace llvmc {\n";
+  O << "LLVMC_FORCE_LINKAGE_DECL(LLVMC_PLUGIN_NAME) {}\n";
+  O << "}\n";
+
   // EOF
 }