371 files changed, 11219 insertions, 5629 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3ad4669..9bce039 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -191,7 +191,13 @@ set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib )
 add_llvm_definitions( -D__STDC_LIMIT_MACROS )
 add_llvm_definitions( -D__STDC_CONSTANT_MACROS )
 
-option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." ON)
+# MSVC has a gazillion warnings with this.
+if( MSVC )
+  option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." OFF)
+else( MSVC )
+  option(LLVM_ENABLE_WARNINGS "Enable compiler warnings." ON)
+endif()
+
 option(LLVM_ENABLE_PEDANTIC "Compile with pedantic enabled." ON)
 option(LLVM_ENABLE_WERROR "Fail and stop if a warning is triggered." OFF)
 
diff --git a/Makefile b/Makefile
index 1ef89e4..2183a5b 100644
--- a/Makefile
+++ b/Makefile
@@ -66,8 +66,7 @@ ifeq ($(MAKECMDGOALS),tools-only)
 endif
 
 ifeq ($(MAKECMDGOALS),install-clang)
-  DIRS := tools/clang/tools/driver tools/clang/tools/clang-cc \
-	tools/clang/lib/Headers tools/clang/docs
+  DIRS := tools/clang/tools/driver tools/clang/lib/Headers tools/clang/docs
   OPTIONAL_DIRS :=
   NO_INSTALL = 1
 endif
diff --git a/Makefile.config.in b/Makefile.config.in
index 44296a4..2cc69dc 100644
--- a/Makefile.config.in
+++ b/Makefile.config.in
@@ -313,7 +313,7 @@ endif
 # Location of the plugin header file for gold.
 BINUTILS_INCDIR := @BINUTILS_INCDIR@
 
-C_INCLUDE_DIRS := @C_INCLUDE_DISR@
+C_INCLUDE_DIRS := @C_INCLUDE_DIRS@
 CXX_INCLUDE_ROOT := @CXX_INCLUDE_ROOT@
 CXX_INCLUDE_ARCH := @CXX_INCLUDE_ARCH@
 CXX_INCLUDE_32BIT_DIR = @CXX_INCLUDE_32BIT_DIR@
diff --git a/autoconf/configure.ac b/autoconf/configure.ac
index 9519698..9ebaadc 100644
--- a/autoconf/configure.ac
+++ b/autoconf/configure.ac
@@ -672,7 +672,7 @@ case "$withval" in
   *) AC_MSG_ERROR([Invalid path for --with-ocaml-libdir. Provide full path]) ;;
 esac
 
-AC_ARG_WITH(c-include-dir,
+AC_ARG_WITH(c-include-dirs,
   AS_HELP_STRING([--with-c-include-dirs],
     [Colon separated list of directories clang will search for headers]),,
     withval="")
diff --git a/cmake/modules/CheckAtomic.cmake b/cmake/modules/CheckAtomic.cmake
index 27bbaba..f40ff4d 100644
--- a/cmake/modules/CheckAtomic.cmake
+++ b/cmake/modules/CheckAtomic.cmake
@@ -1,14 +1,25 @@
 # atomic builtins are required for threading support.
 
 INCLUDE(CheckCXXSourceCompiles)
-	
+
 CHECK_CXX_SOURCE_COMPILES("
+#ifdef _MSC_VER
+#include <windows.h>
+#endif
 int main() {
+#ifdef _MSC_VER
+        volatile LONG val = 1;
+        MemoryBarrier();
+        InterlockedCompareExchange(&val, 0, 1);
+        InterlockedIncrement(&val);
+        InterlockedDecrement(&val);
+#else
         volatile unsigned long val = 1;
         __sync_synchronize();
         __sync_val_compare_and_swap(&val, 1, 0);
         __sync_add_and_fetch(&val, 1);
         __sync_sub_and_fetch(&val, 1);
+#endif
         return 0;
       }
 " LLVM_MULTITHREADED)
diff --git a/configure b/configure
index 4ef693f..3e0ca0a 100755
--- a/configure
+++ b/configure
@@ -5286,9 +5286,9 @@ echo "$as_me: error: Invalid path for --with-ocaml-libdir. Provide full path" >&
 esac
 
 
-# Check whether --with-c-include-dir was given.
-if test "${with_c_include_dir+set}" = set; then
-  withval=$with_c_include_dir;
+# Check whether --with-c-include-dirs was given.
+if test "${with_c_include_dirs+set}" = set; then
+  withval=$with_c_include_dirs;
 else
   withval=""
 fi
diff --git a/docs/CompilerDriver.html b/docs/CompilerDriver.html
index ded3cf4..3af190d 100644
--- a/docs/CompilerDriver.html
+++ b/docs/CompilerDriver.html
@@ -17,28 +17,28 @@ The ReST source lives in the directory 'tools/llvmc/doc'. -->
 <div class="contents topic" id="contents">
 <p class="topic-title first">Contents</p>
 <ul class="simple">
-<li><a class="reference internal" href="#introduction" id="id4">Introduction</a></li>
-<li><a class="reference internal" href="#compiling-with-llvmc" id="id5">Compiling with LLVMC</a></li>
-<li><a class="reference internal" href="#predefined-options" id="id6">Predefined options</a></li>
-<li><a class="reference internal" href="#compiling-llvmc-plugins" id="id7">Compiling LLVMC plugins</a></li>
-<li><a class="reference internal" href="#compiling-standalone-llvmc-based-drivers" id="id8">Compiling standalone LLVMC-based drivers</a></li>
-<li><a class="reference internal" href="#customizing-llvmc-the-compilation-graph" id="id9">Customizing LLVMC: the compilation graph</a></li>
-<li><a class="reference internal" href="#describing-options" id="id10">Describing options</a><ul>
-<li><a class="reference internal" href="#external-options" id="id11">External options</a></li>
+<li><a class="reference internal" href="#introduction" id="id8">Introduction</a></li>
+<li><a class="reference internal" href="#compiling-with-llvmc" id="id9">Compiling with LLVMC</a></li>
+<li><a class="reference internal" href="#predefined-options" id="id10">Predefined options</a></li>
+<li><a class="reference internal" href="#compiling-llvmc-plugins" id="id11">Compiling LLVMC plugins</a></li>
+<li><a class="reference internal" href="#compiling-standalone-llvmc-based-drivers" id="id12">Compiling standalone LLVMC-based drivers</a></li>
+<li><a class="reference internal" href="#customizing-llvmc-the-compilation-graph" id="id13">Customizing LLVMC: the compilation graph</a></li>
+<li><a class="reference internal" href="#describing-options" id="id14">Describing options</a><ul>
+<li><a class="reference internal" href="#external-options" id="id15">External options</a></li>
 </ul>
 </li>
-<li><a class="reference internal" href="#conditional-evaluation" id="id12">Conditional evaluation</a></li>
-<li><a class="reference internal" href="#writing-a-tool-description" id="id13">Writing a tool description</a><ul>
-<li><a class="reference internal" href="#actions" id="id14">Actions</a></li>
+<li><a class="reference internal" href="#conditional-evaluation" id="id16">Conditional evaluation</a></li>
+<li><a class="reference internal" href="#writing-a-tool-description" id="id17">Writing a tool description</a><ul>
+<li><a class="reference internal" href="#id5" id="id18">Actions</a></li>
 </ul>
 </li>
-<li><a class="reference internal" href="#language-map" id="id15">Language map</a></li>
-<li><a class="reference internal" href="#option-preprocessor" id="id16">Option preprocessor</a></li>
-<li><a class="reference internal" href="#more-advanced-topics" id="id17">More advanced topics</a><ul>
-<li><a class="reference internal" href="#hooks-and-environment-variables" id="id18">Hooks and environment variables</a></li>
-<li><a class="reference internal" href="#how-plugins-are-loaded" id="id19">How plugins are loaded</a></li>
-<li><a class="reference internal" href="#debugging" id="id20">Debugging</a></li>
-<li><a class="reference internal" href="#conditioning-on-the-executable-name" id="id21">Conditioning on the executable name</a></li>
+<li><a class="reference internal" href="#language-map" id="id19">Language map</a></li>
+<li><a class="reference internal" href="#option-preprocessor" id="id20">Option preprocessor</a></li>
+<li><a class="reference internal" href="#more-advanced-topics" id="id21">More advanced topics</a><ul>
+<li><a class="reference internal" href="#hooks-and-environment-variables" id="id22">Hooks and environment variables</a></li>
+<li><a class="reference internal" href="#how-plugins-are-loaded" id="id23">How plugins are loaded</a></li>
+<li><a class="reference internal" href="#debugging" id="id24">Debugging</a></li>
+<li><a class="reference internal" href="#conditioning-on-the-executable-name" id="id25">Conditioning on the executable name</a></li>
 </ul>
 </li>
 </ul>
@@ -46,7 +46,7 @@ The ReST source lives in the directory 'tools/llvmc/doc'. -->
 <div class="doc_author">
 <p>Written by <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a></p>
 </div><div class="section" id="introduction">
-<h1><a class="toc-backref" href="#id4">Introduction</a></h1>
+<h1><a class="toc-backref" href="#id8">Introduction</a></h1>
 <p>LLVMC is a generic compiler driver, designed to be customizable and
 extensible. It plays the same role for LLVM as the <tt class="docutils literal"><span class="pre">gcc</span></tt> program
 does for GCC - LLVMC's job is essentially to transform a set of input
@@ -63,7 +63,7 @@ example, as a build tool for game resources.</p>
 need to be familiar with it to customize LLVMC.</p>
 </div>
 <div class="section" id="compiling-with-llvmc">
-<h1><a class="toc-backref" href="#id5">Compiling with LLVMC</a></h1>
+<h1><a class="toc-backref" href="#id9">Compiling with LLVMC</a></h1>
 <p>LLVMC tries hard to be as compatible with <tt class="docutils literal"><span class="pre">gcc</span></tt> as possible,
 although there are some small differences. Most of the time, however,
 you shouldn't be able to notice them:</p>
@@ -100,7 +100,7 @@ hello
 possible to choose the <tt class="docutils literal"><span class="pre">clang</span></tt> compiler with the <tt class="docutils literal"><span class="pre">-clang</span></tt> option.</p>
 </div>
 <div class="section" id="predefined-options">
-<h1><a class="toc-backref" href="#id6">Predefined options</a></h1>
+<h1><a class="toc-backref" href="#id10">Predefined options</a></h1>
 <p>LLVMC has some built-in options that can't be overridden in the
 configuration libraries:</p>
 <ul class="simple">
@@ -137,7 +137,7 @@ their standard meaning.</li>
 </ul>
 </div>
 <div class="section" id="compiling-llvmc-plugins">
-<h1><a class="toc-backref" href="#id7">Compiling LLVMC plugins</a></h1>
+<h1><a class="toc-backref" href="#id11">Compiling LLVMC plugins</a></h1>
 <p>It's easiest to start working on your own LLVMC plugin by copying the
 skeleton project which lives under <tt class="docutils literal"><span class="pre">$LLVMC_DIR/plugins/Simple</span></tt>:</p>
 <pre class="literal-block">
@@ -176,7 +176,7 @@ $ llvmc -load $LLVM_DIR/Release/lib/plugin_llvmc_Simple.so
 </pre>
 </div>
 <div class="section" id="compiling-standalone-llvmc-based-drivers">
-<h1><a class="toc-backref" href="#id8">Compiling standalone LLVMC-based drivers</a></h1>
+<h1><a class="toc-backref" href="#id12">Compiling standalone LLVMC-based drivers</a></h1>
 <p>By default, the <tt class="docutils literal"><span class="pre">llvmc</span></tt> executable consists of a driver core plus several
 statically linked plugins (<tt class="docutils literal"><span class="pre">Base</span></tt> and <tt class="docutils literal"><span class="pre">Clang</span></tt> at the moment). You can
 produce a standalone LLVMC-based driver executable by linking the core with your
@@ -215,7 +215,7 @@ $ make LLVMC_BUILTIN_PLUGINS=&quot;&quot;
 </pre>
 </div>
 <div class="section" id="customizing-llvmc-the-compilation-graph">
-<h1><a class="toc-backref" href="#id9">Customizing LLVMC: the compilation graph</a></h1>
+<h1><a class="toc-backref" href="#id13">Customizing LLVMC: the compilation graph</a></h1>
 <p>Each TableGen configuration file should include the common
 definitions:</p>
 <pre class="literal-block">
@@ -283,7 +283,7 @@ debugging), run <tt class="docutils literal"><span class="pre">llvmc</span> <spa
 <tt class="docutils literal"><span class="pre">gsview</span></tt> installed for this to work properly.</p>
 </div>
 <div class="section" id="describing-options">
-<h1><a class="toc-backref" href="#id10">Describing options</a></h1>
+<h1><a class="toc-backref" href="#id14">Describing options</a></h1>
 <p>Command-line options that the plugin supports are defined by using an
 <tt class="docutils literal"><span class="pre">OptionList</span></tt>:</p>
 <pre class="literal-block">
@@ -342,6 +342,11 @@ the <tt class="docutils literal"><span class="pre">--help</span></tt> output (bu
 output).</li>
 <li><tt class="docutils literal"><span class="pre">really_hidden</span></tt> - the option will not be mentioned in any help
 output.</li>
+<li><tt class="docutils literal"><span class="pre">comma_separated</span></tt> - Indicates that any commas specified for an option's
+value should be used to split the value up into multiple values for the
+option. This property is valid only for list options. In conjunction with
+<tt class="docutils literal"><span class="pre">forward_value</span></tt> can be used to implement option forwarding in style of
+gcc's <tt class="docutils literal"><span class="pre">-Wa,</span></tt>.</li>
 <li><tt class="docutils literal"><span class="pre">multi_val</span> <span class="pre">n</span></tt> - this option takes <em>n</em> arguments (can be useful in some
 special cases). Usage example: <tt class="docutils literal"><span class="pre">(parameter_list_option</span> <span class="pre">&quot;foo&quot;,</span> <span class="pre">(multi_val</span>
 <span class="pre">3))</span></tt>; the command-line syntax is '-foo a b c'. Only list options can have
@@ -352,13 +357,13 @@ parameter), or a boolean (if it is a switch; boolean constants are called
 <tt class="docutils literal"><span class="pre">true</span></tt> and <tt class="docutils literal"><span class="pre">false</span></tt>). List options can't have this attribute. Usage
 examples: <tt class="docutils literal"><span class="pre">(switch_option</span> <span class="pre">&quot;foo&quot;,</span> <span class="pre">(init</span> <span class="pre">true))</span></tt>; <tt class="docutils literal"><span class="pre">(prefix_option</span> <span class="pre">&quot;bar&quot;,</span>
 <span class="pre">(init</span> <span class="pre">&quot;baz&quot;))</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">extern</span></tt> - this option is defined in some other plugin, see below.</li>
+<li><tt class="docutils literal"><span class="pre">extern</span></tt> - this option is defined in some other plugin, see <a class="reference internal" href="#extern">below</a>.</li>
 </ul>
 </blockquote>
 </li>
 </ul>
 <div class="section" id="external-options">
-<h2><a class="toc-backref" href="#id11">External options</a></h2>
+<span id="extern"></span><h2><a class="toc-backref" href="#id15">External options</a></h2>
 <p>Sometimes, when linking several plugins together, one plugin needs to
 access options defined in some other plugin. Because of the way
 options are implemented, such options must be marked as
@@ -374,7 +379,7 @@ ignored. See also the section on plugin <a class="reference internal" href="#pri
 </div>
 </div>
 <div class="section" id="conditional-evaluation">
-<span id="case"></span><h1><a class="toc-backref" href="#id12">Conditional evaluation</a></h1>
+<span id="case"></span><h1><a class="toc-backref" href="#id16">Conditional evaluation</a></h1>
 <p>The 'case' construct is the main means by which programmability is
 achieved in LLVMC. It can be used to calculate edge weights, program
 actions and modify the shell commands to be executed. The 'case'
@@ -433,7 +438,7 @@ a given value.
 Example: <tt class="docutils literal"><span class="pre">(parameter_equals</span> <span class="pre">&quot;W&quot;,</span> <span class="pre">&quot;all&quot;)</span></tt>.</li>
 <li><tt class="docutils literal"><span class="pre">element_in_list</span></tt> - Returns true if a command-line parameter
 list contains a given value.
-Example: <tt class="docutils literal"><span class="pre">(parameter_in_list</span> <span class="pre">&quot;l&quot;,</span> <span class="pre">&quot;pthread&quot;)</span></tt>.</li>
+Example: <tt class="docutils literal"><span class="pre">(element_in_list</span> <span class="pre">&quot;l&quot;,</span> <span class="pre">&quot;pthread&quot;)</span></tt>.</li>
 <li><tt class="docutils literal"><span class="pre">input_languages_contain</span></tt> - Returns true if a given language
 belongs to the current input language set.
 Example: <tt class="docutils literal"><span class="pre">(input_languages_contain</span> <span class="pre">&quot;c++&quot;)</span></tt>.</li>
@@ -475,7 +480,7 @@ argument. Example: <tt class="docutils literal"><span class="pre">(not</span> <s
 </ul>
 </div>
 <div class="section" id="writing-a-tool-description">
-<h1><a class="toc-backref" href="#id13">Writing a tool description</a></h1>
+<h1><a class="toc-backref" href="#id17">Writing a tool description</a></h1>
 <p>As was said earlier, nodes in the compilation graph represent tools,
 which are described separately. A tool definition looks like this
 (taken from the <tt class="docutils literal"><span class="pre">include/llvm/CompilerDriver/Tools.td</span></tt> file):</p>
@@ -512,12 +517,12 @@ list of input files and joins them together. Used for linkers.</li>
 tools are passed to this tool.</li>
 <li><tt class="docutils literal"><span class="pre">actions</span></tt> - A single big <tt class="docutils literal"><span class="pre">case</span></tt> expression that specifies how
 this tool reacts on command-line options (described in more detail
-below).</li>
+<a class="reference internal" href="#actions">below</a>).</li>
 </ul>
 </li>
 </ul>
-<div class="section" id="actions">
-<h2><a class="toc-backref" href="#id14">Actions</a></h2>
+<div class="section" id="id5">
+<span id="actions"></span><h2><a class="toc-backref" href="#id18">Actions</a></h2>
 <p>A tool often needs to react to command-line options, and this is
 precisely what the <tt class="docutils literal"><span class="pre">actions</span></tt> property is for. The next example
 illustrates this feature:</p>
@@ -550,28 +555,31 @@ like a linker.</p>
 <li><p class="first">Possible actions:</p>
 <blockquote>
 <ul class="simple">
-<li><tt class="docutils literal"><span class="pre">append_cmd</span></tt> - append a string to the tool invocation
-command.
-Example: <tt class="docutils literal"><span class="pre">(case</span> <span class="pre">(switch_on</span> <span class="pre">&quot;pthread&quot;),</span> <span class="pre">(append_cmd</span>
-<span class="pre">&quot;-lpthread&quot;))</span></tt></li>
-<li><tt class="docutils literal"><span class="pre">error</span></tt> - exit with error.
+<li><tt class="docutils literal"><span class="pre">append_cmd</span></tt> - Append a string to the tool invocation command.
+Example: <tt class="docutils literal"><span class="pre">(case</span> <span class="pre">(switch_on</span> <span class="pre">&quot;pthread&quot;),</span> <span class="pre">(append_cmd</span> <span class="pre">&quot;-lpthread&quot;))</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">error</span></tt> - Exit with error.
 Example: <tt class="docutils literal"><span class="pre">(error</span> <span class="pre">&quot;Mixing</span> <span class="pre">-c</span> <span class="pre">and</span> <span class="pre">-S</span> <span class="pre">is</span> <span class="pre">not</span> <span class="pre">allowed!&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">warning</span></tt> - print a warning.
+<li><tt class="docutils literal"><span class="pre">warning</span></tt> - Print a warning.
 Example: <tt class="docutils literal"><span class="pre">(warning</span> <span class="pre">&quot;Specifying</span> <span class="pre">both</span> <span class="pre">-O1</span> <span class="pre">and</span> <span class="pre">-O2</span> <span class="pre">is</span> <span class="pre">meaningless!&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">forward</span></tt> - forward an option unchanged.  Example: <tt class="docutils literal"><span class="pre">(forward</span> <span class="pre">&quot;Wall&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">forward_as</span></tt> - Change the name of an option, but forward the
-argument unchanged.
+<li><tt class="docutils literal"><span class="pre">forward</span></tt> - Forward the option unchanged.
+Example: <tt class="docutils literal"><span class="pre">(forward</span> <span class="pre">&quot;Wall&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">forward_as</span></tt> - Change the option's name, but forward the argument
+unchanged.
 Example: <tt class="docutils literal"><span class="pre">(forward_as</span> <span class="pre">&quot;O0&quot;,</span> <span class="pre">&quot;--disable-optimization&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">output_suffix</span></tt> - modify the output suffix of this
-tool.
+<li><tt class="docutils literal"><span class="pre">forward_value</span></tt> - Forward only option's value. Cannot be used with switch
+options (since they don't have values), but works fine with lists.
+Example: <tt class="docutils literal"><span class="pre">(forward_value</span> <span class="pre">&quot;Wa,&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">forward_transformed_value</span></tt> - As above, but applies a hook to the
+option's value before forwarding (see <a class="reference internal" href="#hooks">below</a>). When
+<tt class="docutils literal"><span class="pre">forward_transformed_value</span></tt> is applied to a list
+option, the hook must have signature
+<tt class="docutils literal"><span class="pre">std::string</span> <span class="pre">hooks::HookName</span> <span class="pre">(const</span> <span class="pre">std::vector&lt;std::string&gt;&amp;)</span></tt>.
+Example: <tt class="docutils literal"><span class="pre">(forward_transformed_value</span> <span class="pre">&quot;m&quot;,</span> <span class="pre">&quot;ConvertToMAttr&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">output_suffix</span></tt> - Modify the output suffix of this tool.
 Example: <tt class="docutils literal"><span class="pre">(output_suffix</span> <span class="pre">&quot;i&quot;)</span></tt>.</li>
-<li><tt class="docutils literal"><span class="pre">stop_compilation</span></tt> - stop compilation after this tool processes
-its input. Used without arguments.</li>
-<li><tt class="docutils literal"><span class="pre">unpack_values</span></tt> - used for for splitting and forwarding
-comma-separated lists of options, e.g. <tt class="docutils literal"><span class="pre">-Wa,-foo=bar,-baz</span></tt> is
-converted to <tt class="docutils literal"><span class="pre">-foo=bar</span> <span class="pre">-baz</span></tt> and appended to the tool invocation
-command.
-Example: <tt class="docutils literal"><span class="pre">(unpack_values</span> <span class="pre">&quot;Wa,&quot;)</span></tt>.</li>
+<li><tt class="docutils literal"><span class="pre">stop_compilation</span></tt> - Stop compilation after this tool processes its
+input. Used without arguments.
+Example: <tt class="docutils literal"><span class="pre">(stop_compilation)</span></tt>.</li>
 </ul>
 </blockquote>
 </li>
@@ -579,7 +587,7 @@ Example: <tt class="docutils literal"><span class="pre">(unpack_values</span> <s
 </div>
 </div>
 <div class="section" id="language-map">
-<h1><a class="toc-backref" href="#id15">Language map</a></h1>
+<h1><a class="toc-backref" href="#id19">Language map</a></h1>
 <p>If you are adding support for a new language to LLVMC, you'll need to
 modify the language map, which defines mappings from file extensions
 to language names. It is used to choose the proper toolchain(s) for a
@@ -602,7 +610,7 @@ multiple output languages, for nodes &quot;inside&quot; the graph the input and
 output languages should match. This is enforced at compile-time.</p>
 </div>
 <div class="section" id="option-preprocessor">
-<h1><a class="toc-backref" href="#id16">Option preprocessor</a></h1>
+<h1><a class="toc-backref" href="#id20">Option preprocessor</a></h1>
 <p>It is sometimes useful to run error-checking code before processing the
 compilation graph. For example, if optimization options &quot;-O1&quot; and &quot;-O2&quot; are
 implemented as switches, we might want to output a warning if the user invokes
@@ -629,9 +637,9 @@ in <tt class="docutils literal"><span class="pre">OptionPreprocessor</span></tt>
 convenience, <tt class="docutils literal"><span class="pre">unset_option</span></tt> also works on lists.</p>
 </div>
 <div class="section" id="more-advanced-topics">
-<h1><a class="toc-backref" href="#id17">More advanced topics</a></h1>
+<h1><a class="toc-backref" href="#id21">More advanced topics</a></h1>
 <div class="section" id="hooks-and-environment-variables">
-<span id="hooks"></span><h2><a class="toc-backref" href="#id18">Hooks and environment variables</a></h2>
+<span id="hooks"></span><h2><a class="toc-backref" href="#id22">Hooks and environment variables</a></h2>
 <p>Normally, LLVMC executes programs from the system <tt class="docutils literal"><span class="pre">PATH</span></tt>. Sometimes,
 this is not sufficient: for example, we may want to specify tool paths
 or names in the configuration file. This can be easily achieved via
@@ -664,7 +672,7 @@ the <tt class="docutils literal"><span class="pre">case</span></tt> expression (
 </pre>
 </div>
 <div class="section" id="how-plugins-are-loaded">
-<span id="priorities"></span><h2><a class="toc-backref" href="#id19">How plugins are loaded</a></h2>
+<span id="priorities"></span><h2><a class="toc-backref" href="#id23">How plugins are loaded</a></h2>
 <p>It is possible for LLVMC plugins to depend on each other. For example,
 one can create edges between nodes defined in some other plugin. To
 make this work, however, that plugin should be loaded first. To
@@ -680,7 +688,7 @@ with 0. Therefore, the plugin with the highest priority value will be
 loaded last.</p>
 </div>
 <div class="section" id="debugging">
-<h2><a class="toc-backref" href="#id20">Debugging</a></h2>
+<h2><a class="toc-backref" href="#id24">Debugging</a></h2>
 <p>When writing LLVMC plugins, it can be useful to get a visual view of
 the resulting compilation graph. This can be achieved via the command
 line option <tt class="docutils literal"><span class="pre">--view-graph</span></tt>. This command assumes that <a class="reference external" href="http://www.graphviz.org/">Graphviz</a> and
@@ -696,7 +704,7 @@ perform any compilation tasks and returns the number of encountered
 errors as its status code.</p>
 </div>
 <div class="section" id="conditioning-on-the-executable-name">
-<h2><a class="toc-backref" href="#id21">Conditioning on the executable name</a></h2>
+<h2><a class="toc-backref" href="#id25">Conditioning on the executable name</a></h2>
 <p>For now, the executable name (the value passed to the driver in <tt class="docutils literal"><span class="pre">argv[0]</span></tt>) is
 accessible only in the C++ code (i.e. hooks). Use the following code:</p>
 <pre class="literal-block">
@@ -704,12 +712,16 @@ namespace llvmc {
 extern const char* ProgramName;
 }
 
+namespace hooks {
+
 std::string MyHook() {
 //...
 if (strcmp(ProgramName, &quot;mydriver&quot;) == 0) {
    //...
 
 }
+
+} // end namespace hooks
 </pre>
 <p>In general, you're encouraged not to make the behaviour dependent on the
 executable file name, and use command-line switches instead. See for example how
@@ -727,7 +739,7 @@ the <tt class="docutils literal"><span class="pre">Base</span></tt> plugin behav
 <a href="mailto:foldr@codedgers.com">Mikhail Glushenkov</a><br />
 <a href="http://llvm.org">LLVM Compiler Infrastructure</a><br />
 
-Last modified: $Date: 2009-10-26 02:35:46 +0100 (Mon, 26 Oct 2009) $
+Last modified: $Date: 2009-12-07 19:26:24 +0100 (Mon, 07 Dec 2009) $
 </address></div>
 </div>
 </div>
diff --git a/docs/GettingStarted.html b/docs/GettingStarted.html
index ca6563b..4b2b56f 100644
--- a/docs/GettingStarted.html
+++ b/docs/GettingStarted.html
@@ -252,7 +252,8 @@ software you will need.</p>
 </tr>
 <tr>
   <td>Cygwin/Win32</td>
-  <td>x86<sup><a href="#pf_1">1</a>,<a href="#pf_8">8</a></sup></td>
+  <td>x86<sup><a href="#pf_1">1</a>,<a href="#pf_8">8</a>,
+     <a href="#pf_11">11</a></sup></td>
   <td>GCC 3.4.X, binutils 2.15</td>
 </tr>
 <tr>
@@ -331,6 +332,9 @@ up</a></li>
     before any Windows-based versions such as Strawberry Perl and
     ActivePerl, as these have Windows-specifics that will cause the
     build to fail.</a></li>
+<li><a name="pf_11">In general, LLVM modules requiring dynamic linking can
+    not be built on Windows. However, you can build LLVM tools using
+    <i>"make tools-only"</i>.</li>
 </ol>
 </div>
 
@@ -1636,7 +1640,7 @@ out:</p>
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.x10sys.com/rspencer/">Reid Spencer</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-11-04 07:15:28 +0100 (Wed, 04 Nov 2009) $
+  Last modified: $Date: 2009-12-09 18:26:02 +0100 (Wed, 09 Dec 2009) $
 </address>
 </body>
 </html>
diff --git a/docs/LangRef.html b/docs/LangRef.html
index 894ad49..8b17e0e 100644
--- a/docs/LangRef.html
+++ b/docs/LangRef.html
@@ -5,7 +5,7 @@
   <title>LLVM Assembly Language Reference Manual</title>
   <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
   <meta name="author" content="Chris Lattner">
-  <meta name="description" 
+  <meta name="description"
   content="LLVM Assembly Language Reference Manual.">
   <link rel="stylesheet" href="llvm.css" type="text/css">
 </head>
@@ -54,7 +54,7 @@
   <li><a href="#typesystem">Type System</a>
     <ol>
       <li><a href="#t_classifications">Type Classifications</a></li>
-      <li><a href="#t_primitive">Primitive Types</a>    
+      <li><a href="#t_primitive">Primitive Types</a>
         <ol>
           <li><a href="#t_integer">Integer Type</a></li>
           <li><a href="#t_floating">Floating Point Types</a></li>
@@ -576,7 +576,7 @@ define i32 @main() {                                        <i>; i32()* </i>
       Symbols with "<tt>common</tt>" linkage are merged in the same way as
       <tt>weak symbols</tt>, and they may not be deleted if unreferenced.
       <tt>common</tt> symbols may not have an explicit section,
-      must have a zero initializer, and may not be marked '<a 
+      must have a zero initializer, and may not be marked '<a
       href="#globalvars"><tt>constant</tt></a>'.  Functions and aliases may not
       have common linkage.</dd>
 
@@ -843,7 +843,7 @@ define i32 @main() {                                        <i>; i32()* </i>
 
 <p>LLVM function declarations consist of the "<tt>declare</tt>" keyword, an
    optional <a href="#linkage">linkage type</a>, an optional
-   <a href="#visibility">visibility style</a>, an optional 
+   <a href="#visibility">visibility style</a>, an optional
    <a href="#callingconv">calling convention</a>, a return type, an optional
    <a href="#paramattrs">parameter attribute</a> for the return type, a function
    name, a possibly empty list of arguments, an optional alignment, and an
@@ -1192,7 +1192,7 @@ target datalayout = "<i>layout specification</i>"
       location.</dd>
 
   <dt><tt>p:<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
-  <dd>This specifies the <i>size</i> of a pointer and its <i>abi</i> and 
+  <dd>This specifies the <i>size</i> of a pointer and its <i>abi</i> and
       <i>preferred</i> alignments. All sizes are in bits. Specifying
       the <i>pref</i> alignment is optional. If omitted, the
       preceding <tt>:</tt> should be omitted too.</dd>
@@ -1202,11 +1202,11 @@ target datalayout = "<i>layout specification</i>"
       <i>size</i>. The value of <i>size</i> must be in the range [1,2^23).</dd>
 
   <dt><tt>v<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
-  <dd>This specifies the alignment for a vector type of a given bit 
+  <dd>This specifies the alignment for a vector type of a given bit
       <i>size</i>.</dd>
 
   <dt><tt>f<i>size</i>:<i>abi</i>:<i>pref</i></tt></dt>
-  <dd>This specifies the alignment for a floating point type of a given bit 
+  <dd>This specifies the alignment for a floating point type of a given bit
       <i>size</i>. The value of <i>size</i> must be either 32 (float) or 64
       (double).</dd>
 
@@ -1222,7 +1222,7 @@ target datalayout = "<i>layout specification</i>"
   <dd>This specifies a set of native integer widths for the target CPU
       in bits.  For example, it might contain "n32" for 32-bit PowerPC,
       "n32:64" for PowerPC 64, or "n8:16:32:64" for X86-64.  Elements of
-      this set are considered to support most general arithmetic 
+      this set are considered to support most general arithmetic
       operations efficiently.</dd>
 </dl>
 
@@ -1616,16 +1616,16 @@ Classifications</a> </div>
   </tr><tr class="layout">
     <td class="left"><tt>float&nbsp;(i16&nbsp;signext,&nbsp;i32&nbsp;*)&nbsp;*
     </tt></td>
-    <td class="left"><a href="#t_pointer">Pointer</a> to a function that takes 
-      an <tt>i16</tt> that should be sign extended and a 
-      <a href="#t_pointer">pointer</a> to <tt>i32</tt>, returning 
+    <td class="left"><a href="#t_pointer">Pointer</a> to a function that takes
+      an <tt>i16</tt> that should be sign extended and a
+      <a href="#t_pointer">pointer</a> to <tt>i32</tt>, returning
       <tt>float</tt>.
     </td>
   </tr><tr class="layout">
     <td class="left"><tt>i32 (i8*, ...)</tt></td>
-    <td class="left">A vararg function that takes at least one 
-      <a href="#t_pointer">pointer</a> to <tt>i8 </tt> (char in C), 
-      which returns an integer.  This is the signature for <tt>printf</tt> in 
+    <td class="left">A vararg function that takes at least one
+      <a href="#t_pointer">pointer</a> to <tt>i8 </tt> (char in C),
+      which returns an integer.  This is the signature for <tt>printf</tt> in
       LLVM.
     </td>
   </tr><tr class="layout">
@@ -2054,9 +2054,9 @@ Unsafe:
 For example, if "%X" has a zero bit, then the output of the 'and' operation will
 always be a zero, no matter what the corresponding bit from the undef is.  As
 such, it is unsafe to optimize or assume that the result of the and is undef.
-However, it is safe to assume that all bits of the undef could be 0, and 
-optimize the and to 0.  Likewise, it is safe to assume that all the bits of 
-the undef operand to the or could be set, allowing the or to be folded to 
+However, it is safe to assume that all bits of the undef could be 0, and
+optimize the and to 0.  Likewise, it is safe to assume that all the bits of
+the undef operand to the or could be set, allowing the or to be folded to
 -1.</p>
 
 <div class="doc_code">
@@ -2086,7 +2086,7 @@ the optimizer is allowed to assume that the undef operand could be the same as
 <div class="doc_code">
 <pre>
   %A = xor undef, undef
-  
+
   %B = undef
   %C = xor %B, %B
 
@@ -2137,7 +2137,7 @@ does not execute at all.  This allows us to delete the divide and all code after
 it: since the undefined operation "can't happen", the optimizer can assume that
 it occurs in dead code.
 </p>
- 
+
 <div class="doc_code">
 <pre>
 a:  store undef -> %X
@@ -2149,7 +2149,7 @@ b: unreachable
 </div>
 
 <p>These examples reiterate the fdiv example: a store "of" an undefined value
-can be assumed to not have any effect: we can assume that the value is 
+can be assumed to not have any effect: we can assume that the value is
 overwritten with bits that happen to match what was already there.  However, a
 store "to" an undefined location could clobber arbitrary memory, therefore, it
 has undefined behavior.</p>
@@ -2166,7 +2166,7 @@ has undefined behavior.</p>
 <p>The '<tt>blockaddress</tt>' constant computes the address of the specified
    basic block in the specified function, and always has an i8* type.  Taking
    the address of the entry block is illegal.</p>
-     
+
 <p>This value only has defined behavior when used as an operand to the
    '<a href="#i_indirectbr"><tt>indirectbr</tt></a>' instruction or for comparisons
    against null.  Pointer equality tests between labels addresses is undefined
@@ -2175,7 +2175,7 @@ has undefined behavior.</p>
    pointer sized value as long as the bits are not inspected.  This allows
    <tt>ptrtoint</tt> and arithmetic to be performed on these values so long as
    the original value is reconstituted before the <tt>indirectbr</tt>.</p>
-   
+
 <p>Finally, some targets may provide defined semantics when
    using the value as the operand to an inline assembly, but that is target
    specific.
@@ -2703,7 +2703,7 @@ IfUnequal:
    rest of the arguments indicate the full set of possible destinations that the
    address may point to.  Blocks are allowed to occur multiple times in the
    destination list, though this isn't particularly useful.</p>
-   
+
 <p>This destination list is required so that dataflow analysis has an accurate
    understanding of the CFG.</p>
 
@@ -3060,7 +3060,7 @@ Instruction</a> </div>
 <p>The two arguments to the '<tt>mul</tt>' instruction must
    be <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
    integer values.  Both arguments must have identical types.</p>
- 
+
 <h5>Semantics:</h5>
 <p>The value produced is the integer product of the two operands.</p>
 
@@ -3132,7 +3132,7 @@ Instruction</a> </div>
 <p>The '<tt>udiv</tt>' instruction returns the quotient of its two operands.</p>
 
 <h5>Arguments:</h5>
-<p>The two arguments to the '<tt>udiv</tt>' instruction must be 
+<p>The two arguments to the '<tt>udiv</tt>' instruction must be
    <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
    values.  Both arguments must have identical types.</p>
 
@@ -3167,7 +3167,7 @@ Instruction</a> </div>
 <p>The '<tt>sdiv</tt>' instruction returns the quotient of its two operands.</p>
 
 <h5>Arguments:</h5>
-<p>The two arguments to the '<tt>sdiv</tt>' instruction must be 
+<p>The two arguments to the '<tt>sdiv</tt>' instruction must be
    <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
    values.  Both arguments must have identical types.</p>
 
@@ -3238,7 +3238,7 @@ Instruction</a> </div>
    division of its two arguments.</p>
 
 <h5>Arguments:</h5>
-<p>The two arguments to the '<tt>urem</tt>' instruction must be 
+<p>The two arguments to the '<tt>urem</tt>' instruction must be
    <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
    values.  Both arguments must have identical types.</p>
 
@@ -3278,7 +3278,7 @@ Instruction</a> </div>
    elements must be integers.</p>
 
 <h5>Arguments:</h5>
-<p>The two arguments to the '<tt>srem</tt>' instruction must be 
+<p>The two arguments to the '<tt>srem</tt>' instruction must be
    <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
    values.  Both arguments must have identical types.</p>
 
@@ -3373,7 +3373,7 @@ Instruction</a> </div>
 <p>Both arguments to the '<tt>shl</tt>' instruction must be the
     same <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of
     integer type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
- 
+
 <h5>Semantics:</h5>
 <p>The value produced is <tt>op1</tt> * 2<sup><tt>op2</tt></sup> mod
    2<sup>n</sup>, where <tt>n</tt> is the width of the result.  If <tt>op2</tt>
@@ -3409,7 +3409,7 @@ Instruction</a> </div>
    operand shifted to the right a specified number of bits with zero fill.</p>
 
 <h5>Arguments:</h5>
-<p>Both arguments to the '<tt>lshr</tt>' instruction must be the same 
+<p>Both arguments to the '<tt>lshr</tt>' instruction must be the same
    <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
    type. '<tt>op2</tt>' is treated as an unsigned value.</p>
 
@@ -3449,7 +3449,7 @@ Instruction</a> </div>
    extension.</p>
 
 <h5>Arguments:</h5>
-<p>Both arguments to the '<tt>ashr</tt>' instruction must be the same 
+<p>Both arguments to the '<tt>ashr</tt>' instruction must be the same
    <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
    type.  '<tt>op2</tt>' is treated as an unsigned value.</p>
 
@@ -3489,7 +3489,7 @@ Instruction</a> </div>
    operands.</p>
 
 <h5>Arguments:</h5>
-<p>The two arguments to the '<tt>and</tt>' instruction must be 
+<p>The two arguments to the '<tt>and</tt>' instruction must be
    <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
    values.  Both arguments must have identical types.</p>
 
@@ -3548,7 +3548,7 @@ Instruction</a> </div>
    two operands.</p>
 
 <h5>Arguments:</h5>
-<p>The two arguments to the '<tt>or</tt>' instruction must be 
+<p>The two arguments to the '<tt>or</tt>' instruction must be
    <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
    values.  Both arguments must have identical types.</p>
 
@@ -3611,7 +3611,7 @@ Instruction</a> </div>
    complement" operation, which is the "~" operator in C.</p>
 
 <h5>Arguments:</h5>
-<p>The two arguments to the '<tt>xor</tt>' instruction must be 
+<p>The two arguments to the '<tt>xor</tt>' instruction must be
    <a href="#t_integer">integer</a> or <a href="#t_vector">vector</a> of integer
    values.  Both arguments must have identical types.</p>
 
@@ -3659,7 +3659,7 @@ Instruction</a> </div>
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"> 
+<div class="doc_subsection">
   <a name="vectorops">Vector Operations</a>
 </div>
 
@@ -3782,20 +3782,20 @@ Instruction</a> </div>
 
 <h5>Example:</h5>
 <pre>
-  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2, 
+  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2,
                           &lt;4 x i32&gt; &lt;i32 0, i32 4, i32 1, i32 5&gt;  <i>; yields &lt;4 x i32&gt;</i>
-  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; undef, 
+  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; undef,
                           &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3&gt;  <i>; yields &lt;4 x i32&gt;</i> - Identity shuffle.
-  &lt;result&gt; = shufflevector &lt;8 x i32&gt; %v1, &lt;8 x i32&gt; undef, 
+  &lt;result&gt; = shufflevector &lt;8 x i32&gt; %v1, &lt;8 x i32&gt; undef,
                           &lt;4 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3&gt;  <i>; yields &lt;4 x i32&gt;</i>
-  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2, 
+  &lt;result&gt; = shufflevector &lt;4 x i32&gt; %v1, &lt;4 x i32&gt; %v2,
                           &lt;8 x i32&gt; &lt;i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 &gt;  <i>; yields &lt;8 x i32&gt;</i>
 </pre>
 
 </div>
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"> 
+<div class="doc_subsection">
   <a name="aggregateops">Aggregate Operations</a>
 </div>
 
@@ -3880,7 +3880,7 @@ Instruction</a> </div>
 
 
 <!-- ======================================================================= -->
-<div class="doc_subsection"> 
+<div class="doc_subsection">
   <a name="memoryops">Memory Access and Addressing Operations</a>
 </div>
 
@@ -4243,15 +4243,15 @@ entry:
 </pre>
 
 <h5>Overview:</h5>
-<p>The '<tt>zext</tt>' instruction zero extends its operand to type 
+<p>The '<tt>zext</tt>' instruction zero extends its operand to type
    <tt>ty2</tt>.</p>
 
 
 <h5>Arguments:</h5>
-<p>The '<tt>zext</tt>' instruction takes a value to cast, which must be of 
+<p>The '<tt>zext</tt>' instruction takes a value to cast, which must be of
    <a href="#t_integer">integer</a> type, and a type to cast it to, which must
    also be of <a href="#t_integer">integer</a> type. The bit size of the
-   <tt>value</tt> must be smaller than the bit size of the destination type, 
+   <tt>value</tt> must be smaller than the bit size of the destination type,
    <tt>ty2</tt>.</p>
 
 <h5>Semantics:</h5>
@@ -4283,10 +4283,10 @@ entry:
 <p>The '<tt>sext</tt>' sign extends <tt>value</tt> to the type <tt>ty2</tt>.</p>
 
 <h5>Arguments:</h5>
-<p>The '<tt>sext</tt>' instruction takes a value to cast, which must be of 
+<p>The '<tt>sext</tt>' instruction takes a value to cast, which must be of
    <a href="#t_integer">integer</a> type, and a type to cast it to, which must
    also be of <a href="#t_integer">integer</a> type.  The bit size of the
-   <tt>value</tt> must be smaller than the bit size of the destination type, 
+   <tt>value</tt> must be smaller than the bit size of the destination type,
    <tt>ty2</tt>.</p>
 
 <h5>Semantics:</h5>
@@ -4324,12 +4324,12 @@ entry:
 <p>The '<tt>fptrunc</tt>' instruction takes a <a href="#t_floating">floating
    point</a> value to cast and a <a href="#t_floating">floating point</a> type
    to cast it to. The size of <tt>value</tt> must be larger than the size of
-   <tt>ty2</tt>. This implies that <tt>fptrunc</tt> cannot be used to make a 
+   <tt>ty2</tt>. This implies that <tt>fptrunc</tt> cannot be used to make a
    <i>no-op cast</i>.</p>
 
 <h5>Semantics:</h5>
 <p>The '<tt>fptrunc</tt>' instruction truncates a <tt>value</tt> from a larger
-   <a href="#t_floating">floating point</a> type to a smaller 
+   <a href="#t_floating">floating point</a> type to a smaller
    <a href="#t_floating">floating point</a> type.  If the value cannot fit
    within the destination type, <tt>ty2</tt>, then the results are
    undefined.</p>
@@ -4358,7 +4358,7 @@ entry:
    floating point value.</p>
 
 <h5>Arguments:</h5>
-<p>The '<tt>fpext</tt>' instruction takes a 
+<p>The '<tt>fpext</tt>' instruction takes a
    <a href="#t_floating">floating point</a> <tt>value</tt> to cast, and
    a <a href="#t_floating">floating point</a> type to cast it to. The source
    type must be smaller than the destination type.</p>
@@ -4401,7 +4401,7 @@ entry:
    vector integer type with the same number of elements as <tt>ty</tt></p>
 
 <h5>Semantics:</h5>
-<p>The '<tt>fptoui</tt>' instruction converts its 
+<p>The '<tt>fptoui</tt>' instruction converts its
    <a href="#t_floating">floating point</a> operand into the nearest (rounding
    towards zero) unsigned integer value. If the value cannot fit
    in <tt>ty2</tt>, the results are undefined.</p>
@@ -4427,7 +4427,7 @@ entry:
 </pre>
 
 <h5>Overview:</h5>
-<p>The '<tt>fptosi</tt>' instruction converts 
+<p>The '<tt>fptosi</tt>' instruction converts
    <a href="#t_floating">floating point</a> <tt>value</tt> to
    type <tt>ty2</tt>.</p>
 
@@ -4439,7 +4439,7 @@ entry:
    vector integer type with the same number of elements as <tt>ty</tt></p>
 
 <h5>Semantics:</h5>
-<p>The '<tt>fptosi</tt>' instruction converts its 
+<p>The '<tt>fptosi</tt>' instruction converts its
    <a href="#t_floating">floating point</a> operand into the nearest (rounding
    towards zero) signed integer value. If the value cannot fit in <tt>ty2</tt>,
    the results are undefined.</p>
@@ -4636,7 +4636,7 @@ entry:
 <pre>
   %X = bitcast i8 255 to i8              <i>; yields i8 :-1</i>
   %Y = bitcast i32* %x to sint*          <i>; yields sint*:%x</i>
-  %Z = bitcast &lt;2 x int&gt; %V to i64;      <i>; yields i64: %V</i>   
+  %Z = bitcast &lt;2 x int&gt; %V to i64;      <i>; yields i64: %V</i>
 </pre>
 
 </div>
@@ -4696,11 +4696,11 @@ entry:
    result, as follows:</p>
 
 <ol>
-  <li><tt>eq</tt>: yields <tt>true</tt> if the operands are equal, 
+  <li><tt>eq</tt>: yields <tt>true</tt> if the operands are equal,
       <tt>false</tt> otherwise. No sign interpretation is necessary or
       performed.</li>
 
-  <li><tt>ne</tt>: yields <tt>true</tt> if the operands are unequal, 
+  <li><tt>ne</tt>: yields <tt>true</tt> if the operands are unequal,
       <tt>false</tt> otherwise. No sign interpretation is necessary or
       performed.</li>
 
@@ -4817,42 +4817,42 @@ entry:
 <ol>
   <li><tt>false</tt>: always yields <tt>false</tt>, regardless of operands.</li>
 
-  <li><tt>oeq</tt>: yields <tt>true</tt> if both operands are not a QNAN and 
+  <li><tt>oeq</tt>: yields <tt>true</tt> if both operands are not a QNAN and
       <tt>op1</tt> is equal to <tt>op2</tt>.</li>
 
   <li><tt>ogt</tt>: yields <tt>true</tt> if both operands are not a QNAN and
       <tt>op1</tt> is greather than <tt>op2</tt>.</li>
 
-  <li><tt>oge</tt>: yields <tt>true</tt> if both operands are not a QNAN and 
+  <li><tt>oge</tt>: yields <tt>true</tt> if both operands are not a QNAN and
       <tt>op1</tt> is greater than or equal to <tt>op2</tt>.</li>
 
-  <li><tt>olt</tt>: yields <tt>true</tt> if both operands are not a QNAN and 
+  <li><tt>olt</tt>: yields <tt>true</tt> if both operands are not a QNAN and
       <tt>op1</tt> is less than <tt>op2</tt>.</li>
 
-  <li><tt>ole</tt>: yields <tt>true</tt> if both operands are not a QNAN and 
+  <li><tt>ole</tt>: yields <tt>true</tt> if both operands are not a QNAN and
       <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
 
-  <li><tt>one</tt>: yields <tt>true</tt> if both operands are not a QNAN and 
+  <li><tt>one</tt>: yields <tt>true</tt> if both operands are not a QNAN and
       <tt>op1</tt> is not equal to <tt>op2</tt>.</li>
 
   <li><tt>ord</tt>: yields <tt>true</tt> if both operands are not a QNAN.</li>
 
-  <li><tt>ueq</tt>: yields <tt>true</tt> if either operand is a QNAN or 
+  <li><tt>ueq</tt>: yields <tt>true</tt> if either operand is a QNAN or
       <tt>op1</tt> is equal to <tt>op2</tt>.</li>
 
-  <li><tt>ugt</tt>: yields <tt>true</tt> if either operand is a QNAN or 
+  <li><tt>ugt</tt>: yields <tt>true</tt> if either operand is a QNAN or
       <tt>op1</tt> is greater than <tt>op2</tt>.</li>
 
-  <li><tt>uge</tt>: yields <tt>true</tt> if either operand is a QNAN or 
+  <li><tt>uge</tt>: yields <tt>true</tt> if either operand is a QNAN or
       <tt>op1</tt> is greater than or equal to <tt>op2</tt>.</li>
 
-  <li><tt>ult</tt>: yields <tt>true</tt> if either operand is a QNAN or 
+  <li><tt>ult</tt>: yields <tt>true</tt> if either operand is a QNAN or
       <tt>op1</tt> is less than <tt>op2</tt>.</li>
 
-  <li><tt>ule</tt>: yields <tt>true</tt> if either operand is a QNAN or 
+  <li><tt>ule</tt>: yields <tt>true</tt> if either operand is a QNAN or
       <tt>op1</tt> is less than or equal to <tt>op2</tt>.</li>
 
-  <li><tt>une</tt>: yields <tt>true</tt> if either operand is a QNAN or 
+  <li><tt>une</tt>: yields <tt>true</tt> if either operand is a QNAN or
       <tt>op1</tt> is not equal to <tt>op2</tt>.</li>
 
   <li><tt>uno</tt>: yields <tt>true</tt> if either operand is a QNAN.</li>
@@ -5144,7 +5144,7 @@ freestanding environments and non-C-based langauges.</p>
    suffix is required. Because the argument's type is matched against the return
    type, it does not require its own name suffix.</p>
 
-<p>To learn how to add an intrinsic function, please see the 
+<p>To learn how to add an intrinsic function, please see the
    <a href="ExtendingLLVM.html">Extending LLVM Guide</a>.</p>
 
 </div>
@@ -6579,11 +6579,11 @@ LLVM</a>.</p>
 <ul>
   <li><tt>ll</tt>: All loads before the barrier must complete before any load
       after the barrier begins.</li>
-  <li><tt>ls</tt>: All loads before the barrier must complete before any 
+  <li><tt>ls</tt>: All loads before the barrier must complete before any
       store after the barrier begins.</li>
-  <li><tt>ss</tt>: All stores before the barrier must complete before any 
+  <li><tt>ss</tt>: All stores before the barrier must complete before any
       store after the barrier begins.</li>
-  <li><tt>sl</tt>: All stores before the barrier must complete before any 
+  <li><tt>sl</tt>: All stores before the barrier must complete before any
       load after the barrier begins.</li>
 </ul>
 
@@ -6796,7 +6796,7 @@ LLVM</a>.</p>
 </pre>
 
 <h5>Overview:</h5>
-<p>This intrinsic subtracts <tt>delta</tt> to the value stored in memory at 
+<p>This intrinsic subtracts <tt>delta</tt> to the value stored in memory at
    <tt>ptr</tt>. It yields the original value at <tt>ptr</tt>.</p>
 
 <h5>Arguments:</h5>
@@ -6952,7 +6952,7 @@ LLVM</a>.</p>
 </pre>
 
 <h5>Overview:</h5>
-<p>These intrinsics takes the signed or unsigned minimum or maximum of 
+<p>These intrinsics takes the signed or unsigned minimum or maximum of
    <tt>delta</tt> and the value stored in memory at <tt>ptr</tt>. It yields the
    original value at <tt>ptr</tt>.</p>
 
@@ -7262,24 +7262,44 @@ LLVM</a>.</p>
 </pre>
 
 <h5>Overview:</h5>
-<p>The <tt>llvm.objectsize</tt> intrinsic returns the constant number of bytes
-   from <tt>ptr</tt> to the end of the object <tt>ptr</tt> points to if it
-   can deduce this at compile time.  If there are any side-effects in evaluating
-   the argument or it cannot deduce which objects <tt>ptr</tt> points to at compile
-   time the intrinsic returns <tt>(size_t) -1</tt> for <tt>type</tt> 0
-   or 1 and <tt>(size_t) 0</tt> for <tt>type</tt> 2 or 3.</p>
+<p>The <tt>llvm.objectsize</tt> intrinsic is designed to provide information
+   to the optimizers to either discover at compile time either a) when an
+   operation like memcpy will either overflow a buffer that corresponds to
+   an object, or b) to determine that a runtime check for overflow isn't
+   necessary. An object in this context means an allocation of a
+   specific <a href="#typesystem">type</a>.</p>
 
 <h5>Arguments:</h5>
 <p>The <tt>llvm.objectsize</tt> intrinsic takes two arguments.  The first
-   argument is a pointer to the object <tt>ptr</tt> and an integer <tt>type</tt>.
-   <tt>type</tt> is an integer ranging from 0 to 3. The lsb corresponds to
-   a return value based on whole objects, the second bit whether or not we
-   return the maximum or minimum remaining bytes computed.</p>
+   argument is a pointer to the object <tt>ptr</tt>. The second argument
+   is an integer <tt>type</tt> which ranges from 0 to 3. The first bit in
+   the type corresponds to a return value based on whole objects,
+   and the second bit whether or not we return the maximum or minimum
+   remaining bytes computed.</p>
+<table class="layout">
+  <tr class="layout">
+    <td class="left"><tt>00</tt></td>
+    <td class="left">whole object, maximum number of bytes</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>01</tt></td>
+    <td class="left">partial object, maximum number of bytes</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>10</tt></td>
+    <td class="left">whole object, minimum number of bytes</td>
+  </tr>
+  <tr class="layout">
+    <td class="left"><tt>11</tt></td>
+    <td class="left">partial object, minimum number of bytes</td>
+  </tr>
+</table>
 
 <h5>Semantics:</h5>
 <p>The <tt>llvm.objectsize</tt> intrinsic is lowered to either a constant
-   representing the size of the object concerned or <tt>(size_t) -1</tt> if
-   it cannot be determined at compile time.</p>
+   representing the size of the object concerned or <tt>i32/i64 -1 or 0</tt>
+   (depending on the <tt>type</tt> argument if the size cannot be determined
+   at compile time.</p>
 
 </div>
 
@@ -7293,7 +7313,7 @@ LLVM</a>.</p>
 
   <a href="mailto:sabre@nondot.org">Chris Lattner</a><br>
   <a href="http://llvm.org">The LLVM Compiler Infrastructure</a><br>
-  Last modified: $Date: 2009-11-30 09:03:53 +0100 (Mon, 30 Nov 2009) $
+  Last modified: $Date: 2009-12-05 03:46:03 +0100 (Sat, 05 Dec 2009) $
 </address>
 
 </body>
diff --git a/docs/Makefile b/docs/Makefile
index 310c4bd..5bfa6c3 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -100,7 +100,12 @@ install-ocamldoc: ocamldoc
 	  $(FIND) . -type f -exec \
 	    $(DataInstall) {} $(PROJ_docsdir)/ocamldoc/html \;
 
-ocamldoc: regen-ocamldoc $(PROJ_OBJ_DIR)/ocamldoc.tar.gz
+ocamldoc: regen-ocamldoc
+	$(Echo) Packaging ocamldoc documentation
+	$(Verb) $(RM) -rf $(PROJ_OBJ_DIR)/ocamldoc.tar*
+	$(Verb) $(TAR) cf $(PROJ_OBJ_DIR)/ocamldoc.tar ocamldoc
+	$(Verb) $(GZIP) $(PROJ_OBJ_DIR)/ocamldoc.tar
+	$(Verb) $(CP) $(PROJ_OBJ_DIR)/ocamldoc.tar.gz $(PROJ_OBJ_DIR)/ocamldoc/html/
 
 regen-ocamldoc:
 	$(Echo) Building ocamldoc documentation
@@ -113,13 +118,6 @@ regen-ocamldoc:
 		$(OCAMLDOC) -d $(PROJ_OBJ_DIR)/ocamldoc/html -sort -colorize-code -html \
 		`$(FIND) $(LEVEL)/bindings/ocaml -name "*.odoc" -exec echo -load '{}' ';'`
 
-$(PROJ_OBJ_DIR)/ocamldoc.tar.gz:
-	$(Echo) Packaging ocamldoc documentation
-	$(Verb) $(RM) -rf $@ $(PROJ_OBJ_DIR)/ocamldoc.tar
-	$(Verb) $(TAR) cf $(PROJ_OBJ_DIR)/ocamldoc.tar ocamldoc
-	$(Verb) $(GZIP) $(PROJ_OBJ_DIR)/ocamldoc.tar
-	$(Verb) $(CP) $(PROJ_OBJ_DIR)/ocamldoc.tar.gz $(PROJ_OBJ_DIR)/ocamldoc/html/
-
 uninstall-local::
 	$(Echo) Uninstalling Documentation
 	$(Verb) $(RM) -rf $(PROJ_docsdir)
diff --git a/include/llvm/ADT/DeltaAlgorithm.h b/include/llvm/ADT/DeltaAlgorithm.h
new file mode 100644
index 0000000..1facfa0
--- /dev/null
+++ b/include/llvm/ADT/DeltaAlgorithm.h
@@ -0,0 +1,91 @@
+//===--- DeltaAlgorithm.h - A Set Minimization Algorithm -------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_DELTAALGORITHM_H
+#define LLVM_ADT_DELTAALGORITHM_H
+
+#include <vector>
+#include <set>
+
+namespace llvm {
+
+/// DeltaAlgorithm - Implements the delta debugging algorithm (A. Zeller '99)
+/// for minimizing arbitrary sets using a predicate function.
+///
+/// The result of the algorithm is a subset of the input change set which is
+/// guaranteed to satisfy the predicate, assuming that the input set did. For
+/// well formed predicates, the result set is guaranteed to be such that
+/// removing any single element would falsify the predicate.
+///
+/// For best results the predicate function *should* (but need not) satisfy
+/// certain properties, in particular:
+///  (1) The predicate should return false on an empty set and true on the full
+///  set.
+///  (2) If the predicate returns true for a set of changes, it should return
+///  true for all supersets of that set.
+///
+/// It is not an error to provide a predicate that does not satisfy these
+/// requirements, and the algorithm will generally produce reasonable
+/// results. However, it may run substantially more tests than with a good
+/// predicate.
+class DeltaAlgorithm {
+public:
+  typedef unsigned change_ty;
+  // FIXME: Use a decent data structure.
+  typedef std::set<change_ty> changeset_ty;
+  typedef std::vector<changeset_ty> changesetlist_ty;
+
+private:
+  /// Cache of failed test results. Successful test results are never cached
+  /// since we always reduce following a success.
+  std::set<changeset_ty> FailedTestsCache;
+
+  /// GetTestResult - Get the test result for the \arg Changes from the
+  /// cache, executing the test if necessary.
+  ///
+  /// \param Changes - The change set to test.
+  /// \return - The test result.
+  bool GetTestResult(const changeset_ty &Changes);
+
+  /// Split - Partition a set of changes \arg Sinto one or two subsets.
+  void Split(const changeset_ty &S, changesetlist_ty &Res);
+
+  /// Delta - Minimize a set of \arg Changes which has been partioned into
+  /// smaller sets, by attempting to remove individual subsets.
+  changeset_ty Delta(const changeset_ty &Changes,
+                     const changesetlist_ty &Sets);
+
+  /// Search - Search for a subset (or subsets) in \arg Sets which can be
+  /// removed from \arg Changes while still satisfying the predicate.
+  ///
+  /// \param Res - On success, a subset of Changes which satisfies the
+  /// predicate.
+  /// \return - True on success.
+  bool Search(const changeset_ty &Changes, const changesetlist_ty &Sets,
+              changeset_ty &Res);
+              
+protected:
+  /// UpdatedSearchState - Callback used when the search state changes.
+  virtual void UpdatedSearchState(const changeset_ty &Changes,
+                                  const changesetlist_ty &Sets) {}
+
+  /// ExecuteOneTest - Execute a single test predicate on the change set \arg S.
+  virtual bool ExecuteOneTest(const changeset_ty &S) = 0;
+
+public:
+  virtual ~DeltaAlgorithm();
+
+  /// Run - Minimize the set \arg Changes by executing \see ExecuteOneTest() on
+  /// subsets of changes and returning the smallest set which still satisfies
+  /// the test predicate.
+  changeset_ty Run(const changeset_ty &Changes);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/ADT/DenseMap.h b/include/llvm/ADT/DenseMap.h
index 8329947..8b62f2d 100644
--- a/include/llvm/ADT/DenseMap.h
+++ b/include/llvm/ADT/DenseMap.h
@@ -217,7 +217,8 @@ public:
 
 private:
   void CopyFrom(const DenseMap& other) {
-    if (NumBuckets != 0 && (!KeyInfoT::isPod() || !ValueInfoT::isPod())) {
+    if (NumBuckets != 0 &&
+        (!isPodLike<KeyInfoT>::value || !isPodLike<ValueInfoT>::value)) {
       const KeyT EmptyKey = getEmptyKey(), TombstoneKey = getTombstoneKey();
       for (BucketT *P = Buckets, *E = Buckets+NumBuckets; P != E; ++P) {
         if (!KeyInfoT::isEqual(P->first, EmptyKey) &&
@@ -239,7 +240,7 @@ private:
     Buckets = static_cast<BucketT*>(operator new(sizeof(BucketT) *
                                                  other.NumBuckets));
 
-    if (KeyInfoT::isPod() && ValueInfoT::isPod())
+    if (isPodLike<KeyInfoT>::value && isPodLike<ValueInfoT>::value)
       memcpy(Buckets, other.Buckets, other.NumBuckets * sizeof(BucketT));
     else
       for (size_t i = 0; i < other.NumBuckets; ++i) {
diff --git a/include/llvm/ADT/DenseMapInfo.h b/include/llvm/ADT/DenseMapInfo.h
index 2f241c5..6b494ef 100644
--- a/include/llvm/ADT/DenseMapInfo.h
+++ b/include/llvm/ADT/DenseMapInfo.h
@@ -15,7 +15,7 @@
 #define LLVM_ADT_DENSEMAPINFO_H
 
 #include "llvm/Support/PointerLikeTypeTraits.h"
-#include <utility>
+#include "llvm/Support/type_traits.h"
 
 namespace llvm {
 
@@ -25,7 +25,6 @@ struct DenseMapInfo {
   //static inline T getTombstoneKey();
   //static unsigned getHashValue(const T &Val);
   //static bool isEqual(const T &LHS, const T &RHS);
-  //static bool isPod()
 };
 
 // Provide DenseMapInfo for all pointers.
@@ -46,7 +45,6 @@ struct DenseMapInfo<T*> {
            (unsigned((uintptr_t)PtrVal) >> 9);
   }
   static bool isEqual(const T *LHS, const T *RHS) { return LHS == RHS; }
-  static bool isPod() { return true; }
 };
 
 // Provide DenseMapInfo for chars.
@@ -54,7 +52,6 @@ template<> struct DenseMapInfo<char> {
   static inline char getEmptyKey() { return ~0; }
   static inline char getTombstoneKey() { return ~0 - 1; }
   static unsigned getHashValue(const char& Val) { return Val * 37; }
-  static bool isPod() { return true; }
   static bool isEqual(const char &LHS, const char &RHS) {
     return LHS == RHS;
   }
@@ -65,7 +62,6 @@ template<> struct DenseMapInfo<unsigned> {
   static inline unsigned getEmptyKey() { return ~0; }
   static inline unsigned getTombstoneKey() { return ~0U - 1; }
   static unsigned getHashValue(const unsigned& Val) { return Val * 37; }
-  static bool isPod() { return true; }
   static bool isEqual(const unsigned& LHS, const unsigned& RHS) {
     return LHS == RHS;
   }
@@ -78,7 +74,6 @@ template<> struct DenseMapInfo<unsigned long> {
   static unsigned getHashValue(const unsigned long& Val) {
     return (unsigned)(Val * 37UL);
   }
-  static bool isPod() { return true; }
   static bool isEqual(const unsigned long& LHS, const unsigned long& RHS) {
     return LHS == RHS;
   }
@@ -91,7 +86,6 @@ template<> struct DenseMapInfo<unsigned long long> {
   static unsigned getHashValue(const unsigned long long& Val) {
     return (unsigned)(Val * 37ULL);
   }
-  static bool isPod() { return true; }
   static bool isEqual(const unsigned long long& LHS,
                       const unsigned long long& RHS) {
     return LHS == RHS;
@@ -127,7 +121,6 @@ struct DenseMapInfo<std::pair<T, U> > {
     return (unsigned)key;
   }
   static bool isEqual(const Pair& LHS, const Pair& RHS) { return LHS == RHS; }
-  static bool isPod() { return FirstInfo::isPod() && SecondInfo::isPod(); }
 };
 
 } // end namespace llvm
diff --git a/include/llvm/ADT/DenseSet.h b/include/llvm/ADT/DenseSet.h
index ce7344b..89f55ca 100644
--- a/include/llvm/ADT/DenseSet.h
+++ b/include/llvm/ADT/DenseSet.h
@@ -60,7 +60,7 @@ public:
     ValueT& operator*() { return I->first; }
     ValueT* operator->() { return &I->first; }
 
-    Iterator& operator++() { ++I; return *this; };
+    Iterator& operator++() { ++I; return *this; }
     bool operator==(const Iterator& X) const { return I == X.I; }
     bool operator!=(const Iterator& X) const { return I != X.I; }
   };
@@ -73,7 +73,7 @@ public:
     const ValueT& operator*() { return I->first; }
     const ValueT* operator->() { return &I->first; }
 
-    ConstIterator& operator++() { ++I; return *this; };
+    ConstIterator& operator++() { ++I; return *this; }
     bool operator==(const ConstIterator& X) const { return I == X.I; }
     bool operator!=(const ConstIterator& X) const { return I != X.I; }
   };
diff --git a/include/llvm/ADT/ImmutableList.h b/include/llvm/ADT/ImmutableList.h
index 5f8cb57..7757c08 100644
--- a/include/llvm/ADT/ImmutableList.h
+++ b/include/llvm/ADT/ImmutableList.h
@@ -211,9 +211,12 @@ template<typename T> struct DenseMapInfo<ImmutableList<T> > {
   static bool isEqual(ImmutableList<T> X1, ImmutableList<T> X2) {
     return X1 == X2;
   }
-  static bool isPod() { return true; }
 };
 
+template <typename T> struct isPodLike;
+template <typename T>
+struct isPodLike<ImmutableList<T> > { static const bool value = true; };
+
 } // end llvm namespace
 
 #endif
diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h
index 73ba3c7..64f4a7c 100644
--- a/include/llvm/ADT/PointerIntPair.h
+++ b/include/llvm/ADT/PointerIntPair.h
@@ -106,6 +106,12 @@ public:
   bool operator>=(const PointerIntPair &RHS) const {return Value >= RHS.Value;}
 };
 
+template <typename T> struct isPodLike;
+template<typename PointerTy, unsigned IntBits, typename IntType>
+struct isPodLike<PointerIntPair<PointerTy, IntBits, IntType> > {
+   static const bool value = true;
+};
+  
 // Provide specialization of DenseMapInfo for PointerIntPair.
 template<typename PointerTy, unsigned IntBits, typename IntType>
 struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType> > {
@@ -125,7 +131,6 @@ struct DenseMapInfo<PointerIntPair<PointerTy, IntBits, IntType> > {
     return unsigned(IV) ^ unsigned(IV >> 9);
   }
   static bool isEqual(const Ty &LHS, const Ty &RHS) { return LHS == RHS; }
-  static bool isPod() { return true; }
 };
 
 // Teach SmallPtrSet that PointerIntPair is "basically a pointer".
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index f3b4533..b16649e 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -46,20 +46,17 @@ namespace std {
 
 namespace llvm {
 
-/// SmallVectorImpl - This class consists of common code factored out of the
-/// SmallVector class to reduce code duplication based on the SmallVector 'N'
-/// template parameter.
-template <typename T>
-class SmallVectorImpl {
+/// SmallVectorBase - This is all the non-templated stuff common to all
+/// SmallVectors.
+class SmallVectorBase {
 protected:
-  T *Begin, *End, *Capacity;
+  void *BeginX, *EndX, *CapacityX;
 
   // Allocate raw space for N elements of type T.  If T has a ctor or dtor, we
   // don't want it to be automatically run, so we need to represent the space as
   // something else.  An array of char would work great, but might not be
   // aligned sufficiently.  Instead, we either use GCC extensions, or some
   // number of union instances for the space, which guarantee maximal alignment.
-protected:
 #ifdef __GNUC__
   typedef char U;
   U FirstEl __attribute__((aligned));
@@ -72,46 +69,65 @@ protected:
   } FirstEl;
 #endif
   // Space after 'FirstEl' is clobbered, do not add any instance vars after it.
+  
+protected:
+  SmallVectorBase(size_t Size)
+    : BeginX(&FirstEl), EndX(&FirstEl), CapacityX((char*)&FirstEl+Size) {}
+  
+  /// isSmall - Return true if this is a smallvector which has not had dynamic
+  /// memory allocated for it.
+  bool isSmall() const {
+    return BeginX == static_cast<const void*>(&FirstEl);
+  }
+  
+  
+public:
+  bool empty() const { return BeginX == EndX; }
+};
+  
+/// SmallVectorImpl - This class consists of common code factored out of the
+/// SmallVector class to reduce code duplication based on the SmallVector 'N'
+/// template parameter.
+template <typename T>
+class SmallVectorImpl : public SmallVectorBase {
+  void setEnd(T *P) { EndX = P; }
 public:
   // Default ctor - Initialize to empty.
-  explicit SmallVectorImpl(unsigned N)
-    : Begin(reinterpret_cast<T*>(&FirstEl)),
-      End(reinterpret_cast<T*>(&FirstEl)),
-      Capacity(reinterpret_cast<T*>(&FirstEl)+N) {
+  explicit SmallVectorImpl(unsigned N) : SmallVectorBase(N*sizeof(T)) {
   }
 
   ~SmallVectorImpl() {
     // Destroy the constructed elements in the vector.
-    destroy_range(Begin, End);
+    destroy_range(begin(), end());
 
     // If this wasn't grown from the inline copy, deallocate the old space.
     if (!isSmall())
-      operator delete(Begin);
+      operator delete(begin());
   }
 
   typedef size_t size_type;
   typedef ptrdiff_t difference_type;
   typedef T value_type;
-  typedef T* iterator;
-  typedef const T* const_iterator;
+  typedef T *iterator;
+  typedef const T *const_iterator;
 
-  typedef std::reverse_iterator<const_iterator>  const_reverse_iterator;
-  typedef std::reverse_iterator<iterator>  reverse_iterator;
+  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
+  typedef std::reverse_iterator<iterator> reverse_iterator;
 
-  typedef T& reference;
-  typedef const T& const_reference;
-  typedef T* pointer;
-  typedef const T* const_pointer;
-
-  bool empty() const { return Begin == End; }
-  size_type size() const { return End-Begin; }
-  size_type max_size() const { return size_type(-1) / sizeof(T); }
+  typedef T &reference;
+  typedef const T &const_reference;
+  typedef T *pointer;
+  typedef const T *const_pointer;
 
   // forward iterator creation methods.
-  iterator begin() { return Begin; }
-  const_iterator begin() const { return Begin; }
-  iterator end() { return End; }
-  const_iterator end() const { return End; }
+  iterator begin() { return (iterator)BeginX; }
+  const_iterator begin() const { return (const_iterator)BeginX; }
+  iterator end() { return (iterator)EndX; }
+  const_iterator end() const { return (const_iterator)EndX; }
+private:
+  iterator capacity_ptr() { return (iterator)CapacityX; }
+  const_iterator capacity_ptr() const { return (const_iterator)CapacityX; }
+public:
 
   // reverse iterator creation methods.
   reverse_iterator rbegin()            { return reverse_iterator(end()); }
@@ -119,14 +135,25 @@ public:
   reverse_iterator rend()              { return reverse_iterator(begin()); }
   const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
 
-
+  size_type size() const { return end()-begin(); }
+  size_type max_size() const { return size_type(-1) / sizeof(T); }
+  
+  /// capacity - Return the total number of elements in the currently allocated
+  /// buffer.
+  size_t capacity() const { return capacity_ptr() - begin(); }
+  
+  /// data - Return a pointer to the vector's buffer, even if empty().
+  pointer data() { return pointer(begin()); }
+  /// data - Return a pointer to the vector's buffer, even if empty().
+  const_pointer data() const { return const_pointer(begin()); }
+  
   reference operator[](unsigned idx) {
-    assert(Begin + idx < End);
-    return Begin[idx];
+    assert(begin() + idx < end());
+    return begin()[idx];
   }
   const_reference operator[](unsigned idx) const {
-    assert(Begin + idx < End);
-    return Begin[idx];
+    assert(begin() + idx < end());
+    return begin()[idx];
   }
 
   reference front() {
@@ -144,10 +171,10 @@ public:
   }
 
   void push_back(const_reference Elt) {
-    if (End < Capacity) {
+    if (EndX < CapacityX) {
   Retry:
-      new (End) T(Elt);
-      ++End;
+      new (end()) T(Elt);
+      setEnd(end()+1);
       return;
     }
     grow();
@@ -155,8 +182,8 @@ public:
   }
 
   void pop_back() {
-    --End;
-    End->~T();
+    setEnd(end()-1);
+    end()->~T();
   }
 
   T pop_back_val() {
@@ -166,36 +193,36 @@ public:
   }
 
   void clear() {
-    destroy_range(Begin, End);
-    End = Begin;
+    destroy_range(begin(), end());
+    EndX = BeginX;
   }
 
   void resize(unsigned N) {
     if (N < size()) {
-      destroy_range(Begin+N, End);
-      End = Begin+N;
+      destroy_range(begin()+N, end());
+      setEnd(begin()+N);
     } else if (N > size()) {
-      if (unsigned(Capacity-Begin) < N)
+      if (capacity() < N)
         grow(N);
-      construct_range(End, Begin+N, T());
-      End = Begin+N;
+      construct_range(end(), begin()+N, T());
+      setEnd(begin()+N);
     }
   }
 
   void resize(unsigned N, const T &NV) {
     if (N < size()) {
-      destroy_range(Begin+N, End);
-      End = Begin+N;
+      destroy_range(begin()+N, end());
+      setEnd(begin()+N);
     } else if (N > size()) {
-      if (unsigned(Capacity-Begin) < N)
+      if (capacity() < N)
         grow(N);
-      construct_range(End, Begin+N, NV);
-      End = Begin+N;
+      construct_range(end(), begin()+N, NV);
+      setEnd(begin()+N);
     }
   }
 
   void reserve(unsigned N) {
-    if (unsigned(Capacity-Begin) < N)
+    if (capacity() < N)
       grow(N);
   }
 
@@ -207,38 +234,38 @@ public:
   void append(in_iter in_start, in_iter in_end) {
     size_type NumInputs = std::distance(in_start, in_end);
     // Grow allocated space if needed.
-    if (NumInputs > size_type(Capacity-End))
+    if (NumInputs > size_type(capacity_ptr()-end()))
       grow(size()+NumInputs);
 
     // Copy the new elements over.
-    std::uninitialized_copy(in_start, in_end, End);
-    End += NumInputs;
+    std::uninitialized_copy(in_start, in_end, end());
+    setEnd(end() + NumInputs);
   }
 
   /// append - Add the specified range to the end of the SmallVector.
   ///
   void append(size_type NumInputs, const T &Elt) {
     // Grow allocated space if needed.
-    if (NumInputs > size_type(Capacity-End))
+    if (NumInputs > size_type(capacity_ptr()-end()))
       grow(size()+NumInputs);
 
     // Copy the new elements over.
-    std::uninitialized_fill_n(End, NumInputs, Elt);
-    End += NumInputs;
+    std::uninitialized_fill_n(end(), NumInputs, Elt);
+    setEnd(end() + NumInputs);
   }
 
   void assign(unsigned NumElts, const T &Elt) {
     clear();
-    if (unsigned(Capacity-Begin) < NumElts)
+    if (capacity() < NumElts)
       grow(NumElts);
-    End = Begin+NumElts;
-    construct_range(Begin, End, Elt);
+    setEnd(begin()+NumElts);
+    construct_range(begin(), end(), Elt);
   }
 
   iterator erase(iterator I) {
     iterator N = I;
     // Shift all elts down one.
-    std::copy(I+1, End, I);
+    std::copy(I+1, end(), I);
     // Drop the last elt.
     pop_back();
     return(N);
@@ -247,36 +274,36 @@ public:
   iterator erase(iterator S, iterator E) {
     iterator N = S;
     // Shift all elts down.
-    iterator I = std::copy(E, End, S);
+    iterator I = std::copy(E, end(), S);
     // Drop the last elts.
-    destroy_range(I, End);
-    End = I;
+    destroy_range(I, end());
+    setEnd(I);
     return(N);
   }
 
   iterator insert(iterator I, const T &Elt) {
-    if (I == End) {  // Important special case for empty vector.
+    if (I == end()) {  // Important special case for empty vector.
       push_back(Elt);
       return end()-1;
     }
 
-    if (End < Capacity) {
+    if (EndX < CapacityX) {
   Retry:
-      new (End) T(back());
-      ++End;
+      new (end()) T(back());
+      setEnd(end()+1);
       // Push everything else over.
-      std::copy_backward(I, End-1, End);
+      std::copy_backward(I, end()-1, end());
       *I = Elt;
       return I;
     }
-    size_t EltNo = I-Begin;
+    size_t EltNo = I-begin();
     grow();
-    I = Begin+EltNo;
+    I = begin()+EltNo;
     goto Retry;
   }
 
   iterator insert(iterator I, size_type NumToInsert, const T &Elt) {
-    if (I == End) {  // Important special case for empty vector.
+    if (I == end()) {  // Important special case for empty vector.
       append(NumToInsert, Elt);
       return end()-1;
     }
@@ -295,8 +322,8 @@ public:
     // insertion.  Since we already reserved space, we know that this won't
     // reallocate the vector.
     if (size_t(end()-I) >= NumToInsert) {
-      T *OldEnd = End;
-      append(End-NumToInsert, End);
+      T *OldEnd = end();
+      append(end()-NumToInsert, end());
 
       // Copy the existing elements that get replaced.
       std::copy_backward(I, OldEnd-NumToInsert, OldEnd);
@@ -309,10 +336,10 @@ public:
     // not inserting at the end.
 
     // Copy over the elements that we're about to overwrite.
-    T *OldEnd = End;
-    End += NumToInsert;
+    T *OldEnd = end();
+    setEnd(end() + NumToInsert);
     size_t NumOverwritten = OldEnd-I;
-    std::uninitialized_copy(I, OldEnd, End-NumOverwritten);
+    std::uninitialized_copy(I, OldEnd, end()-NumOverwritten);
 
     // Replace the overwritten part.
     std::fill_n(I, NumOverwritten, Elt);
@@ -324,7 +351,7 @@ public:
 
   template<typename ItTy>
   iterator insert(iterator I, ItTy From, ItTy To) {
-    if (I == End) {  // Important special case for empty vector.
+    if (I == end()) {  // Important special case for empty vector.
       append(From, To);
       return end()-1;
     }
@@ -344,8 +371,8 @@ public:
     // insertion.  Since we already reserved space, we know that this won't
     // reallocate the vector.
     if (size_t(end()-I) >= NumToInsert) {
-      T *OldEnd = End;
-      append(End-NumToInsert, End);
+      T *OldEnd = end();
+      append(end()-NumToInsert, end());
 
       // Copy the existing elements that get replaced.
       std::copy_backward(I, OldEnd-NumToInsert, OldEnd);
@@ -358,10 +385,10 @@ public:
     // not inserting at the end.
 
     // Copy over the elements that we're about to overwrite.
-    T *OldEnd = End;
-    End += NumToInsert;
+    T *OldEnd = end();
+    setEnd(end() + NumToInsert);
     size_t NumOverwritten = OldEnd-I;
-    std::uninitialized_copy(I, OldEnd, End-NumOverwritten);
+    std::uninitialized_copy(I, OldEnd, end()-NumOverwritten);
 
     // Replace the overwritten part.
     std::copy(From, From+NumOverwritten, I);
@@ -371,25 +398,11 @@ public:
     return I;
   }
 
-  /// data - Return a pointer to the vector's buffer, even if empty().
-  pointer data() {
-    return pointer(Begin);
-  }
-
-  /// data - Return a pointer to the vector's buffer, even if empty().
-  const_pointer data() const {
-    return const_pointer(Begin);
-  }
-
   const SmallVectorImpl &operator=(const SmallVectorImpl &RHS);
 
   bool operator==(const SmallVectorImpl &RHS) const {
     if (size() != RHS.size()) return false;
-    for (T *This = Begin, *That = RHS.Begin, *E = Begin+size();
-         This != E; ++This, ++That)
-      if (*This != *That)
-        return false;
-    return true;
+    return std::equal(begin(), end(), RHS.begin());
   }
   bool operator!=(const SmallVectorImpl &RHS) const { return !(*this == RHS); }
 
@@ -398,10 +411,6 @@ public:
                                         RHS.begin(), RHS.end());
   }
 
-  /// capacity - Return the total number of elements in the currently allocated
-  /// buffer.
-  size_t capacity() const { return Capacity - Begin; }
-
   /// set_size - Set the array size to \arg N, which the current array must have
   /// enough capacity for.
   ///
@@ -413,17 +422,10 @@ public:
   /// which will only be overwritten.
   void set_size(unsigned N) {
     assert(N <= capacity());
-    End = Begin + N;
+    setEnd(begin() + N);
   }
 
 private:
-  /// isSmall - Return true if this is a smallvector which has not had dynamic
-  /// memory allocated for it.
-  bool isSmall() const {
-    return static_cast<const void*>(Begin) ==
-           static_cast<const void*>(&FirstEl);
-  }
-
   /// grow - double the size of the allocated memory, guaranteeing space for at
   /// least one more element or MinSize if specified.
   void grow(size_type MinSize = 0);
@@ -434,6 +436,9 @@ private:
   }
 
   void destroy_range(T *S, T *E) {
+    // No need to do a destroy loop for POD's.
+    if (isPodLike<T>::value) return;
+    
     while (S != E) {
       --E;
       E->~T();
@@ -444,7 +449,7 @@ private:
 // Define this out-of-line to dissuade the C++ compiler from inlining it.
 template <typename T>
 void SmallVectorImpl<T>::grow(size_t MinSize) {
-  size_t CurCapacity = Capacity-Begin;
+  size_t CurCapacity = capacity();
   size_t CurSize = size();
   size_t NewCapacity = 2*CurCapacity;
   if (NewCapacity < MinSize)
@@ -452,22 +457,22 @@ void SmallVectorImpl<T>::grow(size_t MinSize) {
   T *NewElts = static_cast<T*>(operator new(NewCapacity*sizeof(T)));
 
   // Copy the elements over.
-  if (is_class<T>::value)
-    std::uninitialized_copy(Begin, End, NewElts);
+  if (isPodLike<T>::value)
+    // Use memcpy for PODs: std::uninitialized_copy optimizes to memmove.
+    memcpy(NewElts, begin(), CurSize * sizeof(T));
   else
-    // Use memcpy for PODs (std::uninitialized_copy optimizes to memmove).
-    memcpy(NewElts, Begin, CurSize * sizeof(T));
+    std::uninitialized_copy(begin(), end(), NewElts);
 
   // Destroy the original elements.
-  destroy_range(Begin, End);
+  destroy_range(begin(), end());
 
   // If this wasn't grown from the inline copy, deallocate the old space.
   if (!isSmall())
-    operator delete(Begin);
+    operator delete(begin());
 
-  Begin = NewElts;
-  End = NewElts+CurSize;
-  Capacity = Begin+NewCapacity;
+  setEnd(NewElts+CurSize);
+  BeginX = NewElts;
+  CapacityX = begin()+NewCapacity;
 }
 
 template <typename T>
@@ -476,35 +481,35 @@ void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) {
 
   // We can only avoid copying elements if neither vector is small.
   if (!isSmall() && !RHS.isSmall()) {
-    std::swap(Begin, RHS.Begin);
-    std::swap(End, RHS.End);
-    std::swap(Capacity, RHS.Capacity);
+    std::swap(BeginX, RHS.BeginX);
+    std::swap(EndX, RHS.EndX);
+    std::swap(CapacityX, RHS.CapacityX);
     return;
   }
-  if (RHS.size() > size_type(Capacity-Begin))
+  if (RHS.size() > capacity())
     grow(RHS.size());
-  if (size() > size_type(RHS.Capacity-RHS.begin()))
+  if (size() > RHS.capacity())
     RHS.grow(size());
 
   // Swap the shared elements.
   size_t NumShared = size();
   if (NumShared > RHS.size()) NumShared = RHS.size();
   for (unsigned i = 0; i != static_cast<unsigned>(NumShared); ++i)
-    std::swap(Begin[i], RHS[i]);
+    std::swap((*this)[i], RHS[i]);
 
   // Copy over the extra elts.
   if (size() > RHS.size()) {
     size_t EltDiff = size() - RHS.size();
-    std::uninitialized_copy(Begin+NumShared, End, RHS.End);
-    RHS.End += EltDiff;
-    destroy_range(Begin+NumShared, End);
-    End = Begin+NumShared;
+    std::uninitialized_copy(begin()+NumShared, end(), RHS.end());
+    RHS.setEnd(RHS.end()+EltDiff);
+    destroy_range(begin()+NumShared, end());
+    setEnd(begin()+NumShared);
   } else if (RHS.size() > size()) {
     size_t EltDiff = RHS.size() - size();
-    std::uninitialized_copy(RHS.Begin+NumShared, RHS.End, End);
-    End += EltDiff;
-    destroy_range(RHS.Begin+NumShared, RHS.End);
-    RHS.End = RHS.Begin+NumShared;
+    std::uninitialized_copy(RHS.begin()+NumShared, RHS.end(), end());
+    setEnd(end() + EltDiff);
+    destroy_range(RHS.begin()+NumShared, RHS.end());
+    RHS.setEnd(RHS.begin()+NumShared);
   }
 }
 
@@ -516,42 +521,42 @@ SmallVectorImpl<T>::operator=(const SmallVectorImpl<T> &RHS) {
 
   // If we already have sufficient space, assign the common elements, then
   // destroy any excess.
-  unsigned RHSSize = unsigned(RHS.size());
-  unsigned CurSize = unsigned(size());
+  size_t RHSSize = RHS.size();
+  size_t CurSize = size();
   if (CurSize >= RHSSize) {
     // Assign common elements.
     iterator NewEnd;
     if (RHSSize)
-      NewEnd = std::copy(RHS.Begin, RHS.Begin+RHSSize, Begin);
+      NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, begin());
     else
-      NewEnd = Begin;
+      NewEnd = begin();
 
     // Destroy excess elements.
-    destroy_range(NewEnd, End);
+    destroy_range(NewEnd, end());
 
     // Trim.
-    End = NewEnd;
+    setEnd(NewEnd);
     return *this;
   }
 
   // If we have to grow to have enough elements, destroy the current elements.
   // This allows us to avoid copying them during the grow.
-  if (unsigned(Capacity-Begin) < RHSSize) {
+  if (capacity() < RHSSize) {
     // Destroy current elements.
-    destroy_range(Begin, End);
-    End = Begin;
+    destroy_range(begin(), end());
+    setEnd(begin());
     CurSize = 0;
     grow(RHSSize);
   } else if (CurSize) {
     // Otherwise, use assignment for the already-constructed elements.
-    std::copy(RHS.Begin, RHS.Begin+CurSize, Begin);
+    std::copy(RHS.begin(), RHS.begin()+CurSize, begin());
   }
 
   // Copy construct the new elements in place.
-  std::uninitialized_copy(RHS.Begin+CurSize, RHS.End, Begin+CurSize);
+  std::uninitialized_copy(RHS.begin()+CurSize, RHS.end(), begin()+CurSize);
 
   // Set end.
-  End = Begin+RHSSize;
+  setEnd(begin()+RHSSize);
   return *this;
 }
 
diff --git a/include/llvm/ADT/StringSwitch.h b/include/llvm/ADT/StringSwitch.h
index 6562d57..7dd5647 100644
--- a/include/llvm/ADT/StringSwitch.h
+++ b/include/llvm/ADT/StringSwitch.h
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===/
 //
 //  This file implements the StringSwitch template, which mimics a switch()
-//  statements whose cases are string literals.
+//  statement whose cases are string literals.
 //
 //===----------------------------------------------------------------------===/
 #ifndef LLVM_ADT_STRINGSWITCH_H
@@ -18,7 +18,7 @@
 #include <cstring>
 
 namespace llvm {
-  
+
 /// \brief A switch()-like statement whose cases are string literals.
 ///
 /// The StringSwitch class is a simple form of a switch() statement that
@@ -35,48 +35,44 @@ namespace llvm {
 ///   .Case("green", Green)
 ///   .Case("blue", Blue)
 ///   .Case("indigo", Indigo)
-///   .Case("violet", Violet)
+///   .Cases("violet", "purple", Violet)
 ///   .Default(UnknownColor);
 /// \endcode
-template<typename T>
+template<typename T, typename R = T>
 class StringSwitch {
   /// \brief The string we are matching.
   StringRef Str;
-  
-  /// \brief The result of this switch statement, once known.
-  T Result;
-  
-  /// \brief Set true when the result of this switch is already known; in this
-  /// case, Result is valid.
-  bool ResultKnown;
-  
+
+  /// \brief The pointer to the result of this switch statement, once known,
+  /// null before that.
+  const T *Result;
+
 public:
-  explicit StringSwitch(StringRef Str) 
-  : Str(Str), ResultKnown(false) { }
-  
+  explicit StringSwitch(StringRef Str)
+  : Str(Str), Result(0) { }
+
   template<unsigned N>
   StringSwitch& Case(const char (&S)[N], const T& Value) {
-    if (!ResultKnown && N-1 == Str.size() && 
+    if (!Result && N-1 == Str.size() &&
         (std::memcmp(S, Str.data(), N-1) == 0)) {
-      Result = Value;
-      ResultKnown = true;
+      Result = &Value;
     }
-    
+
     return *this;
   }
-  
+
   template<unsigned N0, unsigned N1>
   StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
                       const T& Value) {
     return Case(S0, Value).Case(S1, Value);
   }
-  
+
   template<unsigned N0, unsigned N1, unsigned N2>
   StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
                       const char (&S2)[N2], const T& Value) {
     return Case(S0, Value).Case(S1, Value).Case(S2, Value);
   }
-  
+
   template<unsigned N0, unsigned N1, unsigned N2, unsigned N3>
   StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
                       const char (&S2)[N2], const char (&S3)[N3],
@@ -87,21 +83,21 @@ public:
   template<unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4>
   StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
                       const char (&S2)[N2], const char (&S3)[N3],
-                       const char (&S4)[N4], const T& Value) {
+                      const char (&S4)[N4], const T& Value) {
     return Case(S0, Value).Case(S1, Value).Case(S2, Value).Case(S3, Value)
       .Case(S4, Value);
   }
-  
-  T Default(const T& Value) {
-    if (ResultKnown)
-      return Result;
-    
+
+  R Default(const T& Value) const {
+    if (Result)
+      return *Result;
+
     return Value;
   }
-  
-  operator T() {
-    assert(ResultKnown && "Fell off the end of a string-switch");
-    return Result;
+
+  operator R() const {
+    assert(Result && "Fell off the end of a string-switch");
+    return *Result;
   }
 };
 
diff --git a/include/llvm/ADT/ValueMap.h b/include/llvm/ADT/ValueMap.h
index b043c38..6f57fe8 100644
--- a/include/llvm/ADT/ValueMap.h
+++ b/include/llvm/ADT/ValueMap.h
@@ -250,6 +250,12 @@ public:
   }
 };
 
+  
+template<typename KeyT, typename ValueT, typename Config, typename ValueInfoT>
+struct isPodLike<ValueMapCallbackVH<KeyT, ValueT, Config, ValueInfoT> > {
+  static const bool value = true;
+};
+
 template<typename KeyT, typename ValueT, typename Config, typename ValueInfoT>
 struct DenseMapInfo<ValueMapCallbackVH<KeyT, ValueT, Config, ValueInfoT> > {
   typedef ValueMapCallbackVH<KeyT, ValueT, Config, ValueInfoT> VH;
@@ -267,7 +273,6 @@ struct DenseMapInfo<ValueMapCallbackVH<KeyT, ValueT, Config, ValueInfoT> > {
   static bool isEqual(const VH &LHS, const VH &RHS) {
     return LHS == RHS;
   }
-  static bool isPod() { return false; }
 };
 
 
diff --git a/include/llvm/ADT/ilist.h b/include/llvm/ADT/ilist.h
index b3824a2..e4d26dd 100644
--- a/include/llvm/ADT/ilist.h
+++ b/include/llvm/ADT/ilist.h
@@ -643,7 +643,7 @@ struct ilist : public iplist<NodeTy> {
 
   // Main implementation here - Insert for a node passed by value...
   iterator insert(iterator where, const NodeTy &val) {
-    return insert(where, createNode(val));
+    return insert(where, this->createNode(val));
   }
 
 
diff --git a/include/llvm/Analysis/AliasSetTracker.h b/include/llvm/Analysis/AliasSetTracker.h
index 42a377e..09f12ad 100644
--- a/include/llvm/Analysis/AliasSetTracker.h
+++ b/include/llvm/Analysis/AliasSetTracker.h
@@ -259,11 +259,9 @@ class AliasSetTracker {
     ASTCallbackVH(Value *V, AliasSetTracker *AST = 0);
     ASTCallbackVH &operator=(Value *V);
   };
-  /// ASTCallbackVHDenseMapInfo - Traits to tell DenseMap that ASTCallbackVH
-  /// is not a POD (it needs its destructor called).
-  struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {
-    static bool isPod() { return false; }
-  };
+  /// ASTCallbackVHDenseMapInfo - Traits to tell DenseMap that tell us how to
+  /// compare and hash the value handle.
+  struct ASTCallbackVHDenseMapInfo : public DenseMapInfo<Value *> {};
 
   AliasAnalysis &AA;
   ilist<AliasSet> AliasSets;
diff --git a/include/llvm/Analysis/DebugInfo.h b/include/llvm/Analysis/DebugInfo.h
index 866ed8a..232804e 100644
--- a/include/llvm/Analysis/DebugInfo.h
+++ b/include/llvm/Analysis/DebugInfo.h
@@ -197,7 +197,8 @@ namespace llvm {
       FlagProtected        = 1 << 1,
       FlagFwdDecl          = 1 << 2,
       FlagAppleBlock       = 1 << 3,
-      FlagBlockByrefStruct = 1 << 4
+      FlagBlockByrefStruct = 1 << 4,
+      FlagVirtual          = 1 << 5
     };
 
   protected:
@@ -242,6 +243,9 @@ namespace llvm {
     bool isBlockByrefStruct() const {
       return (getFlags() & FlagBlockByrefStruct) != 0;
     }
+    bool isVirtual() const {
+      return (getFlags() & FlagVirtual) != 0;
+    }
 
     /// dump - print type.
     void dump() const;
@@ -366,6 +370,24 @@ namespace llvm {
     /// compile unit, like 'static' in C.
     unsigned isLocalToUnit() const     { return getUnsignedField(9); }
     unsigned isDefinition() const      { return getUnsignedField(10); }
+
+    unsigned getVirtuality() const {
+      if (DbgNode->getNumElements() < 14)
+        return 0;
+      return getUnsignedField(11);
+    }
+
+    unsigned getVirtualIndex() const { 
+      if (DbgNode->getNumElements() < 14)
+        return 0;
+      return getUnsignedField(12);
+    }
+
+    DICompositeType getContainingType() const {
+      assert (DbgNode->getNumElements() >= 14 && "Invalid type!");
+      return getFieldAs<DICompositeType>(13);
+    }
+
     StringRef getFilename() const    { return getCompileUnit().getFilename();}
     StringRef getDirectory() const   { return getCompileUnit().getDirectory();}
 
@@ -470,6 +492,7 @@ namespace llvm {
 
     const Type *EmptyStructPtr; // "{}*".
     Function *DeclareFn;     // llvm.dbg.declare
+    Function *ValueFn;       // llvm.dbg.value
 
     DIFactory(const DIFactory &);     // DO NOT IMPLEMENT
     void operator=(const DIFactory&); // DO NOT IMPLEMENT
@@ -565,7 +588,14 @@ namespace llvm {
                                   StringRef LinkageName,
                                   DICompileUnit CompileUnit, unsigned LineNo,
                                   DIType Type, bool isLocalToUnit,
-                                  bool isDefinition);
+                                  bool isDefinition,
+                                  unsigned VK = 0,
+                                  unsigned VIndex = 0,
+                                  DIType = DIType());
+
+    /// CreateSubprogramDefinition - Create new subprogram descriptor for the
+    /// given declaration. 
+    DISubprogram CreateSubprogramDefinition(DISubprogram &SPDeclaration);
 
     /// CreateGlobalVariable - Create a new descriptor for the specified global.
     DIGlobalVariable
@@ -610,6 +640,13 @@ namespace llvm {
     Instruction *InsertDeclare(llvm::Value *Storage, DIVariable D,
                                Instruction *InsertBefore);
 
+    /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+    Instruction *InsertDbgValueIntrinsic(llvm::Value *V, llvm::Value *Offset,
+                                         DIVariable D, BasicBlock *InsertAtEnd);
+
+    /// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+    Instruction *InsertDbgValueIntrinsic(llvm::Value *V, llvm::Value *Offset,
+                                       DIVariable D, Instruction *InsertBefore);
   private:
     Constant *GetTagConstant(unsigned TAG);
   };
diff --git a/include/llvm/Analysis/IVUsers.h b/include/llvm/Analysis/IVUsers.h
index 22fbb35..fcd9caa 100644
--- a/include/llvm/Analysis/IVUsers.h
+++ b/include/llvm/Analysis/IVUsers.h
@@ -175,11 +175,11 @@ class IVUsers : public LoopPass {
   ScalarEvolution *SE;
   SmallPtrSet<Instruction*,16> Processed;
 
-public:
   /// IVUses - A list of all tracked IV uses of induction variable expressions
   /// we are interested in.
   ilist<IVUsersOfOneStride> IVUses;
 
+public:
   /// IVUsesByStride - A mapping from the strides in StrideOrder to the
   /// uses in IVUses.
   std::map<const SCEV *, IVUsersOfOneStride*> IVUsesByStride;
diff --git a/include/llvm/Analysis/LoopDependenceAnalysis.h b/include/llvm/Analysis/LoopDependenceAnalysis.h
index 1d386ba..a1a5637 100644
--- a/include/llvm/Analysis/LoopDependenceAnalysis.h
+++ b/include/llvm/Analysis/LoopDependenceAnalysis.h
@@ -67,17 +67,17 @@ class LoopDependenceAnalysis : public LoopPass {
   /// created. The third argument is set to the pair found or created.
   bool findOrInsertDependencePair(Value*, Value*, DependencePair*&);
 
-  /// getLoops - Collect all loops of the loop-nest L a given SCEV is variant
-  /// in.
+  /// getLoops - Collect all loops of the loop nest L in which
+  /// a given SCEV is variant.
   void getLoops(const SCEV*, DenseSet<const Loop*>*) const;
 
   /// isLoopInvariant - True if a given SCEV is invariant in all loops of the
-  /// loop-nest starting at the innermost loop L.
+  /// loop nest starting at the innermost loop L.
   bool isLoopInvariant(const SCEV*) const;
 
-  /// isAffine - An SCEV is affine with respect to the loop-nest starting at
+  /// isAffine - An SCEV is affine with respect to the loop nest starting at
   /// the innermost loop L if it is of the form A+B*X where A, B are invariant
-  /// in the loop-nest and X is a induction variable in the loop-nest.
+  /// in the loop nest and X is a induction variable in the loop nest.
   bool isAffine(const SCEV*) const;
 
   /// TODO: doc
@@ -93,8 +93,8 @@ public:
   static char ID; // Class identification, replacement for typeinfo
   LoopDependenceAnalysis() : LoopPass(&ID) {}
 
-  /// isDependencePair - Check wether two values can possibly give rise to a
-  /// data dependence: that is the case if both are instructions accessing
+  /// isDependencePair - Check whether two values can possibly give rise to
+  /// a data dependence: that is the case if both are instructions accessing
   /// memory and at least one of those accesses is a write.
   bool isDependencePair(const Value*, const Value*) const;
 
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index 9969d99..2294e53 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -568,7 +568,7 @@ public:
 
   /// getUniqueExitBlocks - Return all unique successor blocks of this loop. 
   /// These are the blocks _outside of the current loop_ which are branched to.
-  /// This assumes that loop is in canonical form.
+  /// This assumes that loop exits are in canonical form.
   ///
   void getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const;
 
@@ -976,13 +976,6 @@ public:
   void removeBlock(BasicBlock *BB) {
     LI.removeBlock(BB);
   }
-
-  static bool isNotAlreadyContainedIn(const Loop *SubLoop,
-                                      const Loop *ParentLoop) {
-    return
-      LoopInfoBase<BasicBlock, Loop>::isNotAlreadyContainedIn(SubLoop,
-                                                              ParentLoop);
-  }
 };
 
 
diff --git a/include/llvm/Analysis/LoopPass.h b/include/llvm/Analysis/LoopPass.h
index 2eb329f..2dceccb 100644
--- a/include/llvm/Analysis/LoopPass.h
+++ b/include/llvm/Analysis/LoopPass.h
@@ -52,7 +52,7 @@ public:
   // LPPassManger as expected.
   void preparePassManager(PMStack &PMS);
 
-  /// Assign pass manager to manager this pass
+  /// Assign pass manager to manage this pass
   virtual void assignPassManager(PMStack &PMS,
                                  PassManagerType PMT = PMT_LoopPassManager);
 
@@ -73,7 +73,7 @@ public:
   /// cloneBasicBlockAnalysis - Clone analysis info associated with basic block.
   virtual void cloneBasicBlockAnalysis(BasicBlock *F, BasicBlock *T, Loop *L) {}
 
-  /// deletekAnalysisValue - Delete analysis info associated with value V.
+  /// deleteAnalysisValue - Delete analysis info associated with value V.
   virtual void deleteAnalysisValue(Value *V, Loop *L) {}
 };
 
diff --git a/include/llvm/Analysis/MemoryDependenceAnalysis.h b/include/llvm/Analysis/MemoryDependenceAnalysis.h
index 6b300fd..c04631b 100644
--- a/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -16,6 +16,7 @@
 
 #include "llvm/BasicBlock.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/ValueHandle.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/OwningPtr.h"
@@ -31,6 +32,7 @@ namespace llvm {
   class MemoryDependenceAnalysis;
   class PredIteratorCache;
   class DominatorTree;
+  class PHITransAddr;
   
   /// MemDepResult - A memory dependence query can return one of three different
   /// answers, described below.
@@ -60,9 +62,9 @@ namespace llvm {
       ///      this case, the load is loading an undef value or a store is the
       ///      first store to (that part of) the allocation.
       ///   3. Dependence queries on calls return Def only when they are
-      ///      readonly calls with identical callees and no intervening
-      ///      clobbers.  No validation is done that the operands to the calls
-      ///      are the same.
+      ///      readonly calls or memory use intrinsics with identical callees
+      ///      and no intervening clobbers.  No validation is done that the
+      ///      operands to the calls are the same.
       Def,
       
       /// NonLocal - This marker indicates that the query has no dependency in
@@ -130,6 +132,45 @@ namespace llvm {
     }
   };
 
+  /// NonLocalDepEntry - This is an entry in the NonLocalDepInfo cache, and an
+  /// entry in the results set for a non-local query.  For each BasicBlock (the
+  /// BB entry) it keeps a MemDepResult and the (potentially phi translated)
+  /// address that was live in the block.
+  class NonLocalDepEntry {
+    BasicBlock *BB;
+    MemDepResult Result;
+    WeakVH Address;
+  public:
+    NonLocalDepEntry(BasicBlock *bb, MemDepResult result, Value *address)
+      : BB(bb), Result(result), Address(address) {}
+
+    // This is used for searches.
+    NonLocalDepEntry(BasicBlock *bb) : BB(bb) {}
+
+    // BB is the sort key, it can't be changed.
+    BasicBlock *getBB() const { return BB; }
+    
+    void setResult(const MemDepResult &R, Value *Addr) {
+      Result = R;
+      Address = Addr;
+    }
+
+    const MemDepResult &getResult() const { return Result; }
+    
+    /// getAddress - Return the address of this pointer in this block.  This can
+    /// be different than the address queried for the non-local result because
+    /// of phi translation.  This returns null if the address was not available
+    /// in a block (i.e. because phi translation failed) or if this is a cached
+    /// result and that address was deleted.
+    ///
+    /// The address is always null for a non-local 'call' dependence.
+    Value *getAddress() const { return Address; }
+
+    bool operator<(const NonLocalDepEntry &RHS) const {
+      return BB < RHS.BB;
+    }
+  };
+  
   /// MemoryDependenceAnalysis - This is an analysis that determines, for a
   /// given memory operation, what preceding memory operations it depends on.
   /// It builds on alias analysis information, and tries to provide a lazy,
@@ -151,7 +192,6 @@ namespace llvm {
     LocalDepMapType LocalDeps;
 
   public:
-    typedef std::pair<BasicBlock*, MemDepResult> NonLocalDepEntry;
     typedef std::vector<NonLocalDepEntry> NonLocalDepInfo;
   private:
     /// ValueIsLoadPair - This is a pair<Value*, bool> where the bool is true if
@@ -245,29 +285,6 @@ namespace llvm {
                                       BasicBlock *BB,
                                      SmallVectorImpl<NonLocalDepEntry> &Result);
     
-    /// GetPHITranslatedValue - Find an available version of the specified value
-    /// PHI translated across the specified edge.  If MemDep isn't able to
-    /// satisfy this request, it returns null.
-    Value *GetPHITranslatedValue(Value *V,
-                                 BasicBlock *CurBB, BasicBlock *PredBB,
-                                 const TargetData *TD) const;
-
-    /// GetAvailablePHITranslatedValue - Return the value computed by
-    /// PHITranslatePointer if it dominates PredBB, otherwise return null.
-    Value *GetAvailablePHITranslatedValue(Value *V,
-                                          BasicBlock *CurBB, BasicBlock *PredBB,
-                                          const TargetData *TD,
-                                          const DominatorTree &DT) const;
-    
-    /// InsertPHITranslatedPointer - Insert a computation of the PHI translated
-    /// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
-    /// block.  All newly created instructions are added to the NewInsts list.
-    Value *InsertPHITranslatedPointer(Value *V,
-                                      BasicBlock *CurBB, BasicBlock *PredBB,
-                                      const TargetData *TD,
-                                      const DominatorTree &DT,
-                                 SmallVectorImpl<Instruction*> &NewInsts) const;
-    
     /// removeInstruction - Remove an instruction from the dependence analysis,
     /// updating the dependence of instructions that previously depended on it.
     void removeInstruction(Instruction *InstToRemove);
@@ -288,7 +305,7 @@ namespace llvm {
     MemDepResult getCallSiteDependencyFrom(CallSite C, bool isReadOnlyCall,
                                            BasicBlock::iterator ScanIt,
                                            BasicBlock *BB);
-    bool getNonLocalPointerDepFromBB(Value *Pointer, uint64_t Size,
+    bool getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t Size,
                                      bool isLoad, BasicBlock *BB,
                                      SmallVectorImpl<NonLocalDepEntry> &Result,
                                      DenseMap<BasicBlock*, Value*> &Visited,
diff --git a/include/llvm/Analysis/PHITransAddr.h b/include/llvm/Analysis/PHITransAddr.h
new file mode 100644
index 0000000..b612316
--- /dev/null
+++ b/include/llvm/Analysis/PHITransAddr.h
@@ -0,0 +1,121 @@
+//===- PHITransAddr.h - PHI Translation for Addresses -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the PHITransAddr class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_PHITRANSADDR_H
+#define LLVM_ANALYSIS_PHITRANSADDR_H
+
+#include "llvm/Instruction.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+  class DominatorTree;
+  class TargetData;
+  
+/// PHITransAddr - An address value which tracks and handles phi translation.
+/// As we walk "up" the CFG through predecessors, we need to ensure that the
+/// address we're tracking is kept up to date.  For example, if we're analyzing
+/// an address of "&A[i]" and walk through the definition of 'i' which is a PHI
+/// node, we *must* phi translate i to get "&A[j]" or else we will analyze an
+/// incorrect pointer in the predecessor block.
+///
+/// This is designed to be a relatively small object that lives on the stack and
+/// is copyable.
+///
+class PHITransAddr {
+  /// Addr - The actual address we're analyzing.
+  Value *Addr;
+  
+  /// TD - The target data we are playing with if known, otherwise null.
+  const TargetData *TD;
+  
+  /// InstInputs - The inputs for our symbolic address.
+  SmallVector<Instruction*, 4> InstInputs;
+public:
+  PHITransAddr(Value *addr, const TargetData *td) : Addr(addr), TD(td) {
+    // If the address is an instruction, the whole thing is considered an input.
+    if (Instruction *I = dyn_cast<Instruction>(Addr))
+      InstInputs.push_back(I);
+  }
+  
+  Value *getAddr() const { return Addr; }
+  
+  /// NeedsPHITranslationFromBlock - Return true if moving from the specified
+  /// BasicBlock to its predecessors requires PHI translation.
+  bool NeedsPHITranslationFromBlock(BasicBlock *BB) const {
+    // We do need translation if one of our input instructions is defined in
+    // this block.
+    for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+      if (InstInputs[i]->getParent() == BB)
+        return true;
+    return false;
+  }
+  
+  /// IsPotentiallyPHITranslatable - If this needs PHI translation, return true
+  /// if we have some hope of doing it.  This should be used as a filter to
+  /// avoid calling PHITranslateValue in hopeless situations.
+  bool IsPotentiallyPHITranslatable() const;
+  
+  /// PHITranslateValue - PHI translate the current address up the CFG from
+  /// CurBB to Pred, updating our state the reflect any needed changes.  This
+  /// returns true on failure and sets Addr to null.
+  bool PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB);
+  
+  /// PHITranslateWithInsertion - PHI translate this value into the specified
+  /// predecessor block, inserting a computation of the value if it is
+  /// unavailable.
+  ///
+  /// All newly created instructions are added to the NewInsts list.  This
+  /// returns null on failure.
+  ///
+  Value *PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
+                                   const DominatorTree &DT,
+                                   SmallVectorImpl<Instruction*> &NewInsts);
+  
+  void dump() const;
+  
+  /// Verify - Check internal consistency of this data structure.  If the
+  /// structure is valid, it returns true.  If invalid, it prints errors and
+  /// returns false.
+  bool Verify() const;
+private:
+  Value *PHITranslateSubExpr(Value *V, BasicBlock *CurBB, BasicBlock *PredBB);
+  
+  
+  /// GetAvailablePHITranslatedSubExpr - Return the value computed by
+  /// PHITranslateSubExpr if it dominates PredBB, otherwise return null.
+  Value *GetAvailablePHITranslatedSubExpr(Value *V,
+                                          BasicBlock *CurBB, BasicBlock *PredBB,
+                                          const DominatorTree &DT) const;
+  
+  /// InsertPHITranslatedSubExpr - Insert a computation of the PHI translated
+  /// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
+  /// block.  All newly created instructions are added to the NewInsts list.
+  /// This returns null on failure.
+  ///
+  Value *InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
+                                    BasicBlock *PredBB, const DominatorTree &DT,
+                                    SmallVectorImpl<Instruction*> &NewInsts);
+  
+  /// AddAsInput - If the specified value is an instruction, add it as an input.
+  Value *AddAsInput(Value *V) {
+    // If V is an instruction, it is now an input.
+    if (Instruction *VI = dyn_cast<Instruction>(V))
+      InstInputs.push_back(VI);
+    return V;
+  }
+  
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/Analysis/Passes.h b/include/llvm/Analysis/Passes.h
index b222321..2f39c6a 100644
--- a/include/llvm/Analysis/Passes.h
+++ b/include/llvm/Analysis/Passes.h
@@ -92,6 +92,7 @@ namespace llvm {
   // file.
   //
   ModulePass *createProfileLoaderPass();
+  extern const PassInfo *ProfileLoaderPassID;
 
   //===--------------------------------------------------------------------===//
   //
diff --git a/include/llvm/Analysis/ProfileInfo.h b/include/llvm/Analysis/ProfileInfo.h
index 2a80f3d..80ba6d8 100644
--- a/include/llvm/Analysis/ProfileInfo.h
+++ b/include/llvm/Analysis/ProfileInfo.h
@@ -21,116 +21,228 @@
 #ifndef LLVM_ANALYSIS_PROFILEINFO_H
 #define LLVM_ANALYSIS_PROFILEINFO_H
 
-#include "llvm/BasicBlock.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <string>
 #include <map>
+#include <set>
 
 namespace llvm {
-  class Function;
   class Pass;
   class raw_ostream;
 
+  class BasicBlock;
+  class Function;
+  class MachineBasicBlock;
+  class MachineFunction;
+
+  // Helper for dumping edges to errs().
+  raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E);
+  raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E);
+
+  raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB);
+  raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB);
+
+  raw_ostream& operator<<(raw_ostream &O, const Function *F);
+  raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF);
+
   /// ProfileInfo Class - This class holds and maintains profiling
   /// information for some unit of code.
-  class ProfileInfo {
+  template<class FType, class BType>
+  class ProfileInfoT {
   public:
     // Types for handling profiling information.
-    typedef std::pair<const BasicBlock*, const BasicBlock*> Edge;
+    typedef std::pair<const BType*, const BType*> Edge;
     typedef std::pair<Edge, double> EdgeWeight;
     typedef std::map<Edge, double> EdgeWeights;
-    typedef std::map<const BasicBlock*, double> BlockCounts;
+    typedef std::map<const BType*, double> BlockCounts;
+    typedef std::map<const BType*, const BType*> Path;
 
   protected:
     // EdgeInformation - Count the number of times a transition between two
     // blocks is executed. As a special case, we also hold an edge from the
     // null BasicBlock to the entry block to indicate how many times the
     // function was entered.
-    std::map<const Function*, EdgeWeights> EdgeInformation;
+    std::map<const FType*, EdgeWeights> EdgeInformation;
 
     // BlockInformation - Count the number of times a block is executed.
-    std::map<const Function*, BlockCounts> BlockInformation;
+    std::map<const FType*, BlockCounts> BlockInformation;
 
     // FunctionInformation - Count the number of times a function is executed.
-    std::map<const Function*, double> FunctionInformation;
+    std::map<const FType*, double> FunctionInformation;
+
+    ProfileInfoT<MachineFunction, MachineBasicBlock> *MachineProfile;
   public:
     static char ID; // Class identification, replacement for typeinfo
-    virtual ~ProfileInfo();  // We want to be subclassed
+    ProfileInfoT();
+    ~ProfileInfoT();  // We want to be subclassed
 
     // MissingValue - The value that is returned for execution counts in case
     // no value is available.
     static const double MissingValue;
 
     // getFunction() - Returns the Function for an Edge, checking for validity.
-    static const Function* getFunction(Edge e) {
+    static const FType* getFunction(Edge e) {
       if (e.first) {
         return e.first->getParent();
       } else if (e.second) {
         return e.second->getParent();
       }
       assert(0 && "Invalid ProfileInfo::Edge");
-      return (const Function*)0;
+      return (const FType*)0;
     }
 
     // getEdge() - Creates an Edge from two BasicBlocks.
-    static Edge getEdge(const BasicBlock *Src, const BasicBlock *Dest) {
+    static Edge getEdge(const BType *Src, const BType *Dest) {
       return std::make_pair(Src, Dest);
     }
 
     //===------------------------------------------------------------------===//
     /// Profile Information Queries
     ///
-    double getExecutionCount(const Function *F);
+    double getExecutionCount(const FType *F);
+
+    double getExecutionCount(const BType *BB);
 
-    double getExecutionCount(const BasicBlock *BB);
+    void setExecutionCount(const BType *BB, double w);
+
+    void addExecutionCount(const BType *BB, double w);
 
     double getEdgeWeight(Edge e) const {
-      std::map<const Function*, EdgeWeights>::const_iterator J =
+      typename std::map<const FType*, EdgeWeights>::const_iterator J =
         EdgeInformation.find(getFunction(e));
       if (J == EdgeInformation.end()) return MissingValue;
 
-      EdgeWeights::const_iterator I = J->second.find(e);
+      typename EdgeWeights::const_iterator I = J->second.find(e);
       if (I == J->second.end()) return MissingValue;
 
       return I->second;
     }
 
-    EdgeWeights &getEdgeWeights (const Function *F) {
+    void setEdgeWeight(Edge e, double w) {
+      DEBUG_WITH_TYPE("profile-info",
+            errs() << "Creating Edge " << e
+                   << " (weight: " << format("%.20g",w) << ")\n");
+      EdgeInformation[getFunction(e)][e] = w;
+    }
+
+    void addEdgeWeight(Edge e, double w);
+
+    EdgeWeights &getEdgeWeights (const FType *F) {
       return EdgeInformation[F];
     }
 
     //===------------------------------------------------------------------===//
     /// Analysis Update Methods
     ///
-    void removeBlock(const BasicBlock *BB) {
-      std::map<const Function*, BlockCounts>::iterator J =
-        BlockInformation.find(BB->getParent());
-      if (J == BlockInformation.end()) return;
-
-      J->second.erase(BB);
+    void removeBlock(const BType *BB);
+
+    void removeEdge(Edge e);
+
+    void replaceEdge(const Edge &, const Edge &);
+
+    enum GetPathMode {
+      GetPathToExit = 1,
+      GetPathToValue = 2,
+      GetPathToDest = 4,
+      GetPathWithNewEdges = 8
+    };
+
+    const BType *GetPath(const BType *Src, const BType *Dest,
+                              Path &P, unsigned Mode);
+
+    void divertFlow(const Edge &, const Edge &);
+
+    void splitEdge(const BType *FirstBB, const BType *SecondBB,
+                   const BType *NewBB, bool MergeIdenticalEdges = false);
+
+    void splitBlock(const BType *Old, const BType* New);
+
+    void splitBlock(const BType *BB, const BType* NewBB,
+                    BType *const *Preds, unsigned NumPreds);
+
+    void replaceAllUses(const BType *RmBB, const BType *DestBB);
+
+    void transfer(const FType *Old, const FType *New);
+
+    void repair(const FType *F);
+
+    void dump(FType *F = 0, bool real = true) {
+      errs() << "**** This is ProfileInfo " << this << " speaking:\n";
+      if (!real) {
+        typename std::set<const FType*> Functions;
+
+        errs() << "Functions: \n";
+        if (F) {
+          errs() << F << "@" << format("%p", F) << ": " << format("%.20g",getExecutionCount(F)) << "\n";
+          Functions.insert(F);
+        } else {
+          for (typename std::map<const FType*, double>::iterator fi = FunctionInformation.begin(),
+               fe = FunctionInformation.end(); fi != fe; ++fi) {
+            errs() << fi->first << "@" << format("%p",fi->first) << ": " << format("%.20g",fi->second) << "\n";
+            Functions.insert(fi->first);
+          }
+        }
+
+        for (typename std::set<const FType*>::iterator FI = Functions.begin(), FE = Functions.end();
+             FI != FE; ++FI) {
+          const FType *F = *FI;
+          typename std::map<const FType*, BlockCounts>::iterator bwi = BlockInformation.find(F);
+          errs() << "BasicBlocks for Function " << F << ":\n";
+          for (typename BlockCounts::const_iterator bi = bwi->second.begin(), be = bwi->second.end(); bi != be; ++bi) {
+            errs() << bi->first << "@" << format("%p", bi->first) << ": " << format("%.20g",bi->second) << "\n";
+          }
+        }
+
+        for (typename std::set<const FType*>::iterator FI = Functions.begin(), FE = Functions.end();
+             FI != FE; ++FI) {
+          typename std::map<const FType*, EdgeWeights>::iterator ei = EdgeInformation.find(*FI);
+          errs() << "Edges for Function " << ei->first << ":\n";
+          for (typename EdgeWeights::iterator ewi = ei->second.begin(), ewe = ei->second.end(); 
+               ewi != ewe; ++ewi) {
+            errs() << ewi->first << ": " << format("%.20g",ewi->second) << "\n";
+          }
+        }
+      } else {
+        assert(F && "No function given, this is not supported!");
+        errs() << "Functions: \n";
+        errs() << F << "@" << format("%p", F) << ": " << format("%.20g",getExecutionCount(F)) << "\n";
+
+        errs() << "BasicBlocks for Function " << F << ":\n";
+        for (typename FType::const_iterator BI = F->begin(), BE = F->end();
+             BI != BE; ++BI) {
+          const BType *BB = &(*BI);
+          errs() << BB << "@" << format("%p", BB) << ": " << format("%.20g",getExecutionCount(BB)) << "\n";
+        }
+      }
+      errs() << "**** ProfileInfo " << this << ", over and out.\n";
     }
 
-    void removeEdge(Edge e) {
-      std::map<const Function*, EdgeWeights>::iterator J =
-        EdgeInformation.find(getFunction(e));
-      if (J == EdgeInformation.end()) return;
+    bool CalculateMissingEdge(const BType *BB, Edge &removed, bool assumeEmptyExit = false);
 
-      J->second.erase(e);
-    }
+    bool EstimateMissingEdges(const BType *BB);
 
-    void splitEdge(const BasicBlock *FirstBB, const BasicBlock *SecondBB,
-                   const BasicBlock *NewBB, bool MergeIdenticalEdges = false);
+    ProfileInfoT<MachineFunction, MachineBasicBlock> *MI() {
+      if (MachineProfile == 0)
+        MachineProfile = new ProfileInfoT<MachineFunction, MachineBasicBlock>();
+      return MachineProfile;
+    }
 
-    void replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB);
+    bool hasMI() const {
+      return (MachineProfile != 0);
+    }
   };
 
+  typedef ProfileInfoT<Function, BasicBlock> ProfileInfo;
+  typedef ProfileInfoT<MachineFunction, MachineBasicBlock> MachineProfileInfo;
+
   /// createProfileLoaderPass - This function returns a Pass that loads the
   /// profiling information for the module from the specified filename, making
   /// it available to the optimizers.
   Pass *createProfileLoaderPass(const std::string &Filename);
 
-  raw_ostream& operator<<(raw_ostream &O, ProfileInfo::Edge E);
-
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/Argument.h b/include/llvm/Argument.h
index 3a846c2..ca54f48 100644
--- a/include/llvm/Argument.h
+++ b/include/llvm/Argument.h
@@ -51,6 +51,10 @@ public:
   /// in its containing function.
   bool hasByValAttr() const;
 
+  /// hasNestAttr - Return true if this argument has the nest attribute on
+  /// it in its containing function.
+  bool hasNestAttr() const;
+
   /// hasNoAliasAttr - Return true if this argument has the noalias attribute on
   /// it in its containing function.
   bool hasNoAliasAttr() const;
diff --git a/include/llvm/Bitcode/Deserialize.h b/include/llvm/Bitcode/Deserialize.h
index 90a5141..3266038 100644
--- a/include/llvm/Bitcode/Deserialize.h
+++ b/include/llvm/Bitcode/Deserialize.h
@@ -25,53 +25,52 @@
 
 namespace llvm {
 
+struct BPNode {
+  BPNode* Next;
+  uintptr_t& PtrRef;
+  
+  BPNode(BPNode* n, uintptr_t& pref)
+  : Next(n), PtrRef(pref) {
+    PtrRef = 0;
+  }
+};
+
+struct BPEntry {
+  union { BPNode* Head; void* Ptr; };
+  BPEntry() : Head(NULL) {}
+  void SetPtr(BPNode*& FreeList, void* P);
+};
+
+class BPKey {
+  unsigned Raw;
+public:
+  BPKey(SerializedPtrID PtrId) : Raw(PtrId << 1) { assert (PtrId > 0); }
+  BPKey(unsigned code, unsigned) : Raw(code) {}
+  
+  void MarkFinal() { Raw |= 0x1; }
+  bool hasFinalPtr() const { return Raw & 0x1 ? true : false; }
+  SerializedPtrID getID() const { return Raw >> 1; }
+  
+  static inline BPKey getEmptyKey() { return BPKey(0,0); }
+  static inline BPKey getTombstoneKey() { return BPKey(1,0); }
+  static inline unsigned getHashValue(const BPKey& K) { return K.Raw & ~0x1; }
+  
+  static bool isEqual(const BPKey& K1, const BPKey& K2) {
+    return (K1.Raw ^ K2.Raw) & ~0x1 ? false : true;
+  }
+};
+  
+template <>
+struct isPodLike<BPKey> { static const bool value = true; };
+template <>
+struct isPodLike<BPEntry> { static const bool value = true; };
+  
 class Deserializer {
 
   //===----------------------------------------------------------===//
   // Internal type definitions.
   //===----------------------------------------------------------===//
 
-  struct BPNode {
-    BPNode* Next;
-    uintptr_t& PtrRef;
-
-    BPNode(BPNode* n, uintptr_t& pref)
-      : Next(n), PtrRef(pref) {
-        PtrRef = 0;
-      }
-  };
-
-  struct BPEntry {
-    union { BPNode* Head; void* Ptr; };
-
-    BPEntry() : Head(NULL) {}
-
-    static inline bool isPod() { return true; }
-
-    void SetPtr(BPNode*& FreeList, void* P);
-  };
-
-  class BPKey {
-    unsigned Raw;
-
-  public:
-    BPKey(SerializedPtrID PtrId) : Raw(PtrId << 1) { assert (PtrId > 0); }
-    BPKey(unsigned code, unsigned) : Raw(code) {}
-
-    void MarkFinal() { Raw |= 0x1; }
-    bool hasFinalPtr() const { return Raw & 0x1 ? true : false; }
-    SerializedPtrID getID() const { return Raw >> 1; }
-
-    static inline BPKey getEmptyKey() { return BPKey(0,0); }
-    static inline BPKey getTombstoneKey() { return BPKey(1,0); }
-    static inline unsigned getHashValue(const BPKey& K) { return K.Raw & ~0x1; }
-
-    static bool isEqual(const BPKey& K1, const BPKey& K2) {
-      return (K1.Raw ^ K2.Raw) & ~0x1 ? false : true;
-    }
-
-    static bool isPod() { return true; }
-  };
 
   typedef llvm::DenseMap<BPKey,BPEntry,BPKey,BPEntry> MapTy;
 
diff --git a/include/llvm/CallingConv.h b/include/llvm/CallingConv.h
index 318ea28..c54527c 100644
--- a/include/llvm/CallingConv.h
+++ b/include/llvm/CallingConv.h
@@ -68,7 +68,10 @@ namespace CallingConv {
     ARM_AAPCS = 67,
 
     /// ARM_AAPCS_VFP - Same as ARM_AAPCS, but uses hard floating point ABI.
-    ARM_AAPCS_VFP = 68
+    ARM_AAPCS_VFP = 68,
+
+    /// MSP430_INTR - Calling convention used for MSP430 interrupt routines.
+    MSP430_INTR = 69
   };
 } // End CallingConv namespace
 
diff --git a/include/llvm/CodeGen/BreakCriticalMachineEdge.h b/include/llvm/CodeGen/BreakCriticalMachineEdge.h
deleted file mode 100644
index 4861297..0000000
--- a/include/llvm/CodeGen/BreakCriticalMachineEdge.h
+++ /dev/null
@@ -1,108 +0,0 @@
-//===--------- BreakCriticalMachineEdge.h - Break critical edges ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===---------------------------------------------------------------------===//
-//
-// Helper function to break a critical machine edge.
-//
-//===---------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_BREAKCRITICALMACHINEEDGE_H
-#define LLVM_CODEGEN_BREAKCRITICALMACHINEEDGE_H
-
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-
-namespace llvm {
-
-MachineBasicBlock* SplitCriticalMachineEdge(MachineBasicBlock* src,
-                                            MachineBasicBlock* dst) {
-  MachineFunction &MF = *src->getParent();
-  const BasicBlock* srcBB = src->getBasicBlock();
-
-  MachineBasicBlock* crit_mbb = MF.CreateMachineBasicBlock(srcBB);
-
-  // modify the llvm control flow graph
-  src->removeSuccessor(dst);
-  src->addSuccessor(crit_mbb);
-  crit_mbb->addSuccessor(dst);
-
-  // insert the new block into the machine function.
-  MF.push_back(crit_mbb);
-
-  // insert a unconditional branch linking the new block to dst
-  const TargetMachine& TM = MF.getTarget();
-  const TargetInstrInfo* TII = TM.getInstrInfo();
-  std::vector<MachineOperand> emptyConditions;
-  TII->InsertBranch(*crit_mbb, dst, (MachineBasicBlock*)0, 
-                    emptyConditions);
-
-  // modify every branch in src that points to dst to point to the new
-  // machine basic block instead:
-  MachineBasicBlock::iterator mii = src->end();
-  bool found_branch = false;
-  while (mii != src->begin()) {
-    mii--;
-    // if there are no more branches, finish the loop
-    if (!mii->getDesc().isTerminator()) {
-      break;
-    }
-
-    // Scan the operands of this branch, replacing any uses of dst with
-    // crit_mbb.
-    for (unsigned i = 0, e = mii->getNumOperands(); i != e; ++i) {
-      MachineOperand & mo = mii->getOperand(i);
-      if (mo.isMBB() && mo.getMBB() == dst) {
-        found_branch = true;
-        mo.setMBB(crit_mbb);
-      }
-    }
-  }
-
-  // TODO: This is tentative. It may be necessary to fix this code. Maybe
-  // I am inserting too many gotos, but I am trusting that the asm printer
-  // will optimize the unnecessary gotos.
-  if(!found_branch) {
-    TII->InsertBranch(*src, crit_mbb, (MachineBasicBlock*)0, 
-                      emptyConditions);
-  }
-
-  /// Change all the phi functions in dst, so that the incoming block be
-  /// crit_mbb, instead of src
-  for(mii = dst->begin(); mii != dst->end(); mii++) {
-    /// the first instructions are always phi functions.
-    if(mii->getOpcode() != TargetInstrInfo::PHI)
-      break;
-
-    // Find the operands corresponding to the source block
-    std::vector<unsigned> toRemove;
-    unsigned reg = 0;
-    for (unsigned u = 0; u != mii->getNumOperands(); ++u)
-      if (mii->getOperand(u).isMBB() &&
-          mii->getOperand(u).getMBB() == src) {
-        reg = mii->getOperand(u-1).getReg();
-        toRemove.push_back(u-1);
-      }
-    // Remove all uses of this MBB
-    for (std::vector<unsigned>::reverse_iterator I = toRemove.rbegin(),
-         E = toRemove.rend(); I != E; ++I) {
-      mii->RemoveOperand(*I+1);
-      mii->RemoveOperand(*I);
-    }
-
-    // Add a single use corresponding to the new MBB
-    mii->addOperand(MachineOperand::CreateReg(reg, false));
-    mii->addOperand(MachineOperand::CreateMBB(crit_mbb));
-  }
-
-  return crit_mbb;
-}
-
-}
-
-#endif
diff --git a/include/llvm/CodeGen/CalcSpillWeights.h b/include/llvm/CodeGen/CalcSpillWeights.h
new file mode 100644
index 0000000..2fc03bd
--- /dev/null
+++ b/include/llvm/CodeGen/CalcSpillWeights.h
@@ -0,0 +1,39 @@
+//===---------------- lib/CodeGen/CalcSpillWeights.h ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_CODEGEN_CALCSPILLWEIGHTS_H
+#define LLVM_CODEGEN_CALCSPILLWEIGHTS_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+  class LiveInterval;
+
+  /// CalculateSpillWeights - Compute spill weights for all virtual register
+  /// live intervals.
+  class CalculateSpillWeights : public MachineFunctionPass {
+  public:
+    static char ID;
+
+    CalculateSpillWeights() : MachineFunctionPass(&ID) {}
+
+    virtual void getAnalysisUsage(AnalysisUsage &au) const;
+
+    virtual bool runOnMachineFunction(MachineFunction &fn);    
+
+  private:
+    /// Returns true if the given live interval is zero length.
+    bool isZeroLengthInterval(LiveInterval *li) const;
+  };
+
+}
+
+#endif // LLVM_CODEGEN_CALCSPILLWEIGHTS_H
diff --git a/include/llvm/CodeGen/DAGISelHeader.h b/include/llvm/CodeGen/DAGISelHeader.h
index 624f18a..7233f3f 100644
--- a/include/llvm/CodeGen/DAGISelHeader.h
+++ b/include/llvm/CodeGen/DAGISelHeader.h
@@ -93,7 +93,7 @@ void SelectRoot(SelectionDAG &DAG) {
   // a reference to the root node, preventing it from being deleted,
   // and tracking any changes of the root.
   HandleSDNode Dummy(CurDAG->getRoot());
-  ISelPosition = next(SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode()));
+  ISelPosition = llvm::next(SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode()));
 
   // The AllNodes list is now topological-sorted. Visit the
   // nodes by starting at the end of the list (the root of the
@@ -110,8 +110,7 @@ void SelectRoot(SelectionDAG &DAG) {
     DAG.setSubgraphColor(Node, "red");
 #endif
     SDNode *ResNode = Select(SDValue(Node, 0));
-    // If node should not be replaced, 
-    // continue with the next one.
+    // If node should not be replaced, continue with the next one.
     if (ResNode == Node)
       continue;
     // Replace node.
diff --git a/include/llvm/CodeGen/FastISel.h b/include/llvm/CodeGen/FastISel.h
index 1efd1e0..806952a 100644
--- a/include/llvm/CodeGen/FastISel.h
+++ b/include/llvm/CodeGen/FastISel.h
@@ -98,14 +98,6 @@ public:
   ///
   bool SelectOperator(User *I, unsigned Opcode);
 
-  /// TargetSelectInstruction - This method is called by target-independent
-  /// code when the normal FastISel process fails to select an instruction.
-  /// This gives targets a chance to emit code for anything that doesn't
-  /// fit into FastISel's framework. It returns true if it was successful.
-  ///
-  virtual bool
-  TargetSelectInstruction(Instruction *I) = 0;
-
   /// getRegForValue - Create a virtual register and arrange for it to
   /// be assigned the value for the given LLVM value.
   unsigned getRegForValue(Value *V);
@@ -134,6 +126,14 @@ protected:
 #endif
            );
 
+  /// TargetSelectInstruction - This method is called by target-independent
+  /// code when the normal FastISel process fails to select an instruction.
+  /// This gives targets a chance to emit code for anything that doesn't
+  /// fit into FastISel's framework. It returns true if it was successful.
+  ///
+  virtual bool
+  TargetSelectInstruction(Instruction *I) = 0;
+
   /// FastEmit_r - This method is called by target-independent code
   /// to request that an instruction with the given type and opcode
   /// be emitted.
diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h
index 4d2d0ee..5608c99 100644
--- a/include/llvm/CodeGen/LinkAllCodegenComponents.h
+++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/SchedulerRegistry.h"
 #include "llvm/CodeGen/GCs.h"
 #include "llvm/Target/TargetMachine.h"
+#include <cstdlib>
 
 namespace {
   struct ForceCodegenLinking {
diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h
index 7a02d0f..d7ff8da 100644
--- a/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -112,10 +112,13 @@ namespace llvm {
       return (unsigned)(IntervalPercentage * indexes_->getFunctionSize());
     }
 
-    /// conflictsWithPhysRegDef - Returns true if the specified register
-    /// is defined during the duration of the specified interval.
-    bool conflictsWithPhysRegDef(const LiveInterval &li, VirtRegMap &vrm,
-                                 unsigned reg);
+    /// conflictsWithPhysReg - Returns true if the specified register is used or
+    /// defined during the duration of the specified interval. Copies to and
+    /// from li.reg are allowed. This method is only able to analyze simple
+    /// ranges that stay within a single basic block. Anything else is
+    /// considered a conflict.
+    bool conflictsWithPhysReg(const LiveInterval &li, VirtRegMap &vrm,
+                              unsigned reg);
 
     /// conflictsWithPhysRegRef - Similar to conflictsWithPhysRegRef except
     /// it can check use as well.
@@ -186,6 +189,10 @@ namespace llvm {
       return indexes_->getMBBFromIndex(index);
     }
 
+    SlotIndex getMBBTerminatorGap(const MachineBasicBlock *mbb) {
+      return indexes_->getTerminatorGap(mbb);
+    }
+
     SlotIndex InsertMachineInstrInMaps(MachineInstr *MI) {
       return indexes_->insertMachineInstrInMaps(MI);
     }
diff --git a/include/llvm/CodeGen/LiveVariables.h b/include/llvm/CodeGen/LiveVariables.h
index 39a4b89..a7bf600 100644
--- a/include/llvm/CodeGen/LiveVariables.h
+++ b/include/llvm/CodeGen/LiveVariables.h
@@ -283,6 +283,11 @@ public:
     return getVarInfo(Reg).isLiveIn(MBB, Reg, *MRI);
   }
 
+  /// isLiveOut - Determine if Reg is live out from MBB, when not considering
+  /// PHI nodes. This means that Reg is either killed by a successor block or
+  /// passed through one.
+  bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB);
+
   /// addNewBlock - Add a new basic block BB between DomBB and SuccBB. All
   /// variables that are live out of DomBB and live into SuccBB will be marked
   /// as passing live through BB. This method assumes that the machine code is
diff --git a/include/llvm/CodeGen/MachineFrameInfo.h b/include/llvm/CodeGen/MachineFrameInfo.h
index bed82af..968e4ea 100644
--- a/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/include/llvm/CodeGen/MachineFrameInfo.h
@@ -327,7 +327,20 @@ public:
   /// setMaxAlignment - Set the preferred alignment.
   ///
   void setMaxAlignment(unsigned Align) { MaxAlignment = Align; }
-  
+
+  /// calculateMaxStackAlignment() - If there is a local object which requires
+  /// greater alignment than the current max alignment, adjust accordingly.
+  void calculateMaxStackAlignment() {
+    for (int i = getObjectIndexBegin(),
+         e = getObjectIndexEnd(); i != e; ++i) {
+      if (isDeadObjectIndex(i))
+        continue;
+
+      unsigned Align = getObjectAlignment(i);
+      MaxAlignment = std::max(MaxAlignment, Align);
+    }
+  }
+
   /// hasCalls - Return true if the current function has no function calls.
   /// This is only valid during or after prolog/epilog code emission.
   ///
diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h
index c620449..87b67d6 100644
--- a/include/llvm/CodeGen/MachineInstr.h
+++ b/include/llvm/CodeGen/MachineInstr.h
@@ -320,6 +320,11 @@ public:
   /// loads the instruction does are invariant (if it does multiple loads).
   bool isInvariantLoad(AliasAnalysis *AA) const;
 
+  /// isConstantValuePHI - If the specified instruction is a PHI that always
+  /// merges together the same virtual register, return the register, otherwise
+  /// return 0.
+  unsigned isConstantValuePHI() const;
+
   //
   // Debugging support
   //
diff --git a/include/llvm/CodeGen/MachineSSAUpdater.h b/include/llvm/CodeGen/MachineSSAUpdater.h
new file mode 100644
index 0000000..ab663fe
--- /dev/null
+++ b/include/llvm/CodeGen/MachineSSAUpdater.h
@@ -0,0 +1,115 @@
+//===-- MachineSSAUpdater.h - Unstructured SSA Update Tool ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MachineSSAUpdater class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINESSAUPDATER_H
+#define LLVM_CODEGEN_MACHINESSAUPDATER_H
+
+namespace llvm {
+  class MachineBasicBlock;
+  class MachineFunction;
+  class MachineInstr;
+  class MachineOperand;
+  class MachineRegisterInfo;
+  class TargetInstrInfo;
+  class TargetRegisterClass;
+  template<typename T> class SmallVectorImpl;
+
+/// MachineSSAUpdater - This class updates SSA form for a set of virtual
+/// registers defined in multiple blocks.  This is used when code duplication
+/// or another unstructured transformation wants to rewrite a set of uses of one
+/// vreg with uses of a set of vregs.
+class MachineSSAUpdater {
+  /// AvailableVals - This keeps track of which value to use on a per-block
+  /// basis.  When we insert PHI nodes, we keep track of them here.
+  //typedef DenseMap<MachineBasicBlock*, unsigned > AvailableValsTy;
+  void *AV;
+
+  /// IncomingPredInfo - We use this as scratch space when doing our recursive
+  /// walk.  This should only be used in GetValueInBlockInternal, normally it
+  /// should be empty.
+  //std::vector<std::pair<MachineBasicBlock*, unsigned > > IncomingPredInfo;
+  void *IPI;
+
+  /// VR - Current virtual register whose uses are being updated.
+  unsigned VR;
+
+  /// VRC - Register class of the current virtual register.
+  const TargetRegisterClass *VRC;
+
+  /// InsertedPHIs - If this is non-null, the MachineSSAUpdater adds all PHI
+  /// nodes that it creates to the vector.
+  SmallVectorImpl<MachineInstr*> *InsertedPHIs;
+
+  const TargetInstrInfo *TII;
+  MachineRegisterInfo *MRI;
+public:
+  /// MachineSSAUpdater constructor.  If InsertedPHIs is specified, it will be
+  /// filled in with all PHI Nodes created by rewriting.
+  explicit MachineSSAUpdater(MachineFunction &MF,
+                             SmallVectorImpl<MachineInstr*> *InsertedPHIs = 0);
+  ~MachineSSAUpdater();
+
+  /// Initialize - Reset this object to get ready for a new set of SSA
+  /// updates.
+  void Initialize(unsigned V);
+
+  /// AddAvailableValue - Indicate that a rewritten value is available at the
+  /// end of the specified block with the specified value.
+  void AddAvailableValue(MachineBasicBlock *BB, unsigned V);
+
+  /// HasValueForBlock - Return true if the MachineSSAUpdater already has a
+  /// value for the specified block.
+  bool HasValueForBlock(MachineBasicBlock *BB) const;
+
+  /// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+  /// live at the end of the specified block.
+  unsigned GetValueAtEndOfBlock(MachineBasicBlock *BB);
+
+  /// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+  /// is live in the middle of the specified block.
+  ///
+  /// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+  /// important case: if there is a definition of the rewritten value after the
+  /// 'use' in BB.  Consider code like this:
+  ///
+  ///      X1 = ...
+  ///   SomeBB:
+  ///      use(X)
+  ///      X2 = ...
+  ///      br Cond, SomeBB, OutBB
+  ///
+  /// In this case, there are two values (X1 and X2) added to the AvailableVals
+  /// set by the client of the rewriter, and those values are both live out of
+  /// their respective blocks.  However, the use of X happens in the *middle* of
+  /// a block.  Because of this, we need to insert a new PHI node in SomeBB to
+  /// merge the appropriate values, and this value isn't live out of the block.
+  ///
+  unsigned GetValueInMiddleOfBlock(MachineBasicBlock *BB);
+
+  /// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
+  /// which use their value in the corresponding predecessor.  Note that this
+  /// will not work if the use is supposed to be rewritten to a value defined in
+  /// the same block as the use, but above it.  Any 'AddAvailableValue's added
+  /// for the use's block will be considered to be below it.
+  void RewriteUse(MachineOperand &U);
+
+private:
+  void ReplaceRegWith(unsigned OldReg, unsigned NewReg);
+  unsigned GetValueAtEndOfBlockInternal(MachineBasicBlock *BB);
+  void operator=(const MachineSSAUpdater&); // DO NOT IMPLEMENT
+  MachineSSAUpdater(const MachineSSAUpdater&);     // DO NOT IMPLEMENT
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 8e89702..99f8c34 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -131,7 +131,7 @@ namespace llvm {
 
   /// TailDuplicate Pass - Duplicate blocks with unconditional branches
   /// into tails of their predecessors.
-  FunctionPass *createTailDuplicatePass();
+  FunctionPass *createTailDuplicatePass(bool PreRegAlloc = false);
 
   /// IfConverter Pass - This pass performs machine code if conversion.
   FunctionPass *createIfConverterPass();
@@ -191,6 +191,10 @@ namespace llvm {
   /// the GCC-style builtin setjmp/longjmp (sjlj) to handling EH control flow.
   FunctionPass *createSjLjEHPass(const TargetLowering *tli);
 
+  /// createMaxStackAlignmentCalculatorPass() - Determine the maximum required
+  /// alignment for a function.
+  FunctionPass* createMaxStackAlignmentCalculatorPass();
+
 } // End llvm namespace
 
 #endif
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index e586807..c09c634 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -110,6 +110,46 @@ class SelectionDAG {
   /// SelectionDAG.
   BumpPtrAllocator Allocator;
 
+  /// NodeOrdering - Assigns a "line number" value to each SDNode that
+  /// corresponds to the "line number" of the original LLVM instruction. This
+  /// used for turning off scheduling, because we'll forgo the normal scheduling
+  /// algorithm and output the instructions according to this ordering.
+  class NodeOrdering {
+    /// LineNo - The line of the instruction the node corresponds to. A value of
+    /// `0' means it's not assigned.
+    unsigned LineNo;
+    std::map<const SDNode*, unsigned> Order;
+
+    void operator=(const NodeOrdering&); // Do not implement.
+    NodeOrdering(const NodeOrdering&);   // Do not implement.
+  public:
+    NodeOrdering() : LineNo(0) {}
+
+    void add(const SDNode *Node) {
+      assert(LineNo && "Invalid line number!");
+      Order[Node] = LineNo;
+    }
+    void remove(const SDNode *Node) {
+      std::map<const SDNode*, unsigned>::iterator Itr = Order.find(Node);
+      if (Itr != Order.end())
+        Order.erase(Itr);
+    }
+    void clear() {
+      Order.clear();
+      LineNo = 1;
+    }
+    unsigned getLineNo(const SDNode *Node) {
+      unsigned LN = Order[Node];
+      assert(LN && "Node isn't in ordering map!");
+      return LN;
+    }
+    void newInst() {
+      ++LineNo;
+    }
+
+    void dump() const;
+  } *Ordering;
+
   /// VerifyNode - Sanity check the given node.  Aborts if it is invalid.
   void VerifyNode(SDNode *N);
 
@@ -120,6 +160,9 @@ class SelectionDAG {
                               DenseSet<SDNode *> &visited,
                               int level, bool &printed);
 
+  void operator=(const SelectionDAG&); // Do not implement.
+  SelectionDAG(const SelectionDAG&);   // Do not implement.
+
 public:
   SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli);
   ~SelectionDAG();
@@ -199,6 +242,13 @@ public:
     return Root = N;
   }
 
+  /// NewInst - Tell the ordering object that we're processing a new
+  /// instruction.
+  void NewInst() {
+    if (Ordering)
+      Ordering->newInst();
+  }
+
   /// Combine - This iterates over the nodes in the SelectionDAG, folding
   /// certain types of nodes together, or eliminating superfluous nodes.  The
   /// Level argument controls whether Combine is allowed to produce nodes and
@@ -220,7 +270,7 @@ public:
   ///
   /// Note that this is an involved process that may invalidate pointers into
   /// the graph.
-  void Legalize(bool TypesNeedLegalizing, CodeGenOpt::Level OptLevel);
+  void Legalize(CodeGenOpt::Level OptLevel);
 
   /// LegalizeVectors - This transforms the SelectionDAG into a SelectionDAG
   /// that only uses vector math operations supported by the target.  This is
@@ -890,6 +940,16 @@ public:
   /// vector op and fill the end of the resulting vector with UNDEFS.
   SDValue UnrollVectorOp(SDNode *N, unsigned ResNE = 0);
 
+  /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a 
+  /// location that is 'Dist' units away from the location that the 'Base' load 
+  /// is loading from.
+  bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+                         unsigned Bytes, int Dist) const;
+
+  /// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+  /// it cannot be inferred.
+  unsigned InferPtrAlignment(SDValue Ptr) const;
+
 private:
   bool RemoveNodeFromCSEMaps(SDNode *N);
   void AddModifiedNodeToCSEMaps(SDNode *N, DAGUpdateListener *UpdateListener);
diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h
index 4130d2c..bfd3492 100644
--- a/include/llvm/CodeGen/SelectionDAGISel.h
+++ b/include/llvm/CodeGen/SelectionDAGISel.h
@@ -113,7 +113,6 @@ protected:
   // Calls to these functions are generated by tblgen.
   SDNode *Select_INLINEASM(SDValue N);
   SDNode *Select_UNDEF(const SDValue &N);
-  SDNode *Select_DBG_LABEL(const SDValue &N);
   SDNode *Select_EH_LABEL(const SDValue &N);
   void CannotYetSelect(SDValue N);
   void CannotYetSelectIntrinsic(SDValue N);
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 950fd32..571db47 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -891,8 +891,9 @@ template<> struct DenseMapInfo<SDValue> {
   static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
     return LHS == RHS;
   }
-  static bool isPod() { return true; }
 };
+template <> struct isPodLike<SDValue> { static const bool value = true; };
+
 
 /// simplify_type specializations - Allow casting operators to work directly on
 /// SDValues as if they were SDNode*'s.
@@ -1095,7 +1096,7 @@ public:
   /// hasOneUse - Return true if there is exactly one use of this node.
   ///
   bool hasOneUse() const {
-    return !use_empty() && next(use_begin()) == use_end();
+    return !use_empty() && llvm::next(use_begin()) == use_end();
   }
 
   /// use_size - Return the number of uses of this node. This method takes
@@ -2397,6 +2398,11 @@ public:
   SDNodeIterator operator++(int) { // Postincrement
     SDNodeIterator tmp = *this; ++*this; return tmp;
   }
+  size_t operator-(SDNodeIterator Other) const {
+    assert(Node == Other.Node &&
+           "Cannot compare iterators of two different nodes!");
+    return Operand - Other.Operand;
+  }
 
   static SDNodeIterator begin(SDNode *N) { return SDNodeIterator(N, 0); }
   static SDNodeIterator end  (SDNode *N) {
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index 65d85fc..9a85ee1 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -343,8 +343,10 @@ namespace llvm {
     static inline bool isEqual(const SlotIndex &LHS, const SlotIndex &RHS) {
       return (LHS == RHS);
     }
-    static inline bool isPod() { return false; }
   };
+  
+  template <> struct isPodLike<SlotIndex> { static const bool value = true; };
+
 
   inline raw_ostream& operator<<(raw_ostream &os, SlotIndex li) {
     li.print(os);
diff --git a/include/llvm/CodeGen/ValueTypes.h b/include/llvm/CodeGen/ValueTypes.h
index 45ef9b9..06e07f3 100644
--- a/include/llvm/CodeGen/ValueTypes.h
+++ b/include/llvm/CodeGen/ValueTypes.h
@@ -47,35 +47,36 @@ namespace llvm {
       f80            =   9,   // This is a 80 bit floating point value
       f128           =  10,   // This is a 128 bit floating point value
       ppcf128        =  11,   // This is a PPC 128-bit floating point value
-      Flag           =  12,   // This is a condition code or machine flag.
-
-      isVoid         =  13,   // This has no value
-
-      v2i8           =  14,   //  2 x i8
-      v4i8           =  15,   //  4 x i8
-      v8i8           =  16,   //  8 x i8
-      v16i8          =  17,   // 16 x i8
-      v32i8          =  18,   // 32 x i8
-      v2i16          =  19,   //  2 x i16
-      v4i16          =  20,   //  4 x i16
-      v8i16          =  21,   //  8 x i16
-      v16i16         =  22,   // 16 x i16
-      v2i32          =  23,   //  2 x i32
-      v4i32          =  24,   //  4 x i32
-      v8i32          =  25,   //  8 x i32
-      v1i64          =  26,   //  1 x i64
-      v2i64          =  27,   //  2 x i64
-      v4i64          =  28,   //  4 x i64
-
-      v2f32          =  29,   //  2 x f32
-      v4f32          =  30,   //  4 x f32
-      v8f32          =  31,   //  8 x f32
-      v2f64          =  32,   //  2 x f64
-      v4f64          =  33,   //  4 x f64
+
+      v2i8           =  12,   //  2 x i8
+      v4i8           =  13,   //  4 x i8
+      v8i8           =  14,   //  8 x i8
+      v16i8          =  15,   // 16 x i8
+      v32i8          =  16,   // 32 x i8
+      v2i16          =  17,   //  2 x i16
+      v4i16          =  18,   //  4 x i16
+      v8i16          =  19,   //  8 x i16
+      v16i16         =  20,   // 16 x i16
+      v2i32          =  21,   //  2 x i32
+      v4i32          =  22,   //  4 x i32
+      v8i32          =  23,   //  8 x i32
+      v1i64          =  24,   //  1 x i64
+      v2i64          =  25,   //  2 x i64
+      v4i64          =  26,   //  4 x i64
+
+      v2f32          =  27,   //  2 x f32
+      v4f32          =  28,   //  4 x f32
+      v8f32          =  29,   //  8 x f32
+      v2f64          =  30,   //  2 x f64
+      v4f64          =  31,   //  4 x f64
 
       FIRST_VECTOR_VALUETYPE = v2i8,
       LAST_VECTOR_VALUETYPE  = v4f64,
 
+      Flag           =  32,   // This glues nodes together during pre-RA sched
+
+      isVoid         =  33,   // This has no value
+
       LAST_VALUETYPE =  34,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
@@ -166,6 +167,12 @@ namespace llvm {
         return *this;
       }
     }
+
+    /// getScalarType - If this is a vector type, return the element type,
+    /// otherwise return this.
+    MVT getScalarType() const {
+      return isVector() ? getVectorElementType() : *this;
+    }
     
     MVT getVectorElementType() const {
       switch (SimpleTy) {
@@ -524,6 +531,12 @@ namespace llvm {
       return V;
     }
 
+    /// getScalarType - If this is a vector type, return the element type,
+    /// otherwise return this.
+    EVT getScalarType() const {
+      return isVector() ? getVectorElementType() : *this;
+    }
+    
     /// getVectorElementType - Given a vector type, return the type of
     /// each element.
     EVT getVectorElementType() const {
diff --git a/include/llvm/CodeGen/ValueTypes.td b/include/llvm/CodeGen/ValueTypes.td
index 986555b..c8bb789 100644
--- a/include/llvm/CodeGen/ValueTypes.td
+++ b/include/llvm/CodeGen/ValueTypes.td
@@ -31,30 +31,31 @@ def f64    : ValueType<64 ,  8>;   // 64-bit floating point value
 def f80    : ValueType<80 ,  9>;   // 80-bit floating point value
 def f128   : ValueType<128, 10>;   // 128-bit floating point value
 def ppcf128: ValueType<128, 11>;   // PPC 128-bit floating point value
-def FlagVT : ValueType<0  , 12>;   // Condition code or machine flag
-def isVoid : ValueType<0  , 13>;   // Produces no value
 
-def v2i8   : ValueType<16 , 14>;   //  2 x i8  vector value
-def v4i8   : ValueType<32 , 15>;   //  4 x i8  vector value
-def v8i8   : ValueType<64 , 16>;   //  8 x i8  vector value
-def v16i8  : ValueType<128, 17>;   // 16 x i8  vector value
-def v32i8  : ValueType<256, 18>;   // 32 x i8 vector value
-def v2i16  : ValueType<32 , 19>;   //  2 x i16 vector value
-def v4i16  : ValueType<64 , 20>;   //  4 x i16 vector value
-def v8i16  : ValueType<128, 21>;   //  8 x i16 vector value
-def v16i16 : ValueType<256, 22>;   // 16 x i16 vector value
-def v2i32  : ValueType<64 , 23>;   //  2 x i32 vector value
-def v4i32  : ValueType<128, 24>;   //  4 x i32 vector value
-def v8i32  : ValueType<256, 25>;   //  8 x i32 vector value
-def v1i64  : ValueType<64 , 26>;   //  1 x i64 vector value
-def v2i64  : ValueType<128, 27>;   //  2 x i64 vector value
-def v4i64  : ValueType<256, 28>;   //  4 x f64 vector value
+def v2i8   : ValueType<16 , 12>;   //  2 x i8  vector value
+def v4i8   : ValueType<32 , 13>;   //  4 x i8  vector value
+def v8i8   : ValueType<64 , 14>;   //  8 x i8  vector value
+def v16i8  : ValueType<128, 15>;   // 16 x i8  vector value
+def v32i8  : ValueType<256, 16>;   // 32 x i8 vector value
+def v2i16  : ValueType<32 , 17>;   //  2 x i16 vector value
+def v4i16  : ValueType<64 , 18>;   //  4 x i16 vector value
+def v8i16  : ValueType<128, 19>;   //  8 x i16 vector value
+def v16i16 : ValueType<256, 20>;   // 16 x i16 vector value
+def v2i32  : ValueType<64 , 21>;   //  2 x i32 vector value
+def v4i32  : ValueType<128, 22>;   //  4 x i32 vector value
+def v8i32  : ValueType<256, 23>;   //  8 x i32 vector value
+def v1i64  : ValueType<64 , 24>;   //  1 x i64 vector value
+def v2i64  : ValueType<128, 25>;   //  2 x i64 vector value
+def v4i64  : ValueType<256, 26>;   //  4 x f64 vector value
 
-def v2f32  : ValueType<64,  29>;   //  2 x f32 vector value
-def v4f32  : ValueType<128, 30>;   //  4 x f32 vector value
-def v8f32  : ValueType<256, 31>;   //  8 x f32 vector value
-def v2f64  : ValueType<128, 32>;   //  2 x f64 vector value
-def v4f64  : ValueType<256, 33>;   //  4 x f64 vector value
+def v2f32  : ValueType<64,  27>;   //  2 x f32 vector value
+def v4f32  : ValueType<128, 28>;   //  4 x f32 vector value
+def v8f32  : ValueType<256, 29>;   //  8 x f32 vector value
+def v2f64  : ValueType<128, 30>;   //  2 x f64 vector value
+def v4f64  : ValueType<256, 31>;   //  4 x f64 vector value
+
+def FlagVT : ValueType<0  , 32>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 33>;   // Produces no value
 
 def MetadataVT: ValueType<0, 250>; // Metadata
 
diff --git a/include/llvm/CompilerDriver/Common.td b/include/llvm/CompilerDriver/Common.td
index 79edb02..8d2f63b 100644
--- a/include/llvm/CompilerDriver/Common.td
+++ b/include/llvm/CompilerDriver/Common.td
@@ -42,9 +42,10 @@ def hidden;
 def init;
 def multi_val;
 def one_or_more;
+def optional;
 def really_hidden;
 def required;
-def zero_or_one;
+def comma_separated;
 
 // The 'case' construct.
 def case;
@@ -77,6 +78,8 @@ def any_empty;
 def append_cmd;
 def forward;
 def forward_as;
+def forward_value;
+def forward_transformed_value;
 def stop_compilation;
 def unpack_values;
 def warning;
diff --git a/include/llvm/IntrinsicInst.h b/include/llvm/IntrinsicInst.h
index 1e1dca2..a516409 100644
--- a/include/llvm/IntrinsicInst.h
+++ b/include/llvm/IntrinsicInst.h
@@ -70,6 +70,7 @@ namespace llvm {
       case Intrinsic::dbg_region_start:
       case Intrinsic::dbg_region_end:
       case Intrinsic::dbg_declare:
+      case Intrinsic::dbg_value:
         return true;
       default: return false;
       }
@@ -171,6 +172,25 @@ namespace llvm {
     }
   };
 
+  /// DbgValueInst - This represents the llvm.dbg.value instruction.
+  ///
+  struct DbgValueInst : public DbgInfoIntrinsic {
+    Value *getValue()  const {
+      return cast<MDNode>(getOperand(1))->getElement(0);
+    }
+    Value *getOffset() const { return getOperand(2); }
+    MDNode *getVariable() const { return cast<MDNode>(getOperand(3)); }
+
+    // Methods for support type inquiry through isa, cast, and dyn_cast:
+    static inline bool classof(const DbgValueInst *) { return true; }
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::dbg_value;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
+  };
+
   /// MemIntrinsic - This is the common base class for memset/memcpy/memmove.
   ///
   struct MemIntrinsic : public IntrinsicInst {
diff --git a/include/llvm/Intrinsics.td b/include/llvm/Intrinsics.td
index b3b0678..6ff87ba 100644
--- a/include/llvm/Intrinsics.td
+++ b/include/llvm/Intrinsics.td
@@ -290,6 +290,9 @@ let Properties = [IntrNoMem] in {
   def int_dbg_func_start   : Intrinsic<[llvm_void_ty], [llvm_metadata_ty]>;
   def int_dbg_declare      : Intrinsic<[llvm_void_ty],
                                        [llvm_descriptor_ty, llvm_metadata_ty]>;
+  def int_dbg_value  	   : Intrinsic<[llvm_void_ty],
+                                       [llvm_metadata_ty, llvm_i64_ty,
+                                        llvm_metadata_ty]>;
 }
 
 //===------------------ Exception Handling Intrinsics----------------------===//
diff --git a/include/llvm/LinkAllVMCore.h b/include/llvm/LinkAllVMCore.h
index 0ee18d5..2145bf8 100644
--- a/include/llvm/LinkAllVMCore.h
+++ b/include/llvm/LinkAllVMCore.h
@@ -35,6 +35,7 @@
 #include "llvm/Support/Mangler.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SlowOperationInformer.h"
+#include <cstdlib>
 
 namespace {
   struct ForceVMCoreLinking {
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index 909ccde..f3e4dfd 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -111,12 +111,10 @@ public:
   virtual void assignPassManager(PMStack &, 
                                  PassManagerType = PMT_Unknown) {}
   /// Check if available pass managers are suitable for this pass or not.
-  virtual void preparePassManager(PMStack &) {}
+  virtual void preparePassManager(PMStack &);
   
   ///  Return what kind of Pass Manager can manage this pass.
-  virtual PassManagerType getPotentialPassManagerType() const {
-    return PMT_Unknown; 
-  }
+  virtual PassManagerType getPotentialPassManagerType() const;
 
   // Access AnalysisResolver
   inline void setResolver(AnalysisResolver *AR) { 
@@ -132,9 +130,7 @@ public:
   /// particular analysis result to this function, it can then use the
   /// getAnalysis<AnalysisType>() function, below.
   ///
-  virtual void getAnalysisUsage(AnalysisUsage &) const {
-    // By default, no analysis results are used, all are invalidated.
-  }
+  virtual void getAnalysisUsage(AnalysisUsage &) const;
 
   /// releaseMemory() - This member can be implemented by a pass if it wants to
   /// be able to release its memory when it is no longer needed.  The default
@@ -147,11 +143,11 @@ public:
   /// Optionally implement this function to release pass memory when it is no
   /// longer used.
   ///
-  virtual void releaseMemory() {}
+  virtual void releaseMemory();
 
   /// verifyAnalysis() - This member can be implemented by a analysis pass to
   /// check state of analysis information. 
-  virtual void verifyAnalysis() const {}
+  virtual void verifyAnalysis() const;
 
   // dumpPassStructure - Implement the -debug-passes=PassStructure option
   virtual void dumpPassStructure(unsigned Offset = 0);
@@ -221,9 +217,7 @@ public:
                                  PassManagerType T = PMT_ModulePassManager);
 
   ///  Return what kind of Pass Manager can manage this pass.
-  virtual PassManagerType getPotentialPassManagerType() const {
-    return PMT_ModulePassManager;
-  }
+  virtual PassManagerType getPotentialPassManagerType() const;
 
   explicit ModulePass(intptr_t pid) : Pass(pid) {}
   explicit ModulePass(const void *pid) : Pass(pid) {}
@@ -245,7 +239,7 @@ public:
   /// and if it does, the overloaded version of initializePass may get access to
   /// these passes with getAnalysis<>.
   ///
-  virtual void initializePass() {}
+  virtual void initializePass();
 
   /// ImmutablePasses are never run.
   ///
@@ -276,7 +270,7 @@ public:
   /// doInitialization - Virtual method overridden by subclasses to do
   /// any necessary per-module initialization.
   ///
-  virtual bool doInitialization(Module &) { return false; }
+  virtual bool doInitialization(Module &);
   
   /// runOnFunction - Virtual method overriden by subclasses to do the
   /// per-function processing of the pass.
@@ -286,7 +280,7 @@ public:
   /// doFinalization - Virtual method overriden by subclasses to do any post
   /// processing needed after all passes have run.
   ///
-  virtual bool doFinalization(Module &) { return false; }
+  virtual bool doFinalization(Module &);
 
   /// runOnModule - On a module, we run this pass by initializing,
   /// ronOnFunction'ing once for every function in the module, then by
@@ -303,9 +297,7 @@ public:
                                  PassManagerType T = PMT_FunctionPassManager);
 
   ///  Return what kind of Pass Manager can manage this pass.
-  virtual PassManagerType getPotentialPassManagerType() const {
-    return PMT_FunctionPassManager;
-  }
+  virtual PassManagerType getPotentialPassManagerType() const;
 };
 
 
@@ -328,12 +320,12 @@ public:
   /// doInitialization - Virtual method overridden by subclasses to do
   /// any necessary per-module initialization.
   ///
-  virtual bool doInitialization(Module &) { return false; }
+  virtual bool doInitialization(Module &);
 
   /// doInitialization - Virtual method overridden by BasicBlockPass subclasses
   /// to do any necessary per-function initialization.
   ///
-  virtual bool doInitialization(Function &) { return false; }
+  virtual bool doInitialization(Function &);
 
   /// runOnBasicBlock - Virtual method overriden by subclasses to do the
   /// per-basicblock processing of the pass.
@@ -343,12 +335,12 @@ public:
   /// doFinalization - Virtual method overriden by BasicBlockPass subclasses to
   /// do any post processing needed after all passes have run.
   ///
-  virtual bool doFinalization(Function &) { return false; }
+  virtual bool doFinalization(Function &);
 
   /// doFinalization - Virtual method overriden by subclasses to do any post
   /// processing needed after all passes have run.
   ///
-  virtual bool doFinalization(Module &) { return false; }
+  virtual bool doFinalization(Module &);
 
 
   // To run this pass on a function, we simply call runOnBasicBlock once for
@@ -360,9 +352,7 @@ public:
                                  PassManagerType T = PMT_BasicBlockPassManager);
 
   ///  Return what kind of Pass Manager can manage this pass.
-  virtual PassManagerType getPotentialPassManagerType() const {
-    return PMT_BasicBlockPassManager; 
-  }
+  virtual PassManagerType getPotentialPassManagerType() const;
 };
 
 /// If the user specifies the -time-passes argument on an LLVM tool command line
diff --git a/include/llvm/Support/CommandLine.h b/include/llvm/Support/CommandLine.h
index 2e65fdd..7f8b10c 100644
--- a/include/llvm/Support/CommandLine.h
+++ b/include/llvm/Support/CommandLine.h
@@ -986,7 +986,7 @@ template<class DataType>
 class list_storage<DataType, bool> : public std::vector<DataType> {
 public:
   template<class T>
-  void addValue(const T &V) { push_back(V); }
+  void addValue(const T &V) { std::vector<DataType>::push_back(V); }
 };
 
 
@@ -1011,7 +1011,7 @@ class list : public Option, public list_storage<DataType, Storage> {
       typename ParserClass::parser_data_type();
     if (Parser.parse(*this, ArgName, Arg, Val))
       return true;  // Parse Error!
-    addValue(Val);
+    list_storage<DataType, Storage>::addValue(Val);
     setPosition(pos);
     Positions.push_back(pos);
     return false;
diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h
index da31f98..8861a20 100644
--- a/include/llvm/Support/Compiler.h
+++ b/include/llvm/Support/Compiler.h
@@ -70,6 +70,16 @@
 #define DISABLE_INLINE
 #endif
 
+// ALWAYS_INLINE - On compilers where we have a directive to do so, mark a
+// method "always inline" because it is performance sensitive.
+#if (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+#define ALWAYS_INLINE __attribute__((always_inline))
+#else
+// TODO: No idea how to do this with MSVC.
+#define ALWAYS_INLINE
+#endif
+
+
 #ifdef __GNUC__
 #define NORETURN __attribute__((noreturn))
 #elif defined(_MSC_VER)
diff --git a/include/llvm/Support/Debug.h b/include/llvm/Support/Debug.h
index afa828c..e8bc0ce 100644
--- a/include/llvm/Support/Debug.h
+++ b/include/llvm/Support/Debug.h
@@ -63,7 +63,8 @@ void SetCurrentDebugType(const char *Type);
 /// This will emit the debug information if -debug is present, and -debug-only
 /// is not specified, or is specified as "bitset".
 #define DEBUG_WITH_TYPE(TYPE, X)                                        \
-  do { if (DebugFlag && isCurrentDebugType(TYPE)) { X; } } while (0)
+  do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE)) { X; } \
+  } while (0)
 
 #else
 #define isCurrentDebugType(X) (false)
diff --git a/include/llvm/Support/DebugLoc.h b/include/llvm/Support/DebugLoc.h
index 362390f..6814f63 100644
--- a/include/llvm/Support/DebugLoc.h
+++ b/include/llvm/Support/DebugLoc.h
@@ -66,7 +66,7 @@ namespace llvm {
   };
 
   // Specialize DenseMapInfo for DebugLocTuple.
-  template<>  struct DenseMapInfo<DebugLocTuple> {
+  template<> struct DenseMapInfo<DebugLocTuple> {
     static inline DebugLocTuple getEmptyKey() {
       return DebugLocTuple(0, 0, ~0U, ~0U);
     }
@@ -85,9 +85,9 @@ namespace llvm {
              LHS.Line         == RHS.Line &&
              LHS.Col          == RHS.Col;
     }
-
-    static bool isPod() { return true; }
   };
+  template <> struct isPodLike<DebugLocTuple> {static const bool value = true;};
+
 
   /// DebugLocTracker - This class tracks debug location information.
   ///
diff --git a/include/llvm/Support/ErrorHandling.h b/include/llvm/Support/ErrorHandling.h
index 6067795..4d24ada 100644
--- a/include/llvm/Support/ErrorHandling.h
+++ b/include/llvm/Support/ErrorHandling.h
@@ -79,9 +79,10 @@ namespace llvm {
 /// Use this instead of assert(0), so that the compiler knows this path
 /// is not reachable even for NDEBUG builds.
 #ifndef NDEBUG
-#define llvm_unreachable(msg) llvm_unreachable_internal(msg, __FILE__, __LINE__)
+#define llvm_unreachable(msg) \
+  ::llvm::llvm_unreachable_internal(msg, __FILE__, __LINE__)
 #else
-#define llvm_unreachable(msg) llvm_unreachable_internal()
+#define llvm_unreachable(msg) ::llvm::llvm_unreachable_internal()
 #endif
 
 #endif
diff --git a/include/llvm/Support/GetElementPtrTypeIterator.h b/include/llvm/Support/GetElementPtrTypeIterator.h
index f5915c9..e5e7fc7 100644
--- a/include/llvm/Support/GetElementPtrTypeIterator.h
+++ b/include/llvm/Support/GetElementPtrTypeIterator.h
@@ -84,7 +84,7 @@ namespace llvm {
 
   inline gep_type_iterator gep_type_begin(const User *GEP) {
     return gep_type_iterator::begin(GEP->getOperand(0)->getType(),
-                                      GEP->op_begin()+1);
+                                    GEP->op_begin()+1);
   }
   inline gep_type_iterator gep_type_end(const User *GEP) {
     return gep_type_iterator::end(GEP->op_end());
diff --git a/include/llvm/Support/IRBuilder.h b/include/llvm/Support/IRBuilder.h
index 2db2477..1310d70 100644
--- a/include/llvm/Support/IRBuilder.h
+++ b/include/llvm/Support/IRBuilder.h
@@ -269,6 +269,27 @@ public:
     return Insert(IndirectBrInst::Create(Addr, NumDests));
   }
 
+  InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
+                           BasicBlock *UnwindDest, const Twine &Name = "") {
+    Value *Args[] = { 0 };
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args,
+                                     Args), Name);
+  }
+  InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
+                           BasicBlock *UnwindDest, Value *Arg1,
+                           const Twine &Name = "") {
+    Value *Args[] = { Arg1 };
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args,
+                                     Args+1), Name);
+  }
+  InvokeInst *CreateInvoke3(Value *Callee, BasicBlock *NormalDest,
+                            BasicBlock *UnwindDest, Value *Arg1,
+                            Value *Arg2, Value *Arg3,
+                            const Twine &Name = "") {
+    Value *Args[] = { Arg1, Arg2, Arg3 };
+    return Insert(InvokeInst::Create(Callee, NormalDest, UnwindDest, Args,
+                                     Args+3), Name);
+  }
   /// CreateInvoke - Create an invoke instruction.
   template<typename InputIterator>
   InvokeInst *CreateInvoke(Value *Callee, BasicBlock *NormalDest,
@@ -386,18 +407,39 @@ public:
         return Folder.CreateShl(LC, RC);
     return Insert(BinaryOperator::CreateShl(LHS, RHS), Name);
   }
+  Value *CreateShl(Value *LHS, uint64_t RHS, const Twine &Name = "") {
+    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      return Folder.CreateShl(LC, RHSC);
+    return Insert(BinaryOperator::CreateShl(LHS, RHSC), Name);
+  }
+
   Value *CreateLShr(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateLShr(LC, RC);
     return Insert(BinaryOperator::CreateLShr(LHS, RHS), Name);
   }
+  Value *CreateLShr(Value *LHS, uint64_t RHS, const Twine &Name = "") {
+    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      return Folder.CreateLShr(LC, RHSC);
+    return Insert(BinaryOperator::CreateLShr(LHS, RHSC), Name);
+  }
+  
   Value *CreateAShr(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Folder.CreateAShr(LC, RC);
     return Insert(BinaryOperator::CreateAShr(LHS, RHS), Name);
   }
+  Value *CreateAShr(Value *LHS, uint64_t RHS, const Twine &Name = "") {
+    Constant *RHSC = ConstantInt::get(LHS->getType(), RHS);
+    if (Constant *LC = dyn_cast<Constant>(LHS))
+      return Folder.CreateSShr(LC, RHSC);
+    return Insert(BinaryOperator::CreateAShr(LHS, RHSC), Name);
+  }
+
   Value *CreateAnd(Value *LHS, Value *RHS, const Twine &Name = "") {
     if (Constant *RC = dyn_cast<Constant>(RHS)) {
       if (isa<ConstantInt>(RC) && cast<ConstantInt>(RC)->isAllOnesValue())
diff --git a/include/llvm/Support/ValueHandle.h b/include/llvm/Support/ValueHandle.h
index a9872a7..82c3cae 100644
--- a/include/llvm/Support/ValueHandle.h
+++ b/include/llvm/Support/ValueHandle.h
@@ -254,15 +254,18 @@ struct DenseMapInfo<AssertingVH<T> > {
   static bool isEqual(const AssertingVH<T> &LHS, const AssertingVH<T> &RHS) {
     return LHS == RHS;
   }
-  static bool isPod() {
+};
+  
+template <typename T>
+struct isPodLike<AssertingVH<T> > {
 #ifdef NDEBUG
-    return true;
+  static const bool value = true;
 #else
-    return false;
+  static const bool value = false;
 #endif
-  }
 };
 
+
 /// TrackingVH - This is a value handle that tracks a Value (or Value subclass),
 /// even across RAUW operations.
 ///
diff --git a/include/llvm/Support/raw_ostream.h b/include/llvm/Support/raw_ostream.h
index a78e81f..2b3341d 100644
--- a/include/llvm/Support/raw_ostream.h
+++ b/include/llvm/Support/raw_ostream.h
@@ -186,14 +186,12 @@ public:
     // Inline fast path, particulary for constant strings where a sufficiently
     // smart compiler will simplify strlen.
 
-    this->operator<<(StringRef(Str));
-    return *this;
+    return this->operator<<(StringRef(Str));
   }
 
   raw_ostream &operator<<(const std::string &Str) {
     // Avoid the fast path, it would only increase code size for a marginal win.
-    write(Str.data(), Str.length());
-    return *this;
+    return write(Str.data(), Str.length());
   }
 
   raw_ostream &operator<<(unsigned long N);
@@ -202,13 +200,11 @@ public:
   raw_ostream &operator<<(long long N);
   raw_ostream &operator<<(const void *P);
   raw_ostream &operator<<(unsigned int N) {
-    this->operator<<(static_cast<unsigned long>(N));
-    return *this;
+    return this->operator<<(static_cast<unsigned long>(N));
   }
 
   raw_ostream &operator<<(int N) {
-    this->operator<<(static_cast<long>(N));
-    return *this;
+    return this->operator<<(static_cast<long>(N));
   }
 
   raw_ostream &operator<<(double N);
diff --git a/include/llvm/Support/type_traits.h b/include/llvm/Support/type_traits.h
index ce916b5..515295b 100644
--- a/include/llvm/Support/type_traits.h
+++ b/include/llvm/Support/type_traits.h
@@ -17,13 +17,15 @@
 #ifndef LLVM_SUPPORT_TYPE_TRAITS_H
 #define LLVM_SUPPORT_TYPE_TRAITS_H
 
+#include <utility>
+
 // This is actually the conforming implementation which works with abstract
 // classes.  However, enough compilers have trouble with it that most will use
 // the one in boost/type_traits/object_traits.hpp. This implementation actually
 // works with VC7.0, but other interactions seem to fail when we use it.
 
 namespace llvm {
-
+  
 namespace dont_use
 {
     // These two functions should never be used. They are helpers to
@@ -48,6 +50,23 @@ struct is_class
  public:
     enum { value = sizeof(char) == sizeof(dont_use::is_class_helper<T>(0)) };
 };
+  
+  
+/// isPodLike - This is a type trait that is used to determine whether a given
+/// type can be copied around with memcpy instead of running ctors etc.
+template <typename T>
+struct isPodLike {
+  // If we don't know anything else, we can (at least) assume that all non-class
+  // types are PODs.
+  static const bool value = !is_class<T>::value;
+};
+
+// std::pair's are pod-like if their elements are.
+template<typename T, typename U>
+struct isPodLike<std::pair<T, U> > {
+  static const bool value = isPodLike<T>::value & isPodLike<U>::value;
+};
+  
 
 /// \brief Metafunction that determines whether the two given types are 
 /// equivalent.
diff --git a/include/llvm/System/Atomic.h b/include/llvm/System/Atomic.h
index 0c05d69..fc19369 100644
--- a/include/llvm/System/Atomic.h
+++ b/include/llvm/System/Atomic.h
@@ -20,7 +20,11 @@ namespace llvm {
   namespace sys {
     void MemoryFence();
 
+#ifdef _MSC_VER
+    typedef long cas_flag;
+#else
     typedef uint32_t cas_flag;
+#endif
     cas_flag CompareAndSwap(volatile cas_flag* ptr,
                             cas_flag new_value,
                             cas_flag old_value);
diff --git a/include/llvm/System/DataTypes.h.cmake b/include/llvm/System/DataTypes.h.cmake
index 180c86c..d9ca273 100644
--- a/include/llvm/System/DataTypes.h.cmake
+++ b/include/llvm/System/DataTypes.h.cmake
@@ -118,14 +118,33 @@ typedef signed int ssize_t;
 #define INT32_MAX 2147483647
 #define INT32_MIN -2147483648
 #define UINT32_MAX 4294967295U
-#define INT8_C(C)   C
-#define UINT8_C(C)  C
-#define INT16_C(C)  C
-#define UINT16_C(C) C
-#define INT32_C(C)  C
-#define UINT32_C(C) C ## U
-#define INT64_C(C)  ((int64_t) C ## LL)
-#define UINT64_C(C) ((uint64_t) C ## ULL)
+/* Certain compatibility updates to VC++ introduce the `cstdint'
+ * header, which defines the INT*_C macros. On default installs they
+ * are absent. */
+#ifndef INT8_C
+# define INT8_C(C)   C
+#endif
+#ifndef UINT8_C
+# define UINT8_C(C)  C
+#endif
+#ifndef INT16_C
+# define INT16_C(C)  C
+#endif
+#ifndef UINT16_C
+# define UINT16_C(C) C
+#endif
+#ifndef INT32_C
+# define INT32_C(C)  C
+#endif
+#ifndef UINT32_C
+# define UINT32_C(C) C ## U
+#endif
+#ifndef INT64_C
+# define INT64_C(C)  ((int64_t) C ## LL)
+#endif
+#ifndef UINT64_C
+# define UINT64_C(C) ((uint64_t) C ## ULL)
+#endif
 #endif /* _MSC_VER */
 
 /* Set defaults for constants which we cannot find. */
diff --git a/include/llvm/System/DataTypes.h.in b/include/llvm/System/DataTypes.h.in
index d574910..1f8ce79 100644
--- a/include/llvm/System/DataTypes.h.in
+++ b/include/llvm/System/DataTypes.h.in
@@ -36,8 +36,6 @@
 #include <math.h>
 #endif
 
-#ifndef _MSC_VER
-
 /* Note that this header's correct operation depends on __STDC_LIMIT_MACROS
    being defined.  We would define it here, but in order to prevent Bad Things
    happening when system headers or C++ STL headers include stdint.h before we
@@ -89,40 +87,6 @@ typedef u_int64_t uint64_t;
 #define UINT32_MAX 4294967295U
 #endif
 
-#else /* _MSC_VER */
-/* Visual C++ doesn't provide standard integer headers, but it does provide
-   built-in data types. */
-#include <stdlib.h>
-#include <stddef.h>
-#include <sys/types.h>
-typedef __int64 int64_t;
-typedef unsigned __int64 uint64_t;
-typedef signed int int32_t;
-typedef unsigned int uint32_t;
-typedef short int16_t;
-typedef unsigned short uint16_t;
-typedef signed char int8_t;
-typedef unsigned char uint8_t;
-typedef signed int ssize_t;
-#define INT8_MAX 127
-#define INT8_MIN -128
-#define UINT8_MAX 255
-#define INT16_MAX 32767
-#define INT16_MIN -32768
-#define UINT16_MAX 65535
-#define INT32_MAX 2147483647
-#define INT32_MIN -2147483648
-#define UINT32_MAX 4294967295U
-#define INT8_C(C)   C
-#define UINT8_C(C)  C
-#define INT16_C(C)  C
-#define UINT16_C(C) C
-#define INT32_C(C)  C
-#define UINT32_C(C) C ## U
-#define INT64_C(C)  ((int64_t) C ## LL)
-#define UINT64_C(C) ((uint64_t) C ## ULL)
-#endif /* _MSC_VER */
-
 /* Set defaults for constants which we cannot find. */
 #if !defined(INT64_MAX)
 # define INT64_MAX 9223372036854775807LL
diff --git a/include/llvm/Target/TargetData.h b/include/llvm/Target/TargetData.h
index e1d052e..2e63188 100644
--- a/include/llvm/Target/TargetData.h
+++ b/include/llvm/Target/TargetData.h
@@ -30,7 +30,6 @@ class Type;
 class IntegerType;
 class StructType;
 class StructLayout;
-class StructLayoutMap;
 class GlobalVariable;
 class LLVMContext;
 
@@ -60,8 +59,6 @@ struct TargetAlignElem {
                              unsigned char pref_align, uint32_t bit_width);
   /// Equality predicate
   bool operator==(const TargetAlignElem &rhs) const;
-  /// output stream operator
-  std::ostream &dump(std::ostream &os) const;
 };
 
 class TargetData : public ImmutablePass {
@@ -86,7 +83,7 @@ private:
   static const TargetAlignElem InvalidAlignmentElem;
 
   // The StructType -> StructLayout map.
-  mutable StructLayoutMap *LayoutMap;
+  mutable void *LayoutMap;
 
   //! Set/initialize target alignments
   void setAlignment(AlignTypeEnum align_type, unsigned char abi_align,
@@ -153,7 +150,7 @@ public:
   /// The width is specified in bits.
   ///
   bool isLegalInteger(unsigned Width) const {
-    for (unsigned i = 0, e = LegalIntWidths.size(); i != e; ++i)
+    for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i)
       if (LegalIntWidths[i] == Width)
         return true;
     return false;
diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h
index 1ba6b2f..1bcd6fd 100644
--- a/include/llvm/Target/TargetInstrInfo.h
+++ b/include/llvm/Target/TargetInstrInfo.h
@@ -26,6 +26,7 @@ class LiveVariables;
 class CalleeSavedInfo;
 class SDNode;
 class SelectionDAG;
+class MachineMemOperand;
 
 template<class T> class SmallVectorImpl;
 
@@ -182,11 +183,13 @@ public:
 
   /// hasLoadFromStackSlot - If the specified machine instruction has
   /// a load from a stack slot, return true along with the FrameIndex
-  /// of the loaded stack slot.  If not, return false.  Unlike
+  /// of the loaded stack slot and the machine mem operand containing
+  /// the reference.  If not, return false.  Unlike
   /// isLoadFromStackSlot, this returns true for any instructions that
   /// loads from the stack.  This is just a hint, as some cases may be
   /// missed.
   virtual bool hasLoadFromStackSlot(const MachineInstr *MI,
+                                    const MachineMemOperand *&MMO,
                                     int &FrameIndex) const {
     return 0;
   }
@@ -205,17 +208,18 @@ public:
   /// stack locations as well.  This uses a heuristic so it isn't
   /// reliable for correctness.
   virtual unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
-                                      int &FrameIndex) const {
+                                            int &FrameIndex) const {
     return 0;
   }
 
   /// hasStoreToStackSlot - If the specified machine instruction has a
   /// store to a stack slot, return true along with the FrameIndex of
-  /// the loaded stack slot.  If not, return false.  Unlike
-  /// isStoreToStackSlot, this returns true for any instructions that
-  /// loads from the stack.  This is just a hint, as some cases may be
-  /// missed.
+  /// the loaded stack slot and the machine mem operand containing the
+  /// reference.  If not, return false.  Unlike isStoreToStackSlot,
+  /// this returns true for any instructions that loads from the
+  /// stack.  This is just a hint, as some cases may be missed.
   virtual bool hasStoreToStackSlot(const MachineInstr *MI,
+                                   const MachineMemOperand *&MMO,
                                    int &FrameIndex) const {
     return 0;
   }
@@ -282,11 +286,10 @@ public:
   ///    just return false, leaving TBB/FBB null.
   /// 2. If this block ends with only an unconditional branch, it sets TBB to be
   ///    the destination block.
-  /// 3. If this block ends with an conditional branch and it falls through to
-  ///    a successor block, it sets TBB to be the branch destination block and
-  ///    a list of operands that evaluate the condition. These
-  ///    operands can be passed to other TargetInstrInfo methods to create new
-  ///    branches.
+  /// 3. If this block ends with a conditional branch and it falls through to a
+  ///    successor block, it sets TBB to be the branch destination block and a
+  ///    list of operands that evaluate the condition. These operands can be
+  ///    passed to other TargetInstrInfo methods to create new branches.
   /// 4. If this block ends with a conditional branch followed by an
   ///    unconditional branch, it returns the 'true' destination in TBB, the
   ///    'false' destination in FBB, and a list of operands that evaluate the
@@ -461,14 +464,6 @@ public:
     return 0;
   }
   
-  /// BlockHasNoFallThrough - Return true if the specified block does not
-  /// fall-through into its successor block.  This is primarily used when a
-  /// branch is unanalyzable.  It is useful for things like unconditional
-  /// indirect branches (jump tables).
-  virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
-    return false;
-  }
-  
   /// ReverseBranchCondition - Reverses the branch condition of the specified
   /// condition list, returning false on success and true if it cannot be
   /// reversed.
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index ca51102..9536e04 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -857,12 +857,6 @@ public:
   virtual bool
   isGAPlusOffset(SDNode *N, GlobalValue* &GA, int64_t &Offset) const;
 
-  /// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a 
-  /// location that is 'Dist' units away from the location that the 'Base' load 
-  /// is loading from.
-  bool isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, unsigned Bytes,
-                         int Dist, const MachineFrameInfo *MFI) const;
-
   /// PerformDAGCombine - This method will be invoked for all target nodes and
   /// for any target-independent nodes that the target has registered with
   /// invoke it for.
@@ -978,7 +972,7 @@ protected:
   /// not work with the with specified type and indicate what to do about it.
   void setLoadExtAction(unsigned ExtType, MVT VT,
                       LegalizeAction Action) {
-    assert((unsigned)VT.SimpleTy < MVT::LAST_VALUETYPE &&
+    assert((unsigned)VT.SimpleTy*2 < 63 &&
            ExtType < array_lengthof(LoadExtActions) &&
            "Table isn't big enough!");
     LoadExtActions[ExtType] &= ~(uint64_t(3UL) << VT.SimpleTy*2);
@@ -990,7 +984,7 @@ protected:
   void setTruncStoreAction(MVT ValVT, MVT MemVT,
                            LegalizeAction Action) {
     assert((unsigned)ValVT.SimpleTy < array_lengthof(TruncStoreActions) &&
-           (unsigned)MemVT.SimpleTy < MVT::LAST_VALUETYPE &&
+           (unsigned)MemVT.SimpleTy*2 < 63 &&
            "Table isn't big enough!");
     TruncStoreActions[ValVT.SimpleTy] &= ~(uint64_t(3UL)  << MemVT.SimpleTy*2);
     TruncStoreActions[ValVT.SimpleTy] |= (uint64_t)Action << MemVT.SimpleTy*2;
diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h
index cb29c73..dec0b1d 100644
--- a/include/llvm/Target/TargetRegisterInfo.h
+++ b/include/llvm/Target/TargetRegisterInfo.h
@@ -299,7 +299,7 @@ public:
     /// FirstVirtualRegister - This is the first register number that is
     /// considered to be a 'virtual' register, which is part of the SSA
     /// namespace.  This must be the same for all targets, which means that each
-    /// target is limited to 1024 registers.
+    /// target is limited to this fixed number of registers.
     FirstVirtualRegister = 1024
   };
 
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 0a83c3d..17c9b86 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -27,6 +27,7 @@ add_llvm_library(LLVMAnalysis
   LoopPass.cpp
   MemoryBuiltins.cpp
   MemoryDependenceAnalysis.cpp
+  PHITransAddr.cpp
   PointerTracking.cpp
   PostDominators.cpp
   ProfileEstimatorPass.cpp
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index a276c64..10a8b11 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -25,6 +25,16 @@
 #include "llvm/Support/CallSite.h"
 using namespace llvm;
 
+/// As its comment mentions, PointerMayBeCaptured can be expensive.
+/// However, it's not easy for BasicAA to cache the result, because
+/// it's an ImmutablePass. To work around this, bound queries at a
+/// fixed number of uses.
+///
+/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
+/// a cache. Then we can move the code from BasicAliasAnalysis into
+/// that path, and remove this threshold.
+static int const Threshold = 20;
+
 /// PointerMayBeCaptured - Return true if this pointer value may be captured
 /// by the enclosing function (which is required to exist).  This routine can
 /// be expensive, so consider caching the results.  The boolean ReturnCaptures
@@ -35,11 +45,17 @@ using namespace llvm;
 bool llvm::PointerMayBeCaptured(const Value *V,
                                 bool ReturnCaptures, bool StoreCaptures) {
   assert(isa<PointerType>(V->getType()) && "Capture is for pointers only!");
-  SmallVector<Use*, 16> Worklist;
-  SmallSet<Use*, 16> Visited;
+  SmallVector<Use*, Threshold> Worklist;
+  SmallSet<Use*, Threshold> Visited;
+  int Count = 0;
 
   for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end();
        UI != UE; ++UI) {
+    // If there are lots of uses, conservatively say that the value
+    // is captured to avoid taking too much compile time.
+    if (Count++ >= Threshold)
+      return true;
+
     Use *U = &UI.getUse();
     Visited.insert(U);
     Worklist.push_back(U);
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 96f738e..eaf90d0 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -432,7 +432,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
   // Instead of loading constant c string, use corresponding integer value
   // directly if string length is small enough.
   std::string Str;
-  if (TD && GetConstantStringInfo(CE->getOperand(0), Str) && !Str.empty()) {
+  if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) {
     unsigned StrLen = Str.length();
     const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
     unsigned NumBits = Ty->getPrimitiveSizeInBits();
@@ -569,9 +569,16 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
   SmallVector<Constant*, 32> NewIdxs;
   do {
     if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
-      // The only pointer indexing we'll do is on the first index of the GEP.
-      if (isa<PointerType>(ATy) && !NewIdxs.empty())
-        break;
+      if (isa<PointerType>(ATy)) {
+        // The only pointer indexing we'll do is on the first index of the GEP.
+        if (!NewIdxs.empty())
+          break;
+       
+        // Only handle pointers to sized types, not pointers to functions.
+        if (!ATy->getElementType()->isSized())
+          return 0;
+      }
+        
       // Determine which element of the array the offset points into.
       APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
       if (ElemSize == 0)
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 41d803c..1c9f500 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -866,7 +866,9 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
                                          DICompileUnit CompileUnit,
                                          unsigned LineNo, DIType Type,
                                          bool isLocalToUnit,
-                                         bool isDefinition) {
+                                         bool isDefinition,
+                                         unsigned VK, unsigned VIndex,
+                                         DIType ContainingType) {
 
   Value *Elts[] = {
     GetTagConstant(dwarf::DW_TAG_subprogram),
@@ -879,9 +881,38 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
     ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
     Type.getNode(),
     ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
-    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition)
+    ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+    ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
+    ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
+    ContainingType.getNode()
   };
-  return DISubprogram(MDNode::get(VMContext, &Elts[0], 11));
+  return DISubprogram(MDNode::get(VMContext, &Elts[0], 14));
+}
+
+/// CreateSubprogramDefinition - Create new subprogram descriptor for the
+/// given declaration. 
+DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration) {
+  if (SPDeclaration.isDefinition())
+    return DISubprogram(SPDeclaration.getNode());
+
+  MDNode *DeclNode = SPDeclaration.getNode();
+  Value *Elts[] = {
+    GetTagConstant(dwarf::DW_TAG_subprogram),
+    llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+    DeclNode->getElement(2), // Context
+    DeclNode->getElement(3), // Name
+    DeclNode->getElement(4), // DisplayName
+    DeclNode->getElement(5), // LinkageName
+    DeclNode->getElement(6), // CompileUnit
+    DeclNode->getElement(7), // LineNo
+    DeclNode->getElement(8), // Type
+    DeclNode->getElement(9), // isLocalToUnit
+    ConstantInt::get(Type::getInt1Ty(VMContext), true),
+    DeclNode->getElement(11), // Virtuality
+    DeclNode->getElement(12), // VIndex
+    DeclNode->getElement(13)  // Containting Type
+  };
+  return DISubprogram(MDNode::get(VMContext, &Elts[0], 14));
 }
 
 /// CreateGlobalVariable - Create a new descriptor for the specified global.
@@ -1019,6 +1050,37 @@ Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
   return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
 }
 
+/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, Value *Offset,
+                                                DIVariable D,
+                                                Instruction *InsertBefore) {
+  assert(V && "no value passed to dbg.value");
+  assert(Offset->getType() == Type::getInt64Ty(V->getContext()) &&
+         "offset must be i64");
+  if (!ValueFn)
+    ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+  Value *Elts[] = { V };
+  Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), Offset,
+                    D.getNode() };
+  return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore);
+}
+
+/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, Value *Offset,
+                                                DIVariable D,
+                                                BasicBlock *InsertAtEnd) {
+  assert(V && "no value passed to dbg.value");
+  assert(Offset->getType() == Type::getInt64Ty(V->getContext()) &&
+         "offset must be i64");
+  if (!ValueFn)
+    ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+  Value *Elts[] = { V };
+  Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), Offset,
+                    D.getNode() };
+  return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
+}
 
 //===----------------------------------------------------------------------===//
 // DebugInfoFinder implementations.
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp
index e12db81..4d5b312 100644
--- a/lib/Analysis/IPA/Andersens.cpp
+++ b/lib/Analysis/IPA/Andersens.cpp
@@ -121,8 +121,6 @@ namespace {
 
       return *LHS == *RHS;
     }
-
-    static bool isPod() { return true; }
   };
 
   class Andersens : public ModulePass, public AliasAnalysis,
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 37747b6..627dbbb 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -53,7 +53,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
       if (newLoop == L)
         return false;
       // if newLoop is an outer loop of L, this is OK.
-      if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop))
+      if (newLoop->contains(L->getHeader()))
         return false;
     }
     return true;
@@ -307,6 +307,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
     AddUsersIfInteresting(I);
 
+  Processed.clear();
   return false;
 }
 
@@ -369,7 +370,7 @@ void IVUsers::dump() const {
 void IVUsers::releaseMemory() {
   IVUsesByStride.clear();
   StrideOrder.clear();
-  Processed.clear();
+  IVUses.clear();
 }
 
 void IVStrideUse::deleted() {
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 4de756c..34089ee 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -316,12 +316,12 @@ bool Loop::hasDedicatedExits() const {
 
 /// getUniqueExitBlocks - Return all unique successor blocks of this loop.
 /// These are the blocks _outside of the current loop_ which are branched to.
-/// This assumes that loop is in canonical form.
+/// This assumes that loop exits are in canonical form.
 ///
 void
 Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
-  assert(isLoopSimplifyForm() &&
-         "getUniqueExitBlocks assumes the loop is in canonical form!");
+  assert(hasDedicatedExits() &&
+         "getUniqueExitBlocks assumes the loop has canonical form exits!");
 
   // Sort the blocks vector so that we can use binary search to do quick
   // lookups.
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index ae6f970..a0c7706 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/PredIteratorCache.h"
@@ -172,7 +173,7 @@ MemDepResult MemoryDependenceAnalysis::
 getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad, 
                          BasicBlock::iterator ScanIt, BasicBlock *BB) {
 
-  Value *invariantTag = 0;
+  Value *InvariantTag = 0;
 
   // Walk backwards through the basic block, looking for dependencies.
   while (ScanIt != BB->begin()) {
@@ -180,34 +181,36 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
 
     // If we're in an invariant region, no dependencies can be found before
     // we pass an invariant-begin marker.
-    if (invariantTag == Inst) {
-      invariantTag = 0;
+    if (InvariantTag == Inst) {
+      InvariantTag = 0;
       continue;
-    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+    }
+    
+    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+      // Debug intrinsics don't cause dependences.
+      if (isa<DbgInfoIntrinsic>(Inst)) continue;
+      
       // If we pass an invariant-end marker, then we've just entered an
       // invariant region and can start ignoring dependencies.
       if (II->getIntrinsicID() == Intrinsic::invariant_end) {
-        uint64_t invariantSize = ~0ULL;
-        if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(2)))
-          invariantSize = CI->getZExtValue();
-        
-        AliasAnalysis::AliasResult R =
-          AA->alias(II->getOperand(3), invariantSize, MemPtr, MemSize);
+        // FIXME: This only considers queries directly on the invariant-tagged
+        // pointer, not on query pointers that are indexed off of them.  It'd
+        // be nice to handle that at some point.
+        AliasAnalysis::AliasResult R = 
+          AA->alias(II->getOperand(3), ~0U, MemPtr, ~0U);
         if (R == AliasAnalysis::MustAlias) {
-          invariantTag = II->getOperand(1);
+          InvariantTag = II->getOperand(1);
           continue;
         }
       
       // If we reach a lifetime begin or end marker, then the query ends here
       // because the value is undefined.
-      } else if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
-                 II->getIntrinsicID() == Intrinsic::lifetime_end) {
-        uint64_t invariantSize = ~0ULL;
-        if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(1)))
-          invariantSize = CI->getZExtValue();
-
+      } else if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+        // FIXME: This only considers queries directly on the invariant-tagged
+        // pointer, not on query pointers that are indexed off of them.  It'd
+        // be nice to handle that at some point.
         AliasAnalysis::AliasResult R =
-          AA->alias(II->getOperand(2), invariantSize, MemPtr, MemSize);
+          AA->alias(II->getOperand(2), ~0U, MemPtr, ~0U);
         if (R == AliasAnalysis::MustAlias)
           return MemDepResult::getDef(II);
       }
@@ -215,10 +218,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
 
     // If we're querying on a load and we're in an invariant region, we're done
     // at this point. Nothing a load depends on can live in an invariant region.
-    if (isLoad && invariantTag) continue;
-
-    // Debug intrinsics don't cause dependences.
-    if (isa<DbgInfoIntrinsic>(Inst)) continue;
+    if (isLoad && InvariantTag) continue;
 
     // Values depend on loads if the pointers are must aliased.  This means that
     // a load depends on another must aliased load from the same value.
@@ -243,7 +243,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
       // There can't be stores to the value we care about inside an 
       // invariant region.
-      if (invariantTag) continue;
+      if (InvariantTag) continue;
       
       // If alias analysis can tell that this store is guaranteed to not modify
       // the query pointer, ignore it.  Use getModRefInfo to handle cases where
@@ -292,7 +292,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
     case AliasAnalysis::Mod:
       // If we're in an invariant region, we can ignore calls that ONLY
       // modify the pointer.
-      if (invariantTag) continue;
+      if (InvariantTag) continue;
       return MemDepResult::getClobber(Inst);
     case AliasAnalysis::Ref:
       // If the call is known to never store to the pointer, and if this is a
@@ -374,21 +374,22 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
       IntrinsicID = II->getIntrinsicID();
 
     switch (IntrinsicID) {
-      case Intrinsic::lifetime_start:
-      case Intrinsic::lifetime_end:
-      case Intrinsic::invariant_start:
-        MemPtr = QueryInst->getOperand(2);
-        MemSize = cast<ConstantInt>(QueryInst->getOperand(1))->getZExtValue();
-        break;
-      case Intrinsic::invariant_end:
-        MemPtr = QueryInst->getOperand(3);
-        MemSize = cast<ConstantInt>(QueryInst->getOperand(2))->getZExtValue();
-        break;
-      default:
-        CallSite QueryCS = CallSite::get(QueryInst);
-        bool isReadOnly = AA->onlyReadsMemory(QueryCS);
-        LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
-                                               QueryParent);
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+    case Intrinsic::invariant_start:
+      MemPtr = QueryInst->getOperand(2);
+      MemSize = cast<ConstantInt>(QueryInst->getOperand(1))->getZExtValue();
+      break;
+    case Intrinsic::invariant_end:
+      MemPtr = QueryInst->getOperand(3);
+      MemSize = cast<ConstantInt>(QueryInst->getOperand(2))->getZExtValue();
+      break;
+    default:
+      CallSite QueryCS = CallSite::get(QueryInst);
+      bool isReadOnly = AA->onlyReadsMemory(QueryCS);
+      LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
+                                             QueryParent);
+      break;
     }
   } else {
     // Non-memory instruction.
@@ -421,7 +422,7 @@ static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
   if (Count == 0) return;
 
   for (unsigned i = 1; i != unsigned(Count); ++i)
-    assert(Cache[i-1] <= Cache[i] && "Cache isn't sorted!");
+    assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!");
 }
 #endif
 
@@ -462,8 +463,8 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
     // determine what is dirty, seeding our initial DirtyBlocks worklist.
     for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
        I != E; ++I)
-      if (I->second.isDirty())
-        DirtyBlocks.push_back(I->first);
+      if (I->getResult().isDirty())
+        DirtyBlocks.push_back(I->getBB());
     
     // Sort the cache so that we can do fast binary search lookups below.
     std::sort(Cache.begin(), Cache.end());
@@ -501,27 +502,27 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
     DEBUG(AssertSorted(Cache, NumSortedEntries));
     NonLocalDepInfo::iterator Entry = 
       std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
-                       std::make_pair(DirtyBB, MemDepResult()));
-    if (Entry != Cache.begin() && prior(Entry)->first == DirtyBB)
+                       NonLocalDepEntry(DirtyBB));
+    if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB)
       --Entry;
     
-    MemDepResult *ExistingResult = 0;
+    NonLocalDepEntry *ExistingResult = 0;
     if (Entry != Cache.begin()+NumSortedEntries && 
-        Entry->first == DirtyBB) {
+        Entry->getBB() == DirtyBB) {
       // If we already have an entry, and if it isn't already dirty, the block
       // is done.
-      if (!Entry->second.isDirty())
+      if (!Entry->getResult().isDirty())
         continue;
       
       // Otherwise, remember this slot so we can update the value.
-      ExistingResult = &Entry->second;
+      ExistingResult = &*Entry;
     }
     
     // If the dirty entry has a pointer, start scanning from it so we don't have
     // to rescan the entire block.
     BasicBlock::iterator ScanPos = DirtyBB->end();
     if (ExistingResult) {
-      if (Instruction *Inst = ExistingResult->getInst()) {
+      if (Instruction *Inst = ExistingResult->getResult().getInst()) {
         ScanPos = Inst;
         // We're removing QueryInst's use of Inst.
         RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
@@ -545,9 +546,9 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
     // If we had a dirty entry for the block, update it.  Otherwise, just add
     // a new entry.
     if (ExistingResult)
-      *ExistingResult = Dep;
+      ExistingResult->setResult(Dep, 0);
     else
-      Cache.push_back(std::make_pair(DirtyBB, Dep));
+      Cache.push_back(NonLocalDepEntry(DirtyBB, Dep, 0));
     
     // If the block has a dependency (i.e. it isn't completely transparent to
     // the value), remember the association!
@@ -587,17 +588,20 @@ getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
   const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType();
   uint64_t PointeeSize = AA->getTypeStoreSize(EltTy);
   
+  PHITransAddr Address(Pointer, TD);
+  
   // This is the set of blocks we've inspected, and the pointer we consider in
   // each block.  Because of critical edges, we currently bail out if querying
   // a block with multiple different pointers.  This can happen during PHI
   // translation.
   DenseMap<BasicBlock*, Value*> Visited;
-  if (!getNonLocalPointerDepFromBB(Pointer, PointeeSize, isLoad, FromBB,
+  if (!getNonLocalPointerDepFromBB(Address, PointeeSize, isLoad, FromBB,
                                    Result, Visited, true))
     return;
   Result.clear();
-  Result.push_back(std::make_pair(FromBB,
-                                  MemDepResult::getClobber(FromBB->begin())));
+  Result.push_back(NonLocalDepEntry(FromBB,
+                                    MemDepResult::getClobber(FromBB->begin()),
+                                    Pointer));
 }
 
 /// GetNonLocalInfoForBlock - Compute the memdep value for BB with
@@ -613,30 +617,30 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
   // the cache set.  If so, find it.
   NonLocalDepInfo::iterator Entry =
     std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries,
-                     std::make_pair(BB, MemDepResult()));
-  if (Entry != Cache->begin() && prior(Entry)->first == BB)
+                     NonLocalDepEntry(BB));
+  if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
     --Entry;
   
-  MemDepResult *ExistingResult = 0;
-  if (Entry != Cache->begin()+NumSortedEntries && Entry->first == BB)
-    ExistingResult = &Entry->second;
+  NonLocalDepEntry *ExistingResult = 0;
+  if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
+    ExistingResult = &*Entry;
   
   // If we have a cached entry, and it is non-dirty, use it as the value for
   // this dependency.
-  if (ExistingResult && !ExistingResult->isDirty()) {
+  if (ExistingResult && !ExistingResult->getResult().isDirty()) {
     ++NumCacheNonLocalPtr;
-    return *ExistingResult;
+    return ExistingResult->getResult();
   }    
   
   // Otherwise, we have to scan for the value.  If we have a dirty cache
   // entry, start scanning from its position, otherwise we scan from the end
   // of the block.
   BasicBlock::iterator ScanPos = BB->end();
-  if (ExistingResult && ExistingResult->getInst()) {
-    assert(ExistingResult->getInst()->getParent() == BB &&
+  if (ExistingResult && ExistingResult->getResult().getInst()) {
+    assert(ExistingResult->getResult().getInst()->getParent() == BB &&
            "Instruction invalidated?");
     ++NumCacheDirtyNonLocalPtr;
-    ScanPos = ExistingResult->getInst();
+    ScanPos = ExistingResult->getResult().getInst();
     
     // Eliminating the dirty entry from 'Cache', so update the reverse info.
     ValueIsLoadPair CacheKey(Pointer, isLoad);
@@ -652,9 +656,9 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
   // If we had a dirty entry for the block, update it.  Otherwise, just add
   // a new entry.
   if (ExistingResult)
-    *ExistingResult = Dep;
+    ExistingResult->setResult(Dep, Pointer);
   else
-    Cache->push_back(std::make_pair(BB, Dep));
+    Cache->push_back(NonLocalDepEntry(BB, Dep, Pointer));
   
   // If the block has a dependency (i.e. it isn't completely transparent to
   // the value), remember the reverse association because we just added it
@@ -683,7 +687,7 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
     break;
   case 2: {
     // Two new entries, insert the last one into place.
-    MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back();
+    NonLocalDepEntry Val = Cache.back();
     Cache.pop_back();
     MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
       std::upper_bound(Cache.begin(), Cache.end()-1, Val);
@@ -693,7 +697,7 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
   case 1:
     // One new entry, Just insert the new value at the appropriate position.
     if (Cache.size() != 1) {
-      MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back();
+      NonLocalDepEntry Val = Cache.back();
       Cache.pop_back();
       MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
         std::upper_bound(Cache.begin(), Cache.end(), Val);
@@ -707,275 +711,6 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
   }
 }
 
-/// isPHITranslatable - Return true if the specified computation is derived from
-/// a PHI node in the current block and if it is simple enough for us to handle.
-static bool isPHITranslatable(Instruction *Inst) {
-  if (isa<PHINode>(Inst))
-    return true;
-  
-  // We can handle bitcast of a PHI, but the PHI needs to be in the same block
-  // as the bitcast.
-  if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
-    Instruction *OpI = dyn_cast<Instruction>(BC->getOperand(0));
-    if (OpI == 0 || OpI->getParent() != Inst->getParent())
-      return true;
-    return isPHITranslatable(OpI);
-  }
-  
-  // We can translate a GEP if all of its operands defined in this block are phi
-  // translatable. 
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
-    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
-      Instruction *OpI = dyn_cast<Instruction>(GEP->getOperand(i));
-      if (OpI == 0 || OpI->getParent() != Inst->getParent())
-        continue;
-      
-      if (!isPHITranslatable(OpI))
-        return false;
-    }
-    return true;
-  }
-  
-  if (Inst->getOpcode() == Instruction::Add &&
-      isa<ConstantInt>(Inst->getOperand(1))) {
-    Instruction *OpI = dyn_cast<Instruction>(Inst->getOperand(0));
-    if (OpI == 0 || OpI->getParent() != Inst->getParent())
-      return true;
-    return isPHITranslatable(OpI);
-  }
-
-  //   cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
-  //   if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
-  //     cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
-  
-  return false;
-}
-
-/// GetPHITranslatedValue - Given a computation that satisfied the
-/// isPHITranslatable predicate, see if we can translate the computation into
-/// the specified predecessor block.  If so, return that value.
-Value *MemoryDependenceAnalysis::
-GetPHITranslatedValue(Value *InVal, BasicBlock *CurBB, BasicBlock *Pred,
-                      const TargetData *TD) const {  
-  // If the input value is not an instruction, or if it is not defined in CurBB,
-  // then we don't need to phi translate it.
-  Instruction *Inst = dyn_cast<Instruction>(InVal);
-  if (Inst == 0 || Inst->getParent() != CurBB)
-    return InVal;
-  
-  if (PHINode *PN = dyn_cast<PHINode>(Inst))
-    return PN->getIncomingValueForBlock(Pred);
-  
-  // Handle bitcast of PHI.
-  if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
-    // PHI translate the input operand.
-    Value *PHIIn = GetPHITranslatedValue(BC->getOperand(0), CurBB, Pred, TD);
-    if (PHIIn == 0) return 0;
-    
-    // Constants are trivial to phi translate.
-    if (Constant *C = dyn_cast<Constant>(PHIIn))
-      return ConstantExpr::getBitCast(C, BC->getType());
-    
-    // Otherwise we have to see if a bitcasted version of the incoming pointer
-    // is available.  If so, we can use it, otherwise we have to fail.
-    for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
-         UI != E; ++UI) {
-      if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI))
-        if (BCI->getType() == BC->getType())
-          return BCI;
-    }
-    return 0;
-  }
-
-  // Handle getelementptr with at least one PHI translatable operand.
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
-    SmallVector<Value*, 8> GEPOps;
-    BasicBlock *CurBB = GEP->getParent();
-    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
-      Value *GEPOp = GEP->getOperand(i);
-      // No PHI translation is needed of operands whose values are live in to
-      // the predecessor block.
-      if (!isa<Instruction>(GEPOp) ||
-          cast<Instruction>(GEPOp)->getParent() != CurBB) {
-        GEPOps.push_back(GEPOp);
-        continue;
-      }
-      
-      // If the operand is a phi node, do phi translation.
-      Value *InOp = GetPHITranslatedValue(GEPOp, CurBB, Pred, TD);
-      if (InOp == 0) return 0;
-      
-      GEPOps.push_back(InOp);
-    }
-    
-    // Simplify the GEP to handle 'gep x, 0' -> x etc.
-    if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD))
-      return V;
-
-    // Scan to see if we have this GEP available.
-    Value *APHIOp = GEPOps[0];
-    for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
-         UI != E; ++UI) {
-      if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
-        if (GEPI->getType() == GEP->getType() &&
-            GEPI->getNumOperands() == GEPOps.size() &&
-            GEPI->getParent()->getParent() == CurBB->getParent()) {
-          bool Mismatch = false;
-          for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
-            if (GEPI->getOperand(i) != GEPOps[i]) {
-              Mismatch = true;
-              break;
-            }
-          if (!Mismatch)
-            return GEPI;
-        }
-    }
-    return 0;
-  }
-  
-  // Handle add with a constant RHS.
-  if (Inst->getOpcode() == Instruction::Add &&
-      isa<ConstantInt>(Inst->getOperand(1))) {
-    // PHI translate the LHS.
-    Value *LHS;
-    Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
-    Instruction *OpI = dyn_cast<Instruction>(Inst->getOperand(0));
-    bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
-    bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
-    
-    if (OpI == 0 || OpI->getParent() != Inst->getParent())
-      LHS = Inst->getOperand(0);
-    else {
-      LHS = GetPHITranslatedValue(Inst->getOperand(0), CurBB, Pred, TD);
-      if (LHS == 0)
-        return 0;
-    }
-    
-    // If the PHI translated LHS is an add of a constant, fold the immediates.
-    if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
-      if (BOp->getOpcode() == Instruction::Add)
-        if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
-          LHS = BOp->getOperand(0);
-          RHS = ConstantExpr::getAdd(RHS, CI);
-          isNSW = isNUW = false;
-        }
-    
-    // See if the add simplifies away.
-    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD))
-      return Res;
-    
-    // Otherwise, see if we have this add available somewhere.
-    for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
-         UI != E; ++UI) {
-      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
-        if (BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
-            BO->getParent()->getParent() == CurBB->getParent())
-          return BO;
-    }
-    
-    return 0;
-  }
-  
-  return 0;
-}
-
-/// GetAvailablePHITranslatePointer - Return the value computed by
-/// PHITranslatePointer if it dominates PredBB, otherwise return null.
-Value *MemoryDependenceAnalysis::
-GetAvailablePHITranslatedValue(Value *V,
-                               BasicBlock *CurBB, BasicBlock *PredBB,
-                               const TargetData *TD,
-                               const DominatorTree &DT) const {
-  // See if PHI translation succeeds.
-  V = GetPHITranslatedValue(V, CurBB, PredBB, TD);
-  if (V == 0) return 0;
-  
-  // Make sure the value is live in the predecessor.
-  if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
-    if (!DT.dominates(Inst->getParent(), PredBB))
-      return 0;
-  return V;
-}
-
-
-/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
-/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
-/// block.  All newly created instructions are added to the NewInsts list.
-///
-Value *MemoryDependenceAnalysis::
-InsertPHITranslatedPointer(Value *InVal, BasicBlock *CurBB,
-                           BasicBlock *PredBB, const TargetData *TD,
-                           const DominatorTree &DT,
-                           SmallVectorImpl<Instruction*> &NewInsts) const {
-  // See if we have a version of this value already available and dominating
-  // PredBB.  If so, there is no need to insert a new copy.
-  if (Value *Res = GetAvailablePHITranslatedValue(InVal, CurBB, PredBB, TD, DT))
-    return Res;
-  
-  // If we don't have an available version of this value, it must be an
-  // instruction.
-  Instruction *Inst = cast<Instruction>(InVal);
-  
-  // Handle bitcast of PHI translatable value.
-  if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
-    Value *OpVal = InsertPHITranslatedPointer(BC->getOperand(0),
-                                              CurBB, PredBB, TD, DT, NewInsts);
-    if (OpVal == 0) return 0;
-      
-    // Otherwise insert a bitcast at the end of PredBB.
-    BitCastInst *New = new BitCastInst(OpVal, InVal->getType(),
-                                       InVal->getName()+".phi.trans.insert",
-                                       PredBB->getTerminator());
-    NewInsts.push_back(New);
-    return New;
-  }
-  
-  // Handle getelementptr with at least one PHI operand.
-  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
-    SmallVector<Value*, 8> GEPOps;
-    BasicBlock *CurBB = GEP->getParent();
-    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
-      Value *OpVal = InsertPHITranslatedPointer(GEP->getOperand(i),
-                                                CurBB, PredBB, TD, DT, NewInsts);
-      if (OpVal == 0) return 0;
-      GEPOps.push_back(OpVal);
-    }
-    
-    GetElementPtrInst *Result = 
-      GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(),
-                                InVal->getName()+".phi.trans.insert",
-                                PredBB->getTerminator());
-    Result->setIsInBounds(GEP->isInBounds());
-    NewInsts.push_back(Result);
-    return Result;
-  }
-  
-#if 0
-  // FIXME: This code works, but it is unclear that we actually want to insert
-  // a big chain of computation in order to make a value available in a block.
-  // This needs to be evaluated carefully to consider its cost trade offs.
-  
-  // Handle add with a constant RHS.
-  if (Inst->getOpcode() == Instruction::Add &&
-      isa<ConstantInt>(Inst->getOperand(1))) {
-    // PHI translate the LHS.
-    Value *OpVal = InsertPHITranslatedPointer(Inst->getOperand(0),
-                                              CurBB, PredBB, TD, DT, NewInsts);
-    if (OpVal == 0) return 0;
-    
-    BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
-                                           InVal->getName()+".phi.trans.insert",
-                                                    PredBB->getTerminator());
-    Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap());
-    Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap());
-    NewInsts.push_back(Res);
-    return Res;
-  }
-#endif
-  
-  return 0;
-}
-
 /// getNonLocalPointerDepFromBB - Perform a dependency query based on
 /// pointer/pointeesize starting at the end of StartBB.  Add any clobber/def
 /// results to the results vector and keep track of which blocks are visited in
@@ -989,14 +724,14 @@ InsertPHITranslatedPointer(Value *InVal, BasicBlock *CurBB,
 /// not compute dependence information for some reason.  This should be treated
 /// as a clobber dependence on the first instruction in the predecessor block.
 bool MemoryDependenceAnalysis::
-getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
+getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
                             bool isLoad, BasicBlock *StartBB,
                             SmallVectorImpl<NonLocalDepEntry> &Result,
                             DenseMap<BasicBlock*, Value*> &Visited,
                             bool SkipFirstBlock) {
   
   // Look up the cached info for Pointer.
-  ValueIsLoadPair CacheKey(Pointer, isLoad);
+  ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
   
   std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo =
     &NonLocalPointerDeps[CacheKey];
@@ -1013,8 +748,9 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
     if (!Visited.empty()) {
       for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
            I != E; ++I) {
-        DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->first);
-        if (VI == Visited.end() || VI->second == Pointer) continue;
+        DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB());
+        if (VI == Visited.end() || VI->second == Pointer.getAddr())
+          continue;
         
         // We have a pointer mismatch in a block.  Just return clobber, saying
         // that something was clobbered in this result.  We could also do a
@@ -1025,8 +761,8 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
     
     for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
          I != E; ++I) {
-      Visited.insert(std::make_pair(I->first, Pointer));
-      if (!I->second.isNonLocal())
+      Visited.insert(std::make_pair(I->getBB(), Pointer.getAddr()));
+      if (!I->getResult().isNonLocal())
         Result.push_back(*I);
     }
     ++NumCacheCompleteNonLocalPtr;
@@ -1065,30 +801,27 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
       // Get the dependency info for Pointer in BB.  If we have cached
       // information, we will use it, otherwise we compute it.
       DEBUG(AssertSorted(*Cache, NumSortedEntries));
-      MemDepResult Dep = GetNonLocalInfoForBlock(Pointer, PointeeSize, isLoad,
-                                                 BB, Cache, NumSortedEntries);
+      MemDepResult Dep = GetNonLocalInfoForBlock(Pointer.getAddr(), PointeeSize,
+                                                 isLoad, BB, Cache,
+                                                 NumSortedEntries);
       
       // If we got a Def or Clobber, add this to the list of results.
       if (!Dep.isNonLocal()) {
-        Result.push_back(NonLocalDepEntry(BB, Dep));
+        Result.push_back(NonLocalDepEntry(BB, Dep, Pointer.getAddr()));
         continue;
       }
     }
     
     // If 'Pointer' is an instruction defined in this block, then we need to do
     // phi translation to change it into a value live in the predecessor block.
-    // If phi translation fails, then we can't continue dependence analysis.
-    Instruction *PtrInst = dyn_cast<Instruction>(Pointer);
-    bool NeedsPHITranslation = PtrInst && PtrInst->getParent() == BB;
-    
-    // If no PHI translation is needed, just add all the predecessors of this
-    // block to scan them as well.
-    if (!NeedsPHITranslation) {
+    // If not, we just add the predecessors to the worklist and scan them with
+    // the same Pointer.
+    if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
       SkipFirstBlock = false;
       for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
         // Verify that we haven't looked at this block yet.
         std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
-          InsertRes = Visited.insert(std::make_pair(*PI, Pointer));
+          InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr()));
         if (InsertRes.second) {
           // First time we've looked at *PI.
           Worklist.push_back(*PI);
@@ -1098,16 +831,17 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
         // If we have seen this block before, but it was with a different
         // pointer then we have a phi translation failure and we have to treat
         // this as a clobber.
-        if (InsertRes.first->second != Pointer)
+        if (InsertRes.first->second != Pointer.getAddr())
           goto PredTranslationFailure;
       }
       continue;
     }
     
-    // If we do need to do phi translation, then there are a bunch of different
-    // cases, because we have to find a Value* live in the predecessor block. We
-    // know that PtrInst is defined in this block at least.
-
+    // We do need to do phi translation, if we know ahead of time we can't phi
+    // translate this value, don't even try.
+    if (!Pointer.IsPotentiallyPHITranslatable())
+      goto PredTranslationFailure;
+    
     // We may have added values to the cache list before this PHI translation.
     // If so, we haven't done anything to ensure that the cache remains sorted.
     // Sort it now (if needed) so that recursive invocations of
@@ -1117,19 +851,17 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
       SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
       NumSortedEntries = Cache->size();
     }
-    
-    // If this is a computation derived from a PHI node, use the suitably
-    // translated incoming values for each pred as the phi translated version.
-    if (!isPHITranslatable(PtrInst))
-      goto PredTranslationFailure;
-
     Cache = 0;
-      
+    
     for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
       BasicBlock *Pred = *PI;
-      // Get the PHI translated pointer in this predecessor.  This can fail and
-      // return null if not translatable.
-      Value *PredPtr = GetPHITranslatedValue(PtrInst, BB, Pred, TD);
+      
+      // Get the PHI translated pointer in this predecessor.  This can fail if
+      // not translatable, in which case the getAddr() returns null.
+      PHITransAddr PredPointer(Pointer);
+      PredPointer.PHITranslateValue(BB, Pred);
+
+      Value *PredPtrVal = PredPointer.getAddr();
       
       // Check to see if we have already visited this pred block with another
       // pointer.  If so, we can't do this lookup.  This failure can occur
@@ -1137,12 +869,12 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
       // the successor translates to a pointer value different than the
       // pointer the block was first analyzed with.
       std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
-        InsertRes = Visited.insert(std::make_pair(Pred, PredPtr));
+        InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
 
       if (!InsertRes.second) {
         // If the predecessor was visited with PredPtr, then we already did
         // the analysis and can ignore it.
-        if (InsertRes.first->second == PredPtr)
+        if (InsertRes.first->second == PredPtrVal)
           continue;
         
         // Otherwise, the block was previously analyzed with a different
@@ -1155,10 +887,11 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
       // predecessor, then we have to assume that the pointer is clobbered in
       // that predecessor.  We can still do PRE of the load, which would insert
       // a computation of the pointer in this predecessor.
-      if (PredPtr == 0) {
+      if (PredPtrVal == 0) {
         // Add the entry to the Result list.
         NonLocalDepEntry Entry(Pred,
-                               MemDepResult::getClobber(Pred->getTerminator()));
+                               MemDepResult::getClobber(Pred->getTerminator()),
+                               PredPtrVal);
         Result.push_back(Entry);
 
         // Add it to the cache for this CacheKey so that subsequent queries get
@@ -1167,27 +900,27 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
         MemoryDependenceAnalysis::NonLocalDepInfo::iterator It =
           std::upper_bound(Cache->begin(), Cache->end(), Entry);
         
-        if (It != Cache->begin() && prior(It)->first == Pred)
+        if (It != Cache->begin() && (It-1)->getBB() == Pred)
           --It;
 
-        if (It == Cache->end() || It->first != Pred) {
+        if (It == Cache->end() || It->getBB() != Pred) {
           Cache->insert(It, Entry);
           // Add it to the reverse map.
           ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey);
-        } else if (!It->second.isDirty()) {
+        } else if (!It->getResult().isDirty()) {
           // noop
-        } else if (It->second.getInst() == Pred->getTerminator()) {
+        } else if (It->getResult().getInst() == Pred->getTerminator()) {
           // Same instruction, clear the dirty marker.
-          It->second = Entry.second;
-        } else if (It->second.getInst() == 0) {
+          It->setResult(Entry.getResult(), PredPtrVal);
+        } else if (It->getResult().getInst() == 0) {
           // Dirty, with no instruction, just add this.
-          It->second = Entry.second;
+          It->setResult(Entry.getResult(), PredPtrVal);
           ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey);
         } else {
           // Otherwise, dirty with a different instruction.
-          RemoveFromReverseMap(ReverseNonLocalPtrDeps, It->second.getInst(),
-                               CacheKey);
-          It->second = Entry.second;
+          RemoveFromReverseMap(ReverseNonLocalPtrDeps,
+                               It->getResult().getInst(), CacheKey);
+          It->setResult(Entry.getResult(),PredPtrVal);
           ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey);
         }
         Cache = 0;
@@ -1201,7 +934,7 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
       
       // If we have a problem phi translating, fall through to the code below
       // to handle the failure condition.
-      if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred,
+      if (getNonLocalPointerDepFromBB(PredPointer, PointeeSize, isLoad, Pred,
                                       Result, Visited))
         goto PredTranslationFailure;
     }
@@ -1245,12 +978,12 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
     
     for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
       assert(I != Cache->rend() && "Didn't find current block??");
-      if (I->first != BB)
+      if (I->getBB() != BB)
         continue;
       
-      assert(I->second.isNonLocal() &&
+      assert(I->getResult().isNonLocal() &&
              "Should only be here with transparent block");
-      I->second = MemDepResult::getClobber(BB->begin());
+      I->setResult(MemDepResult::getClobber(BB->begin()), Pointer.getAddr());
       ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey);
       Result.push_back(*I);
       break;
@@ -1276,9 +1009,9 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
   NonLocalDepInfo &PInfo = It->second.second;
   
   for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
-    Instruction *Target = PInfo[i].second.getInst();
+    Instruction *Target = PInfo[i].getResult().getInst();
     if (Target == 0) continue;  // Ignore non-local dep results.
-    assert(Target->getParent() == PInfo[i].first);
+    assert(Target->getParent() == PInfo[i].getBB());
     
     // Eliminating the dirty entry from 'Cache', so update the reverse info.
     RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
@@ -1315,7 +1048,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
     NonLocalDepInfo &BlockMap = NLDI->second.first;
     for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end();
          DI != DE; ++DI)
-      if (Instruction *Inst = DI->second.getInst())
+      if (Instruction *Inst = DI->getResult().getInst())
         RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
     NonLocalDeps.erase(NLDI);
   }
@@ -1403,10 +1136,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
       
       for (NonLocalDepInfo::iterator DI = INLD.first.begin(), 
            DE = INLD.first.end(); DI != DE; ++DI) {
-        if (DI->second.getInst() != RemInst) continue;
+        if (DI->getResult().getInst() != RemInst) continue;
         
         // Convert to a dirty entry for the subsequent instruction.
-        DI->second = NewDirtyVal;
+        DI->setResult(NewDirtyVal, DI->getAddress());
         
         if (Instruction *NextI = NewDirtyVal.getInst())
           ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
@@ -1445,10 +1178,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
       // Update any entries for RemInst to use the instruction after it.
       for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
            DI != DE; ++DI) {
-        if (DI->second.getInst() != RemInst) continue;
+        if (DI->getResult().getInst() != RemInst) continue;
         
         // Convert to a dirty entry for the subsequent instruction.
-        DI->second = NewDirtyVal;
+        DI->setResult(NewDirtyVal, DI->getAddress());
         
         if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
           ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
@@ -1489,7 +1222,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
     const NonLocalDepInfo &Val = I->second.second;
     for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
          II != E; ++II)
-      assert(II->second.getInst() != D && "Inst occurs as NLPD value");
+      assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
   }
   
   for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
@@ -1498,7 +1231,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
     const PerInstNLInfo &INLD = I->second;
     for (NonLocalDepInfo::const_iterator II = INLD.first.begin(),
          EE = INLD.first.end(); II  != EE; ++II)
-      assert(II->second.getInst() != D && "Inst occurs in data structures");
+      assert(II->getResult().getInst() != D && "Inst occurs in data structures");
   }
   
   for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
new file mode 100644
index 0000000..07e2919
--- /dev/null
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -0,0 +1,432 @@
+//===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PHITransAddr class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static bool CanPHITrans(Instruction *Inst) {
+  if (isa<PHINode>(Inst) ||
+      isa<BitCastInst>(Inst) ||
+      isa<GetElementPtrInst>(Inst))
+    return true;
+  
+  if (Inst->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Inst->getOperand(1)))
+    return true;
+  
+  //   cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
+  //   if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
+  //     cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
+  return false;
+}
+
+void PHITransAddr::dump() const {
+  if (Addr == 0) {
+    errs() << "PHITransAddr: null\n";
+    return;
+  }
+  errs() << "PHITransAddr: " << *Addr << "\n";
+  for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+    errs() << "  Input #" << i << " is " << *InstInputs[i] << "\n";
+}
+
+
+static bool VerifySubExpr(Value *Expr,
+                          SmallVectorImpl<Instruction*> &InstInputs) {
+  // If this is a non-instruction value, there is nothing to do.
+  Instruction *I = dyn_cast<Instruction>(Expr);
+  if (I == 0) return true;
+  
+  // If it's an instruction, it is either in Tmp or its operands recursively
+  // are.
+  SmallVectorImpl<Instruction*>::iterator Entry =
+    std::find(InstInputs.begin(), InstInputs.end(), I);
+  if (Entry != InstInputs.end()) {
+    InstInputs.erase(Entry);
+    return true;
+  }
+  
+  // If it isn't in the InstInputs list it is a subexpr incorporated into the
+  // address.  Sanity check that it is phi translatable.
+  if (!CanPHITrans(I)) {
+    errs() << "Non phi translatable instruction found in PHITransAddr, either "
+              "something is missing from InstInputs or CanPHITrans is wrong:\n";
+    errs() << *I << '\n';
+    return false;
+  }
+  
+  // Validate the operands of the instruction.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+    if (!VerifySubExpr(I->getOperand(i), InstInputs))
+      return false;
+
+  return true;
+}
+
+/// Verify - Check internal consistency of this data structure.  If the
+/// structure is valid, it returns true.  If invalid, it prints errors and
+/// returns false.
+bool PHITransAddr::Verify() const {
+  if (Addr == 0) return true;
+  
+  SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end());  
+  
+  if (!VerifySubExpr(Addr, Tmp))
+    return false;
+  
+  if (!Tmp.empty()) {
+    errs() << "PHITransAddr inconsistent, contains extra instructions:\n";
+    for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+      errs() << "  InstInput #" << i << " is " << *InstInputs[i] << "\n";
+    return false;
+  }
+  
+  // a-ok.
+  return true;
+}
+
+
+/// IsPotentiallyPHITranslatable - If this needs PHI translation, return true
+/// if we have some hope of doing it.  This should be used as a filter to
+/// avoid calling PHITranslateValue in hopeless situations.
+bool PHITransAddr::IsPotentiallyPHITranslatable() const {
+  // If the input value is not an instruction, or if it is not defined in CurBB,
+  // then we don't need to phi translate it.
+  Instruction *Inst = dyn_cast<Instruction>(Addr);
+  return Inst == 0 || CanPHITrans(Inst);
+}
+
+
+static void RemoveInstInputs(Value *V, 
+                             SmallVectorImpl<Instruction*> &InstInputs) {
+  Instruction *I = dyn_cast<Instruction>(V);
+  if (I == 0) return;
+  
+  // If the instruction is in the InstInputs list, remove it.
+  SmallVectorImpl<Instruction*>::iterator Entry =
+    std::find(InstInputs.begin(), InstInputs.end(), I);
+  if (Entry != InstInputs.end()) {
+    InstInputs.erase(Entry);
+    return;
+  }
+  
+  assert(!isa<PHINode>(I) && "Error, removing something that isn't an input");
+  
+  // Otherwise, it must have instruction inputs itself.  Zap them recursively.
+  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+    if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i)))
+      RemoveInstInputs(Op, InstInputs);
+  }
+}
+
+Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
+                                         BasicBlock *PredBB) {
+  // If this is a non-instruction value, it can't require PHI translation.
+  Instruction *Inst = dyn_cast<Instruction>(V);
+  if (Inst == 0) return V;
+  
+  // Determine whether 'Inst' is an input to our PHI translatable expression.
+  bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst);
+
+  // Handle inputs instructions if needed.
+  if (isInput) {
+    if (Inst->getParent() != CurBB) {
+      // If it is an input defined in a different block, then it remains an
+      // input.
+      return Inst;
+    }
+
+    // If 'Inst' is defined in this block and is an input that needs to be phi
+    // translated, we need to incorporate the value into the expression or fail.
+
+    // In either case, the instruction itself isn't an input any longer.
+    InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst));
+    
+    // If this is a PHI, go ahead and translate it.
+    if (PHINode *PN = dyn_cast<PHINode>(Inst))
+      return AddAsInput(PN->getIncomingValueForBlock(PredBB));
+    
+    // If this is a non-phi value, and it is analyzable, we can incorporate it
+    // into the expression by making all instruction operands be inputs.
+    if (!CanPHITrans(Inst))
+      return 0;
+   
+    // All instruction operands are now inputs (and of course, they may also be
+    // defined in this block, so they may need to be phi translated themselves.
+    for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+      if (Instruction *Op = dyn_cast<Instruction>(Inst->getOperand(i)))
+        InstInputs.push_back(Op);
+  }
+
+  // Ok, it must be an intermediate result (either because it started that way
+  // or because we just incorporated it into the expression).  See if its
+  // operands need to be phi translated, and if so, reconstruct it.
+  
+  if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
+    Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB);
+    if (PHIIn == 0) return 0;
+    if (PHIIn == BC->getOperand(0))
+      return BC;
+    
+    // Find an available version of this cast.
+    
+    // Constants are trivial to find.
+    if (Constant *C = dyn_cast<Constant>(PHIIn))
+      return AddAsInput(ConstantExpr::getBitCast(C, BC->getType()));
+    
+    // Otherwise we have to see if a bitcasted version of the incoming pointer
+    // is available.  If so, we can use it, otherwise we have to fail.
+    for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
+         UI != E; ++UI) {
+      if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI))
+        if (BCI->getType() == BC->getType())
+          return BCI;
+    }
+    return 0;
+  }
+  
+  // Handle getelementptr with at least one PHI translatable operand.
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+    SmallVector<Value*, 8> GEPOps;
+    bool AnyChanged = false;
+    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+      Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB);
+      if (GEPOp == 0) return 0;
+      
+      AnyChanged |= GEPOp != GEP->getOperand(i);
+      GEPOps.push_back(GEPOp);
+    }
+    
+    if (!AnyChanged)
+      return GEP;
+    
+    // Simplify the GEP to handle 'gep x, 0' -> x etc.
+    if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD)) {
+      for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+        RemoveInstInputs(GEPOps[i], InstInputs);
+      
+      return AddAsInput(V);
+    }
+    
+    // Scan to see if we have this GEP available.
+    Value *APHIOp = GEPOps[0];
+    for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
+         UI != E; ++UI) {
+      if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
+        if (GEPI->getType() == GEP->getType() &&
+            GEPI->getNumOperands() == GEPOps.size() &&
+            GEPI->getParent()->getParent() == CurBB->getParent()) {
+          bool Mismatch = false;
+          for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+            if (GEPI->getOperand(i) != GEPOps[i]) {
+              Mismatch = true;
+              break;
+            }
+          if (!Mismatch)
+            return GEPI;
+        }
+    }
+    return 0;
+  }
+  
+  // Handle add with a constant RHS.
+  if (Inst->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Inst->getOperand(1))) {
+    // PHI translate the LHS.
+    Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
+    bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
+    bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
+    
+    Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB);
+    if (LHS == 0) return 0;
+    
+    // If the PHI translated LHS is an add of a constant, fold the immediates.
+    if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
+      if (BOp->getOpcode() == Instruction::Add)
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+          LHS = BOp->getOperand(0);
+          RHS = ConstantExpr::getAdd(RHS, CI);
+          isNSW = isNUW = false;
+          
+          // If the old 'LHS' was an input, add the new 'LHS' as an input.
+          if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) {
+            RemoveInstInputs(BOp, InstInputs);
+            AddAsInput(LHS);
+          }
+        }
+    
+    // See if the add simplifies away.
+    if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD)) {
+      // If we simplified the operands, the LHS is no longer an input, but Res
+      // is.
+      RemoveInstInputs(LHS, InstInputs);
+      return AddAsInput(Res);
+    }
+
+    // If we didn't modify the add, just return it.
+    if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1))
+      return Inst;
+    
+    // Otherwise, see if we have this add available somewhere.
+    for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
+         UI != E; ++UI) {
+      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
+        if (BO->getOpcode() == Instruction::Add &&
+            BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
+            BO->getParent()->getParent() == CurBB->getParent())
+          return BO;
+    }
+    
+    return 0;
+  }
+  
+  // Otherwise, we failed.
+  return 0;
+}
+
+
+/// PHITranslateValue - PHI translate the current address up the CFG from
+/// CurBB to Pred, updating our state the reflect any needed changes.  This
+/// returns true on failure and sets Addr to null.
+bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB) {
+  assert(Verify() && "Invalid PHITransAddr!");
+  Addr = PHITranslateSubExpr(Addr, CurBB, PredBB);
+  assert(Verify() && "Invalid PHITransAddr!");
+  return Addr == 0;
+}
+
+/// GetAvailablePHITranslatedSubExpr - Return the value computed by
+/// PHITranslateSubExpr if it dominates PredBB, otherwise return null.
+Value *PHITransAddr::
+GetAvailablePHITranslatedSubExpr(Value *V, BasicBlock *CurBB,BasicBlock *PredBB,
+                                 const DominatorTree &DT) const {
+  PHITransAddr Tmp(V, TD);
+  Tmp.PHITranslateValue(CurBB, PredBB);
+  
+  // See if PHI translation succeeds.
+  V = Tmp.getAddr();
+  
+  // Make sure the value is live in the predecessor.
+  if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
+    if (!DT.dominates(Inst->getParent(), PredBB))
+      return 0;
+  return V;
+}
+
+
+/// PHITranslateWithInsertion - PHI translate this value into the specified
+/// predecessor block, inserting a computation of the value if it is
+/// unavailable.
+///
+/// All newly created instructions are added to the NewInsts list.  This
+/// returns null on failure.
+///
+Value *PHITransAddr::
+PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
+                          const DominatorTree &DT,
+                          SmallVectorImpl<Instruction*> &NewInsts) {
+  unsigned NISize = NewInsts.size();
+  
+  // Attempt to PHI translate with insertion.
+  Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts);
+  
+  // If successful, return the new value.
+  if (Addr) return Addr;
+  
+  // If not, destroy any intermediate instructions inserted.
+  while (NewInsts.size() != NISize)
+    NewInsts.pop_back_val()->eraseFromParent();
+  return 0;
+}
+
+
+/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
+/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
+/// block.  All newly created instructions are added to the NewInsts list.
+/// This returns null on failure.
+///
+Value *PHITransAddr::
+InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
+                           BasicBlock *PredBB, const DominatorTree &DT,
+                           SmallVectorImpl<Instruction*> &NewInsts) {
+  // See if we have a version of this value already available and dominating
+  // PredBB.  If so, there is no need to insert a new instance of it.
+  if (Value *Res = GetAvailablePHITranslatedSubExpr(InVal, CurBB, PredBB, DT))
+    return Res;
+
+  // If we don't have an available version of this value, it must be an
+  // instruction.
+  Instruction *Inst = cast<Instruction>(InVal);
+  
+  // Handle bitcast of PHI translatable value.
+  if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
+    Value *OpVal = InsertPHITranslatedSubExpr(BC->getOperand(0),
+                                              CurBB, PredBB, DT, NewInsts);
+    if (OpVal == 0) return 0;
+    
+    // Otherwise insert a bitcast at the end of PredBB.
+    BitCastInst *New = new BitCastInst(OpVal, InVal->getType(),
+                                       InVal->getName()+".phi.trans.insert",
+                                       PredBB->getTerminator());
+    NewInsts.push_back(New);
+    return New;
+  }
+  
+  // Handle getelementptr with at least one PHI operand.
+  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+    SmallVector<Value*, 8> GEPOps;
+    BasicBlock *CurBB = GEP->getParent();
+    for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+      Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i),
+                                                CurBB, PredBB, DT, NewInsts);
+      if (OpVal == 0) return 0;
+      GEPOps.push_back(OpVal);
+    }
+    
+    GetElementPtrInst *Result = 
+    GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(),
+                              InVal->getName()+".phi.trans.insert",
+                              PredBB->getTerminator());
+    Result->setIsInBounds(GEP->isInBounds());
+    NewInsts.push_back(Result);
+    return Result;
+  }
+  
+#if 0
+  // FIXME: This code works, but it is unclear that we actually want to insert
+  // a big chain of computation in order to make a value available in a block.
+  // This needs to be evaluated carefully to consider its cost trade offs.
+  
+  // Handle add with a constant RHS.
+  if (Inst->getOpcode() == Instruction::Add &&
+      isa<ConstantInt>(Inst->getOperand(1))) {
+    // PHI translate the LHS.
+    Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0),
+                                              CurBB, PredBB, DT, NewInsts);
+    if (OpVal == 0) return 0;
+    
+    BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
+                                           InVal->getName()+".phi.trans.insert",
+                                                    PredBB->getTerminator());
+    Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap());
+    Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap());
+    NewInsts.push_back(Res);
+    return Res;
+  }
+#endif
+  
+  return 0;
+}
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
index e767891..8148429 100644
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ b/lib/Analysis/ProfileEstimatorPass.cpp
@@ -35,6 +35,7 @@ namespace {
     LoopInfo *LI;
     std::set<BasicBlock*>  BBToVisit;
     std::map<Loop*,double> LoopExitWeights;
+    std::map<Edge,double>  MinimalWeight;
   public:
     static char ID; // Class identification, replacement for typeinfo
     explicit ProfileEstimatorPass(const double execcount = 0)
@@ -91,7 +92,7 @@ static void inline printEdgeError(ProfileInfo::Edge e, const char *M) {
 
 void inline ProfileEstimatorPass::printEdgeWeight(Edge E) {
   DEBUG(errs() << "-- Weight of Edge " << E << ":"
-               << format("%g", getEdgeWeight(E)) << "\n");
+               << format("%20.20g", getEdgeWeight(E)) << "\n");
 }
 
 // recurseBasicBlock() - This calculates the ProfileInfo estimation for a
@@ -174,6 +175,12 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
         double w = getEdgeWeight(*ei);
         if (w == MissingValue) {
           Edges.push_back(*ei);
+          // Check if there is a necessary minimal weight, if yes, subtract it 
+          // from weight.
+          if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+            incoming -= MinimalWeight[*ei];
+            DEBUG(errs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+          }
         } else {
           incoming -= w;
         }
@@ -191,11 +198,43 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
         printEdgeWeight(edge);
       }
     }
-    // Distribute remaining weight onto the exit edges.
+
+    // Distribute remaining weight to the exting edges. To prevent fractions
+    // from building up and provoking precision problems the weight which is to
+    // be distributed is split and the rounded, the last edge gets a somewhat
+    // bigger value, but we are close enough for an estimation.
+    double fraction = floor(incoming/Edges.size());
     for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
          ei != ee; ++ei) {
-      EdgeInformation[BB->getParent()][*ei] += incoming/Edges.size();
+      double w = 0;
+      if (ei != (ee-1)) {
+        w = fraction;
+        incoming -= fraction;
+      } else {
+        w = incoming;
+      }
+      EdgeInformation[BB->getParent()][*ei] += w;
+      // Read necessary minimal weight.
+      if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+        EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
+        DEBUG(errs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+      }
       printEdgeWeight(*ei);
+      
+      // Add minimal weight to paths to all exit edges, this is used to ensure
+      // that enough flow is reaching this edges.
+      Path p;
+      const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest);
+      while (Dest != BB) {
+        const BasicBlock *Parent = p.find(Dest)->second;
+        Edge e = getEdge(Parent, Dest);
+        if (MinimalWeight.find(e) == MinimalWeight.end()) {
+          MinimalWeight[e] = 0;
+        }
+        MinimalWeight[e] += w;
+        DEBUG(errs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n");
+        Dest = Parent;
+      }
     }
     // Increase flow into the loop.
     BBWeight *= (ExecCount+1);
@@ -203,7 +242,7 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
 
   BlockInformation[BB->getParent()][BB] = BBWeight;
   // Up until now we considered only the loop exiting edges, now we have a
-  // definite block weight and must ditribute this onto the outgoing edges.
+  // definite block weight and must distribute this onto the outgoing edges.
   // Since there may be already flow attached to some of the edges, read this
   // flow first and remember the edges that have still now flow attached.
   Edges.clear();
@@ -225,15 +264,32 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
         BBWeight -= getEdgeWeight(edge);
       } else {
         Edges.push_back(edge);
+        // If minimal weight is necessary, reserve weight by subtracting weight
+        // from block weight, this is readded later on.
+        if (MinimalWeight.find(edge) != MinimalWeight.end()) {
+          BBWeight -= MinimalWeight[edge];
+          DEBUG(errs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n");
+        }
       }
     }
   }
 
+  double fraction = floor(BBWeight/Edges.size());
   // Finally we know what flow is still not leaving the block, distribute this
   // flow onto the empty edges.
   for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
        ei != ee; ++ei) {
-    EdgeInformation[BB->getParent()][*ei] += BBWeight/Edges.size();
+    if (ei != (ee-1)) {
+      EdgeInformation[BB->getParent()][*ei] += fraction;
+      BBWeight -= fraction;
+    } else {
+      EdgeInformation[BB->getParent()][*ei] += BBWeight;
+    }
+    // Readd minial necessary weight.
+    if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+      EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
+      DEBUG(errs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+    }
     printEdgeWeight(*ei);
   }
 
@@ -260,20 +316,24 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) {
   for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi)
     BBToVisit.insert(bi);
 
+  // Clear Minimal Edges.
+  MinimalWeight.clear();
+
   DEBUG(errs() << "Working on function " << F.getNameStr() << "\n");
 
   // Since the entry block is the first one and has no predecessors, the edge
   // (0,entry) is inserted with the starting weight of 1.
   BasicBlock *entry = &F.getEntryBlock();
-  BlockInformation[&F][entry] = 1;
+  BlockInformation[&F][entry] = pow(2.0, 32.0);
   Edge edge = getEdge(0,entry);
-  EdgeInformation[&F][edge] = 1;
+  EdgeInformation[&F][edge] = BlockInformation[&F][entry];
   printEdgeWeight(edge);
 
   // Since recurseBasicBlock() maybe returns with a block which was not fully
-  // estimated, use recurseBasicBlock() until everything is calculated. 
+  // estimated, use recurseBasicBlock() until everything is calculated.
+  bool cleanup = false;
   recurseBasicBlock(entry);
-  while (BBToVisit.size() > 0) {
+  while (BBToVisit.size() > 0 && !cleanup) {
     // Remember number of open blocks, this is later used to check if progress
     // was made.
     unsigned size = BBToVisit.size();
@@ -287,21 +347,65 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) {
       if (BBToVisit.size() < size) break;
     }
 
-    // If there was not a single block resovled, make some assumptions.
+    // If there was not a single block resolved, make some assumptions.
     if (BBToVisit.size() == size) {
-      BasicBlock *BB = *(BBToVisit.begin());
-      // Since this BB was not calculated because of missing incoming edges,
-      // set these edges to zero.
-      for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
-           bbi != bbe; ++bbi) {
-        Edge e = getEdge(*bbi,BB);
-        double w = getEdgeWeight(e);
-        if (w == MissingValue) {
-          EdgeInformation[&F][e] = 0;
-          DEBUG(errs() << "Assuming edge weight: ");
-          printEdgeWeight(e);
+      bool found = false;
+      for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end(); 
+           (BBI != BBE) && (!found); ++BBI) {
+        BasicBlock *BB = *BBI;
+        // Try each predecessor if it can be assumend.
+        for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+             (bbi != bbe) && (!found); ++bbi) {
+          Edge e = getEdge(*bbi,BB);
+          double w = getEdgeWeight(e);
+          // Check that edge from predecessor is still free.
+          if (w == MissingValue) {
+            // Check if there is a circle from this block to predecessor.
+            Path P;
+            const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest);
+            if (Dest != *bbi) {
+              // If there is no circle, just set edge weight to 0
+              EdgeInformation[&F][e] = 0;
+              DEBUG(errs() << "Assuming edge weight: ");
+              printEdgeWeight(e);
+              found = true;
+            }
+          }
         }
       }
+      if (!found) {
+        cleanup = true;
+        DEBUG(errs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n");
+      }
+    }
+  }
+  // In case there was no safe way to assume edges, set as a last measure, 
+  // set _everything_ to zero.
+  if (cleanup) {
+    FunctionInformation[&F] = 0;
+    BlockInformation[&F].clear();
+    EdgeInformation[&F].clear();
+    for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+      const BasicBlock *BB = &(*FI);
+      BlockInformation[&F][BB] = 0;
+      pred_const_iterator predi = pred_begin(BB), prede = pred_end(BB);
+      if (predi == prede) {
+        Edge e = getEdge(0,BB);
+        setEdgeWeight(e,0);
+      }
+      for (;predi != prede; ++predi) {
+        Edge e = getEdge(*predi,BB);
+        setEdgeWeight(e,0);
+      }
+      succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB);
+      if (succi == succe) {
+        Edge e = getEdge(BB,0);
+        setEdgeWeight(e,0);
+      }
+      for (;succi != succe; ++succi) {
+        Edge e = getEdge(*succi,BB);
+        setEdgeWeight(e,0);
+      }
     }
   }
 
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index 7f24f5a..c49c6e1 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -11,26 +11,52 @@
 // "no profile" implementation.
 //
 //===----------------------------------------------------------------------===//
-
+#define DEBUG_TYPE "profile-info"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CFG.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Format.h"
+#include "llvm/ADT/SmallSet.h"
 #include <set>
+#include <queue>
+#include <limits>
 using namespace llvm;
 
 // Register the ProfileInfo interface, providing a nice name to refer to.
 static RegisterAnalysisGroup<ProfileInfo> Z("Profile Information");
-char ProfileInfo::ID = 0;
 
-ProfileInfo::~ProfileInfo() {}
+namespace llvm {
+
+template <>
+ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {}
+template <>
+ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {}
+
+template <>
+ProfileInfoT<Function, BasicBlock>::ProfileInfoT() {
+  MachineProfile = 0;
+}
+template <>
+ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() {
+  if (MachineProfile) delete MachineProfile;
+}
+
+template<>
+char ProfileInfoT<Function,BasicBlock>::ID = 0;
+
+template<>
+char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0;
+
+template<>
+const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1;
 
-const double ProfileInfo::MissingValue = -1;
+template<> const
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1;
 
-double ProfileInfo::getExecutionCount(const BasicBlock *BB) {
+template<> double
+ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) {
   std::map<const Function*, BlockCounts>::iterator J =
     BlockInformation.find(BB->getParent());
   if (J != BlockInformation.end()) {
@@ -39,36 +65,74 @@ double ProfileInfo::getExecutionCount(const BasicBlock *BB) {
       return I->second;
   }
 
+  double Count = MissingValue;
+
   pred_const_iterator PI = pred_begin(BB), PE = pred_end(BB);
 
   // Are there zero predecessors of this block?
   if (PI == PE) {
-    // If this is the entry block, look for the Null -> Entry edge.
-    if (BB == &BB->getParent()->getEntryBlock())
-      return getEdgeWeight(getEdge(0, BB));
-    else
-      return 0;   // Otherwise, this is a dead block.
+    Edge e = getEdge(0,BB);
+    Count = getEdgeWeight(e);
+  } else {
+    // Otherwise, if there are predecessors, the execution count of this block is
+    // the sum of the edge frequencies from the incoming edges.
+    std::set<const BasicBlock*> ProcessedPreds;
+    Count = 0;
+    for (; PI != PE; ++PI)
+      if (ProcessedPreds.insert(*PI).second) {
+        double w = getEdgeWeight(getEdge(*PI, BB));
+        if (w == MissingValue) {
+          Count = MissingValue;
+          break;
+        }
+        Count += w;
+      }
   }
 
-  // Otherwise, if there are predecessors, the execution count of this block is
-  // the sum of the edge frequencies from the incoming edges.
-  std::set<const BasicBlock*> ProcessedPreds;
-  double Count = 0;
-  for (; PI != PE; ++PI)
-    if (ProcessedPreds.insert(*PI).second) {
-      double w = getEdgeWeight(getEdge(*PI, BB));
-      if (w == MissingValue) {
-        Count = MissingValue;
-        break;
-      }
-      Count += w;
+  // If the predecessors did not suffice to get block weight, try successors.
+  if (Count == MissingValue) {
+
+    succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+
+    // Are there zero successors of this block?
+    if (SI == SE) {
+      Edge e = getEdge(BB,0);
+      Count = getEdgeWeight(e);
+    } else {
+      std::set<const BasicBlock*> ProcessedSuccs;
+      Count = 0;
+      for (; SI != SE; ++SI)
+        if (ProcessedSuccs.insert(*SI).second) {
+          double w = getEdgeWeight(getEdge(BB, *SI));
+          if (w == MissingValue) {
+            Count = MissingValue;
+            break;
+          }
+          Count += w;
+        }
     }
+  }
 
   if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count;
   return Count;
 }
 
-double ProfileInfo::getExecutionCount(const Function *F) {
+template<>
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::
+        getExecutionCount(const MachineBasicBlock *MBB) {
+  std::map<const MachineFunction*, BlockCounts>::iterator J =
+    BlockInformation.find(MBB->getParent());
+  if (J != BlockInformation.end()) {
+    BlockCounts::iterator I = J->second.find(MBB);
+    if (I != J->second.end())
+      return I->second;
+  }
+
+  return MissingValue;
+}
+
+template<>
+double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) {
   std::map<const Function*, double>::iterator J =
     FunctionInformation.find(F);
   if (J != FunctionInformation.end())
@@ -83,35 +147,211 @@ double ProfileInfo::getExecutionCount(const Function *F) {
   return Count;
 }
 
+template<>
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::
+        getExecutionCount(const MachineFunction *MF) {
+  std::map<const MachineFunction*, double>::iterator J =
+    FunctionInformation.find(MF);
+  if (J != FunctionInformation.end())
+    return J->second;
+
+  double Count = getExecutionCount(&MF->front());
+  if (Count != MissingValue) FunctionInformation[MF] = Count;
+  return Count;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+        setExecutionCount(const BasicBlock *BB, double w) {
+  DEBUG(errs() << "Creating Block " << BB->getName() 
+               << " (weight: " << format("%.20g",w) << ")\n");
+  BlockInformation[BB->getParent()][BB] = w;
+}
+
+template<>
+void ProfileInfoT<MachineFunction, MachineBasicBlock>::
+        setExecutionCount(const MachineBasicBlock *MBB, double w) {
+  DEBUG(errs() << "Creating Block " << MBB->getBasicBlock()->getName()
+               << " (weight: " << format("%.20g",w) << ")\n");
+  BlockInformation[MBB->getParent()][MBB] = w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) {
+  double oldw = getEdgeWeight(e);
+  assert (oldw != MissingValue && "Adding weight to Edge with no previous weight");
+  DEBUG(errs() << "Adding to Edge " << e
+               << " (new weight: " << format("%.20g",oldw + w) << ")\n");
+  EdgeInformation[getFunction(e)][e] = oldw + w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+        addExecutionCount(const BasicBlock *BB, double w) {
+  double oldw = getExecutionCount(BB);
+  assert (oldw != MissingValue && "Adding weight to Block with no previous weight");
+  DEBUG(errs() << "Adding to Block " << BB->getName()
+               << " (new weight: " << format("%.20g",oldw + w) << ")\n");
+  BlockInformation[BB->getParent()][BB] = oldw + w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) {
+  std::map<const Function*, BlockCounts>::iterator J =
+    BlockInformation.find(BB->getParent());
+  if (J == BlockInformation.end()) return;
+
+  DEBUG(errs() << "Deleting " << BB->getName() << "\n");
+  J->second.erase(BB);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) {
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(getFunction(e));
+  if (J == EdgeInformation.end()) return;
+
+  DEBUG(errs() << "Deleting" << e << "\n");
+  J->second.erase(e);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+        replaceEdge(const Edge &oldedge, const Edge &newedge) {
+  double w;
+  if ((w = getEdgeWeight(newedge)) == MissingValue) {
+    w = getEdgeWeight(oldedge);
+    DEBUG(errs() << "Replacing " << oldedge << " with " << newedge  << "\n");
+  } else {
+    w += getEdgeWeight(oldedge);
+    DEBUG(errs() << "Adding " << oldedge << " to " << newedge  << "\n");
+  }
+  setEdgeWeight(newedge,w);
+  removeEdge(oldedge);
+}
+
+template<>
+const BasicBlock *ProfileInfoT<Function,BasicBlock>::
+        GetPath(const BasicBlock *Src, const BasicBlock *Dest,
+                Path &P, unsigned Mode) {
+  const BasicBlock *BB = 0;
+  bool hasFoundPath = false;
+
+  std::queue<const BasicBlock *> BFS;
+  BFS.push(Src);
+
+  while(BFS.size() && !hasFoundPath) {
+    BB = BFS.front();
+    BFS.pop();
+
+    succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
+    if (Succ == End) {
+      P[0] = BB;
+      if (Mode & GetPathToExit) {
+        hasFoundPath = true;
+        BB = 0;
+      }
+    }
+    for(;Succ != End; ++Succ) {
+      if (P.find(*Succ) != P.end()) continue;
+      Edge e = getEdge(BB,*Succ);
+      if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue;
+      P[*Succ] = BB;
+      BFS.push(*Succ);
+      if ((Mode & GetPathToDest) && *Succ == Dest) {
+        hasFoundPath = true;
+        BB = *Succ;
+        break;
+      }
+      if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) {
+        hasFoundPath = true;
+        BB = *Succ;
+        break;
+      }
+    }
+  }
+
+  return BB;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+        divertFlow(const Edge &oldedge, const Edge &newedge) {
+  DEBUG(errs() << "Diverting " << oldedge << " via " << newedge );
+
+  // First check if the old edge was taken, if not, just delete it...
+  if (getEdgeWeight(oldedge) == 0) {
+    removeEdge(oldedge);
+    return;
+  }
+
+  Path P;
+  P[newedge.first] = 0;
+  P[newedge.second] = newedge.first;
+  const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest);
+
+  double w = getEdgeWeight (oldedge);
+  DEBUG(errs() << ", Weight: " << format("%.20g",w) << "\n");
+  do {
+    const BasicBlock *Parent = P.find(BB)->second;
+    Edge e = getEdge(Parent,BB);
+    double oldw = getEdgeWeight(e);
+    double oldc = getExecutionCount(e.first);
+    setEdgeWeight(e, w+oldw);
+    if (Parent != oldedge.first) {
+      setExecutionCount(e.first, w+oldc);
+    }
+    BB = Parent;
+  } while (BB != newedge.first);
+  removeEdge(oldedge);
+}
+
 /// Replaces all occurences of RmBB in the ProfilingInfo with DestBB.
 /// This checks all edges of the function the blocks reside in and replaces the
 /// occurences of RmBB with DestBB.
-void ProfileInfo::replaceAllUses(const BasicBlock *RmBB, 
-                                 const BasicBlock *DestBB) {
-  DEBUG(errs() << "Replacing " << RmBB->getNameStr()
-               << " with " << DestBB->getNameStr() << "\n");
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+        replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) {
+  DEBUG(errs() << "Replacing " << RmBB->getName()
+               << " with " << DestBB->getName() << "\n");
   const Function *F = DestBB->getParent();
   std::map<const Function*, EdgeWeights>::iterator J =
     EdgeInformation.find(F);
   if (J == EdgeInformation.end()) return;
 
-  for (EdgeWeights::iterator I = J->second.begin(), E = J->second.end();
-       I != E; ++I) {
-    Edge e = I->first;
-    Edge newedge; bool foundedge = false;
+  Edge e, newedge;
+  bool erasededge = false;
+  EdgeWeights::iterator I = J->second.begin(), E = J->second.end();
+  while(I != E) {
+    e = (I++)->first;
+    bool foundedge = false; bool eraseedge = false;
     if (e.first == RmBB) {
-      newedge = getEdge(DestBB, e.second);
-      foundedge = true;
+      if (e.second == DestBB) {
+        eraseedge = true;
+      } else {
+        newedge = getEdge(DestBB, e.second);
+        foundedge = true;
+      }
     }
     if (e.second == RmBB) {
-      newedge = getEdge(e.first, DestBB);
-      foundedge = true;
+      if (e.first == DestBB) {
+        eraseedge = true;
+      } else {
+        newedge = getEdge(e.first, DestBB);
+        foundedge = true;
+      }
     }
     if (foundedge) {
-      double w = getEdgeWeight(e);
-      EdgeInformation[F][newedge] = w;
-      DEBUG(errs() << "Replacing " << e << " with " << newedge  << "\n");
-      J->second.erase(e);
+      replaceEdge(e, newedge);
+    }
+    if (eraseedge) {
+      if (erasededge) {
+        Edge newedge = getEdge(DestBB, DestBB);
+        replaceEdge(e, newedge);
+      } else {
+        removeEdge(e);
+        erasededge = true;
+      }
     }
   }
 }
@@ -119,10 +359,11 @@ void ProfileInfo::replaceAllUses(const BasicBlock *RmBB,
 /// Splits an edge in the ProfileInfo and redirects flow over NewBB.
 /// Since its possible that there is more than one edge in the CFG from FristBB
 /// to SecondBB its necessary to redirect the flow proporionally.
-void ProfileInfo::splitEdge(const BasicBlock *FirstBB,
-                            const BasicBlock *SecondBB,
-                            const BasicBlock *NewBB,
-                            bool MergeIdenticalEdges) {
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB,
+                                                  const BasicBlock *SecondBB,
+                                                  const BasicBlock *NewBB,
+                                                  bool MergeIdenticalEdges) {
   const Function *F = FirstBB->getParent();
   std::map<const Function*, EdgeWeights>::iterator J =
     EdgeInformation.find(F);
@@ -153,7 +394,7 @@ void ProfileInfo::splitEdge(const BasicBlock *FirstBB,
 
   // We know now how many edges there are from FirstBB to SecondBB, reroute a
   // proportional part of the edge weight over NewBB.
-  double neww = w / succ_count;
+  double neww = floor(w / succ_count);
   ECs[n1] += neww;
   ECs[n2] += neww;
   BlockInformation[F][NewBB] += neww;
@@ -164,14 +405,672 @@ void ProfileInfo::splitEdge(const BasicBlock *FirstBB,
   }
 }
 
-raw_ostream& llvm::operator<<(raw_ostream &O, ProfileInfo::Edge E) {
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old,
+                                                   const BasicBlock* New) {
+  const Function *F = Old->getParent();
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(F);
+  if (J == EdgeInformation.end()) return;
+
+  DEBUG(errs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n");
+
+  std::set<Edge> Edges;
+  for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end(); 
+       ewi != ewe; ++ewi) {
+    Edge old = ewi->first;
+    if (old.first == Old) {
+      Edges.insert(old);
+    }
+  }
+  for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end(); 
+       EI != EE; ++EI) {
+    Edge newedge = getEdge(New, EI->second);
+    replaceEdge(*EI, newedge);
+  }
+
+  double w = getExecutionCount(Old);
+  setEdgeWeight(getEdge(Old, New), w);
+  setExecutionCount(New, w);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB,
+                                                   const BasicBlock* NewBB,
+                                                   BasicBlock *const *Preds,
+                                                   unsigned NumPreds) {
+  const Function *F = BB->getParent();
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(F);
+  if (J == EdgeInformation.end()) return;
+
+  DEBUG(errs() << "Splitting " << NumPreds << " Edges from " << BB->getName() 
+               << " to " << NewBB->getName() << "\n");
+
+  // Collect weight that was redirected over NewBB.
+  double newweight = 0;
+  
+  std::set<const BasicBlock *> ProcessedPreds;
+  // For all requestes Predecessors.
+  for (unsigned pred = 0; pred < NumPreds; ++pred) {
+    const BasicBlock * Pred = Preds[pred];
+    if (ProcessedPreds.insert(Pred).second) {
+      // Create edges and read old weight.
+      Edge oldedge = getEdge(Pred, BB);
+      Edge newedge = getEdge(Pred, NewBB);
+
+      // Remember how much weight was redirected.
+      newweight += getEdgeWeight(oldedge);
+    
+      replaceEdge(oldedge,newedge);
+    }
+  }
+
+  Edge newedge = getEdge(NewBB,BB);
+  setEdgeWeight(newedge, newweight);
+  setExecutionCount(NewBB, newweight);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old,
+                                                 const Function *New) {
+  DEBUG(errs() << "Replacing Function " << Old->getName() << " with "
+               << New->getName() << "\n");
+  std::map<const Function*, EdgeWeights>::iterator J =
+    EdgeInformation.find(Old);
+  if(J != EdgeInformation.end()) {
+    EdgeInformation[New] = J->second;
+  }
+  EdgeInformation.erase(Old);
+  BlockInformation.erase(Old);
+  FunctionInformation.erase(Old);
+}
+
+static double readEdgeOrRemember(ProfileInfo::Edge edge, double w,
+                                 ProfileInfo::Edge &tocalc, unsigned &uncalc) {
+  if (w == ProfileInfo::MissingValue) {
+    tocalc = edge;
+    uncalc++;
+    return 0;
+  } else {
+    return w;
+  }
+}
+
+template<>
+bool ProfileInfoT<Function,BasicBlock>::
+        CalculateMissingEdge(const BasicBlock *BB, Edge &removed,
+                             bool assumeEmptySelf) {
+  Edge edgetocalc;
+  unsigned uncalculated = 0;
+
+  // collect weights of all incoming and outgoing edges, rememer edges that
+  // have no value
+  double incount = 0;
+  SmallSet<const BasicBlock*,8> pred_visited;
+  pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+  if (bbi==bbe) {
+    Edge e = getEdge(0,BB);
+    incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
+  }
+  for (;bbi != bbe; ++bbi) {
+    if (pred_visited.insert(*bbi)) {
+      Edge e = getEdge(*bbi,BB);
+      incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
+    }
+  }
+
+  double outcount = 0;
+  SmallSet<const BasicBlock*,8> succ_visited;
+  succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+  if (sbbi==sbbe) {
+    Edge e = getEdge(BB,0);
+    if (getEdgeWeight(e) == MissingValue) {
+      double w = getExecutionCount(BB);
+      if (w != MissingValue) {
+        setEdgeWeight(e,w);
+        removed = e;
+      }
+    }
+    outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
+  }
+  for (;sbbi != sbbe; ++sbbi) {
+    if (succ_visited.insert(*sbbi)) {
+      Edge e = getEdge(BB,*sbbi);
+      outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
+    }
+  }
+
+  // if exactly one edge weight was missing, calculate it and remove it from
+  // spanning tree
+  if (uncalculated == 0 ) {
+    return true;
+  } else
+  if (uncalculated == 1) {
+    if (incount < outcount) {
+      EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount;
+    } else {
+      EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount;
+    }
+    DEBUG(errs() << "--Calc Edge Counter for " << edgetocalc << ": "
+                 << format("%.20g", getEdgeWeight(edgetocalc)) << "\n");
+    removed = edgetocalc;
+    return true;
+  } else 
+  if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) {
+    setEdgeWeight(edgetocalc, incount * 10);
+    removed = edgetocalc;
+    return true;
+  } else {
+    return false;
+  }
+}
+
+static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) {
+  double w = PI->getEdgeWeight(e);
+  if (w != ProfileInfo::MissingValue) {
+    calcw += w;
+  } else {
+    misscount.insert(e);
+  }
+}
+
+template<>
+bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) {
+  bool hasNoSuccessors = false;
+
+  double inWeight = 0;
+  std::set<Edge> inMissing;
+  std::set<const BasicBlock*> ProcessedPreds;
+  pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+  if (bbi == bbe) {
+    readEdge(this,getEdge(0,BB),inWeight,inMissing);
+  }
+  for( ; bbi != bbe; ++bbi ) {
+    if (ProcessedPreds.insert(*bbi).second) {
+      readEdge(this,getEdge(*bbi,BB),inWeight,inMissing);
+    }
+  }
+
+  double outWeight = 0;
+  std::set<Edge> outMissing;
+  std::set<const BasicBlock*> ProcessedSuccs;
+  succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+  if (sbbi == sbbe) {
+    readEdge(this,getEdge(BB,0),outWeight,outMissing);
+    hasNoSuccessors = true;
+  }
+  for ( ; sbbi != sbbe; ++sbbi ) {
+    if (ProcessedSuccs.insert(*sbbi).second) {
+      readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing);
+    }
+  }
+
+  double share;
+  std::set<Edge>::iterator ei,ee;
+  if (inMissing.size() == 0 && outMissing.size() > 0) {
+    ei = outMissing.begin();
+    ee = outMissing.end();
+    share = inWeight/outMissing.size();
+    setExecutionCount(BB,inWeight);
+  } else
+  if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) {
+    ei = inMissing.begin();
+    ee = inMissing.end();
+    share = 0;
+    setExecutionCount(BB,0);
+  } else
+  if (inMissing.size() == 0 && outMissing.size() == 0) {
+    setExecutionCount(BB,outWeight);
+    return true;
+  } else {
+    return false;
+  }
+  for ( ; ei != ee; ++ei ) {
+    setEdgeWeight(*ei,share);
+  }
+  return true;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
+//  if (getExecutionCount(&(F->getEntryBlock())) == 0) {
+//    for (Function::const_iterator FI = F->begin(), FE = F->end();
+//         FI != FE; ++FI) {
+//      const BasicBlock* BB = &(*FI);
+//      {
+//        pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+//        if (NBB == End) {
+//          setEdgeWeight(getEdge(0,BB),0);
+//        }
+//        for(;NBB != End; ++NBB) {
+//          setEdgeWeight(getEdge(*NBB,BB),0);
+//        }
+//      }
+//      {
+//        succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+//        if (NBB == End) {
+//          setEdgeWeight(getEdge(0,BB),0);
+//        }
+//        for(;NBB != End; ++NBB) {
+//          setEdgeWeight(getEdge(*NBB,BB),0);
+//        }
+//      }
+//    }
+//    return;
+//  }
+  // The set of BasicBlocks that are still unvisited.
+  std::set<const BasicBlock*> Unvisited;
+
+  // The set of return edges (Edges with no successors).
+  std::set<Edge> ReturnEdges;
+  double ReturnWeight = 0;
+  
+  // First iterate over the whole function and collect:
+  // 1) The blocks in this function in the Unvisited set.
+  // 2) The return edges in the ReturnEdges set.
+  // 3) The flow that is leaving the function already via return edges.
+
+  // Data structure for searching the function.
+  std::queue<const BasicBlock *> BFS;
+  const BasicBlock *BB = &(F->getEntryBlock());
+  BFS.push(BB);
+  Unvisited.insert(BB);
+
+  while (BFS.size()) {
+    BB = BFS.front(); BFS.pop();
+    succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+    if (NBB == End) {
+      Edge e = getEdge(BB,0);
+      double w = getEdgeWeight(e);
+      if (w == MissingValue) {
+        // If the return edge has no value, try to read value from block.
+        double bw = getExecutionCount(BB);
+        if (bw != MissingValue) {
+          setEdgeWeight(e,bw);
+          ReturnWeight += bw;
+        } else {
+          // If both return edge and block provide no value, collect edge.
+          ReturnEdges.insert(e);
+        }
+      } else {
+        // If the return edge has a proper value, collect it.
+        ReturnWeight += w;
+      }
+    }
+    for (;NBB != End; ++NBB) {
+      if (Unvisited.insert(*NBB).second) {
+        BFS.push(*NBB);
+      }
+    }
+  }
+
+  while (Unvisited.size() > 0) {
+    unsigned oldUnvisitedCount = Unvisited.size();
+    bool FoundPath = false;
+
+    // If there is only one edge left, calculate it.
+    if (ReturnEdges.size() == 1) {
+      ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight;
+
+      Edge e = *ReturnEdges.begin();
+      setEdgeWeight(e,ReturnWeight);
+      setExecutionCount(e.first,ReturnWeight);
+
+      Unvisited.erase(e.first);
+      ReturnEdges.erase(e);
+      continue;
+    }
+
+    // Calculate all blocks where only one edge is missing, this may also
+    // resolve furhter return edges.
+    std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE) {
+      const BasicBlock *BB = *FI; ++FI;
+      Edge e;
+      if(CalculateMissingEdge(BB,e,true)) {
+        if (BlockInformation[F].find(BB) == BlockInformation[F].end()) {
+          setExecutionCount(BB,getExecutionCount(BB));
+        }
+        Unvisited.erase(BB);
+        if (e.first != 0 && e.second == 0) {
+          ReturnEdges.erase(e);
+          ReturnWeight += getEdgeWeight(e);
+        }
+      }
+    }
+    if (oldUnvisitedCount > Unvisited.size()) continue;
+
+    // Estimate edge weights by dividing the flow proportionally.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE) {
+      const BasicBlock *BB = *FI; ++FI;
+      const BasicBlock *Dest = 0;
+      bool AllEdgesHaveSameReturn = true;
+      // Check each Successor, these must all end up in the same or an empty
+      // return block otherwise its dangerous to do an estimation on them.
+      for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
+           Succ != End; ++Succ) {
+        Path P;
+        GetPath(*Succ, 0, P, GetPathToExit);
+        if (Dest && Dest != P[0]) {
+          AllEdgesHaveSameReturn = false;
+        }
+        Dest = P[0];
+      }
+      if (AllEdgesHaveSameReturn) {
+        if(EstimateMissingEdges(BB)) {
+          Unvisited.erase(BB);
+          break;
+        }
+      }
+    }
+    if (oldUnvisitedCount > Unvisited.size()) continue;
+
+    // Check if there is a path to an block that has a known value and redirect
+    // flow accordingly.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE && !FoundPath) {
+      // Fetch path.
+      const BasicBlock *BB = *FI; ++FI;
+      Path P;
+      const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue);
+
+      // Calculate incoming flow.
+      double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0;
+      std::set<const BasicBlock *> Processed;
+      for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+           NBB != End; ++NBB) {
+        if (Processed.insert(*NBB).second) {
+          Edge e = getEdge(*NBB, BB);
+          double ew = getEdgeWeight(e);
+          if (ew != MissingValue) {
+            iw += ew;
+            invalid++;
+          } else {
+            // If the path contains the successor, this means its a backedge,
+            // do not count as missing.
+            if (P.find(*NBB) == P.end())
+              inmissing++;
+          }
+          incount++;
+        }
+      }
+      if (inmissing == incount) continue;
+      if (invalid == 0) continue;
+
+      // Subtract (already) outgoing flow.
+      Processed.clear();
+      for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+           NBB != End; ++NBB) {
+        if (Processed.insert(*NBB).second) {
+          Edge e = getEdge(BB, *NBB);
+          double ew = getEdgeWeight(e);
+          if (ew != MissingValue) {
+            iw -= ew;
+          }
+        }
+      }
+      if (iw < 0) continue;
+
+      // Check the recieving end of the path if it can handle the flow.
+      double ow = getExecutionCount(Dest);
+      Processed.clear();
+      for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+           NBB != End; ++NBB) {
+        if (Processed.insert(*NBB).second) {
+          Edge e = getEdge(BB, *NBB);
+          double ew = getEdgeWeight(e);
+          if (ew != MissingValue) {
+            ow -= ew;
+          }
+        }
+      }
+      if (ow < 0) continue;
+
+      // Determine how much flow shall be used.
+      double ew = getEdgeWeight(getEdge(P[Dest],Dest));
+      if (ew != MissingValue) {
+        ew = ew<ow?ew:ow;
+        ew = ew<iw?ew:iw;
+      } else {
+        if (inmissing == 0)
+          ew = iw;
+      }
+
+      // Create flow.
+      if (ew != MissingValue) {
+        do {
+          Edge e = getEdge(P[Dest],Dest);
+          if (getEdgeWeight(e) == MissingValue) {
+            setEdgeWeight(e,ew);
+            FoundPath = true;
+          }
+          Dest = P[Dest];
+        } while (Dest != BB);
+      }
+    }
+    if (FoundPath) continue;
+
+    // Calculate a block with self loop.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE && !FoundPath) {
+      const BasicBlock *BB = *FI; ++FI;
+      bool SelfEdgeFound = false;
+      for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+           NBB != End; ++NBB) {
+        if (*NBB == BB) {
+          SelfEdgeFound = true;
+          break;
+        }
+      }
+      if (SelfEdgeFound) {
+        Edge e = getEdge(BB,BB);
+        if (getEdgeWeight(e) == MissingValue) {
+          double iw = 0;
+          std::set<const BasicBlock *> Processed;
+          for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+               NBB != End; ++NBB) {
+            if (Processed.insert(*NBB).second) {
+              Edge e = getEdge(*NBB, BB);
+              double ew = getEdgeWeight(e);
+              if (ew != MissingValue) {
+                iw += ew;
+              }
+            }
+          }
+          setEdgeWeight(e,iw * 10);
+          FoundPath = true;
+        }
+      }
+    }
+    if (FoundPath) continue;
+
+    // Determine backedges, set them to zero.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE && !FoundPath) {
+      const BasicBlock *BB = *FI; ++FI;
+      const BasicBlock *Dest;
+      Path P;
+      bool BackEdgeFound = false;
+      for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+           NBB != End; ++NBB) {
+        Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges);
+        if (Dest == *NBB) {
+          BackEdgeFound = true;
+          break;
+        }
+      }
+      if (BackEdgeFound) {
+        Edge e = getEdge(Dest,BB);
+        double w = getEdgeWeight(e);
+        if (w == MissingValue) {
+          setEdgeWeight(e,0);
+          FoundPath = true;
+        }
+        do {
+          Edge e = getEdge(P[Dest], Dest);
+          double w = getEdgeWeight(e);
+          if (w == MissingValue) {
+            setEdgeWeight(e,0);
+            FoundPath = true;
+          }
+          Dest = P[Dest];
+        } while (Dest != BB);
+      }
+    }
+    if (FoundPath) continue;
+
+    // Channel flow to return block.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE && !FoundPath) {
+      const BasicBlock *BB = *FI; ++FI;
+
+      Path P;
+      const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
+      Dest = P[0];
+      if (!Dest) continue;
+
+      if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
+        // Calculate incoming flow.
+        double iw = 0;
+        std::set<const BasicBlock *> Processed;
+        for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+             NBB != End; ++NBB) {
+          if (Processed.insert(*NBB).second) {
+            Edge e = getEdge(*NBB, BB);
+            double ew = getEdgeWeight(e);
+            if (ew != MissingValue) {
+              iw += ew;
+            }
+          }
+        }
+        do {
+          Edge e = getEdge(P[Dest], Dest);
+          double w = getEdgeWeight(e);
+          if (w == MissingValue) {
+            setEdgeWeight(e,iw);
+            FoundPath = true;
+          } else {
+            assert(0 && "Edge should not have value already!");
+          }
+          Dest = P[Dest];
+        } while (Dest != BB);
+      }
+    }
+    if (FoundPath) continue;
+
+    // Speculatively set edges to zero.
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE && !FoundPath) {
+      const BasicBlock *BB = *FI; ++FI;
+
+      for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+           NBB != End; ++NBB) {
+        Edge e = getEdge(*NBB,BB);
+        double w = getEdgeWeight(e);
+        if (w == MissingValue) {
+          setEdgeWeight(e,0);
+          FoundPath = true;
+          break;
+        }
+      }
+    }
+    if (FoundPath) continue;
+
+    errs() << "{";
+    FI = Unvisited.begin(), FE = Unvisited.end();
+    while(FI != FE) {
+      const BasicBlock *BB = *FI; ++FI;
+      errs() << BB->getName();
+      if (FI != FE)
+        errs() << ",";
+    }
+    errs() << "}";
+
+    errs() << "ASSERT: could not repair function";
+    assert(0 && "could not repair function");
+  }
+
+  EdgeWeights J = EdgeInformation[F];
+  for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) {
+    Edge e = EI->first;
+
+    bool SuccFound = false;
+    if (e.first != 0) {
+      succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first);
+      if (NBB == End) {
+        if (0 == e.second) {
+          SuccFound = true;
+        }
+      }
+      for (;NBB != End; ++NBB) {
+        if (*NBB == e.second) {
+          SuccFound = true;
+          break;
+        }
+      }
+      if (!SuccFound) {
+        removeEdge(e);
+      }
+    }
+  }
+}
+
+raw_ostream& operator<<(raw_ostream &O, const Function *F) {
+  return O << F->getName();
+}
+
+raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) {
+  return O << MF->getFunction()->getName() << "(MF)";
+}
+
+raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) {
+  return O << BB->getName();
+}
+
+raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) {
+  return O << MBB->getBasicBlock()->getName() << "(MB)";
+}
+
+raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) {
   O << "(";
-  O << (E.first ? E.first->getNameStr() : "0");
+
+  if (E.first)
+    O << E.first;
+  else
+    O << "0";
+
+  O << ",";
+
+  if (E.second)
+    O << E.second;
+  else
+    O << "0";
+
+  return O << ")";
+}
+
+raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) {
+  O << "(";
+
+  if (E.first)
+    O << E.first;
+  else
+    O << "0";
+
   O << ",";
-  O << (E.second ? E.second->getNameStr() : "0");
+
+  if (E.second)
+    O << E.second;
+  else
+    O << "0";
+
   return O << ")";
 }
 
+} // namespace llvm
+
 //===----------------------------------------------------------------------===//
 //  NoProfile ProfileInfo implementation
 //
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
index 9e1dfb6..cbd0430 100644
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ b/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -74,6 +74,8 @@ X("profile-loader", "Load profile information from llvmprof.out", false, true);
 
 static RegisterAnalysisGroup<ProfileInfo> Y(X);
 
+const PassInfo *llvm::ProfileLoaderPassID = &X;
+
 ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); }
 
 /// createProfileLoaderPass - This function returns a Pass that loads the
@@ -112,46 +114,9 @@ void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
     recurseBasicBlock(*bbi);
   }
 
-  Edge edgetocalc;
-  unsigned uncalculated = 0;
-
-  // collect weights of all incoming and outgoing edges, rememer edges that
-  // have no value
-  double incount = 0;
-  SmallSet<const BasicBlock*,8> pred_visited;
-  pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
-  if (bbi==bbe) {
-    readEdgeOrRemember(getEdge(0, BB),edgetocalc,uncalculated,incount);
-  }
-  for (;bbi != bbe; ++bbi) {
-    if (pred_visited.insert(*bbi)) {
-      readEdgeOrRemember(getEdge(*bbi, BB),edgetocalc,uncalculated,incount);
-    }
-  }
-
-  double outcount = 0;
-  SmallSet<const BasicBlock*,8> succ_visited;
-  succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
-  if (sbbi==sbbe) {
-    readEdgeOrRemember(getEdge(BB, 0),edgetocalc,uncalculated,outcount);
-  }
-  for (;sbbi != sbbe; ++sbbi) {
-    if (succ_visited.insert(*sbbi)) {
-      readEdgeOrRemember(getEdge(BB, *sbbi),edgetocalc,uncalculated,outcount);
-    }
-  }
-
-  // if exactly one edge weight was missing, calculate it and remove it from
-  // spanning tree
-  if (uncalculated == 1) {
-    if (incount < outcount) {
-      EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount;
-    } else {
-      EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount;
-    }
-    DEBUG(errs() << "--Calc Edge Counter for " << edgetocalc << ": "
-                 << format("%g", getEdgeWeight(edgetocalc)) << "\n");
-    SpanningTree.erase(edgetocalc);
+  Edge tocalc;
+  if (CalculateMissingEdge(BB, tocalc)) {
+    SpanningTree.erase(tocalc);
   }
 }
 
@@ -219,9 +184,9 @@ bool LoaderPass::runOnModule(Module &M) {
         }
       }
       while (SpanningTree.size() > 0) {
-#if 0
+
         unsigned size = SpanningTree.size();
-#endif
+
         BBisUnvisited.clear();
         for (std::set<Edge>::iterator ei = SpanningTree.begin(),
              ee = SpanningTree.end(); ei != ee; ++ei) {
@@ -231,17 +196,16 @@ bool LoaderPass::runOnModule(Module &M) {
         while (BBisUnvisited.size() > 0) {
           recurseBasicBlock(*BBisUnvisited.begin());
         }
-#if 0
+
         if (SpanningTree.size() == size) {
           DEBUG(errs()<<"{");
           for (std::set<Edge>::iterator ei = SpanningTree.begin(),
                ee = SpanningTree.end(); ei != ee; ++ei) {
-            DEBUG(errs()<<"("<<(ei->first?ei->first->getName():"0")<<","
-                        <<(ei->second?ei->second->getName():"0")<<"),");
+            DEBUG(errs()<< *ei <<",");
           }
           assert(0 && "No edge calculated!");
         }
-#endif
+
       }
     }
     if (ReadCount != Counters.size()) {
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
index 5f36294..36a80ba 100644
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ b/lib/Analysis/ProfileVerifierPass.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/InstIterator.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/Debug.h"
 #include <set>
 using namespace llvm;
@@ -29,44 +30,45 @@ static cl::opt<bool,false>
 ProfileVerifierDisableAssertions("profile-verifier-noassert",
      cl::desc("Disable assertions"));
 
-namespace {
-  class ProfileVerifierPass : public FunctionPass {
+namespace llvm {
+  template<class FType, class BType>
+  class ProfileVerifierPassT : public FunctionPass {
 
     struct DetailedBlockInfo {
-      const BasicBlock *BB;
-      double            BBWeight;
-      double            inWeight;
-      int               inCount;
-      double            outWeight;
-      int               outCount;
+      const BType *BB;
+      double      BBWeight;
+      double      inWeight;
+      int         inCount;
+      double      outWeight;
+      int         outCount;
     };
 
-    ProfileInfo *PI;
-    std::set<const BasicBlock*> BBisVisited;
-    std::set<const Function*>   FisVisited;
+    ProfileInfoT<FType, BType> *PI;
+    std::set<const BType*> BBisVisited;
+    std::set<const FType*>   FisVisited;
     bool DisableAssertions;
 
     // When debugging is enabled, the verifier prints a whole slew of debug
     // information, otherwise its just the assert. These are all the helper
     // functions.
     bool PrintedDebugTree;
-    std::set<const BasicBlock*> BBisPrinted;
+    std::set<const BType*> BBisPrinted;
     void debugEntry(DetailedBlockInfo*);
-    void printDebugInfo(const BasicBlock *BB);
+    void printDebugInfo(const BType *BB);
 
   public:
     static char ID; // Class identification, replacement for typeinfo
 
-    explicit ProfileVerifierPass () : FunctionPass(&ID) {
+    explicit ProfileVerifierPassT () : FunctionPass(&ID) {
       DisableAssertions = ProfileVerifierDisableAssertions;
     }
-    explicit ProfileVerifierPass (bool da) : FunctionPass(&ID), 
-                                             DisableAssertions(da) {
+    explicit ProfileVerifierPassT (bool da) : FunctionPass(&ID), 
+                                              DisableAssertions(da) {
     }
 
     void getAnalysisUsage(AnalysisUsage &AU) const {
       AU.setPreservesAll();
-      AU.addRequired<ProfileInfo>();
+      AU.addRequired<ProfileInfoT<FType, BType> >();
     }
 
     const char *getPassName() const {
@@ -74,271 +76,302 @@ namespace {
     }
 
     /// run - Verify the profile information.
-    bool runOnFunction(Function &F);
-    void recurseBasicBlock(const BasicBlock*);
+    bool runOnFunction(FType &F);
+    void recurseBasicBlock(const BType*);
 
-    bool   exitReachable(const Function*);
-    double ReadOrAssert(ProfileInfo::Edge);
+    bool   exitReachable(const FType*);
+    double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge);
     void   CheckValue(bool, const char*, DetailedBlockInfo*);
   };
-}  // End of anonymous namespace
-
-char ProfileVerifierPass::ID = 0;
-static RegisterPass<ProfileVerifierPass>
-X("profile-verifier", "Verify profiling information", false, true);
 
-namespace llvm {
-  FunctionPass *createProfileVerifierPass() {
-    return new ProfileVerifierPass(ProfileVerifierDisableAssertions); 
-  }
-}
-
-void ProfileVerifierPass::printDebugInfo(const BasicBlock *BB) {
-
-  if (BBisPrinted.find(BB) != BBisPrinted.end()) return;
-
-  double BBWeight = PI->getExecutionCount(BB);
-  if (BBWeight == ProfileInfo::MissingValue) { BBWeight = 0; }
-  double inWeight = 0;
-  int inCount = 0;
-  std::set<const BasicBlock*> ProcessedPreds;
-  for ( pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
-        bbi != bbe; ++bbi ) {
-    if (ProcessedPreds.insert(*bbi).second) {
-      ProfileInfo::Edge E = PI->getEdge(*bbi,BB);
-      double EdgeWeight = PI->getEdgeWeight(E);
-      if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; }
-      errs() << "calculated in-edge " << E << ": " << EdgeWeight << "\n";
-      inWeight += EdgeWeight;
-      inCount++;
+  typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass;
+
+  template<class FType, class BType>
+  void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) {
+
+    if (BBisPrinted.find(BB) != BBisPrinted.end()) return;
+
+    double BBWeight = PI->getExecutionCount(BB);
+    if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; }
+    double inWeight = 0;
+    int inCount = 0;
+    std::set<const BType*> ProcessedPreds;
+    for ( pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+          bbi != bbe; ++bbi ) {
+      if (ProcessedPreds.insert(*bbi).second) {
+        typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB);
+        double EdgeWeight = PI->getEdgeWeight(E);
+        if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
+        errs() << "calculated in-edge " << E << ": " 
+               << format("%20.20g",EdgeWeight) << "\n";
+        inWeight += EdgeWeight;
+        inCount++;
+      }
     }
-  }
-  double outWeight = 0;
-  int outCount = 0;
-  std::set<const BasicBlock*> ProcessedSuccs;
-  for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
-        bbi != bbe; ++bbi ) {
-    if (ProcessedSuccs.insert(*bbi).second) {
-      ProfileInfo::Edge E = PI->getEdge(BB,*bbi);
-      double EdgeWeight = PI->getEdgeWeight(E);
-      if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; }
-      errs() << "calculated out-edge " << E << ": " << EdgeWeight << "\n";
-      outWeight += EdgeWeight;
-      outCount++;
+    double outWeight = 0;
+    int outCount = 0;
+    std::set<const BType*> ProcessedSuccs;
+    for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+          bbi != bbe; ++bbi ) {
+      if (ProcessedSuccs.insert(*bbi).second) {
+        typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi);
+        double EdgeWeight = PI->getEdgeWeight(E);
+        if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
+        errs() << "calculated out-edge " << E << ": " 
+               << format("%20.20g",EdgeWeight) << "\n";
+        outWeight += EdgeWeight;
+        outCount++;
+      }
+    }
+    errs() << "Block " << BB->getNameStr()                << " in " 
+           << BB->getParent()->getNameStr()               << ":"
+           << "BBWeight="  << format("%20.20g",BBWeight)  << ","
+           << "inWeight="  << format("%20.20g",inWeight)  << ","
+           << "inCount="   << inCount                     << ","
+           << "outWeight=" << format("%20.20g",outWeight) << ","
+           << "outCount"   << outCount                    << "\n";
+
+    // mark as visited and recurse into subnodes
+    BBisPrinted.insert(BB);
+    for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); 
+          bbi != bbe; ++bbi ) {
+      printDebugInfo(*bbi);
     }
   }
-  errs()<<"Block "<<BB->getNameStr()<<" in "<<BB->getParent()->getNameStr()
-        <<",BBWeight="<<BBWeight<<",inWeight="<<inWeight<<",inCount="<<inCount
-        <<",outWeight="<<outWeight<<",outCount"<<outCount<<"\n";
-
-  // mark as visited and recurse into subnodes
-  BBisPrinted.insert(BB);
-  for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); 
-        bbi != bbe; ++bbi ) {
-    printDebugInfo(*bbi);
-  }
-}
 
-void ProfileVerifierPass::debugEntry (DetailedBlockInfo *DI) {
-  errs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in "
-         << DI->BB->getParent()->getNameStr()  << ":";
-  errs() << "BBWeight="  << DI->BBWeight   << ",";
-  errs() << "inWeight="  << DI->inWeight   << ",";
-  errs() << "inCount="   << DI->inCount    << ",";
-  errs() << "outWeight=" << DI->outWeight  << ",";
-  errs() << "outCount="  << DI->outCount   << "\n";
-  if (!PrintedDebugTree) {
-    PrintedDebugTree = true;
-    printDebugInfo(&(DI->BB->getParent()->getEntryBlock()));
+  template<class FType, class BType>
+  void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) {
+    errs() << "TROUBLE: Block " << DI->BB->getNameStr()       << " in "
+           << DI->BB->getParent()->getNameStr()               << ":"
+           << "BBWeight="  << format("%20.20g",DI->BBWeight)  << ","
+           << "inWeight="  << format("%20.20g",DI->inWeight)  << ","
+           << "inCount="   << DI->inCount                     << ","
+           << "outWeight=" << format("%20.20g",DI->outWeight) << ","
+           << "outCount="  << DI->outCount                    << "\n";
+    if (!PrintedDebugTree) {
+      PrintedDebugTree = true;
+      printDebugInfo(&(DI->BB->getParent()->getEntryBlock()));
+    }
   }
-}
 
-// This compares A and B but considering maybe small differences.
-static bool Equals(double A, double B) { 
-  double maxRelativeError = 0.0000001;
-  if (A == B)
-    return true;
-  double relativeError;
-  if (fabs(B) > fabs(A)) 
-    relativeError = fabs((A - B) / B);
-  else 
-    relativeError = fabs((A - B) / A);
-  if (relativeError <= maxRelativeError) return true; 
-  return false; 
-}
+  // This compares A and B for equality.
+  static bool Equals(double A, double B) {
+    return A == B;
+  }
 
-// This checks if the function "exit" is reachable from an given function
-// via calls, this is necessary to check if a profile is valid despite the
-// counts not fitting exactly.
-bool ProfileVerifierPass::exitReachable(const Function *F) {
-  if (!F) return false;
+  // This checks if the function "exit" is reachable from an given function
+  // via calls, this is necessary to check if a profile is valid despite the
+  // counts not fitting exactly.
+  template<class FType, class BType>
+  bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) {
+    if (!F) return false;
 
-  if (FisVisited.count(F)) return false;
+    if (FisVisited.count(F)) return false;
 
-  Function *Exit = F->getParent()->getFunction("exit");
-  if (Exit == F) {
-    return true;
-  }
+    FType *Exit = F->getParent()->getFunction("exit");
+    if (Exit == F) {
+      return true;
+    }
 
-  FisVisited.insert(F);
-  bool exits = false;
-  for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
-    if (const CallInst *CI = dyn_cast<CallInst>(&*I)) {
-      exits |= exitReachable(CI->getCalledFunction());
-      if (exits) break;
+    FisVisited.insert(F);
+    bool exits = false;
+    for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+      if (const CallInst *CI = dyn_cast<CallInst>(&*I)) {
+        FType *F = CI->getCalledFunction();
+        if (F) {
+          exits |= exitReachable(F);
+        } else {
+          // This is a call to a pointer, all bets are off...
+          exits = true;
+        }
+        if (exits) break;
+      }
     }
+    return exits;
   }
-  return exits;
-}
 
-#define ASSERTMESSAGE(M) \
-    errs() << (M) << "\n"; \
-    if (!DisableAssertions) assert(0 && (M));
-
-double ProfileVerifierPass::ReadOrAssert(ProfileInfo::Edge E) {
-  double EdgeWeight = PI->getEdgeWeight(E);
-  if (EdgeWeight == ProfileInfo::MissingValue) {
-    errs() << "Edge " << E << " in Function " 
-           << ProfileInfo::getFunction(E)->getNameStr() << ": ";
-    ASSERTMESSAGE("ASSERT:Edge has missing value");
-    return 0;
-  } else {
-    return EdgeWeight;
+  #define ASSERTMESSAGE(M) \
+    { errs() << "ASSERT:" << (M) << "\n"; \
+      if (!DisableAssertions) assert(0 && (M)); }
+
+  template<class FType, class BType>
+  double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) {
+    double EdgeWeight = PI->getEdgeWeight(E);
+    if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) {
+      errs() << "Edge " << E << " in Function " 
+             << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+      ASSERTMESSAGE("Edge has missing value");
+      return 0;
+    } else {
+      if (EdgeWeight < 0) {
+        errs() << "Edge " << E << " in Function " 
+               << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+        ASSERTMESSAGE("Edge has negative value");
+      }
+      return EdgeWeight;
+    }
   }
-}
 
-void ProfileVerifierPass::CheckValue(bool Error, const char *Message,
-                                     DetailedBlockInfo *DI) {
-  if (Error) {
-    DEBUG(debugEntry(DI));
-    errs() << "Block " << DI->BB->getNameStr() << " in Function " 
-           << DI->BB->getParent()->getNameStr() << ": ";
-    ASSERTMESSAGE(Message);
+  template<class FType, class BType>
+  void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error, 
+                                                      const char *Message,
+                                                      DetailedBlockInfo *DI) {
+    if (Error) {
+      DEBUG(debugEntry(DI));
+      errs() << "Block " << DI->BB->getNameStr() << " in Function " 
+             << DI->BB->getParent()->getNameStr() << ": ";
+      ASSERTMESSAGE(Message);
+    }
+    return;
   }
-  return;
-}
 
-// This calculates the Information for a block and then recurses into the
-// successors.
-void ProfileVerifierPass::recurseBasicBlock(const BasicBlock *BB) {
-
-  // Break the recursion by remembering all visited blocks.
-  if (BBisVisited.find(BB) != BBisVisited.end()) return;
-
-  // Use a data structure to store all the information, this can then be handed
-  // to debug printers.
-  DetailedBlockInfo DI;
-  DI.BB = BB;
-  DI.outCount = DI.inCount = 0;
-  DI.inWeight = DI.outWeight = 0.0;
-
-  // Read predecessors.
-  std::set<const BasicBlock*> ProcessedPreds;
-  pred_const_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
-  // If there are none, check for (0,BB) edge.
-  if (bpi == bpe) {
-    DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
-    DI.inCount++;
-  }
-  for (;bpi != bpe; ++bpi) {
-    if (ProcessedPreds.insert(*bpi).second) {
-      DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB));
+  // This calculates the Information for a block and then recurses into the
+  // successors.
+  template<class FType, class BType>
+  void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) {
+
+    // Break the recursion by remembering all visited blocks.
+    if (BBisVisited.find(BB) != BBisVisited.end()) return;
+
+    // Use a data structure to store all the information, this can then be handed
+    // to debug printers.
+    DetailedBlockInfo DI;
+    DI.BB = BB;
+    DI.outCount = DI.inCount = 0;
+    DI.inWeight = DI.outWeight = 0;
+
+    // Read predecessors.
+    std::set<const BType*> ProcessedPreds;
+    pred_const_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
+    // If there are none, check for (0,BB) edge.
+    if (bpi == bpe) {
+      DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
       DI.inCount++;
     }
-  }
+    for (;bpi != bpe; ++bpi) {
+      if (ProcessedPreds.insert(*bpi).second) {
+        DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB));
+        DI.inCount++;
+      }
+    }
 
-  // Read successors.
-  std::set<const BasicBlock*> ProcessedSuccs;
-  succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
-  // If there is an (0,BB) edge, consider it too. (This is done not only when
-  // there are no successors, but every time; not every function contains
-  // return blocks with no successors (think loop latch as return block)).
-  double w = PI->getEdgeWeight(PI->getEdge(BB,0));
-  if (w != ProfileInfo::MissingValue) {
-    DI.outWeight += w;
-    DI.outCount++;
-  }
-  for (;bbi != bbe; ++bbi) {
-    if (ProcessedSuccs.insert(*bbi).second) {
-      DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi));
+    // Read successors.
+    std::set<const BType*> ProcessedSuccs;
+    succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+    // If there is an (0,BB) edge, consider it too. (This is done not only when
+    // there are no successors, but every time; not every function contains
+    // return blocks with no successors (think loop latch as return block)).
+    double w = PI->getEdgeWeight(PI->getEdge(BB,0));
+    if (w != ProfileInfoT<FType, BType>::MissingValue) {
+      DI.outWeight += w;
       DI.outCount++;
     }
-  }
+    for (;bbi != bbe; ++bbi) {
+      if (ProcessedSuccs.insert(*bbi).second) {
+        DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi));
+        DI.outCount++;
+      }
+    }
 
-  // Read block weight.
-  DI.BBWeight = PI->getExecutionCount(BB);
-  CheckValue(DI.BBWeight == ProfileInfo::MissingValue,
-             "ASSERT:BasicBlock has missing value", &DI);
-
-  // Check if this block is a setjmp target.
-  bool isSetJmpTarget = false;
-  if (DI.outWeight > DI.inWeight) {
-    for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end();
-         i != ie; ++i) {
-      if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
-        Function *F = CI->getCalledFunction();
-        if (F && (F->getNameStr() == "_setjmp")) {
-          isSetJmpTarget = true; break;
+    // Read block weight.
+    DI.BBWeight = PI->getExecutionCount(BB);
+    CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue,
+               "BasicBlock has missing value", &DI);
+    CheckValue(DI.BBWeight < 0,
+               "BasicBlock has negative value", &DI);
+
+    // Check if this block is a setjmp target.
+    bool isSetJmpTarget = false;
+    if (DI.outWeight > DI.inWeight) {
+      for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
+           i != ie; ++i) {
+        if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+          FType *F = CI->getCalledFunction();
+          if (F && (F->getNameStr() == "_setjmp")) {
+            isSetJmpTarget = true; break;
+          }
         }
       }
     }
-  }
-  // Check if this block is eventually reaching exit.
-  bool isExitReachable = false;
-  if (DI.inWeight > DI.outWeight) {
-    for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end();
-         i != ie; ++i) {
-      if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
-        FisVisited.clear();
-        isExitReachable |= exitReachable(CI->getCalledFunction());
-        if (isExitReachable) break;
+    // Check if this block is eventually reaching exit.
+    bool isExitReachable = false;
+    if (DI.inWeight > DI.outWeight) {
+      for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
+           i != ie; ++i) {
+        if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+          FType *F = CI->getCalledFunction();
+          if (F) {
+            FisVisited.clear();
+            isExitReachable |= exitReachable(F);
+          } else {
+            // This is a call to a pointer, all bets are off...
+            isExitReachable = true;
+          }
+          if (isExitReachable) break;
+        }
       }
     }
-  }
 
-  if (DI.inCount > 0 && DI.outCount == 0) {
-     // If this is a block with no successors.
-    if (!isSetJmpTarget) {
-      CheckValue(!Equals(DI.inWeight,DI.BBWeight), 
-                 "ASSERT:inWeight and BBWeight do not match", &DI);
+    if (DI.inCount > 0 && DI.outCount == 0) {
+       // If this is a block with no successors.
+      if (!isSetJmpTarget) {
+        CheckValue(!Equals(DI.inWeight,DI.BBWeight), 
+                   "inWeight and BBWeight do not match", &DI);
+      }
+    } else if (DI.inCount == 0 && DI.outCount > 0) {
+      // If this is a block with no predecessors.
+      if (!isExitReachable)
+        CheckValue(!Equals(DI.BBWeight,DI.outWeight), 
+                   "BBWeight and outWeight do not match", &DI);
+    } else {
+      // If this block has successors and predecessors.
+      if (DI.inWeight > DI.outWeight && !isExitReachable)
+        CheckValue(!Equals(DI.inWeight,DI.outWeight), 
+                   "inWeight and outWeight do not match", &DI);
+      if (DI.inWeight < DI.outWeight && !isSetJmpTarget)
+        CheckValue(!Equals(DI.inWeight,DI.outWeight), 
+                   "inWeight and outWeight do not match", &DI);
     }
-  } else if (DI.inCount == 0 && DI.outCount > 0) {
-    // If this is a block with no predecessors.
-    if (!isExitReachable)
-      CheckValue(!Equals(DI.BBWeight,DI.outWeight), 
-                 "ASSERT:BBWeight and outWeight do not match", &DI);
-  } else {
-    // If this block has successors and predecessors.
-    if (DI.inWeight > DI.outWeight && !isExitReachable)
-      CheckValue(!Equals(DI.inWeight,DI.outWeight), 
-                 "ASSERT:inWeight and outWeight do not match", &DI);
-    if (DI.inWeight < DI.outWeight && !isSetJmpTarget)
-      CheckValue(!Equals(DI.inWeight,DI.outWeight), 
-                 "ASSERT:inWeight and outWeight do not match", &DI);
-  }
 
 
-  // Mark this block as visited, rescurse into successors.
-  BBisVisited.insert(BB);
-  for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); 
-        bbi != bbe; ++bbi ) {
-    recurseBasicBlock(*bbi);
+    // Mark this block as visited, rescurse into successors.
+    BBisVisited.insert(BB);
+    for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); 
+          bbi != bbe; ++bbi ) {
+      recurseBasicBlock(*bbi);
+    }
   }
-}
 
-bool ProfileVerifierPass::runOnFunction(Function &F) {
-  PI = &getAnalysis<ProfileInfo>();
+  template<class FType, class BType>
+  bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) {
+    PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >();
+    if (!PI)
+      ASSERTMESSAGE("No ProfileInfo available");
+
+    // Prepare global variables.
+    PrintedDebugTree = false;
+    BBisVisited.clear();
+
+    // Fetch entry block and recurse into it.
+    const BType *entry = &F.getEntryBlock();
+    recurseBasicBlock(entry);
+
+    if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry))
+      ASSERTMESSAGE("Function count and entry block count do not match");
 
-  // Prepare global variables.
-  PrintedDebugTree = false;
-  BBisVisited.clear();
+    return false;
+  }
+
+  template<class FType, class BType>
+  char ProfileVerifierPassT<FType, BType>::ID = 0;
+}
 
-  // Fetch entry block and recurse into it.
-  const BasicBlock *entry = &F.getEntryBlock();
-  recurseBasicBlock(entry);
+static RegisterPass<ProfileVerifierPass>
+X("profile-verifier", "Verify profiling information", false, true);
 
-  if (!DisableAssertions)
-    assert((PI->getExecutionCount(&F)==PI->getExecutionCount(entry)) &&
-           "Function count and entry block count do not match");
-  return false;
+namespace llvm {
+  FunctionPass *createProfileVerifierPass() {
+    return new ProfileVerifierPass(ProfileVerifierDisableAssertions); 
+  }
 }
+
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index d674ee8..7157d47 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -357,7 +357,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
   // without the other.
   SplitAddRecs(Ops, Ty, SE);
 
-  // Decend down the pointer's type and attempt to convert the other
+  // Descend down the pointer's type and attempt to convert the other
   // operands into GEP indices, at each level. The first index in a GEP
   // indexes into the array implied by the pointer operand; the rest of
   // the indices index into the element or field type selected by the
@@ -628,7 +628,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
     BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
     BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
     BasicBlock::iterator NewInsertPt =
-      next(BasicBlock::iterator(cast<Instruction>(V)));
+      llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
     while (isa<PHINode>(NewInsertPt)) ++NewInsertPt;
     V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
                       NewInsertPt);
@@ -844,7 +844,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
       if (L && S->hasComputableLoopEvolution(L))
         InsertPt = L->getHeader()->getFirstNonPHI();
       while (isInsertedInstruction(InsertPt))
-        InsertPt = next(BasicBlock::iterator(InsertPt));
+        InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
       break;
     }
 
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 31d3ccc..22c6e3b 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -659,7 +659,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
   switch (Operator::getOpcode(V)) {
   default: break;
   case Instruction::SExt:
-    Tmp = TyBits-cast<IntegerType>(U->getOperand(0)->getType())->getBitWidth();
+    Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
     return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp;
     
   case Instruction::AShr:
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 1b7c9c6..cad1d3b 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -540,6 +540,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(arm_apcscc);
   KEYWORD(arm_aapcscc);
   KEYWORD(arm_aapcs_vfpcc);
+  KEYWORD(msp430_intrcc);
 
   KEYWORD(cc);
   KEYWORD(c);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index a92dbf8..0333eed 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -581,6 +581,37 @@ bool LLParser::ParseStandaloneMetadata() {
   return false;
 }
 
+/// ParseInlineMetadata:
+///   !{type %instr}
+///   !{...} MDNode
+///   !"foo" MDString
+bool LLParser::ParseInlineMetadata(Value *&V, PerFunctionState &PFS) {
+  assert(Lex.getKind() == lltok::Metadata && "Only for Metadata");
+  V = 0;
+
+  Lex.Lex();
+  if (Lex.getKind() == lltok::lbrace) {
+    Lex.Lex();
+    if (ParseTypeAndValue(V, PFS) ||
+        ParseToken(lltok::rbrace, "expected end of metadata node"))
+      return true;
+
+    Value *Vals[] = { V };
+    V = MDNode::get(Context, Vals, 1);
+    return false;
+  }
+
+  // Standalone metadata reference
+  // !{ ..., !42, ... }
+  if (!ParseMDNode((MetadataBase *&)V))
+    return false;
+
+  // MDString:
+  // '!' STRINGCONSTANT
+  if (ParseMDString((MetadataBase *&)V)) return true;
+  return false;
+}
+
 /// ParseAlias:
 ///   ::= GlobalVar '=' OptionalVisibility 'alias' OptionalLinkage Aliasee
 /// Aliasee
@@ -1043,6 +1074,7 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) {
 ///   ::= 'arm_apcscc'
 ///   ::= 'arm_aapcscc'
 ///   ::= 'arm_aapcs_vfpcc'
+///   ::= 'msp430_intrcc'
 ///   ::= 'cc' UINT
 ///
 bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
@@ -1056,6 +1088,7 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
   case lltok::kw_arm_apcscc:     CC = CallingConv::ARM_APCS; break;
   case lltok::kw_arm_aapcscc:    CC = CallingConv::ARM_AAPCS; break;
   case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
+  case lltok::kw_msp430_intrcc:  CC = CallingConv::MSP430_INTR; break;
   case lltok::kw_cc: {
       unsigned ArbitraryCC;
       Lex.Lex();
@@ -1377,15 +1410,23 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
     // Parse the argument.
     LocTy ArgLoc;
     PATypeHolder ArgTy(Type::getVoidTy(Context));
-    unsigned ArgAttrs1, ArgAttrs2;
+    unsigned ArgAttrs1 = Attribute::None;
+    unsigned ArgAttrs2 = Attribute::None;
     Value *V;
-    if (ParseType(ArgTy, ArgLoc) ||
-        ParseOptionalAttrs(ArgAttrs1, 0) ||
-        ParseValue(ArgTy, V, PFS) ||
-        // FIXME: Should not allow attributes after the argument, remove this in
-        // LLVM 3.0.
-        ParseOptionalAttrs(ArgAttrs2, 3))
+    if (ParseType(ArgTy, ArgLoc))
       return true;
+
+    if (Lex.getKind() == lltok::Metadata) {
+      if (ParseInlineMetadata(V, PFS))
+        return true;
+    } else {
+      if (ParseOptionalAttrs(ArgAttrs1, 0) ||
+          ParseValue(ArgTy, V, PFS) ||
+          // FIXME: Should not allow attributes after the argument, remove this
+          // in LLVM 3.0.
+          ParseOptionalAttrs(ArgAttrs2, 3))
+        return true;
+    }
     ArgList.push_back(ParamInfo(ArgLoc, V, ArgAttrs1|ArgAttrs2));
   }
 
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 1112dc4..d14b1cb 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -279,7 +279,9 @@ namespace llvm {
       LocTy Loc;
       return ParseTypeAndBasicBlock(BB, Loc, PFS);
     }
-  
+
+    bool ParseInlineMetadata(Value *&V, PerFunctionState &PFS);
+
     struct ParamInfo {
       LocTy Loc;
       Value *V;
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 797c32e..1165766 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -69,6 +69,7 @@ namespace lltok {
     kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
     kw_x86_stdcallcc, kw_x86_fastcallcc,
     kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
+    kw_msp430_intrcc,
 
     kw_signext,
     kw_zeroext,
diff --git a/lib/Bitcode/Reader/Deserialize.cpp b/lib/Bitcode/Reader/Deserialize.cpp
index 67607ef..b8e720a 100644
--- a/lib/Bitcode/Reader/Deserialize.cpp
+++ b/lib/Bitcode/Reader/Deserialize.cpp
@@ -413,7 +413,7 @@ uintptr_t Deserializer::ReadInternalRefPtr() {
   return GetFinalPtr(E);
 }
 
-void Deserializer::BPEntry::SetPtr(BPNode*& FreeList, void* P) {
+void BPEntry::SetPtr(BPNode*& FreeList, void* P) {
   BPNode* Last = NULL;
   
   for (BPNode* N = Head; N != NULL; N=N->Next) {
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 8e3f8e7..bb61682 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -38,16 +38,19 @@ DebugMod("agg-antidep-debugmod",
                       cl::desc("Debug control for aggressive anti-dep breaker"),
                       cl::init(0), cl::Hidden);
 
-AggressiveAntiDepState::AggressiveAntiDepState(MachineBasicBlock *BB) :
-  GroupNodes(TargetRegisterInfo::FirstVirtualRegister, 0) {
-  // Initialize all registers to be in their own group. Initially we
-  // assign the register to the same-indexed GroupNode.
-  for (unsigned i = 0; i < TargetRegisterInfo::FirstVirtualRegister; ++i)
+AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
+                                               MachineBasicBlock *BB) :
+  NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0) {
+
+  const unsigned BBSize = BB->size();
+  for (unsigned i = 0; i < NumTargetRegs; ++i) {
+    // Initialize all registers to be in their own group. Initially we
+    // assign the register to the same-indexed GroupNode.
     GroupNodeIndices[i] = i;
-
-  // Initialize the indices to indicate that no registers are live.
-  std::fill(KillIndices, array_endof(KillIndices), ~0u);
-  std::fill(DefIndices, array_endof(DefIndices), BB->size());
+    // Initialize the indices to indicate that no registers are live.
+    KillIndices[i] = ~0u;
+    DefIndices[i] = BBSize;
+  }
 }
 
 unsigned AggressiveAntiDepState::GetGroup(unsigned Reg)
@@ -64,7 +67,7 @@ void AggressiveAntiDepState::GetGroupRegs(
   std::vector<unsigned> &Regs,
   std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs)
 {
-  for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) {
+  for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) {
     if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0))
       Regs.push_back(Reg);
   }
@@ -137,7 +140,7 @@ AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
 
 void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   assert(State == NULL);
-  State = new AggressiveAntiDepState(BB);
+  State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
 
   bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
   unsigned *KillIndices = State->GetKillIndices();
@@ -220,7 +223,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
   DEBUG(errs() << "\tRegs:");
 
   unsigned *DefIndices = State->GetDefIndices();
-  for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) {
+  for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
     // If Reg is current live, then mark that it can't be renamed as
     // we don't know the extent of its live-range anymore (now that it
     // has been scheduled). If it is not live but was defined in the
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 8154d2d..d385a21 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -44,6 +44,10 @@ namespace llvm {
     } RegisterReference;
 
   private:
+    /// NumTargetRegs - Number of non-virtual target registers
+    /// (i.e. TRI->getNumRegs()).
+    const unsigned NumTargetRegs;
+
     /// GroupNodes - Implements a disjoint-union data structure to
     /// form register groups. A node is represented by an index into
     /// the vector. A node can "point to" itself to indicate that it
@@ -69,7 +73,7 @@ namespace llvm {
     unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
 
   public:
-    AggressiveAntiDepState(MachineBasicBlock *BB);
+    AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
     
     /// GetKillIndices - Return the kill indices.
     unsigned *GetKillIndices() { return KillIndices; }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 993cdbf..44fd176 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1374,6 +1374,7 @@ void AsmPrinter::processDebugLoc(const MachineInstr *MI,
       unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col,
                                         CurDLT.Scope);
       printLabel(L);
+      O << '\n';
       DW->BeginScope(MI, L);
       PrevDLT = CurDLT;
     }
@@ -1837,15 +1838,16 @@ void AsmPrinter::EmitComments(const MachineInstr &MI) const {
 
     // Print source line info.
     O.PadToColumn(MAI->getCommentColumn());
-    O << MAI->getCommentString() << " SrcLine ";
-    if (DLT.Scope) {
-      DICompileUnit CU(DLT.Scope);
-      if (!CU.isNull())
-        O << CU.getFilename() << " ";
-    }
-    O << DLT.Line;
+    O << MAI->getCommentString() << ' ';
+    DIScope Scope(DLT.Scope);
+    // Omit the directory, because it's likely to be long and uninteresting.
+    if (!Scope.isNull())
+      O << Scope.getFilename();
+    else
+      O << "<unknown>";
+    O << ':' << DLT.Line;
     if (DLT.Col != 0)
-      O << ":" << DLT.Col;
+      O << ':' << DLT.Col;
     Newline = true;
   }
 
@@ -1857,35 +1859,40 @@ void AsmPrinter::EmitComments(const MachineInstr &MI) const {
 
   // We assume a single instruction only has a spill or reload, not
   // both.
+  const MachineMemOperand *MMO;
   if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
     if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+      MMO = *MI.memoperands_begin();
       if (Newline) O << '\n';
       O.PadToColumn(MAI->getCommentColumn());
-      O << MAI->getCommentString() << " Reload";
+      O << MAI->getCommentString() << ' ' << MMO->getSize() << "-byte Reload";
       Newline = true;
     }
   }
-  else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, FI)) {
+  else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) {
     if (FrameInfo->isSpillSlotObjectIndex(FI)) {
       if (Newline) O << '\n';
       O.PadToColumn(MAI->getCommentColumn());
-      O << MAI->getCommentString() << " Folded Reload";
+      O << MAI->getCommentString() << ' '
+        << MMO->getSize() << "-byte Folded Reload";
       Newline = true;
     }
   }
   else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
     if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+      MMO = *MI.memoperands_begin();
       if (Newline) O << '\n';
       O.PadToColumn(MAI->getCommentColumn());
-      O << MAI->getCommentString() << " Spill";
+      O << MAI->getCommentString() << ' ' << MMO->getSize() << "-byte Spill";
       Newline = true;
     }
   }
-  else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, FI)) {
+  else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) {
     if (FrameInfo->isSpillSlotObjectIndex(FI)) {
       if (Newline) O << '\n';
       O.PadToColumn(MAI->getCommentColumn());
-      O << MAI->getCommentString() << " Folded Spill";
+      O << MAI->getCommentString() << ' '
+        << MMO->getSize() << "-byte Folded Spill";
       Newline = true;
     }
   }
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index dc6a70a..cad8b89 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -274,7 +274,7 @@ namespace llvm {
   };
 
   //===--------------------------------------------------------------------===//
-  /// DIEString - A string value DIE.
+  /// DIEString - A string value DIE. This DIE keeps string reference only.
   ///
   class DIEString : public DIEValue {
     const StringRef Str;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index c2e1e05..c200a46 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -330,8 +330,8 @@ void DwarfDebug::addSInt(DIE *Die, unsigned Attribute,
   Die->addValue(Attribute, Form, Value);
 }
 
-/// addString - Add a string attribute data and value.
-///
+/// addString - Add a string attribute data and value. DIEString only
+/// keeps string reference. 
 void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form,
                            const StringRef String) {
   DIEValue *Value = new DIEString(String);
@@ -393,7 +393,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) {
     return;
 
   unsigned Line = V->getLineNumber();
-  unsigned FileID = findCompileUnit(V->getCompileUnit()).getID();
+  unsigned FileID = findCompileUnit(V->getCompileUnit())->getID();
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -407,7 +407,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIGlobal *G) {
     return;
 
   unsigned Line = G->getLineNumber();
-  unsigned FileID = findCompileUnit(G->getCompileUnit()).getID();
+  unsigned FileID = findCompileUnit(G->getCompileUnit())->getID();
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -425,7 +425,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) {
 
 
   unsigned Line = SP->getLineNumber();
-  unsigned FileID = findCompileUnit(SP->getCompileUnit()).getID();
+  unsigned FileID = findCompileUnit(SP->getCompileUnit())->getID();
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -440,7 +440,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) {
     return;
 
   unsigned Line = Ty->getLineNumber();
-  unsigned FileID = findCompileUnit(CU).getID();
+  unsigned FileID = findCompileUnit(CU)->getID();
   assert(FileID && "Invalid file id");
   addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
   addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -738,13 +738,49 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
   addBlock(Die, Attribute, 0, Block);
 }
 
+/// addToContextOwner - Add Die into the list of its context owner's children.
+void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
+  if (Context.isNull())
+    ModuleCU->addDie(Die);
+  else if (Context.isType()) {
+    DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context.getNode()));
+    ContextDIE->addChild(Die);
+  } else if (DIE *ContextDIE = ModuleCU->getDIE(Context.getNode()))
+    ContextDIE->addChild(Die);
+  else 
+    ModuleCU->addDie(Die);
+}
+
+/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+/// given DIType.
+DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) {
+  DIE *TyDIE = ModuleCU->getDIE(Ty.getNode());
+  if (TyDIE)
+    return TyDIE;
+
+  // Create new type.
+  TyDIE = new DIE(dwarf::DW_TAG_base_type);
+  ModuleCU->insertDIE(Ty.getNode(), TyDIE);
+  if (Ty.isBasicType())
+    constructTypeDIE(*TyDIE, DIBasicType(Ty.getNode()));
+  else if (Ty.isCompositeType())
+    constructTypeDIE(*TyDIE, DICompositeType(Ty.getNode()));
+  else {
+    assert(Ty.isDerivedType() && "Unknown kind of DIType");
+    constructTypeDIE(*TyDIE, DIDerivedType(Ty.getNode()));
+  }
+
+  addToContextOwner(TyDIE, Ty.getContext());
+  return TyDIE;
+}
+
 /// addType - Add a new type attribute to the specified entity.
-void DwarfDebug::addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
+void DwarfDebug::addType(DIE *Entity, DIType Ty) {
   if (Ty.isNull())
     return;
 
   // Check for pre-existence.
-  DIEEntry *Entry = DW_Unit->getDIEEntry(Ty.getNode());
+  DIEEntry *Entry = ModuleCU->getDIEEntry(Ty.getNode());
 
   // If it exists then use the existing value.
   if (Entry) {
@@ -754,36 +790,17 @@ void DwarfDebug::addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
 
   // Set up proxy.
   Entry = createDIEEntry();
-  DW_Unit->insertDIEEntry(Ty.getNode(), Entry);
+  ModuleCU->insertDIEEntry(Ty.getNode(), Entry);
 
   // Construct type.
-  DIE *Buffer = new DIE(dwarf::DW_TAG_base_type);
-  if (Ty.isBasicType())
-    constructTypeDIE(DW_Unit, *Buffer, DIBasicType(Ty.getNode()));
-  else if (Ty.isCompositeType())
-    constructTypeDIE(DW_Unit, *Buffer, DICompositeType(Ty.getNode()));
-  else {
-    assert(Ty.isDerivedType() && "Unknown kind of DIType");
-    constructTypeDIE(DW_Unit, *Buffer, DIDerivedType(Ty.getNode()));
-  }
+  DIE *Buffer = getOrCreateTypeDIE(Ty);
 
-  // Add debug information entry to entity and appropriate context.
-  DIE *Die = NULL;
-  DIDescriptor Context = Ty.getContext();
-  if (!Context.isNull())
-    Die = DW_Unit->getDIE(Context.getNode());
-
-  if (Die)
-    Die->addChild(Buffer);
-  else
-    DW_Unit->addDie(Buffer);
   Entry->setEntry(Buffer);
   Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
 }
 
 /// constructTypeDIE - Construct basic type die from DIBasicType.
-void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
-                                  DIBasicType BTy) {
+void DwarfDebug::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
   // Get core information.
   StringRef Name = BTy.getName();
   Buffer.setTag(dwarf::DW_TAG_base_type);
@@ -798,8 +815,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
 }
 
 /// constructTypeDIE - Construct derived type die from DIDerivedType.
-void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
-                                  DIDerivedType DTy) {
+void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
   // Get core information.
   StringRef Name = DTy.getName();
   uint64_t Size = DTy.getSizeInBits() >> 3;
@@ -812,7 +828,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
 
   // Map to main type, void will not have a type.
   DIType FromTy = DTy.getTypeDerivedFrom();
-  addType(DW_Unit, &Buffer, FromTy);
+  addType(&Buffer, FromTy);
 
   // Add name if not anonymous or intermediate type.
   if (!Name.empty())
@@ -828,8 +844,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
 }
 
 /// constructTypeDIE - Construct type DIE from DICompositeType.
-void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
-                                  DICompositeType CTy) {
+void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
   // Get core information.
   StringRef Name = CTy.getName();
 
@@ -840,7 +855,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
   switch (Tag) {
   case dwarf::DW_TAG_vector_type:
   case dwarf::DW_TAG_array_type:
-    constructArrayTypeDIE(DW_Unit, Buffer, &CTy);
+    constructArrayTypeDIE(Buffer, &CTy);
     break;
   case dwarf::DW_TAG_enumeration_type: {
     DIArray Elements = CTy.getTypeArray();
@@ -850,7 +865,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
       DIE *ElemDie = NULL;
       DIEnumerator Enum(Elements.getElement(i).getNode());
       if (!Enum.isNull()) {
-        ElemDie = constructEnumTypeDIE(DW_Unit, &Enum);
+        ElemDie = constructEnumTypeDIE(&Enum);
         Buffer.addChild(ElemDie);
       }
     }
@@ -860,7 +875,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
     // Add return type.
     DIArray Elements = CTy.getTypeArray();
     DIDescriptor RTy = Elements.getElement(0);
-    addType(DW_Unit, &Buffer, DIType(RTy.getNode()));
+    addType(&Buffer, DIType(RTy.getNode()));
 
     // Add prototype flag.
     addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
@@ -869,7 +884,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
     for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
       DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
       DIDescriptor Ty = Elements.getElement(i);
-      addType(DW_Unit, Arg, DIType(Ty.getNode()));
+      addType(Arg, DIType(Ty.getNode()));
       Buffer.addChild(Arg);
     }
   }
@@ -891,11 +906,9 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
         continue;
       DIE *ElemDie = NULL;
       if (Element.getTag() == dwarf::DW_TAG_subprogram)
-        ElemDie = createSubprogramDIE(DW_Unit,
-                                      DISubprogram(Element.getNode()));
+        ElemDie = createSubprogramDIE(DISubprogram(Element.getNode()));
       else
-        ElemDie = createMemberDIE(DW_Unit,
-                                  DIDerivedType(Element.getNode()));
+        ElemDie = createMemberDIE(DIDerivedType(Element.getNode()));
       Buffer.addChild(ElemDie);
     }
 
@@ -944,33 +957,32 @@ void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
   addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
   if (L)
     addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
-  if (H)
-    addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+  addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
 
   Buffer.addChild(DW_Subrange);
 }
 
 /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-void DwarfDebug::constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+void DwarfDebug::constructArrayTypeDIE(DIE &Buffer,
                                        DICompositeType *CTy) {
   Buffer.setTag(dwarf::DW_TAG_array_type);
   if (CTy->getTag() == dwarf::DW_TAG_vector_type)
     addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
 
   // Emit derived type.
-  addType(DW_Unit, &Buffer, CTy->getTypeDerivedFrom());
+  addType(&Buffer, CTy->getTypeDerivedFrom());
   DIArray Elements = CTy->getTypeArray();
 
   // Get an anonymous type for index type.
-  DIE *IdxTy = DW_Unit->getIndexTyDie();
+  DIE *IdxTy = ModuleCU->getIndexTyDie();
   if (!IdxTy) {
     // Construct an anonymous type for index type.
     IdxTy = new DIE(dwarf::DW_TAG_base_type);
     addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
     addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
             dwarf::DW_ATE_signed);
-    DW_Unit->addDie(IdxTy);
-    DW_Unit->setIndexTyDie(IdxTy);
+    ModuleCU->addDie(IdxTy);
+    ModuleCU->setIndexTyDie(IdxTy);
   }
 
   // Add subranges to array type.
@@ -982,7 +994,7 @@ void DwarfDebug::constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
 }
 
 /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-DIE *DwarfDebug::constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
+DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator *ETy) {
   DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
   StringRef Name = ETy->getName();
   addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
@@ -992,8 +1004,7 @@ DIE *DwarfDebug::constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
 }
 
 /// createGlobalVariableDIE - Create new DIE using GV.
-DIE *DwarfDebug::createGlobalVariableDIE(CompileUnit *DW_Unit,
-                                         const DIGlobalVariable &GV) {
+DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) {
   // If the global variable was optmized out then no need to create debug info
   // entry.
   if (!GV.getGlobal()) return NULL;
@@ -1014,7 +1025,7 @@ DIE *DwarfDebug::createGlobalVariableDIE(CompileUnit *DW_Unit,
     addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
               LinkageName);
   }
-  addType(DW_Unit, GVDie, GV.getType());
+  addType(GVDie, GV.getType());
   if (!GV.isLocalToUnit())
     addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
   addSourceLine(GVDie, &GV);
@@ -1030,13 +1041,13 @@ DIE *DwarfDebug::createGlobalVariableDIE(CompileUnit *DW_Unit,
 }
 
 /// createMemberDIE - Create new member DIE.
-DIE *DwarfDebug::createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){
+DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
   DIE *MemberDie = new DIE(DT.getTag());
   StringRef Name = DT.getName();
   if (!Name.empty())
     addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
   
-  addType(DW_Unit, MemberDie, DT.getTypeDerivedFrom());
+  addType(MemberDie, DT.getTypeDerivedFrom());
 
   addSourceLine(MemberDie, &DT);
 
@@ -1073,21 +1084,27 @@ DIE *DwarfDebug::createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){
   addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
 
   if (DT.isProtected())
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
             dwarf::DW_ACCESS_protected);
   else if (DT.isPrivate())
-    addUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
             dwarf::DW_ACCESS_private);
-
+  else if (DT.getTag() == dwarf::DW_TAG_inheritance)
+    addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+            dwarf::DW_ACCESS_public);
+  if (DT.isVirtual())
+    addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
+            dwarf::DW_VIRTUALITY_virtual);
   return MemberDie;
 }
 
 /// createSubprogramDIE - Create new DIE using SP.
-DIE *DwarfDebug::createSubprogramDIE(CompileUnit *DW_Unit,
-                                     const DISubprogram &SP,
-                                     bool IsConstructor,
-                                     bool IsInlined) {
-  DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram);
+DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
+  DIE *SPDie = ModuleCU->getDIE(SP.getNode());
+  if (SPDie)
+    return SPDie;
+
+  SPDie = new DIE(dwarf::DW_TAG_subprogram);
   addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
 
   StringRef LinkageName = SP.getLinkageName();
@@ -1103,9 +1120,6 @@ DIE *DwarfDebug::createSubprogramDIE(CompileUnit *DW_Unit,
   }
   addSourceLine(SPDie, &SP);
 
-  DICompositeType SPTy = SP.getType();
-  DIArray Args = SPTy.getTypeArray();
-
   // Add prototyped tag, if C or ObjC.
   unsigned Lang = SP.getCompileUnit().getLanguage();
   if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 ||
@@ -1113,98 +1127,56 @@ DIE *DwarfDebug::createSubprogramDIE(CompileUnit *DW_Unit,
     addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
 
   // Add Return Type.
+  DICompositeType SPTy = SP.getType();
+  DIArray Args = SPTy.getTypeArray();
   unsigned SPTag = SPTy.getTag();
-  if (!IsConstructor) {
-    if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type)
-      addType(DW_Unit, SPDie, SPTy);
-    else
-      addType(DW_Unit, SPDie, DIType(Args.getElement(0).getNode()));
+
+  if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type)
+    addType(SPDie, SPTy);
+  else
+    addType(SPDie, DIType(Args.getElement(0).getNode()));
+
+  unsigned VK = SP.getVirtuality();
+  if (VK) {
+    addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+    DIEBlock *Block = new DIEBlock();
+    addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+    addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex());
+    addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
+    ContainingTypeMap.insert(std::make_pair(SPDie, WeakVH(SP.getContainingType().getNode())));
   }
 
-  if (!SP.isDefinition()) {
+  if (MakeDecl || !SP.isDefinition()) {
     addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
 
     // Add arguments. Do not add arguments for subprogram definition. They will
-    // be handled through RecordVariable.
+    // be handled while processing variables.
+    DICompositeType SPTy = SP.getType();
+    DIArray Args = SPTy.getTypeArray();
+    unsigned SPTag = SPTy.getTag();
+
     if (SPTag == dwarf::DW_TAG_subroutine_type)
       for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
         DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
-        addType(DW_Unit, Arg, DIType(Args.getElement(i).getNode()));
+        addType(Arg, DIType(Args.getElement(i).getNode()));
         addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ??
         SPDie->addChild(Arg);
       }
   }
 
   // DW_TAG_inlined_subroutine may refer to this DIE.
-  DW_Unit->insertDIE(SP.getNode(), SPDie);
+  ModuleCU->insertDIE(SP.getNode(), SPDie);
   return SPDie;
 }
 
 /// findCompileUnit - Get the compile unit for the given descriptor.
 ///
-CompileUnit &DwarfDebug::findCompileUnit(DICompileUnit Unit) const {
+CompileUnit *DwarfDebug::findCompileUnit(DICompileUnit Unit) {
   DenseMap<Value *, CompileUnit *>::const_iterator I =
     CompileUnitMap.find(Unit.getNode());
-  assert(I != CompileUnitMap.end() && "Missing compile unit.");
-  return *I->second;
-}
-
-/// createDbgScopeVariable - Create a new scope variable.
-///
-DIE *DwarfDebug::createDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
-  // Get the descriptor.
-  const DIVariable &VD = DV->getVariable();
-  StringRef Name = VD.getName();
-  if (Name.empty())
-    return NULL;
-
-  // Translate tag to proper Dwarf tag.  The result variable is dropped for
-  // now.
-  unsigned Tag;
-  switch (VD.getTag()) {
-  case dwarf::DW_TAG_return_variable:
-    return NULL;
-  case dwarf::DW_TAG_arg_variable:
-    Tag = dwarf::DW_TAG_formal_parameter;
-    break;
-  case dwarf::DW_TAG_auto_variable:    // fall thru
-  default:
-    Tag = dwarf::DW_TAG_variable;
-    break;
-  }
-
-  // Define variable debug information entry.
-  DIE *VariableDie = new DIE(Tag);
-  addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-
-  // Add source line info if available.
-  addSourceLine(VariableDie, &VD);
-
-  // Add variable type.
-  // FIXME: isBlockByrefVariable should be reformulated in terms of complex
-  // addresses instead.
-  if (VD.isBlockByrefVariable())
-    addType(Unit, VariableDie, getBlockByrefType(VD.getType(), Name));
-  else
-    addType(Unit, VariableDie, VD.getType());
-
-  // Add variable address.
-  // Variables for abstract instances of inlined functions don't get a
-  // location.
-  MachineLocation Location;
-  unsigned FrameReg;
-  int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg);
-  Location.set(FrameReg, Offset);
-
-
-  if (VD.hasComplexAddress())
-    addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
-  else if (VD.isBlockByrefVariable())
-    addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
-  else
-    addAddress(VariableDie, dwarf::DW_AT_location, Location);
-
-  return VariableDie;
+  if (I == CompileUnitMap.end())
+    return constructCompileUnit(Unit.getNode());
+  return I->second;
 }
 
 /// getUpdatedDbgScope - Find or create DbgScope assicated with the instruction.
@@ -1295,6 +1267,28 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
 
  DIE *SPDie = ModuleCU->getDIE(SPNode);
  assert (SPDie && "Unable to find subprogram DIE!");
+ DISubprogram SP(SPNode);
+ if (SP.isDefinition() && !SP.getContext().isCompileUnit()) {
+   addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+  // Add arguments. 
+   DICompositeType SPTy = SP.getType();
+   DIArray Args = SPTy.getTypeArray();
+   unsigned SPTag = SPTy.getTag();
+   if (SPTag == dwarf::DW_TAG_subroutine_type)
+     for (unsigned i = 1, N =  Args.getNumElements(); i < N; ++i) {
+       DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+       addType(Arg, DIType(Args.getElement(i).getNode()));
+       addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ??
+       SPDie->addChild(Arg);
+     }
+   DIE *SPDeclDie = SPDie;
+   SPDie = new DIE(dwarf::DW_TAG_subprogram);
+   addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, 
+               SPDeclDie);
+   
+   ModuleCU->addDie(SPDie);
+ }
+   
  addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
           DWLabel("func_begin", SubprogramCount));
  addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
@@ -1305,19 +1299,6 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
  if (!DISubprogram(SPNode).isLocalToUnit())
    addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
 
- // If there are global variables at this scope then add their dies.
- for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(),
-        SGE = ScopedGVs.end(); SGI != SGE; ++SGI) {
-   MDNode *N = dyn_cast_or_null<MDNode>(*SGI);
-   if (!N) continue;
-   DIGlobalVariable GV(N);
-   if (GV.getContext().getNode() == SPNode) {
-     DIE *ScopedGVDie = createGlobalVariableDIE(ModuleCU, GV);
-     if (ScopedGVDie)
-       SPDie->addChild(ScopedGVDie);
-   }
- }
- 
  return SPDie;
 }
 
@@ -1401,8 +1382,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
 
 
 /// constructVariableDIE - Construct a DIE for the given DbgVariable.
-DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV,
-                                      DbgScope *Scope, CompileUnit *Unit) {
+DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
   // Get the descriptor.
   const DIVariable &VD = DV->getVariable();
   StringRef Name = VD.getName();
@@ -1451,9 +1431,9 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV,
     // FIXME: isBlockByrefVariable should be reformulated in terms of complex
     // addresses instead.
     if (VD.isBlockByrefVariable())
-      addType(Unit, VariableDie, getBlockByrefType(VD.getType(), Name));
+      addType(VariableDie, getBlockByrefType(VD.getType(), Name));
     else
-      addType(Unit, VariableDie, VD.getType());
+      addType(VariableDie, VD.getType());
   }
 
   // Add variable address.
@@ -1522,7 +1502,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
   // Add variables to scope.
   SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
   for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
-    DIE *VariableDIE = constructVariableDIE(Variables[i], Scope, ModuleCU);
+    DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
     if (VariableDIE)
       ScopeDIE->addChild(VariableDIE);
   }
@@ -1579,7 +1559,7 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName)
   return SrcId;
 }
 
-void DwarfDebug::constructCompileUnit(MDNode *N) {
+CompileUnit *DwarfDebug::constructCompileUnit(MDNode *N) {
   DICompileUnit DIUnit(N);
   StringRef FN = DIUnit.getFilename();
   StringRef Dir = DIUnit.getDirectory();
@@ -1618,6 +1598,7 @@ void DwarfDebug::constructCompileUnit(MDNode *N) {
 
   CompileUnitMap[DIUnit.getNode()] = Unit;
   CompileUnits.push_back(Unit);
+  return Unit;
 }
 
 void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
@@ -1631,14 +1612,16 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
   if (ModuleCU->getDIE(DI_GV.getNode()))
     return;
 
-  DIE *VariableDie = createGlobalVariableDIE(ModuleCU, DI_GV);
+  DIE *VariableDie = createGlobalVariableDIE(DI_GV);
+  if (!VariableDie)
+    return;
 
   // Add to map.
   ModuleCU->insertDIE(N, VariableDie);
 
   // Add to context owner.
-  ModuleCU->getCUDie()->addChild(VariableDie);
-
+  addToContextOwner(VariableDie, DI_GV.getContext());
+  
   // Expose as global. FIXME - need to check external flag.
   ModuleCU->addGlobal(DI_GV.getName(), VariableDie);
 
@@ -1663,13 +1646,15 @@ void DwarfDebug::constructSubprogramDIE(MDNode *N) {
     // class type.
     return;
 
-  DIE *SubprogramDie = createSubprogramDIE(ModuleCU, SP);
+  DIE *SubprogramDie = createSubprogramDIE(SP);
 
   // Add to map.
   ModuleCU->insertDIE(N, SubprogramDie);
 
   // Add to context owner.
-  ModuleCU->getCUDie()->addChild(SubprogramDie);
+  if (SP.getContext().getNode() == SP.getCompileUnit().getNode())
+    if (TopLevelDIEs.insert(SubprogramDie))
+      TopLevelDIEsVector.push_back(SubprogramDie);
 
   // Expose as global.
   ModuleCU->addGlobal(SP.getName(), SubprogramDie);
@@ -1709,21 +1694,16 @@ void DwarfDebug::beginModule(Module *M, MachineModuleInfo *mmi) {
   if (!ModuleCU)
     ModuleCU = CompileUnits[0];
 
-  // Create DIEs for each of the externally visible global variables.
-  for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
-         E = DbgFinder.global_variable_end(); I != E; ++I) {
-    DIGlobalVariable GV(*I);
-    if (GV.getContext().getNode() != GV.getCompileUnit().getNode())
-      ScopedGVs.push_back(*I);
-    else
-      constructGlobalVariableDIE(*I);
-  }
-
   // Create DIEs for each subprogram.
   for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
          E = DbgFinder.subprogram_end(); I != E; ++I)
     constructSubprogramDIE(*I);
 
+  // Create DIEs for each global variable.
+  for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
+         E = DbgFinder.global_variable_end(); I != E; ++I)
+    constructGlobalVariableDIE(*I);
+
   MMI = mmi;
   shouldEmit = true;
   MMI->setDebugInfoAvailability(true);
@@ -1770,6 +1750,22 @@ void DwarfDebug::endModule() {
     addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
   }
 
+  // Insert top level DIEs.
+  for (SmallVector<DIE *, 4>::iterator TI = TopLevelDIEsVector.begin(),
+         TE = TopLevelDIEsVector.end(); TI != TE; ++TI)
+    ModuleCU->getCUDie()->addChild(*TI);
+
+  for (DenseMap<DIE *, WeakVH>::iterator CI = ContainingTypeMap.begin(),
+         CE = ContainingTypeMap.end(); CI != CE; ++CI) {
+    DIE *SPDie = CI->first;
+    MDNode *N = dyn_cast_or_null<MDNode>(CI->second);
+    if (!N) continue;
+    DIE *NDie = ModuleCU->getDIE(N);
+    if (!NDie) continue;
+    addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+    addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+  }
+
   // Standard sections final addresses.
   Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
   EmitLabel("text_end", 0);
@@ -1898,6 +1894,7 @@ void DwarfDebug::endScope(const MachineInstr *MI) {
 
   unsigned Label = MMI->NextLabelID();
   Asm->printLabel(Label);
+  O << '\n';
 
   SmallVector<DbgScope *, 2> &SD = I->second;
   for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
@@ -2092,17 +2089,15 @@ void DwarfDebug::endFunction(MachineFunction *MF) {
                                                MMI->getFrameMoves()));
 
   // Clear debug info
-  if (CurrentFnDbgScope) {
-    CurrentFnDbgScope = NULL;
-    DbgScopeMap.clear();
-    DbgScopeBeginMap.clear();
-    DbgScopeEndMap.clear();
-    ConcreteScopes.clear();
-    AbstractScopesList.clear();
-  }
+  CurrentFnDbgScope = NULL;
+  DbgScopeMap.clear();
+  DbgScopeBeginMap.clear();
+  DbgScopeEndMap.clear();
+  ConcreteScopes.clear();
+  AbstractScopesList.clear();
 
   Lines.clear();
-
+  
   if (TimePassesIsEnabled)
     DebugTimer->stopTimer();
 }
@@ -2337,13 +2332,16 @@ void DwarfDebug::emitDIE(DIE *Die) {
   }
 }
 
-/// emitDebugInfo / emitDebugInfoPerCU - Emit the debug info section.
+/// emitDebugInfo - Emit the debug info section.
 ///
-void DwarfDebug::emitDebugInfoPerCU(CompileUnit *Unit) {
-  DIE *Die = Unit->getCUDie();
+void DwarfDebug::emitDebugInfo() {
+  // Start debug info section.
+  Asm->OutStreamer.SwitchSection(
+                            Asm->getObjFileLowering().getDwarfInfoSection());
+  DIE *Die = ModuleCU->getCUDie();
 
   // Emit the compile units header.
-  EmitLabel("info_begin", Unit->getID());
+  EmitLabel("info_begin", ModuleCU->getID());
 
   // Emit size of content not including length itself
   unsigned ContentSize = Die->getSize() +
@@ -2364,17 +2362,10 @@ void DwarfDebug::emitDebugInfoPerCU(CompileUnit *Unit) {
   Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
   Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
   Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
-  EmitLabel("info_end", Unit->getID());
+  EmitLabel("info_end", ModuleCU->getID());
 
   Asm->EOL();
-}
-
-void DwarfDebug::emitDebugInfo() {
-  // Start debug info section.
-  Asm->OutStreamer.SwitchSection(
-                            Asm->getObjFileLowering().getDwarfInfoSection());
 
-  emitDebugInfoPerCU(ModuleCU);
 }
 
 /// emitAbbreviations - Emit the abbreviation section.
@@ -2534,9 +2525,9 @@ void DwarfDebug::emitDebugLines() {
         std::pair<unsigned, unsigned> SourceID =
           getSourceDirectoryAndFileIds(LineInfo.getSourceID());
         O << '\t' << MAI->getCommentString() << ' '
-          << getSourceDirectoryName(SourceID.first) << ' '
+          << getSourceDirectoryName(SourceID.first) << '/'
           << getSourceFileName(SourceID.second)
-          <<" :" << utostr_32(LineInfo.getLine()) << '\n';
+          << ':' << utostr_32(LineInfo.getLine()) << '\n';
       }
 
       // Define the line address.
@@ -2672,24 +2663,30 @@ DwarfDebug::emitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
   Asm->EOL();
 }
 
-void DwarfDebug::emitDebugPubNamesPerCU(CompileUnit *Unit) {
-  EmitDifference("pubnames_end", Unit->getID(),
-                 "pubnames_begin", Unit->getID(), true);
+/// emitDebugPubNames - Emit visible names into a debug pubnames section.
+///
+void DwarfDebug::emitDebugPubNames() {
+  // Start the dwarf pubnames section.
+  Asm->OutStreamer.SwitchSection(
+                          Asm->getObjFileLowering().getDwarfPubNamesSection());
+
+  EmitDifference("pubnames_end", ModuleCU->getID(),
+                 "pubnames_begin", ModuleCU->getID(), true);
   Asm->EOL("Length of Public Names Info");
 
-  EmitLabel("pubnames_begin", Unit->getID());
+  EmitLabel("pubnames_begin", ModuleCU->getID());
 
   Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF Version");
 
   EmitSectionOffset("info_begin", "section_info",
-                    Unit->getID(), 0, true, false);
+                    ModuleCU->getID(), 0, true, false);
   Asm->EOL("Offset of Compilation Unit Info");
 
-  EmitDifference("info_end", Unit->getID(), "info_begin", Unit->getID(),
+  EmitDifference("info_end", ModuleCU->getID(), "info_begin", ModuleCU->getID(),
                  true);
   Asm->EOL("Compilation Unit Length");
 
-  const StringMap<DIE*> &Globals = Unit->getGlobals();
+  const StringMap<DIE*> &Globals = ModuleCU->getGlobals();
   for (StringMap<DIE*>::const_iterator
          GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
     const char *Name = GI->getKeyData();
@@ -2700,21 +2697,11 @@ void DwarfDebug::emitDebugPubNamesPerCU(CompileUnit *Unit) {
   }
 
   Asm->EmitInt32(0); Asm->EOL("End Mark");
-  EmitLabel("pubnames_end", Unit->getID());
+  EmitLabel("pubnames_end", ModuleCU->getID());
 
   Asm->EOL();
 }
 
-/// emitDebugPubNames - Emit visible names into a debug pubnames section.
-///
-void DwarfDebug::emitDebugPubNames() {
-  // Start the dwarf pubnames section.
-  Asm->OutStreamer.SwitchSection(
-                          Asm->getObjFileLowering().getDwarfPubNamesSection());
-
-  emitDebugPubNamesPerCU(ModuleCU);
-}
-
 void DwarfDebug::emitDebugPubTypes() {
   // Start the dwarf pubnames section.
   Asm->OutStreamer.SwitchSection(
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 679d9b9..12ad322 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -154,12 +154,14 @@ class DwarfDebug : public Dwarf {
   /// (at the end of the module) as DW_AT_inline.
   SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
 
+  DenseMap<DIE *, WeakVH> ContainingTypeMap;
+
   /// AbstractSubprogramDIEs - Collection of abstruct subprogram DIEs.
   SmallPtrSet<DIE *, 4> AbstractSubprogramDIEs;
 
-  /// ScopedGVs - Tracks global variables that are not at file scope.
-  /// For example void f() { static int b = 42; }
-  SmallVector<WeakVH, 4> ScopedGVs;
+  /// TopLevelDIEs - Collection of top level DIEs. 
+  SmallPtrSet<DIE *, 4> TopLevelDIEs;
+  SmallVector<DIE *, 4> TopLevelDIEsVector;
 
   typedef SmallVector<DbgScope *, 2> ScopeVector;
   typedef DenseMap<const MachineInstr *, ScopeVector>
@@ -307,53 +309,52 @@ class DwarfDebug : public Dwarf {
   void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
                             const MachineLocation &Location);
 
+  /// addToContextOwner - Add Die into the list of its context owner's children.
+  void addToContextOwner(DIE *Die, DIDescriptor Context);
+
   /// addType - Add a new type attribute to the specified entity.
-  void addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty);
+  void addType(DIE *Entity, DIType Ty);
+
+  /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+  /// given DIType.
+  DIE *getOrCreateTypeDIE(DIType Ty);
 
   void addPubTypes(DISubprogram SP);
 
   /// constructTypeDIE - Construct basic type die from DIBasicType.
-  void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+  void constructTypeDIE(DIE &Buffer,
                         DIBasicType BTy);
 
   /// constructTypeDIE - Construct derived type die from DIDerivedType.
-  void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+  void constructTypeDIE(DIE &Buffer,
                         DIDerivedType DTy);
 
   /// constructTypeDIE - Construct type DIE from DICompositeType.
-  void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+  void constructTypeDIE(DIE &Buffer,
                         DICompositeType CTy);
 
   /// constructSubrangeDIE - Construct subrange DIE from DISubrange.
   void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
 
   /// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-  void constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer, 
+  void constructArrayTypeDIE(DIE &Buffer, 
                              DICompositeType *CTy);
 
   /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-  DIE *constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy);
+  DIE *constructEnumTypeDIE(DIEnumerator *ETy);
 
   /// createGlobalVariableDIE - Create new DIE using GV.
-  DIE *createGlobalVariableDIE(CompileUnit *DW_Unit,
-                               const DIGlobalVariable &GV);
+  DIE *createGlobalVariableDIE(const DIGlobalVariable &GV);
 
   /// createMemberDIE - Create new member DIE.
-  DIE *createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT);
+  DIE *createMemberDIE(const DIDerivedType &DT);
 
   /// createSubprogramDIE - Create new DIE using SP.
-  DIE *createSubprogramDIE(CompileUnit *DW_Unit,
-                           const DISubprogram &SP,
-                           bool IsConstructor = false,
-                           bool IsInlined = false);
+  DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false);
 
   /// findCompileUnit - Get the compile unit for the given descriptor. 
   ///
-  CompileUnit &findCompileUnit(DICompileUnit Unit) const;
-
-  /// createDbgScopeVariable - Create a new scope variable.
-  ///
-  DIE *createDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit);
+  CompileUnit *findCompileUnit(DICompileUnit Unit);
 
   /// getUpdatedDbgScope - Find or create DbgScope assicated with 
   /// the instruction. Initialize scope and update scope hierarchy.
@@ -384,7 +385,7 @@ class DwarfDebug : public Dwarf {
   DIE *constructInlinedScopeDIE(DbgScope *Scope);
 
   /// constructVariableDIE - Construct a DIE for the given DbgVariable.
-  DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S, CompileUnit *Unit);
+  DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S);
 
   /// constructScopeDIE - Construct a DIE for this scope.
   DIE *constructScopeDIE(DbgScope *Scope);
@@ -405,10 +406,8 @@ class DwarfDebug : public Dwarf {
   ///
   void computeSizeAndOffsets();
 
-  /// EmitDebugInfo / emitDebugInfoPerCU - Emit the debug info section.
+  /// EmitDebugInfo - Emit the debug info section.
   ///
-  void emitDebugInfoPerCU(CompileUnit *Unit);
-
   void emitDebugInfo();
 
   /// emitAbbreviations - Emit the abbreviation section.
@@ -432,8 +431,6 @@ class DwarfDebug : public Dwarf {
   /// section.
   void emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo);
 
-  void emitDebugPubNamesPerCU(CompileUnit *Unit);
-
   /// emitDebugPubNames - Emit visible names into a debug pubnames section.
   ///
   void emitDebugPubNames();
@@ -488,7 +485,7 @@ class DwarfDebug : public Dwarf {
   /// as well.
   unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName);
 
-  void constructCompileUnit(MDNode *N);
+  CompileUnit *constructCompileUnit(MDNode *N);
 
   void constructGlobalVariableDIE(MDNode *N);
 
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 1c8b8f4..3fd077f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -292,14 +292,13 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
       Asm->EmitULEB128Bytes(is4Byte ? 4 : 8);
       Asm->EOL("Augmentation size");
 
+      // We force 32-bits here because we've encoded our LSDA in the CIE with
+      // `dwarf::DW_EH_PE_sdata4'. And the CIE and FDE should agree.
       if (EHFrameInfo.hasLandingPads)
-        EmitReference("exception", EHFrameInfo.Number, true, false);
-      else {
-        if (is4Byte)
-          Asm->EmitInt32((int)0);
-        else
-          Asm->EmitInt64((int)0);
-      }
+        EmitReference("exception", EHFrameInfo.Number, true, true);
+      else
+        Asm->EmitInt32((int)0);
+
       Asm->EOL("Language Specific Data Area");
     } else {
       Asm->EmitULEB128Bytes(0);
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index aff1665..aa01c5b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -119,7 +119,6 @@ class DwarfException : public Dwarf {
     static inline unsigned getTombstoneKey() { return -2U; }
     static unsigned getHashValue(const unsigned &Key) { return Key; }
     static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; }
-    static bool isPod() { return true; }
   };
 
   /// PadRange - Structure holding a try-range and the associated landing pad.
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 8a62eb2..3887e6d 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -427,7 +427,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
 static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
                     const TargetInstrInfo *TII) {
   MachineFunction *MF = CurMBB->getParent();
-  MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB));
+  MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB));
   MachineBasicBlock *TBB = 0, *FBB = 0;
   SmallVector<MachineOperand, 4> Cond;
   if (I != MF->end() &&
@@ -805,7 +805,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
   // a compile-time infinite loop repeatedly doing and undoing the same
   // transformations.)
 
-  for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+  for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
        I != E; ++I) {
     if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
       SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
@@ -833,7 +833,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
               continue;
             // This is the QBB case described above
             if (!FBB)
-              FBB = next(MachineFunction::iterator(PBB));
+              FBB = llvm::next(MachineFunction::iterator(PBB));
           }
           // Failing case:  the only way IBB can be reached from PBB is via
           // exception handling.  Happens for landing pads.  Would be nice
@@ -1140,7 +1140,7 @@ ReoptimizeBlock:
       // falls through into MBB and we can't understand the prior block's branch
       // condition.
       if (MBB->empty()) {
-        bool PredHasNoFallThrough = TII->BlockHasNoFallThrough(PrevBB);
+        bool PredHasNoFallThrough = !PrevBB.canFallThrough();
         if (PredHasNoFallThrough || !PriorUnAnalyzable ||
             !PrevBB.isSuccessor(MBB)) {
           // If the prior block falls through into us, turn it into an
@@ -1239,7 +1239,7 @@ ReoptimizeBlock:
           // B elsewhere
           // next:
           if (CurFallsThru) {
-            MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
+            MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
             CurCond.clear();
             TII->InsertBranch(*MBB, NextBB, 0, CurCond);
           }
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 6f86614..7a969f0 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_llvm_library(LLVMCodeGen
   AggressiveAntiDepBreaker.cpp
   BranchFolding.cpp
+  CalcSpillWeights.cpp
   CodePlacementOpt.cpp
   CriticalAntiDepBreaker.cpp
   DeadMachineInstructionElim.cpp
@@ -35,7 +36,9 @@ add_llvm_library(LLVMCodeGen
   MachinePassRegistry.cpp
   MachineRegisterInfo.cpp
   MachineSink.cpp
+  MachineSSAUpdater.cpp
   MachineVerifier.cpp
+  MaxStackAlignment.cpp
   ObjectCodeEmitter.cpp
   OcamlGC.cpp
   PHIElimination.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
new file mode 100644
index 0000000..dcffb8a2
--- /dev/null
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -0,0 +1,154 @@
+//===------------------------ CalcSpillWeights.cpp ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "calcspillweights"
+
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+char CalculateSpillWeights::ID = 0;
+static RegisterPass<CalculateSpillWeights> X("calcspillweights",
+                                             "Calculate spill weights");
+
+void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
+  au.addRequired<LiveIntervals>();
+  au.addRequired<MachineLoopInfo>();
+  au.setPreservesAll();
+  MachineFunctionPass::getAnalysisUsage(au);
+}
+
+bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
+
+  DEBUG(errs() << "********** Compute Spill Weights **********\n"
+               << "********** Function: "
+               << fn.getFunction()->getName() << '\n');
+
+  LiveIntervals *lis = &getAnalysis<LiveIntervals>();
+  MachineLoopInfo *loopInfo = &getAnalysis<MachineLoopInfo>();
+  const TargetInstrInfo *tii = fn.getTarget().getInstrInfo();
+  MachineRegisterInfo *mri = &fn.getRegInfo();
+
+  SmallSet<unsigned, 4> processed;
+  for (MachineFunction::iterator mbbi = fn.begin(), mbbe = fn.end();
+       mbbi != mbbe; ++mbbi) {
+    MachineBasicBlock* mbb = mbbi;
+    SlotIndex mbbEnd = lis->getMBBEndIdx(mbb);
+    MachineLoop* loop = loopInfo->getLoopFor(mbb);
+    unsigned loopDepth = loop ? loop->getLoopDepth() : 0;
+    bool isExiting = loop ? loop->isLoopExiting(mbb) : false;
+
+    for (MachineBasicBlock::const_iterator mii = mbb->begin(), mie = mbb->end();
+         mii != mie; ++mii) {
+      const MachineInstr *mi = mii;
+      if (tii->isIdentityCopy(*mi))
+        continue;
+
+      if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
+        continue;
+
+      for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+        const MachineOperand &mopi = mi->getOperand(i);
+        if (!mopi.isReg() || mopi.getReg() == 0)
+          continue;
+        unsigned reg = mopi.getReg();
+        if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg()))
+          continue;
+        // Multiple uses of reg by the same instruction. It should not
+        // contribute to spill weight again.
+        if (!processed.insert(reg))
+          continue;
+
+        bool hasDef = mopi.isDef();
+        bool hasUse = !hasDef;
+        for (unsigned j = i+1; j != e; ++j) {
+          const MachineOperand &mopj = mi->getOperand(j);
+          if (!mopj.isReg() || mopj.getReg() != reg)
+            continue;
+          hasDef |= mopj.isDef();
+          hasUse |= mopj.isUse();
+          if (hasDef && hasUse)
+            break;
+        }
+
+        LiveInterval &regInt = lis->getInterval(reg);
+        float weight = lis->getSpillWeight(hasDef, hasUse, loopDepth);
+        if (hasDef && isExiting) {
+          // Looks like this is a loop count variable update.
+          SlotIndex defIdx = lis->getInstructionIndex(mi).getDefIndex();
+          const LiveRange *dlr =
+            lis->getInterval(reg).getLiveRangeContaining(defIdx);
+          if (dlr->end > mbbEnd)
+            weight *= 3.0F;
+        }
+        regInt.weight += weight;
+      }
+      processed.clear();
+    }
+  }
+
+  for (LiveIntervals::iterator I = lis->begin(), E = lis->end(); I != E; ++I) {
+    LiveInterval &li = *I->second;
+    if (TargetRegisterInfo::isVirtualRegister(li.reg)) {
+      // If the live interval length is essentially zero, i.e. in every live
+      // range the use follows def immediately, it doesn't make sense to spill
+      // it and hope it will be easier to allocate for this li.
+      if (isZeroLengthInterval(&li)) {
+        li.weight = HUGE_VALF;
+        continue;
+      }
+
+      bool isLoad = false;
+      SmallVector<LiveInterval*, 4> spillIs;
+      if (lis->isReMaterializable(li, spillIs, isLoad)) {
+        // If all of the definitions of the interval are re-materializable,
+        // it is a preferred candidate for spilling. If non of the defs are
+        // loads, then it's potentially very cheap to re-materialize.
+        // FIXME: this gets much more complicated once we support non-trivial
+        // re-materialization.
+        if (isLoad)
+          li.weight *= 0.9F;
+        else
+          li.weight *= 0.5F;
+      }
+
+      // Slightly prefer live interval that has been assigned a preferred reg.
+      std::pair<unsigned, unsigned> Hint = mri->getRegAllocationHint(li.reg);
+      if (Hint.first || Hint.second)
+        li.weight *= 1.01F;
+
+      // Divide the weight of the interval by its size.  This encourages
+      // spilling of intervals that are large and have few uses, and
+      // discourages spilling of small intervals with many uses.
+      li.weight /= lis->getApproximateInstructionCount(li) * SlotIndex::NUM;
+    }
+  }
+  
+  return false;
+}
+
+/// Returns true if the given live interval is zero length.
+bool CalculateSpillWeights::isZeroLengthInterval(LiveInterval *li) const {
+  for (LiveInterval::Ranges::const_iterator
+       i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
+    if (i->end.getPrevIndex() > i->start)
+      return false;
+  return true;
+}
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index e9844d8..ff71f6b 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -182,7 +182,7 @@ bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
       // Move it and all the blocks that can reach it via fallthrough edges
       // exclusively, to keep existing fallthrough edges intact.
       MachineFunction::iterator Begin = Pred;
-      MachineFunction::iterator End = next(Begin);
+      MachineFunction::iterator End = llvm::next(Begin);
       while (Begin != MF.begin()) {
         MachineFunction::iterator Prior = prior(Begin);
         if (Prior == MF.begin())
@@ -255,7 +255,8 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
   // to the top of the loop to avoid loosing that fallthrough. Otherwise append
   // them to the bottom, even if it previously had a fallthrough, on the theory
   // that it's worth an extra branch to keep the loop contiguous.
-  MachineFunction::iterator InsertPt = next(MachineFunction::iterator(BotMBB));
+  MachineFunction::iterator InsertPt =
+    llvm::next(MachineFunction::iterator(BotMBB));
   bool InsertAtTop = false;
   if (TopMBB != MF.begin() &&
       !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) &&
@@ -268,7 +269,7 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
   // with the loop header.
   SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks;
   for (MachineFunction::iterator I = TopMBB,
-       E = next(MachineFunction::iterator(BotMBB)); I != E; ++I)
+       E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I)
     ContiguousBlocks.insert(I);
 
   // Find non-contigous blocks and fix them.
@@ -301,7 +302,7 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
       // Process this block and all loop blocks contiguous with it, to keep
       // them in their relative order.
       MachineFunction::iterator Begin = BB;
-      MachineFunction::iterator End = next(MachineFunction::iterator(BB));
+      MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB));
       for (; End != MF.end(); ++End) {
         if (!L->contains(End)) break;
         if (!HasAnalyzableTerminator(End)) break;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 1b39fec..3c7961c2 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -43,8 +43,11 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
             static_cast<const TargetRegisterClass *>(0));
 
   // Initialize the indices to indicate that no registers are live.
-  std::fill(KillIndices, array_endof(KillIndices), ~0u);
-  std::fill(DefIndices, array_endof(DefIndices), BB->size());
+  const unsigned BBSize = BB->size();
+  for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
+    KillIndices[i] = ~0u;
+    DefIndices[i] = BBSize;
+  }
 
   // Clear "do not change" set.
   KeepRegs.clear();
@@ -122,7 +125,7 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
   // may have been rescheduled and its lifetime may overlap with registers
   // in ways not reflected in our current liveness state. For each such
   // register, adjust the liveness state to be conservatively correct.
-  for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg)
+  for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg)
     if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
       assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
       // Mark this register to be non-renamable.
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 242cba5..297dd31 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -74,6 +74,9 @@ EnableFastISelOption("fast-isel", cl::Hidden,
 static cl::opt<bool> EnableSplitGEPGVN("split-gep-gvn", cl::Hidden,
     cl::desc("Split GEPs and run no-load GVN"));
 
+static cl::opt<bool> PreAllocTailDup("pre-regalloc-taildup", cl::Hidden,
+    cl::desc("Pre-register allocation tail duplication"));
+
 LLVMTargetMachine::LLVMTargetMachine(const Target &T,
                                      const std::string &TargetTriple)
   : TargetMachine(T) {
@@ -302,6 +305,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
                    /* allowDoubleDefs= */ true);
   }
 
+  // Pre-ra tail duplication.
+  if (OptLevel != CodeGenOpt::None &&
+      !DisableTailDuplicate && PreAllocTailDup) {
+    PM.add(createTailDuplicatePass(true));
+    printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
+  }
+
   // Run pre-ra passes.
   if (addPreRegAlloc(PM, OptLevel))
     printAndVerify(PM, "After PreRegAlloc passes",
@@ -348,7 +358,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
 
   // Tail duplication.
   if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
-    PM.add(createTailDuplicatePass());
+    PM.add(createTailDuplicatePass(false));
     printAndVerify(PM, "After TailDuplicate");
   }
 
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 8d632cb..cc286aa 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -847,7 +847,7 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
       if (vni->isUnused()) {
         OS << "x";
       } else {
-        if (!vni->isDefAccurate())
+        if (!vni->isDefAccurate() && !vni->isPHIDef())
           OS << "?";
         else
           OS << vni->def;
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 24adf36..8806439f 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -149,44 +149,69 @@ void LiveIntervals::dumpInstrs() const {
   printInstrs(errs());
 }
 
-/// conflictsWithPhysRegDef - Returns true if the specified register
-/// is defined during the duration of the specified interval.
-bool LiveIntervals::conflictsWithPhysRegDef(const LiveInterval &li,
-                                            VirtRegMap &vrm, unsigned reg) {
-  for (LiveInterval::Ranges::const_iterator
-         I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
-    for (SlotIndex index = I->start.getBaseIndex(),
-           end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
-           index != end;
-           index = index.getNextIndex()) {
-      // skip deleted instructions
-      while (index != end && !getInstructionFromIndex(index))
-        index = index.getNextIndex();
-      if (index == end) break;
+bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
+                                         VirtRegMap &vrm, unsigned reg) {
+  // We don't handle fancy stuff crossing basic block boundaries
+  if (li.ranges.size() != 1)
+    return true;
+  const LiveRange &range = li.ranges.front();
+  SlotIndex idx = range.start.getBaseIndex();
+  SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex();
+
+  // Skip deleted instructions
+  MachineInstr *firstMI = getInstructionFromIndex(idx);
+  while (!firstMI && idx != end) {
+    idx = idx.getNextIndex();
+    firstMI = getInstructionFromIndex(idx);
+  }
+  if (!firstMI)
+    return false;
 
-      MachineInstr *MI = getInstructionFromIndex(index);
-      unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
-      if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
-        if (SrcReg == li.reg || DstReg == li.reg)
-          continue;
-      for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
-        MachineOperand& mop = MI->getOperand(i);
-        if (!mop.isReg())
-          continue;
-        unsigned PhysReg = mop.getReg();
-        if (PhysReg == 0 || PhysReg == li.reg)
+  // Find last instruction in range
+  SlotIndex lastIdx = end.getPrevIndex();
+  MachineInstr *lastMI = getInstructionFromIndex(lastIdx);
+  while (!lastMI && lastIdx != idx) {
+    lastIdx = lastIdx.getPrevIndex();
+    lastMI = getInstructionFromIndex(lastIdx);
+  }
+  if (!lastMI)
+    return false;
+
+  // Range cannot cross basic block boundaries or terminators
+  MachineBasicBlock *MBB = firstMI->getParent();
+  if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator())
+    return true;
+
+  MachineBasicBlock::const_iterator E = lastMI;
+  ++E;
+  for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) {
+    const MachineInstr &MI = *I;
+
+    // Allow copies to and from li.reg
+    unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+    if (tii_->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+      if (SrcReg == li.reg || DstReg == li.reg)
+        continue;
+
+    // Check for operands using reg
+    for (unsigned i = 0, e = MI.getNumOperands(); i != e;  ++i) {
+      const MachineOperand& mop = MI.getOperand(i);
+      if (!mop.isReg())
+        continue;
+      unsigned PhysReg = mop.getReg();
+      if (PhysReg == 0 || PhysReg == li.reg)
+        continue;
+      if (TargetRegisterInfo::isVirtualRegister(PhysReg)) {
+        if (!vrm.hasPhys(PhysReg))
           continue;
-        if (TargetRegisterInfo::isVirtualRegister(PhysReg)) {
-          if (!vrm.hasPhys(PhysReg))
-            continue;
-          PhysReg = vrm.getPhys(PhysReg);
-        }
-        if (PhysReg && tri_->regsOverlap(PhysReg, reg))
-          return true;
+        PhysReg = vrm.getPhys(PhysReg);
       }
+      if (PhysReg && tri_->regsOverlap(PhysReg, reg))
+        return true;
     }
   }
 
+  // No conflicts found.
   return false;
 }
 
@@ -201,15 +226,9 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
            end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
            index != end;
            index = index.getNextIndex()) {
-      // Skip deleted instructions.
-      MachineInstr *MI = 0;
-      while (index != end) {
-        MI = getInstructionFromIndex(index);
-        if (MI)
-          break;
-        index = index.getNextIndex();
-      }
-      if (index == end) break;
+      MachineInstr *MI = getInstructionFromIndex(index);
+      if (!MI)
+        continue;               // skip deleted instructions
 
       if (JoinedCopies.count(MI))
         continue;
@@ -374,8 +393,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
       // Value#0 is now defined by the 2-addr instruction.
       OldValNo->def  = RedefIndex;
       OldValNo->setCopy(0);
-      if (MO.isEarlyClobber())
-        OldValNo->setHasRedefByEC(true);
       
       // Add the new live interval which replaces the range for the input copy.
       LiveRange LR(DefIndex, RedefIndex, ValNo);
@@ -411,7 +428,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         interval.removeRange(Start, End);        
         assert(interval.ranges.size() == 1 &&
                "Newly discovered PHI interval has >1 ranges.");
-        MachineBasicBlock *killMBB = getMBBFromIndex(interval.endIndex());
+        MachineBasicBlock *killMBB = getMBBFromIndex(VNI->def);
         VNI->addKill(indexes_->getTerminatorGap(killMBB));
         VNI->setHasPHIKill(true);
         DEBUG({
@@ -422,7 +439,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
         // Replace the interval with one of a NEW value number.  Note that this
         // value number isn't actually defined by an instruction, weird huh? :)
         LiveRange LR(Start, End,
-                     interval.getNextValue(SlotIndex(getMBBStartIdx(mbb), true),
+                     interval.getNextValue(SlotIndex(getMBBStartIdx(Killer->getParent()), true),
                        0, false, VNInfoAllocator));
         LR.valno->setIsPHIDef(true);
         DEBUG(errs() << " replace range with " << LR);
@@ -513,8 +530,6 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
         if (mi->isRegTiedToUseOperand(DefIdx)) {
           // Two-address instruction.
           end = baseIndex.getDefIndex();
-          assert(!mi->getOperand(DefIdx).isEarlyClobber() &&
-                 "Two address instruction is an early clobber?"); 
         } else {
           // Another instruction redefines the register before it is ever read.
           // Then the register is essentially dead at the instruction that defines
@@ -730,8 +745,16 @@ unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const {
   if (VNI->getCopy()->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
     // If it's extracting out of a physical register, return the sub-register.
     unsigned Reg = VNI->getCopy()->getOperand(1).getReg();
-    if (TargetRegisterInfo::isPhysicalRegister(Reg))
+    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+      unsigned SrcSubReg = VNI->getCopy()->getOperand(2).getImm();
+      unsigned DstSubReg = VNI->getCopy()->getOperand(0).getSubReg();
+      if (SrcSubReg == DstSubReg)
+        // %reg1034:3<def> = EXTRACT_SUBREG %EDX, 3
+        // reg1034 can still be coalesced to EDX.
+        return Reg;
+      assert(DstSubReg == 0);
       Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm());
+    }
     return Reg;
   } else if (VNI->getCopy()->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
              VNI->getCopy()->getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 68f80ac..3c88e37 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -720,6 +720,51 @@ bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
   return findKill(&MBB);
 }
 
+bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) {
+  LiveVariables::VarInfo &VI = getVarInfo(Reg);
+
+  // Loop over all of the successors of the basic block, checking to see if
+  // the value is either live in the block, or if it is killed in the block.
+  std::vector<MachineBasicBlock*> OpSuccBlocks;
+  for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+         E = MBB.succ_end(); SI != E; ++SI) {
+    MachineBasicBlock *SuccMBB = *SI;
+
+    // Is it alive in this successor?
+    unsigned SuccIdx = SuccMBB->getNumber();
+    if (VI.AliveBlocks.test(SuccIdx))
+      return true;
+    OpSuccBlocks.push_back(SuccMBB);
+  }
+
+  // Check to see if this value is live because there is a use in a successor
+  // that kills it.
+  switch (OpSuccBlocks.size()) {
+  case 1: {
+    MachineBasicBlock *SuccMBB = OpSuccBlocks[0];
+    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+      if (VI.Kills[i]->getParent() == SuccMBB)
+        return true;
+    break;
+  }
+  case 2: {
+    MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1];
+    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+      if (VI.Kills[i]->getParent() == SuccMBB1 ||
+          VI.Kills[i]->getParent() == SuccMBB2)
+        return true;
+    break;
+  }
+  default:
+    std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
+    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+      if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
+                             VI.Kills[i]->getParent()))
+        return true;
+  }
+  return false;
+}
+
 /// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
 /// variables that are live out of DomBB will be marked as passing live through
 /// BB.
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
index 30636a8..80eb6cd 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -312,7 +312,7 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
        mbbi != mbbe; ++mbbi) {
     for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
          mi != me;) {
-      MachineBasicBlock::iterator nmi = next(mi);
+      MachineBasicBlock::iterator nmi = llvm::next(mi);
       MachineInstr *MI = mi;
       if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
         MadeChange |= LowerExtract(MI);
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index e55e369..a58286d 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -290,7 +290,7 @@ void MachineBasicBlock::updateTerminator() {
     } else {
       // The block has a fallthrough conditional branch.
       MachineBasicBlock *MBBA = *succ_begin();
-      MachineBasicBlock *MBBB = *next(succ_begin());
+      MachineBasicBlock *MBBB = *llvm::next(succ_begin());
       if (MBBA == TBB) std::swap(MBBB, MBBA);
       if (isLayoutSuccessor(TBB)) {
         if (TII->ReverseBranchCondition(Cond)) {
@@ -359,15 +359,10 @@ bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
 
 bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
   MachineFunction::const_iterator I(this);
-  return next(I) == MachineFunction::const_iterator(MBB);
+  return llvm::next(I) == MachineFunction::const_iterator(MBB);
 }
 
 bool MachineBasicBlock::canFallThrough() {
-  MachineBasicBlock *TBB = 0, *FBB = 0;
-  SmallVector<MachineOperand, 4> Cond;
-  const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
-  bool BranchUnAnalyzable = TII->AnalyzeBranch(*this, TBB, FBB, Cond, true);
-
   MachineFunction::iterator Fallthrough = this;
   ++Fallthrough;
   // If FallthroughBlock is off the end of the function, it can't fall through.
@@ -378,16 +373,21 @@ bool MachineBasicBlock::canFallThrough() {
   if (!isSuccessor(Fallthrough))
     return false;
 
-  // If we couldn't analyze the branch, examine the last instruction.
-  // If the block doesn't end in a known control barrier, assume fallthrough
-  // is possible. The isPredicable check is needed because this code can be
-  // called during IfConversion, where an instruction which is normally a
-  // Barrier is predicated and thus no longer an actual control barrier. This
-  // is over-conservative though, because if an instruction isn't actually
-  // predicated we could still treat it like a barrier.
-  if (BranchUnAnalyzable)
+  // Analyze the branches, if any, at the end of the block.
+  MachineBasicBlock *TBB = 0, *FBB = 0;
+  SmallVector<MachineOperand, 4> Cond;
+  const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+  if (TII->AnalyzeBranch(*this, TBB, FBB, Cond, true)) {
+    // If we couldn't analyze the branch, examine the last instruction.
+    // If the block doesn't end in a known control barrier, assume fallthrough
+    // is possible. The isPredicable check is needed because this code can be
+    // called during IfConversion, where an instruction which is normally a
+    // Barrier is predicated and thus no longer an actual control barrier. This
+    // is over-conservative though, because if an instruction isn't actually
+    // predicated we could still treat it like a barrier.
     return empty() || !back().getDesc().isBarrier() ||
            back().getDesc().isPredicable();
+  }
 
   // If there is no branch, control always falls through.
   if (TBB == 0) return true;
@@ -461,18 +461,19 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
   bool MadeChange = false;
   bool AddedFallThrough = false;
 
-  MachineFunction::iterator FallThru = next(MachineFunction::iterator(this));
+  MachineFunction::iterator FallThru =
+    llvm::next(MachineFunction::iterator(this));
   
-  // If this block ends with a conditional branch that falls through to its
-  // successor, set DestB as the successor.
   if (isCond) {
+    // If this block ends with a conditional branch that falls through to its
+    // successor, set DestB as the successor.
     if (DestB == 0 && FallThru != getParent()->end()) {
       DestB = FallThru;
       AddedFallThrough = true;
     }
   } else {
     // If this is an unconditional branch with no explicit dest, it must just be
-    // a fallthrough into DestB.
+    // a fallthrough into DestA.
     if (DestA == 0 && FallThru != getParent()->end()) {
       DestA = FallThru;
       AddedFallThrough = true;
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index d20f446..dd6fd7e 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -328,7 +328,7 @@ void MachineFunction::print(raw_ostream &OS) const {
       if (I->second)
         OS << " in reg%" << I->second;
 
-      if (next(I) != E)
+      if (llvm::next(I) != E)
         OS << ", ";
     }
     OS << '\n';
@@ -342,7 +342,7 @@ void MachineFunction::print(raw_ostream &OS) const {
       else
         OS << "%physreg" << *I;
 
-      if (next(I) != E)
+      if (llvm::next(I) != E)
         OS << " ";
     }
     OS << '\n';
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index f73a5a3..12b974d 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -1058,6 +1058,22 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
   return true;
 }
 
+/// isConstantValuePHI - If the specified instruction is a PHI that always
+/// merges together the same virtual register, return the register, otherwise
+/// return 0.
+unsigned MachineInstr::isConstantValuePHI() const {
+  if (getOpcode() != TargetInstrInfo::PHI)
+    return 0;
+  assert(getNumOperands() >= 3 &&
+         "It's illegal to have a PHI without source operands");
+
+  unsigned Reg = getOperand(1).getReg();
+  for (unsigned i = 3, e = getNumOperands(); i < e; i += 2)
+    if (getOperand(i).getReg() != Reg)
+      return 0;
+  return Reg;
+}
+
 void MachineInstr::dump() const {
   errs() << "  " << *this;
 }
@@ -1150,9 +1166,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
     DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc);
     DIScope Scope(DLT.Scope);
     OS << " dbg:";
+    // Omit the directory, since it's usually long and uninteresting.
     if (!Scope.isNull())
-      OS << Scope.getDirectory() << ':' << Scope.getFilename() << ':';
-    OS << DLT.Line << ":" << DLT.Col;
+      OS << Scope.getFilename();
+    else
+      OS << "<unknown>";
+    OS << ':' << DLT.Line;
+    if (DLT.Col != 0)
+      OS << ':' << DLT.Col;
   }
 
   OS << "\n";
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index db77d19..63f4f18 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -62,11 +62,11 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
   MachineBasicBlock *BotMBB = getHeader();
   MachineFunction::iterator End = BotMBB->getParent()->end();
   if (BotMBB != prior(End)) {
-    MachineBasicBlock *NextMBB = next(MachineFunction::iterator(BotMBB));
+    MachineBasicBlock *NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
     while (contains(NextMBB)) {
       BotMBB = NextMBB;
-      if (BotMBB == next(MachineFunction::iterator(BotMBB))) break;
-      NextMBB = next(MachineFunction::iterator(BotMBB));
+      if (BotMBB == llvm::next(MachineFunction::iterator(BotMBB))) break;
+      NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
     }
   }
   return BotMBB;
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
new file mode 100644
index 0000000..292096f
--- /dev/null
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -0,0 +1,393 @@
+//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MachineSSAUpdater class. It's based on SSAUpdater
+// class in lib/Transforms/Utils.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
+typedef std::vector<std::pair<MachineBasicBlock*, unsigned> >
+                IncomingPredInfoTy;
+
+static AvailableValsTy &getAvailableVals(void *AV) {
+  return *static_cast<AvailableValsTy*>(AV);
+}
+
+static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) {
+  return *static_cast<IncomingPredInfoTy*>(IPI);
+}
+
+
+MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
+                                     SmallVectorImpl<MachineInstr*> *NewPHI)
+  : AV(0), IPI(0), InsertedPHIs(NewPHI) {
+  TII = MF.getTarget().getInstrInfo();
+  MRI = &MF.getRegInfo();
+}
+
+MachineSSAUpdater::~MachineSSAUpdater() {
+  delete &getAvailableVals(AV);
+  delete &getIncomingPredInfo(IPI);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates.  ProtoValue is the value used to name PHI nodes.
+void MachineSSAUpdater::Initialize(unsigned V) {
+  if (AV == 0)
+    AV = new AvailableValsTy();
+  else
+    getAvailableVals(AV).clear();
+
+  if (IPI == 0)
+    IPI = new IncomingPredInfoTy();
+  else
+    getIncomingPredInfo(IPI).clear();
+
+  VR = V;
+  VRC = MRI->getRegClass(VR);
+}
+
+/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for
+/// the specified block.
+bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const {
+  return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) {
+  getAvailableVals(AV)[BB] = V;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) {
+  return GetValueAtEndOfBlockInternal(BB);
+}
+
+static
+unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
+          SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) {
+  if (BB->empty())
+    return 0;
+
+  MachineBasicBlock::iterator I = BB->front();
+  if (I->getOpcode() != TargetInstrInfo::PHI)
+    return 0;
+
+  AvailableValsTy AVals;
+  for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+    AVals[PredValues[i].first] = PredValues[i].second;
+  while (I != BB->end() && I->getOpcode() == TargetInstrInfo::PHI) {
+    bool Same = true;
+    for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) {
+      unsigned SrcReg = I->getOperand(i).getReg();
+      MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB();
+      if (AVals[SrcBB] != SrcReg) {
+        Same = false;
+        break;
+      }
+    }
+    if (Same)
+      return I->getOperand(0).getReg();
+    ++I;
+  }
+  return 0;
+}
+
+/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define
+/// a value of the given register class at the start of the specified basic
+/// block. It returns the virtual register defined by the instruction.
+static
+MachineInstr *InsertNewDef(unsigned Opcode,
+                           MachineBasicBlock *BB, MachineBasicBlock::iterator I,
+                           const TargetRegisterClass *RC,
+                           MachineRegisterInfo *MRI, const TargetInstrInfo *TII) {
+  unsigned NewVR = MRI->createVirtualRegister(RC);
+  return BuildMI(*BB, I, DebugLoc::getUnknownLoc(), TII->get(Opcode), NewVR);
+}
+                          
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB.  Consider code like this:
+///
+///      X1 = ...
+///   SomeBB:
+///      use(X)
+///      X2 = ...
+///      br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks.  However, the use of X happens in the *middle* of
+/// a block.  Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
+  // If there is no definition of the renamed variable in this block, just use
+  // GetValueAtEndOfBlock to do our work.
+  if (!getAvailableVals(AV).count(BB))
+    return GetValueAtEndOfBlockInternal(BB);
+
+  // If there are no predecessors, just return undef.
+  if (BB->pred_empty()) {
+    // Insert an implicit_def to represent an undef value.
+    MachineInstr *NewDef = InsertNewDef(TargetInstrInfo::IMPLICIT_DEF,
+                                        BB, BB->getFirstTerminator(),
+                                        VRC, MRI, TII);
+    return NewDef->getOperand(0).getReg();
+  }
+
+  // Otherwise, we have the hard case.  Get the live-in values for each
+  // predecessor.
+  SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues;
+  unsigned SingularValue = 0;
+
+  bool isFirstPred = true;
+  for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+         E = BB->pred_end(); PI != E; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+    unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
+    PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+    // Compute SingularValue.
+    if (isFirstPred) {
+      SingularValue = PredVal;
+      isFirstPred = false;
+    } else if (PredVal != SingularValue)
+      SingularValue = 0;
+  }
+
+  // Otherwise, if all the merged values are the same, just use it.
+  if (SingularValue != 0)
+    return SingularValue;
+
+  // If an identical PHI is already in BB, just reuse it.
+  unsigned DupPHI = LookForIdenticalPHI(BB, PredValues);
+  if (DupPHI)
+    return DupPHI;
+
+  // Otherwise, we do need a PHI: insert one now.
+  MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+  MachineInstr *InsertedPHI = InsertNewDef(TargetInstrInfo::PHI, BB,
+                                           Loc, VRC, MRI, TII);
+
+  // Fill in all the predecessors of the PHI.
+  MachineInstrBuilder MIB(InsertedPHI);
+  for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+    MIB.addReg(PredValues[i].second).addMBB(PredValues[i].first);
+
+  // See if the PHI node can be merged to a single value.  This can happen in
+  // loop cases when we get a PHI of itself and one other value.
+  if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
+    InsertedPHI->eraseFromParent();
+    return ConstVal;
+  }
+
+  // If the client wants to know about all new instructions, tell it.
+  if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+  DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+  return InsertedPHI->getOperand(0).getReg();
+}
+
+static
+MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
+                                         MachineOperand *U) {
+  for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+    if (&MI->getOperand(i) == U)
+      return MI->getOperand(i+1).getMBB();
+  }
+
+  llvm_unreachable("MachineOperand::getParent() failure?");
+  return 0;
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value.  This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
+  MachineInstr *UseMI = U.getParent();
+  unsigned NewVR = 0;
+  if (UseMI->getOpcode() == TargetInstrInfo::PHI) {
+    MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U);
+    NewVR = GetValueAtEndOfBlockInternal(SourceBB);
+  } else {
+    NewVR = GetValueInMiddleOfBlock(UseMI->getParent());
+  }
+
+  U.setReg(NewVR);
+}
+
+void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) {
+  MRI->replaceRegWith(OldReg, NewReg);
+
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+  for (DenseMap<MachineBasicBlock*, unsigned>::iterator
+         I = AvailableVals.begin(), E = AvailableVals.end(); I != E; ++I)
+    if (I->second == OldReg)
+      I->second = NewReg;
+}
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it.  If not, construct SSA form by
+/// walking predecessors inserting PHI nodes as needed until we get to a block
+/// where the value is available.
+///
+unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
+  AvailableValsTy &AvailableVals = getAvailableVals(AV);
+
+  // Query AvailableVals by doing an insertion of null.
+  std::pair<AvailableValsTy::iterator, bool> InsertRes =
+    AvailableVals.insert(std::make_pair(BB, 0));
+
+  // Handle the case when the insertion fails because we have already seen BB.
+  if (!InsertRes.second) {
+    // If the insertion failed, there are two cases.  The first case is that the
+    // value is already available for the specified block.  If we get this, just
+    // return the value.
+    if (InsertRes.first->second != 0)
+      return InsertRes.first->second;
+
+    // Otherwise, if the value we find is null, then this is the value is not
+    // known but it is being computed elsewhere in our recursion.  This means
+    // that we have a cycle.  Handle this by inserting a PHI node and returning
+    // it.  When we get back to the first instance of the recursion we will fill
+    // in the PHI node.
+    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+    MachineInstr *NewPHI = InsertNewDef(TargetInstrInfo::PHI, BB, Loc,
+                                        VRC, MRI,TII);
+    unsigned NewVR = NewPHI->getOperand(0).getReg();
+    InsertRes.first->second = NewVR;
+    return NewVR;
+  }
+
+  // If there are no predecessors, then we must have found an unreachable block
+  // just return 'undef'.  Since there are no predecessors, InsertRes must not
+  // be invalidated.
+  if (BB->pred_empty()) {
+    // Insert an implicit_def to represent an undef value.
+    MachineInstr *NewDef = InsertNewDef(TargetInstrInfo::IMPLICIT_DEF,
+                                        BB, BB->getFirstTerminator(),
+                                        VRC, MRI, TII);
+    return InsertRes.first->second = NewDef->getOperand(0).getReg();
+  }
+
+  // Okay, the value isn't in the map and we just inserted a null in the entry
+  // to indicate that we're processing the block.  Since we have no idea what
+  // value is in this block, we have to recurse through our predecessors.
+  //
+  // While we're walking our predecessors, we keep track of them in a vector,
+  // then insert a PHI node in the end if we actually need one.  We could use a
+  // smallvector here, but that would take a lot of stack space for every level
+  // of the recursion, just use IncomingPredInfo as an explicit stack.
+  IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI);
+  unsigned FirstPredInfoEntry = IncomingPredInfo.size();
+
+  // As we're walking the predecessors, keep track of whether they are all
+  // producing the same value.  If so, this value will capture it, if not, it
+  // will get reset to null.  We distinguish the no-predecessor case explicitly
+  // below.
+  unsigned SingularValue = 0;
+  bool isFirstPred = true;
+  for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+         E = BB->pred_end(); PI != E; ++PI) {
+    MachineBasicBlock *PredBB = *PI;
+    unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
+    IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
+
+    // Compute SingularValue.
+    if (isFirstPred) {
+      SingularValue = PredVal;
+      isFirstPred = false;
+    } else if (PredVal != SingularValue)
+      SingularValue = 0;
+  }
+
+  /// Look up BB's entry in AvailableVals.  'InsertRes' may be invalidated.  If
+  /// this block is involved in a loop, a no-entry PHI node will have been
+  /// inserted as InsertedVal.  Otherwise, we'll still have the null we inserted
+  /// above.
+  unsigned &InsertedVal = AvailableVals[BB];
+
+  // If all the predecessor values are the same then we don't need to insert a
+  // PHI.  This is the simple and common case.
+  if (SingularValue) {
+    // If a PHI node got inserted, replace it with the singlar value and delete
+    // it.
+    if (InsertedVal) {
+      MachineInstr *OldVal = MRI->getVRegDef(InsertedVal);
+      // Be careful about dead loops.  These RAUW's also update InsertedVal.
+      assert(InsertedVal != SingularValue && "Dead loop?");
+      ReplaceRegWith(InsertedVal, SingularValue);
+      OldVal->eraseFromParent();
+    }
+
+    InsertedVal = SingularValue;
+
+    // Drop the entries we added in IncomingPredInfo to restore the stack.
+    IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
+                           IncomingPredInfo.end());
+    return InsertedVal;
+  }
+
+
+  // Otherwise, we do need a PHI: insert one now if we don't already have one.
+  MachineInstr *InsertedPHI;
+  if (InsertedVal == 0) {
+    MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+    InsertedPHI = InsertNewDef(TargetInstrInfo::PHI, BB, Loc,
+                               VRC, MRI, TII);
+    InsertedVal = InsertedPHI->getOperand(0).getReg();
+  } else {
+    InsertedPHI = MRI->getVRegDef(InsertedVal);
+  }
+
+  // Fill in all the predecessors of the PHI.
+  MachineInstrBuilder MIB(InsertedPHI);
+  for (IncomingPredInfoTy::iterator I =
+         IncomingPredInfo.begin()+FirstPredInfoEntry,
+         E = IncomingPredInfo.end(); I != E; ++I)
+    MIB.addReg(I->second).addMBB(I->first);
+
+  // Drop the entries we added in IncomingPredInfo to restore the stack.
+  IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
+                         IncomingPredInfo.end());
+
+  // See if the PHI node can be merged to a single value.  This can happen in
+  // loop cases when we get a PHI of itself and one other value.
+  if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
+    MRI->replaceRegWith(InsertedVal, ConstVal);
+    InsertedPHI->eraseFromParent();
+    InsertedVal = ConstVal;
+  } else {
+    DEBUG(errs() << "  Inserted PHI: " << *InsertedPHI << "\n");
+
+    // If the client wants to know about all new instructions, tell it.
+    if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+  }
+
+  return InsertedVal;
+}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index d9f4c99..917d053 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -376,15 +376,6 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
         report("MBB doesn't fall through but is empty!", MBB);
       }
     }
-    if (TII->BlockHasNoFallThrough(*MBB)) {
-      if (MBB->empty()) {
-        report("TargetInstrInfo says the block has no fall through, but the "
-               "block is empty!", MBB);
-      } else if (!MBB->back().getDesc().isBarrier()) {
-        report("TargetInstrInfo says the block has no fall through, but the "
-               "block does not end in a barrier!", MBB);
-      }
-    }
   } else {
     // Block is last in function.
     if (MBB->empty()) {
diff --git a/lib/CodeGen/MaxStackAlignment.cpp b/lib/CodeGen/MaxStackAlignment.cpp
new file mode 100644
index 0000000..d327cfa
--- /dev/null
+++ b/lib/CodeGen/MaxStackAlignment.cpp
@@ -0,0 +1,70 @@
+//===-- MaxStackAlignment.cpp - Compute the required stack alignment -- ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for vector register usage and aligned local objects to
+// calculate the maximum required alignment for a function. This is used by
+// targets which support it to determine if dynamic stack realignment is
+// necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+namespace {
+  struct MaximalStackAlignmentCalculator : public MachineFunctionPass {
+    static char ID;
+    MaximalStackAlignmentCalculator() : MachineFunctionPass(&ID) {}
+
+    virtual bool runOnMachineFunction(MachineFunction &MF) {
+      MachineFrameInfo *FFI = MF.getFrameInfo();
+      MachineRegisterInfo &RI = MF.getRegInfo();
+
+      // Calculate max stack alignment of all already allocated stack objects.
+      FFI->calculateMaxStackAlignment();
+      unsigned MaxAlign = FFI->getMaxAlignment();
+
+      // Be over-conservative: scan over all vreg defs and find whether vector
+      // registers are used. If yes, there is probability that vector registers
+      // will be spilled and thus the stack needs to be aligned properly.
+      // FIXME: It would be better to only do this if a spill actually
+      // happens rather than conseratively aligning the stack regardless.
+      for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
+           RegNum < RI.getLastVirtReg(); ++RegNum)
+        MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
+
+      if (FFI->getMaxAlignment() == MaxAlign)
+        return false;
+
+      FFI->setMaxAlignment(MaxAlign);
+      return true;
+    }
+
+    virtual const char *getPassName() const {
+      return "Stack Alignment Requirements Auto-Detector";
+    }
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+      MachineFunctionPass::getAnalysisUsage(AU);
+    }
+  };
+
+  char MaximalStackAlignmentCalculator::ID = 0;
+}
+
+FunctionPass*
+llvm::createMaxStackAlignmentCalculatorPass() {
+  return new MaximalStackAlignmentCalculator();
+}
+
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 2e30cc6..c62d179 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -287,7 +287,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
 
     // Okay, if we now know that the value is not live out of the block, we can
     // add a kill marker in this block saying that it kills the incoming value!
-    if (!ValueIsUsed && !isLiveOut(SrcReg, opBlock, *LV)) {
+    if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) {
       // In our final twist, we have to decide which instruction kills the
       // register.  In most cases this is the copy, however, the first
       // terminator instruction at the end of the block may also use the value.
@@ -301,8 +301,8 @@ void llvm::PHIElimination::LowerAtomicPHINode(
 
         // Check that no other terminators use values.
 #ifndef NDEBUG
-        for (MachineBasicBlock::iterator TI = next(Term); TI != opBlock.end();
-             ++TI) {
+        for (MachineBasicBlock::iterator TI = llvm::next(Term);
+             TI != opBlock.end(); ++TI) {
           assert(!TI->readsRegister(SrcReg) &&
                  "Terminator instructions cannot use virtual registers unless"
                  "they are the first terminator in a block!");
@@ -353,59 +353,13 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
       // We break edges when registers are live out from the predecessor block
       // (not considering PHI nodes). If the register is live in to this block
       // anyway, we would gain nothing from splitting.
-      if (!LV.isLiveIn(Reg, MBB) && isLiveOut(Reg, *PreMBB, LV))
+      if (!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB))
         SplitCriticalEdge(PreMBB, &MBB);
     }
   }
   return true;
 }
 
-bool llvm::PHIElimination::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB,
-                                     LiveVariables &LV) {
-  LiveVariables::VarInfo &VI = LV.getVarInfo(Reg);
-
-  // Loop over all of the successors of the basic block, checking to see if
-  // the value is either live in the block, or if it is killed in the block.
-  std::vector<MachineBasicBlock*> OpSuccBlocks;
-  for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
-         E = MBB.succ_end(); SI != E; ++SI) {
-    MachineBasicBlock *SuccMBB = *SI;
-
-    // Is it alive in this successor?
-    unsigned SuccIdx = SuccMBB->getNumber();
-    if (VI.AliveBlocks.test(SuccIdx))
-      return true;
-    OpSuccBlocks.push_back(SuccMBB);
-  }
-
-  // Check to see if this value is live because there is a use in a successor
-  // that kills it.
-  switch (OpSuccBlocks.size()) {
-  case 1: {
-    MachineBasicBlock *SuccMBB = OpSuccBlocks[0];
-    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
-      if (VI.Kills[i]->getParent() == SuccMBB)
-        return true;
-    break;
-  }
-  case 2: {
-    MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1];
-    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
-      if (VI.Kills[i]->getParent() == SuccMBB1 ||
-          VI.Kills[i]->getParent() == SuccMBB2)
-        return true;
-    break;
-  }
-  default:
-    std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
-    for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
-      if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
-                             VI.Kills[i]->getParent()))
-        return true;
-  }
-  return false;
-}
-
 MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
                                                      MachineBasicBlock *B) {
   assert(A && B && "Missing MBB end point");
@@ -423,7 +377,7 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
   ++NumSplits;
 
   MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
-  MF->insert(next(MachineFunction::iterator(A)), NMBB);
+  MF->insert(llvm::next(MachineFunction::iterator(A)), NMBB);
   DEBUG(errs() << "PHIElimination splitting critical edge:"
         " BB#" << A->getNumber()
         << " -- BB#" << NMBB->getNumber()
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index f5872cb..b0b71ce 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -93,12 +93,6 @@ namespace llvm {
     bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
                        LiveVariables &LV);
 
-    /// isLiveOut - Determine if Reg is live out from MBB, when not
-    /// considering PHI nodes. This means that Reg is either killed by
-    /// a successor block or passed through one.
-    bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB,
-                   LiveVariables &LV);
-
     /// SplitCriticalEdge - Split a critical edge from A to B by
     /// inserting a new MBB. Update branches in A and PHI instructions
     /// in B. Return the new block.
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 9101fce2..79be295 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -373,7 +373,8 @@ void SchedulePostRATDList::FinishBlock() {
 ///
 void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
   // Initialize the indices to indicate that no registers are live.
-  std::fill(KillIndices, array_endof(KillIndices), ~0u);
+  for (unsigned i = 0; i < TRI->getNumRegs(); ++i)
+    KillIndices[i] = ~0u;
 
   // Determine the live-out physregs for this block.
   if (!BB->empty() && BB->back().getDesc().isReturn()) {
@@ -510,12 +511,9 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
       }
       
       if (MO.isKill() != kill) {
-        bool removed = ToggleKillFlag(MI, MO);
-        if (removed) {
-          DEBUG(errs() << "Fixed <removed> in ");
-        } else {
-          DEBUG(errs() << "Fixed " << MO << " in ");
-        }
+        DEBUG(errs() << "Fixing " << MO << " in ");
+        // Warning: ToggleKillFlag may invalidate MO.
+        ToggleKillFlag(MI, MO);
         DEBUG(MI->dump());
       }
       
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index 8f62345..b0d7a47 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -16,6 +16,7 @@
 
 #define DEBUG_TYPE "pre-alloc-split"
 #include "VirtRegMap.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -104,6 +105,7 @@ namespace {
       AU.addRequired<LiveStacks>();
       AU.addPreserved<LiveStacks>();
       AU.addPreserved<RegisterCoalescer>();
+      AU.addPreserved<CalculateSpillWeights>();
       if (StrongPHIElim)
         AU.addPreservedID(StrongPHIEliminationID);
       else
@@ -876,7 +878,7 @@ bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
   if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
     KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
   else
-    KillPt = next(MachineBasicBlock::iterator(DefMI));
+    KillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
   
   if (KillPt == DefMI->getParent()->end())
     return false;
@@ -1118,7 +1120,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
           return false; // No gap to insert spill.
         }
       } else {
-        SpillPt = next(MachineBasicBlock::iterator(DefMI));
+        SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
         if (SpillPt == DefMBB->end()) {
           DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
           return false; // No gap to insert spill.
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 8905f75..e94247f 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -136,9 +136,10 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
 /// pseudo instructions.
 void PEI::calculateCallsInformation(MachineFunction &Fn) {
   const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+  MachineFrameInfo *FFI = Fn.getFrameInfo();
 
   unsigned MaxCallFrameSize = 0;
-  bool HasCalls = false;
+  bool HasCalls = FFI->hasCalls();
 
   // Get the function call frame set-up and tear-down instruction opcode
   int FrameSetupOpcode   = RegInfo->getCallFrameSetupOpcode();
@@ -166,7 +167,6 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
         HasCalls = true;
       }
 
-  MachineFrameInfo *FFI = Fn.getFrameInfo();
   FFI->setHasCalls(HasCalls);
   FFI->setMaxCallFrameSize(MaxCallFrameSize);
 
@@ -674,7 +674,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
         if (PrevI == BB->end())
           I = BB->begin();     // The replaced instr was the first in the block.
         else
-          I = next(PrevI);
+          I = llvm::next(PrevI);
         continue;
       }
 
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 4ff5129..c02d47b 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -16,6 +16,7 @@
 #include "VirtRegRewriter.h"
 #include "Spiller.h"
 #include "llvm/Function.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -59,6 +60,11 @@ PreSplitIntervals("pre-alloc-split",
                   cl::desc("Pre-register allocation live interval splitting"),
                   cl::init(false), cl::Hidden);
 
+static cl::opt<bool>
+TrivCoalesceEnds("trivial-coalesce-ends",
+                  cl::desc("Attempt trivial coalescing of interval ends"),
+                  cl::init(false), cl::Hidden);
+
 static RegisterRegAlloc
 linearscanRegAlloc("linearscan", "linear scan register allocator",
                    createLinearScanRegisterAllocator);
@@ -182,6 +188,7 @@ namespace {
       // Make sure PassManager knows which analyses to make available
       // to coalescing and which analyses coalescing invalidates.
       AU.addRequiredTransitive<RegisterCoalescer>();
+      AU.addRequired<CalculateSpillWeights>();
       if (PreSplitIntervals)
         AU.addRequiredID(PreAllocSplittingID);
       AU.addRequired<LiveStacks>();
@@ -390,66 +397,71 @@ void RALinScan::ComputeRelatedRegClasses() {
         RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]);
 }
 
-/// attemptTrivialCoalescing - If a simple interval is defined by a copy,
-/// try allocate the definition the same register as the source register
-/// if the register is not defined during live time of the interval. This
-/// eliminate a copy. This is used to coalesce copies which were not
-/// coalesced away before allocation either due to dest and src being in
-/// different register classes or because the coalescer was overly
-/// conservative.
+/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try
+/// allocate the definition the same register as the source register if the
+/// register is not defined during live time of the interval. If the interval is
+/// killed by a copy, try to use the destination register. This eliminates a
+/// copy. This is used to coalesce copies which were not coalesced away before
+/// allocation either due to dest and src being in different register classes or
+/// because the coalescer was overly conservative.
 unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
   unsigned Preference = vrm_->getRegAllocPref(cur.reg);
   if ((Preference && Preference == Reg) || !cur.containsOneValue())
     return Reg;
 
-  VNInfo *vni = cur.begin()->valno;
-  if ((vni->def == SlotIndex()) ||
-      vni->isUnused() || !vni->isDefAccurate())
+  // We cannot handle complicated live ranges. Simple linear stuff only.
+  if (cur.ranges.size() != 1)
     return Reg;
-  MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
-  unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg;
-  if (!CopyMI ||
-      !tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+
+  const LiveRange &range = cur.ranges.front();
+
+  VNInfo *vni = range.valno;
+  if (vni->isUnused())
     return Reg;
-  PhysReg = SrcReg;
-  if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
-    if (!vrm_->isAssignedReg(SrcReg))
+
+  unsigned CandReg;
+  {
+    MachineInstr *CopyMI;
+    unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+    if (vni->def != SlotIndex() && vni->isDefAccurate() &&
+        (CopyMI = li_->getInstructionFromIndex(vni->def)) &&
+        tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+      // Defined by a copy, try to extend SrcReg forward
+      CandReg = SrcReg;
+    else if (TrivCoalesceEnds &&
+             (CopyMI =
+              li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
+             tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+             cur.reg == SrcReg)
+      // Only used by a copy, try to extend DstReg backwards
+      CandReg = DstReg;
+    else
+      return Reg;
+  }
+
+  if (TargetRegisterInfo::isVirtualRegister(CandReg)) {
+    if (!vrm_->isAssignedReg(CandReg))
       return Reg;
-    PhysReg = vrm_->getPhys(SrcReg);
+    CandReg = vrm_->getPhys(CandReg);
   }
-  if (Reg == PhysReg)
+  if (Reg == CandReg)
     return Reg;
 
   const TargetRegisterClass *RC = mri_->getRegClass(cur.reg);
-  if (!RC->contains(PhysReg))
+  if (!RC->contains(CandReg))
     return Reg;
 
-  // Try to coalesce.
-  if (!li_->conflictsWithPhysRegDef(cur, *vrm_, PhysReg)) {
-    DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg)
-                 << '\n');
-    vrm_->clearVirt(cur.reg);
-    vrm_->assignVirt2Phys(cur.reg, PhysReg);
-
-    // Remove unnecessary kills since a copy does not clobber the register.
-    if (li_->hasInterval(SrcReg)) {
-      LiveInterval &SrcLI = li_->getInterval(SrcReg);
-      for (MachineRegisterInfo::use_iterator I = mri_->use_begin(cur.reg),
-             E = mri_->use_end(); I != E; ++I) {
-        MachineOperand &O = I.getOperand();
-        if (!O.isKill())
-          continue;
-        MachineInstr *MI = &*I;
-        if (SrcLI.liveAt(li_->getInstructionIndex(MI).getDefIndex()))
-          O.setIsKill(false);
-      }
-    }
+  if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg))
+    return Reg;
 
-    ++NumCoalesce;
-    return PhysReg;
-  }
+  // Try to coalesce.
+  DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg)
+        << '\n');
+  vrm_->clearVirt(cur.reg);
+  vrm_->assignVirt2Phys(cur.reg, CandReg);
 
-  return Reg;
+  ++NumCoalesce;
+  return CandReg;
 }
 
 bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
@@ -1261,9 +1273,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
 
   // The earliest start of a Spilled interval indicates up to where
   // in handled we need to roll back
+  assert(!spillIs.empty() && "No spill intervals?"); 
+  SlotIndex earliestStart = spillIs[0]->beginIndex();
   
-  LiveInterval *earliestStartInterval = cur;
-
   // Spill live intervals of virtual regs mapped to the physical register we
   // want to clear (and its aliases).  We only spill those that overlap with the
   // current interval as the rest do not affect its allocation. we also keep
@@ -1274,19 +1286,16 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
     LiveInterval *sli = spillIs.back();
     spillIs.pop_back();
     DEBUG(errs() << "\t\t\tspilling(a): " << *sli << '\n');
-    earliestStartInterval =
-      (earliestStartInterval->beginIndex() < sli->beginIndex()) ?
-         earliestStartInterval : sli;
+    if (sli->beginIndex() < earliestStart)
+      earliestStart = sli->beginIndex();
        
     std::vector<LiveInterval*> newIs;
-    newIs = spiller_->spill(sli, spillIs);
+    newIs = spiller_->spill(sli, spillIs, &earliestStart);
     addStackInterval(sli, ls_, li_, mri_, *vrm_);
     std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
     spilled.insert(sli->reg);
   }
 
-  SlotIndex earliestStart = earliestStartInterval->beginIndex();
-
   DEBUG(errs() << "\t\trolling back to: " << earliestStart << '\n');
 
   // Scan handled in reverse order up to the earliest start of a
@@ -1295,7 +1304,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
   while (!handled_.empty()) {
     LiveInterval* i = handled_.back();
     // If this interval starts before t we are done.
-    if (i->beginIndex() < earliestStart)
+    if (!i->empty() && i->beginIndex() < earliestStart)
       break;
     DEBUG(errs() << "\t\t\tundo changes for: " << *i << '\n');
     handled_.pop_back();
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index c677d34..c2014a7 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -36,6 +36,7 @@
 #include "PBQP/Heuristics/Briggs.h"
 #include "VirtRegMap.h"
 #include "VirtRegRewriter.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/LiveStackAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -90,6 +91,7 @@ namespace {
       au.addRequired<LiveIntervals>();
       //au.addRequiredID(SplitCriticalEdgesID);
       au.addRequired<RegisterCoalescer>();
+      au.addRequired<CalculateSpillWeights>();
       au.addRequired<LiveStacks>();
       au.addPreserved<LiveStacks>();
       au.addRequired<MachineLoopInfo>();
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 94680ed..67bf209 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -125,7 +125,7 @@ void RegScavenger::forward() {
     Tracking = true;
   } else {
     assert(MBBI != MBB->end() && "Already at the end of the basic block!");
-    MBBI = next(MBBI);
+    MBBI = llvm::next(MBBI);
   }
 
   MachineInstr *MI = MBBI;
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 06ffdd6..2b52187 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -119,7 +119,8 @@ namespace {
     /// it can be simplified or if things it uses can be simplified by bit
     /// propagation.  If so, return true.
     bool SimplifyDemandedBits(SDValue Op) {
-      APInt Demanded = APInt::getAllOnesValue(Op.getValueSizeInBits());
+      unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+      APInt Demanded = APInt::getAllOnesValue(BitWidth);
       return SimplifyDemandedBits(Op, Demanded);
     }
 
@@ -546,7 +547,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
         To[0].getNode()->dump(&DAG);
         errs() << " and " << NumTo-1 << " other values\n";
         for (unsigned i = 0, e = NumTo; i != e; ++i)
-          assert(N->getValueType(i) == To[i].getValueType() &&
+          assert((!To[i].getNode() ||
+                  N->getValueType(i) == To[i].getValueType()) &&
                  "Cannot combine value to value of different type!"));
   WorkListRemover DeadNodes(*this);
   DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
@@ -1687,10 +1689,14 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+  //
+  // do not sink logical op inside of a vector extend, since it may combine
+  // into a vsetcc.
   if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
        N0.getOpcode() == ISD::SIGN_EXTEND ||
        (N0.getOpcode() == ISD::TRUNCATE &&
         !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
+      !VT.isVector() &&
       N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
       (!LegalOperations ||
        TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) {
@@ -1943,8 +1949,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   }
 
   // fold (or x, undef) -> -1
-  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
-    return DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) {
+    EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+    return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+  }
   // fold (or c1, c2) -> c1|c2
   if (N0C && N1C)
     return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
@@ -2434,7 +2442,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
-  unsigned OpSizeInBits = VT.getSizeInBits();
+  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
 
   // fold (shl c1, c2) -> c1<<c2
   if (N0C && N1C)
@@ -2450,7 +2458,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
     return N0;
   // if (shl x, c) is known to be zero, return 0
   if (DAG.MaskedValueIsZero(SDValue(N, 0),
-                            APInt::getAllOnesValue(VT.getSizeInBits())))
+                            APInt::getAllOnesValue(OpSizeInBits)))
     return DAG.getConstant(0, VT);
   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
   if (N1.getOpcode() == ISD::TRUNCATE &&
@@ -2526,6 +2534,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
+  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
 
   // fold (sra c1, c2) -> (sra c1, c2)
   if (N0C && N1C)
@@ -2537,7 +2546,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   if (N0C && N0C->isAllOnesValue())
     return N0;
   // fold (sra x, (setge c, size(x))) -> undef
-  if (N1C && N1C->getZExtValue() >= VT.getSizeInBits())
+  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
     return DAG.getUNDEF(VT);
   // fold (sra x, 0) -> x
   if (N1C && N1C->isNullValue())
@@ -2545,7 +2554,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
   // sext_inreg.
   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
-    unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue();
+    unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
     EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
     if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT)))
       return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
@@ -2556,7 +2565,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   if (N1C && N0.getOpcode() == ISD::SRA) {
     if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
       unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
-      if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1;
+      if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
       return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
                          DAG.getConstant(Sum, N1C->getValueType(0)));
     }
@@ -2572,9 +2581,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
     const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
     if (N01C && N1C) {
       // Determine what the truncate's result bitsize and type would be.
-      unsigned VTValSize = VT.getSizeInBits();
       EVT TruncVT =
-        EVT::getIntegerVT(*DAG.getContext(), VTValSize - N1C->getZExtValue());
+        EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue());
       // Determine the residual right-shift amount.
       signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
 
@@ -2607,7 +2615,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
       EVT TruncVT = N1.getValueType();
       SDValue N100 = N1.getOperand(0).getOperand(0);
       APInt TruncC = N101C->getAPIntValue();
-      TruncC.trunc(TruncVT.getSizeInBits());
+      TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
       return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
                          DAG.getNode(ISD::AND, N->getDebugLoc(),
                                      TruncVT,
@@ -2636,7 +2644,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   EVT VT = N0.getValueType();
-  unsigned OpSizeInBits = VT.getSizeInBits();
+  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
 
   // fold (srl c1, c2) -> c1 >>u c2
   if (N0C && N1C)
@@ -3029,7 +3037,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
       else if (Op.getValueType().bitsGT(VT))
         Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
       return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
-                         DAG.getValueType(N0.getValueType()));
+                         DAG.getValueType(N0.getValueType().getScalarType()));
     }
   }
 
@@ -3170,7 +3178,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     } else if (Op.getValueType().bitsGT(VT)) {
       Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
     }
-    return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType());
+    return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
+                                  N0.getValueType().getScalarType());
   }
 
   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
@@ -3193,6 +3202,19 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
                        X, DAG.getConstant(Mask, VT));
   }
 
+  // Fold (zext (and x, cst)) -> (and (zext x), cst)
+  if (N0.getOpcode() == ISD::AND &&
+      N0.getOperand(1).getOpcode() == ISD::Constant &&
+      N0.getOperand(0).getOpcode() != ISD::TRUNCATE &&
+      N0.getOperand(0).hasOneUse()) {
+    APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+    Mask.zext(VT.getSizeInBits());
+    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                       DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
+                                   N0.getOperand(0)),
+                       DAG.getConstant(Mask, VT));
+  }
+
   // fold (zext (load x)) -> (zext (truncate (zextload x)))
   if (ISD::isNON_EXTLoad(N0.getNode()) &&
       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
@@ -3269,6 +3291,26 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     if (SCC.getNode()) return SCC;
   }
 
+  // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
+  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+      isa<ConstantSDNode>(N0.getOperand(1)) &&
+      N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+      N0.hasOneUse()) {
+    if (N0.getOpcode() == ISD::SHL) {
+      // If the original shl may be shifting out bits, do not perform this
+      // transformation.
+      unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+      unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() -
+        N0.getOperand(0).getOperand(0).getValueType().getSizeInBits();
+      if (ShAmt > KnownZeroBits)
+        return SDValue();
+    }
+    DebugLoc dl = N->getDebugLoc();
+    return DAG.getNode(N0.getOpcode(), dl, VT,
+                       DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
+                       DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1)));
+  }
+
   return SDValue();
 }
 
@@ -3529,7 +3571,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
   EVT EVT = cast<VTSDNode>(N1)->getVT();
-  unsigned VTBits = VT.getSizeInBits();
+  unsigned VTBits = VT.getScalarType().getSizeInBits();
   unsigned EVTBits = EVT.getSizeInBits();
 
   // fold (sext_in_reg c1) -> c1
@@ -3537,7 +3579,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
     return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
 
   // If the input is already sign extended, just drop the extension.
-  if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1)
+  if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
     return N0;
 
   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
@@ -3552,7 +3594,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   // if x is small enough.
   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
     SDValue N00 = N0.getOperand(0);
-    if (N00.getValueType().getSizeInBits() < EVTBits)
+    if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits)
       return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
   }
 
@@ -3576,11 +3618,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
   if (N0.getOpcode() == ISD::SRL) {
     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
-      if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) {
+      if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
         // We can turn this into an SRA iff the input to the SRL is already sign
         // extended enough.
         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
-        if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
+        if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
           return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
                              N0.getOperand(0), N0.getOperand(1));
       }
@@ -3681,7 +3723,6 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
     return SDValue();
   EVT LD1VT = LD1->getValueType(0);
-  const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
 
   if (ISD::isNON_EXTLoad(LD2) &&
       LD2->hasOneUse() &&
@@ -3689,7 +3730,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
       // If one is volatile it might be ok, but play conservative and bail out.
       !LD1->isVolatile() &&
       !LD2->isVolatile() &&
-      TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) {
+      DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
     unsigned Align = LD1->getAlignment();
     unsigned NewAlign = TLI.getTargetData()->
       getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
@@ -4804,49 +4845,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
   return false;
 }
 
-/// InferAlignment - If we can infer some alignment information from this
-/// pointer, return it.
-static unsigned InferAlignment(SDValue Ptr, SelectionDAG &DAG) {
-  // If this is a direct reference to a stack slot, use information about the
-  // stack slot's alignment.
-  int FrameIdx = 1 << 31;
-  int64_t FrameOffset = 0;
-  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
-    FrameIdx = FI->getIndex();
-  } else if (Ptr.getOpcode() == ISD::ADD &&
-             isa<ConstantSDNode>(Ptr.getOperand(1)) &&
-             isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
-    FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
-    FrameOffset = Ptr.getConstantOperandVal(1);
-  }
-
-  if (FrameIdx != (1 << 31)) {
-    // FIXME: Handle FI+CST.
-    const MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo();
-    if (MFI.isFixedObjectIndex(FrameIdx)) {
-      int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset;
-
-      // The alignment of the frame index can be determined from its offset from
-      // the incoming frame position.  If the frame object is at offset 32 and
-      // the stack is guaranteed to be 16-byte aligned, then we know that the
-      // object is 16-byte aligned.
-      unsigned StackAlign = DAG.getTarget().getFrameInfo()->getStackAlignment();
-      unsigned Align = MinAlign(ObjectOffset, StackAlign);
-
-      // Finally, the frame object itself may have a known alignment.  Factor
-      // the alignment + offset into a new alignment.  For example, if we know
-      // the  FI is 8 byte aligned, but the pointer is 4 off, we really have a
-      // 4-byte alignment of the resultant pointer.  Likewise align 4 + 4-byte
-      // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc.
-      unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
-                                      FrameOffset);
-      return std::max(Align, FIInfoAlign);
-    }
-  }
-
-  return 0;
-}
-
 SDValue DAGCombiner::visitLOAD(SDNode *N) {
   LoadSDNode *LD  = cast<LoadSDNode>(N);
   SDValue Chain = LD->getChain();
@@ -4854,7 +4852,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
 
   // Try to infer better alignment information than the load already has.
   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
-    if (unsigned Align = InferAlignment(Ptr, DAG)) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
       if (Align > LD->getAlignment())
         return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
                               LD->getValueType(0),
@@ -5079,7 +5077,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
 
   // Try to infer better alignment information than the store already has.
   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
-    if (unsigned Align = InferAlignment(Ptr, DAG)) {
+    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
       if (Align > ST->getAlignment())
         return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
                                  Ptr, ST->getSrcValue(),
@@ -5231,7 +5229,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     // SimplifyDemandedBits, which only works if the value has a single use.
     if (SimplifyDemandedBits(Value,
                              APInt::getLowBitsSet(
-                               Value.getValueSizeInBits(),
+                               Value.getValueType().getScalarType().getSizeInBits(),
                                ST->getMemoryVT().getSizeInBits())))
       return SDValue(N, 0);
   }
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 5eb9ca1..4ead9c9 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -532,7 +532,15 @@ bool FastISel::SelectBitCast(User *I) {
 
 bool
 FastISel::SelectInstruction(Instruction *I) {
-  return SelectOperator(I, I->getOpcode());
+  // First, try doing target-independent selection.
+  if (SelectOperator(I, I->getOpcode()))
+    return true;
+
+  // Next, try calling the target to attempt to handle the instruction.
+  if (TargetSelectInstruction(I))
+    return true;
+
+  return false;
 }
 
 /// FastEmitBranch - Emit an unconditional branch to the given block,
@@ -541,7 +549,7 @@ FastISel::SelectInstruction(Instruction *I) {
 void
 FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {
   MachineFunction::iterator NextMBB =
-     next(MachineFunction::iterator(MBB));
+     llvm::next(MachineFunction::iterator(MBB));
 
   if (MBB->isLayoutSuccessor(MSucc)) {
     // The unconditional fall-through case, which needs no instructions.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 273dbf0..f9c05d0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -232,7 +232,7 @@ void SelectionDAGLegalize::LegalizeDAG() {
   // node is only legalized after all of its operands are legalized.
   DAG.AssignTopologicalOrder();
   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
-       E = prior(DAG.allnodes_end()); I != next(E); ++I)
+       E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
     LegalizeOp(SDValue(I, 0));
 
   // Finally, it's possible the root changed.  Get the new root.
@@ -2294,9 +2294,15 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
     // NOTE: we could fall back on load/store here too for targets without
     // SAR.  However, it is doubtful that any exist.
     EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
-    unsigned BitsDiff = Node->getValueType(0).getSizeInBits() -
+    EVT VT = Node->getValueType(0);
+    EVT ShiftAmountTy = TLI.getShiftAmountTy();
+    if (VT.isVector()) {
+      ShiftAmountTy = VT;
+      VT = VT.getVectorElementType();
+    }
+    unsigned BitsDiff = VT.getSizeInBits() -
                         ExtraVT.getSizeInBits();
-    SDValue ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy());
+    SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy);
     Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
                        Node->getOperand(0), ShiftCst);
     Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
@@ -3059,8 +3065,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
 
 // SelectionDAG::Legalize - This is the entry point for the file.
 //
-void SelectionDAG::Legalize(bool TypesNeedLegalizing,
-                            CodeGenOpt::Level OptLevel) {
+void SelectionDAG::Legalize(CodeGenOpt::Level OptLevel) {
   /// run - This is the main entry point to this class.
   ///
   SelectionDAGLegalize(*this, OptLevel).LegalizeDAG();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 8ac8063..2f4457e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1167,55 +1167,62 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
   GetExpandedInteger(N->getOperand(0), InL, InH);
 
   SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy);
-  SDValue Amt2 = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
-  SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy),
-                             Amt, NVBitsNode, ISD::SETULT);
+  SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
+  SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
+  SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy),
+                                 Amt, NVBitsNode, ISD::SETULT);
 
-  SDValue Lo1, Hi1, Lo2, Hi2;
+  SDValue LoS, HiS, LoL, HiL;
   switch (N->getOpcode()) {
   default: llvm_unreachable("Unknown shift");
   case ISD::SHL:
-    // ShAmt < NVTBits
-    Lo1 = DAG.getConstant(0, NVT);                  // Low part is zero.
-    Hi1 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
-
-    // ShAmt >= NVTBits
-    Lo2 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
-    Hi2 = DAG.getNode(ISD::OR, dl, NVT,
+    // Short: ShAmt < NVTBits
+    LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
+    HiS = DAG.getNode(ISD::OR, dl, NVT,
                       DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
-                      DAG.getNode(ISD::SRL, dl, NVT, InL, Amt2));
+    // FIXME: If Amt is zero, the following shift generates an undefined result
+    // on some architectures.
+                      DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));
+
+    // Long: ShAmt >= NVTBits
+    LoL = DAG.getConstant(0, NVT);                        // Lo part is zero.
+    HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
 
-    Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2);
-    Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2);
+    Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+    Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
     return true;
   case ISD::SRL:
-    // ShAmt < NVTBits
-    Hi1 = DAG.getConstant(0, NVT);                  // Hi part is zero.
-    Lo1 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
-
-    // ShAmt >= NVTBits
-    Hi2 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
-    Lo2 = DAG.getNode(ISD::OR, dl, NVT,
-                     DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
-                     DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2));
-
-    Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2);
-    Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2);
+    // Short: ShAmt < NVTBits
+    HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
+    LoS = DAG.getNode(ISD::OR, dl, NVT,
+                      DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+    // FIXME: If Amt is zero, the following shift generates an undefined result
+    // on some architectures.
+                      DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+    // Long: ShAmt >= NVTBits
+    HiL = DAG.getConstant(0, NVT);                        // Hi part is zero.
+    LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+    Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+    Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
     return true;
   case ISD::SRA:
-    // ShAmt < NVTBits
-    Hi1 = DAG.getNode(ISD::SRA, dl, NVT, InH,       // Sign extend high part.
-                       DAG.getConstant(NVTBits-1, ShTy));
-    Lo1 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
-
-    // ShAmt >= NVTBits
-    Hi2 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
-    Lo2 = DAG.getNode(ISD::OR, dl, NVT,
+    // Short: ShAmt < NVTBits
+    HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
+    LoS = DAG.getNode(ISD::OR, dl, NVT,
                       DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
-                      DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2));
+    // FIXME: If Amt is zero, the following shift generates an undefined result
+    // on some architectures.
+                      DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+    // Long: ShAmt >= NVTBits
+    HiL = DAG.getNode(ISD::SRA, dl, NVT, InH,             // Sign of Hi part.
+                      DAG.getConstant(NVTBits-1, ShTy));
+    LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
 
-    Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2);
-    Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2);
+    Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+    Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
     return true;
   }
 
@@ -1989,7 +1996,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::SRA:
   case ISD::SRL:
   case ISD::ROTL:
-  case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
+  case ISD::ROTR:              Res = ExpandIntOp_Shift(N); break;
+  case ISD::RETURNADDR:
+  case ISD::FRAMEADDR:         Res = ExpandIntOp_RETURNADDR(N); break;
   }
 
   // If the result is null, the sub-method took care of registering results etc.
@@ -2173,6 +2182,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
   return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo);
 }
 
+SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
+  // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant.  This
+  // surely makes pretty nice problems on 8/16 bit targets. Just truncate this
+  // constant to valid type.
+  SDValue Lo, Hi;
+  GetExpandedInteger(N->getOperand(0), Lo, Hi);
+  return DAG.UpdateNodeOperands(SDValue(N, 0), Lo);
+}
+
 SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
   SDValue Op = N->getOperand(0);
   EVT DstVT = N->getValueType(0);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 2ee9f8a..c35f7ad 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -362,6 +362,7 @@ private:
   SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue ExpandIntOp_TRUNCATE(SDNode *N);
   SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+  SDValue ExpandIntOp_RETURNADDR(SDNode *N);
 
   void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
                                   ISD::CondCode &CCCode, DebugLoc dl);
@@ -516,6 +517,7 @@ private:
   SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
   SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
+  SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
   SDValue ScalarizeVecRes_SELECT(SDNode *N);
   SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
   SDValue ScalarizeVecRes_SETCC(SDNode *N);
@@ -559,6 +561,7 @@ private:
   void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+  void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
   void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
@@ -601,6 +604,7 @@ private:
   SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
   SDValue WidenVecRes_LOAD(SDNode* N);
   SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+  SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
   SDValue WidenVecRes_SELECT(SDNode* N);
   SDValue WidenVecRes_SELECT_CC(SDNode* N);
   SDValue WidenVecRes_UNDEF(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 785c2ad..2625245 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -79,7 +79,7 @@ bool VectorLegalizer::Run() {
   // node is only legalized after all of its operands are legalized.
   DAG.AssignTopologicalOrder();
   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
-       E = prior(DAG.allnodes_end()); I != next(E); ++I)
+       E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
     LegalizeOp(SDValue(I, 0));
 
   // Finally, it's possible the root changed.  Get the new root.
@@ -179,6 +179,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::FRINT:
   case ISD::FNEARBYINT:
   case ISD::FFLOOR:
+  case ISD::SIGN_EXTEND_INREG:
     QueryType = Node->getValueType(0);
     break;
   case ISD::SINT_TO_FP:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 023324b..cf67ab9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -54,6 +54,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
   case ISD::LOAD:           R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
   case ISD::SCALAR_TO_VECTOR:  R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
+  case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_SIGN_EXTEND_INREG(N); break;
   case ISD::SELECT:            R = ScalarizeVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         R = ScalarizeVecRes_SELECT_CC(N); break;
   case ISD::SETCC:             R = ScalarizeVecRes_SETCC(N); break;
@@ -195,6 +196,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
   return InOp;
 }
 
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N) {
+  EVT EltVT = N->getValueType(0).getVectorElementType();
+  SDValue LHS = GetScalarizedVector(N->getOperand(0));
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), EltVT,
+                     LHS, N->getOperand(1));
+}
+
 SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
   SDValue LHS = GetScalarizedVector(N->getOperand(1));
   return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
@@ -401,6 +409,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::FPOWI:             SplitVecRes_FPOWI(N, Lo, Hi); break;
   case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
   case ISD::SCALAR_TO_VECTOR:  SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+  case ISD::SIGN_EXTEND_INREG: SplitVecRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
   case ISD::LOAD:
     SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
     break;
@@ -700,6 +709,18 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
   Hi = DAG.getUNDEF(HiVT);
 }
 
+void DAGTypeLegalizer::SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo,
+                                                     SDValue &Hi) {
+  SDValue LHSLo, LHSHi;
+  GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+  DebugLoc dl = N->getDebugLoc();
+
+  Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
+                   N->getOperand(1));
+  Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
+                   N->getOperand(1));
+}
+
 void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
                                         SDValue &Hi) {
   assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
@@ -1141,6 +1162,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
   case ISD::LOAD:              Res = WidenVecRes_LOAD(N); break;
   case ISD::SCALAR_TO_VECTOR:  Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+  case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_SIGN_EXTEND_INREG(N); break;
   case ISD::SELECT:            Res = WidenVecRes_SELECT(N); break;
   case ISD::SELECT_CC:         Res = WidenVecRes_SELECT_CC(N); break;
   case ISD::UNDEF:             Res = WidenVecRes_UNDEF(N); break;
@@ -1691,6 +1713,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
                      WidenVT, N->getOperand(0));
 }
 
+SDValue DAGTypeLegalizer::WidenVecRes_SIGN_EXTEND_INREG(SDNode *N) {
+  EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(),
+                     WidenVT, WidenLHS, N->getOperand(1));
+}
+
 SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
   EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   unsigned WidenNumElts = WidenVT.getVectorNumElements();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index d53de34..b2ee8bb 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -20,10 +20,16 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+cl::opt<bool>
+DisableInstScheduling("disable-inst-scheduling",
+                      cl::init(false),
+                      cl::desc("Disable instruction scheduling"));
+
 ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
   : ScheduleDAG(mf) {
 }
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c38c79b..da55e6b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/PseudoSourceValue.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -47,6 +48,8 @@
 #include <cmath>
 using namespace llvm;
 
+extern cl::opt<bool> DisableInstScheduling;
+
 /// makeVTList - Return an instance of the SDVTList struct initialized with the
 /// specified members.
 static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
@@ -551,6 +554,9 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
     }
 
     DeallocateNode(N);
+
+    // Remove the ordering of this node.
+    if (Ordering) Ordering->remove(N);
   }
 }
 
@@ -576,6 +582,9 @@ void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
   N->DropOperands();
 
   DeallocateNode(N);
+
+  // Remove the ordering of this node.
+  if (Ordering) Ordering->remove(N);
 }
 
 void SelectionDAG::DeallocateNode(SDNode *N) {
@@ -587,6 +596,9 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
   N->NodeType = ISD::DELETED_NODE;
 
   NodeAllocator.Deallocate(AllNodes.remove(N));
+
+  // Remove the ordering of this node.
+  if (Ordering) Ordering->remove(N);
 }
 
 /// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
@@ -690,7 +702,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);
   AddNodeIDCustom(ID, N);
-  return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  if (Ordering) Ordering->remove(Node);
+  return Node;
 }
 
 /// FindModifiedNodeSlot - Find a slot for the specified node if its operands
@@ -707,7 +721,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);
   AddNodeIDCustom(ID, N);
-  return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  if (Ordering) Ordering->remove(Node);
+  return Node;
 }
 
 
@@ -724,7 +740,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);
   AddNodeIDCustom(ID, N);
-  return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+  if (Ordering) Ordering->remove(Node);
+  return Node;
 }
 
 /// VerifyNode - Sanity check the given node.  Aborts if it is invalid.
@@ -777,8 +795,13 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
 SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli)
   : TLI(tli), FLI(fli), DW(0),
     EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(),
-    getVTList(MVT::Other)), Root(getEntryNode()) {
+              getVTList(MVT::Other)),
+    Root(getEntryNode()), Ordering(0) {
   AllNodes.push_back(&EntryNode);
+  if (DisableInstScheduling) {
+    Ordering = new NodeOrdering();
+    Ordering->add(&EntryNode);
+  }
 }
 
 void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi,
@@ -791,6 +814,7 @@ void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi,
 
 SelectionDAG::~SelectionDAG() {
   allnodes_clear();
+  delete Ordering;
 }
 
 void SelectionDAG::allnodes_clear() {
@@ -816,6 +840,10 @@ void SelectionDAG::clear() {
   EntryNode.UseList = 0;
   AllNodes.push_back(&EntryNode);
   Root = getEntryNode();
+  if (DisableInstScheduling) {
+    Ordering = new NodeOrdering();
+    Ordering->add(&EntryNode);
+  }
 }
 
 SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
@@ -831,8 +859,12 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
 }
 
 SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) {
+  assert(!VT.isVector() &&
+         "getZeroExtendInReg should use the vector element type instead of "
+         "the vector type!");
   if (Op.getValueType() == VT) return Op;
-  APInt Imm = APInt::getLowBitsSet(Op.getValueSizeInBits(),
+  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+  APInt Imm = APInt::getLowBitsSet(BitWidth,
                                    VT.getSizeInBits());
   return getNode(ISD::AND, DL, Op.getValueType(), Op,
                  getConstant(Imm, Op.getValueType()));
@@ -872,14 +904,17 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
   ID.AddPointer(&Val);
   void *IP = 0;
   SDNode *N = NULL;
-  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) {
+    if (Ordering) Ordering->add(N);
     if (!VT.isVector())
       return SDValue(N, 0);
+  }
   if (!N) {
     N = NodeAllocator.Allocate<ConstantSDNode>();
     new (N) ConstantSDNode(isT, &Val, EltVT);
     CSEMap.InsertNode(N, IP);
     AllNodes.push_back(N);
+    if (Ordering) Ordering->add(N);
   }
 
   SDValue Result(N, 0);
@@ -916,14 +951,17 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
   ID.AddPointer(&V);
   void *IP = 0;
   SDNode *N = NULL;
-  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+  if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) {
+    if (Ordering) Ordering->add(N);
     if (!VT.isVector())
       return SDValue(N, 0);
+  }
   if (!N) {
     N = NodeAllocator.Allocate<ConstantFPSDNode>();
     new (N) ConstantFPSDNode(isTarget, &V, EltVT);
     CSEMap.InsertNode(N, IP);
     AllNodes.push_back(N);
+    if (Ordering) Ordering->add(N);
   }
 
   SDValue Result(N, 0);
@@ -978,12 +1016,15 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
   ID.AddInteger(Offset);
   ID.AddInteger(TargetFlags);
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>();
   new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -993,12 +1034,15 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
   AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
   ID.AddInteger(FI);
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<FrameIndexSDNode>();
   new (N) FrameIndexSDNode(FI, VT, isTarget);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -1012,12 +1056,15 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
   ID.AddInteger(JTI);
   ID.AddInteger(TargetFlags);
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>();
   new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -1037,12 +1084,15 @@ SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT,
   ID.AddPointer(C);
   ID.AddInteger(TargetFlags);
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
   new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -1063,12 +1113,15 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
   C->AddSelectionDAGCSEId(ID);
   ID.AddInteger(TargetFlags);
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
   new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -1077,12 +1130,15 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
   AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
   ID.AddPointer(MBB);
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<BasicBlockSDNode>();
   new (N) BasicBlockSDNode(MBB);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -1098,6 +1154,7 @@ SDValue SelectionDAG::getValueType(EVT VT) {
   N = NodeAllocator.Allocate<VTSDNode>();
   new (N) VTSDNode(VT);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -1107,6 +1164,7 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
   N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
   new (N) ExternalSymbolSDNode(false, Sym, 0, VT);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -1119,6 +1177,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
   N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
   new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -1131,6 +1190,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
     new (N) CondCodeSDNode(Cond);
     CondCodeNodes[Cond] = N;
     AllNodes.push_back(N);
+    if (Ordering) Ordering->add(N);
   }
   return SDValue(CondCodeNodes[Cond], 0);
 }
@@ -1223,8 +1283,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1,
     ID.AddInteger(MaskVec[i]);
 
   void* IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
+  }
 
   // Allocate the mask array for the node out of the BumpPtrAllocator, since
   // SDNode doesn't have access to it.  This memory will be "leaked" when
@@ -1236,6 +1298,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1,
   new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -1253,12 +1316,15 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
   SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
   AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5);
   void* IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
+  }
   CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>();
   new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -1267,12 +1333,15 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
   AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
   ID.AddInteger(RegNo);
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<RegisterSDNode>();
   new (N) RegisterSDNode(RegNo, VT);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -1284,12 +1353,15 @@ SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl,
   AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1);
   ID.AddInteger(LabelID);
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<LabelSDNode>();
   new (N) LabelSDNode(Opcode, dl, Root, LabelID);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -1303,12 +1375,15 @@ SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT,
   ID.AddPointer(BA);
   ID.AddInteger(TargetFlags);
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<BlockAddressSDNode>();
   new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -1321,13 +1396,16 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
   ID.AddPointer(V);
 
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
+  }
 
   SDNode *N = NodeAllocator.Allocate<SrcValueSDNode>();
   new (N) SrcValueSDNode(V);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -1480,7 +1558,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
   if (Op.getValueType().isVector())
     return false;
 
-  unsigned BitWidth = Op.getValueSizeInBits();
+  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
   return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
 }
 
@@ -1503,7 +1581,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
                                      APInt &KnownZero, APInt &KnownOne,
                                      unsigned Depth) const {
   unsigned BitWidth = Mask.getBitWidth();
-  assert(BitWidth == Op.getValueType().getSizeInBits() &&
+  assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() &&
          "Mask size mismatches value type size!");
 
   KnownZero = KnownOne = APInt(BitWidth, 0);   // Don't know anything.
@@ -1760,7 +1838,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   }
   case ISD::ZERO_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getSizeInBits();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
     APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
     APInt InMask    = Mask;
     InMask.trunc(InBits);
@@ -1774,7 +1852,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   }
   case ISD::SIGN_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getSizeInBits();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
     APInt InSignBit = APInt::getSignBit(InBits);
     APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
     APInt InMask = Mask;
@@ -1815,7 +1893,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   }
   case ISD::ANY_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getSizeInBits();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
     APInt InMask = Mask;
     InMask.trunc(InBits);
     KnownZero.trunc(InBits);
@@ -1827,7 +1905,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
   }
   case ISD::TRUNCATE: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getSizeInBits();
+    unsigned InBits = InVT.getScalarType().getSizeInBits();
     APInt InMask = Mask;
     InMask.zext(InBits);
     KnownZero.zext(InBits);
@@ -1960,7 +2038,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
 unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
   EVT VT = Op.getValueType();
   assert(VT.isInteger() && "Invalid VT!");
-  unsigned VTBits = VT.getSizeInBits();
+  unsigned VTBits = VT.getScalarType().getSizeInBits();
   unsigned Tmp, Tmp2;
   unsigned FirstAnswer = 1;
 
@@ -1987,7 +2065,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
   }
 
   case ISD::SIGN_EXTEND:
-    Tmp = VTBits-Op.getOperand(0).getValueType().getSizeInBits();
+    Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
     return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
 
   case ISD::SIGN_EXTEND_INREG:
@@ -2238,13 +2316,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<SDNode>();
   new (N) SDNode(Opcode, DL, getVTList(VT));
   CSEMap.InsertNode(N, IP);
 
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
 #ifndef NDEBUG
   VerifyNode(N);
 #endif
@@ -2349,6 +2430,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     assert(VT.isFloatingPoint() &&
            Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
     if (Operand.getValueType() == VT) return Operand;  // noop conversion.
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
     if (Operand.getOpcode() == ISD::UNDEF)
       return getUNDEF(VT);
     break;
@@ -2356,8 +2441,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
            "Invalid SIGN_EXTEND!");
     if (Operand.getValueType() == VT) return Operand;   // noop extension
-    assert(Operand.getValueType().bitsLT(VT)
-           && "Invalid sext node, dst < src!");
+    assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+           "Invalid sext node, dst < src!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
     if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
       return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
     break;
@@ -2365,8 +2454,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
            "Invalid ZERO_EXTEND!");
     if (Operand.getValueType() == VT) return Operand;   // noop extension
-    assert(Operand.getValueType().bitsLT(VT)
-           && "Invalid zext node, dst < src!");
+    assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+           "Invalid zext node, dst < src!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
     if (OpOpcode == ISD::ZERO_EXTEND)   // (zext (zext x)) -> (zext x)
       return getNode(ISD::ZERO_EXTEND, DL, VT,
                      Operand.getNode()->getOperand(0));
@@ -2375,8 +2468,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
            "Invalid ANY_EXTEND!");
     if (Operand.getValueType() == VT) return Operand;   // noop extension
-    assert(Operand.getValueType().bitsLT(VT)
-           && "Invalid anyext node, dst < src!");
+    assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+           "Invalid anyext node, dst < src!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
     if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND)
       // (ext (zext x)) -> (zext x)  and  (ext (sext x)) -> (sext x)
       return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
@@ -2385,14 +2482,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     assert(VT.isInteger() && Operand.getValueType().isInteger() &&
            "Invalid TRUNCATE!");
     if (Operand.getValueType() == VT) return Operand;   // noop truncate
-    assert(Operand.getValueType().bitsGT(VT)
-           && "Invalid truncate node, src < dst!");
+    assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) &&
+           "Invalid truncate node, src < dst!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() ==
+            Operand.getValueType().getVectorNumElements()) &&
+           "Vector element count mismatch!");
     if (OpOpcode == ISD::TRUNCATE)
       return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
     else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
              OpOpcode == ISD::ANY_EXTEND) {
       // If the source is smaller than the dest, we still need an extend.
-      if (Operand.getNode()->getOperand(0).getValueType().bitsLT(VT))
+      if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
+            .bitsLT(VT.getScalarType()))
         return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
       else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
         return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
@@ -2447,8 +2549,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
     SDValue Ops[1] = { Operand };
     AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
     void *IP = 0;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      if (Ordering) Ordering->add(E);
       return SDValue(E, 0);
+    }
     N = NodeAllocator.Allocate<UnarySDNode>();
     new (N) UnarySDNode(Opcode, DL, VTs, Operand);
     CSEMap.InsertNode(N, IP);
@@ -2458,6 +2562,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
   }
 
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
 #ifndef NDEBUG
   VerifyNode(N);
 #endif
@@ -2623,6 +2728,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     assert(VT == N1.getValueType() && "Not an inreg extend!");
     assert(VT.isInteger() && EVT.isInteger() &&
            "Cannot *_EXTEND_INREG FP types");
+    assert(!EVT.isVector() &&
+           "AssertSExt/AssertZExt type should be the vector element type "
+           "rather than the vector type!");
     assert(EVT.bitsLE(VT) && "Not extending!");
     if (VT == EVT) return N1; // noop assertion.
     break;
@@ -2632,12 +2740,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     assert(VT == N1.getValueType() && "Not an inreg extend!");
     assert(VT.isInteger() && EVT.isInteger() &&
            "Cannot *_EXTEND_INREG FP types");
-    assert(EVT.bitsLE(VT) && "Not extending!");
+    assert(!EVT.isVector() &&
+           "SIGN_EXTEND_INREG type should be the vector element type rather "
+           "than the vector type!");
+    assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!");
     if (EVT == VT) return N1;  // Not actually extending
 
     if (N1C) {
       APInt Val = N1C->getAPIntValue();
-      unsigned FromBits = cast<VTSDNode>(N2)->getVT().getSizeInBits();
+      unsigned FromBits = EVT.getSizeInBits();
       Val <<= Val.getBitWidth()-FromBits;
       Val = Val.ashr(Val.getBitWidth()-FromBits);
       return getConstant(Val, VT);
@@ -2859,8 +2970,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
     void *IP = 0;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      if (Ordering) Ordering->add(E);
       return SDValue(E, 0);
+    }
     N = NodeAllocator.Allocate<BinarySDNode>();
     new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
     CSEMap.InsertNode(N, IP);
@@ -2870,6 +2983,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   }
 
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
 #ifndef NDEBUG
   VerifyNode(N);
 #endif
@@ -2936,8 +3050,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
     void *IP = 0;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      if (Ordering) Ordering->add(E);
       return SDValue(E, 0);
+    }
     N = NodeAllocator.Allocate<TernarySDNode>();
     new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
     CSEMap.InsertNode(N, IP);
@@ -2945,7 +3061,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     N = NodeAllocator.Allocate<TernarySDNode>();
     new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
   }
+
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
 #ifndef NDEBUG
   VerifyNode(N);
 #endif
@@ -3541,12 +3659,14 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
   void* IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<AtomicSDNode>(E)->refineAlignment(MMO);
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
   }
   SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
   new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -3604,12 +3724,14 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
   void* IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<AtomicSDNode>(E)->refineAlignment(MMO);
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
   }
   SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
   new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -3682,6 +3804,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
     void *IP = 0;
     if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
       cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
+      if (Ordering) Ordering->add(E);
       return SDValue(E, 0);
     }
 
@@ -3693,6 +3816,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
     new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
   }
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -3732,16 +3856,15 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
     assert(VT == MemVT && "Non-extending load from different memory type!");
   } else {
     // Extending load.
-    if (VT.isVector())
-      assert(MemVT.getVectorNumElements() == VT.getVectorNumElements() &&
-             "Invalid vector extload!");
-    else
-      assert(MemVT.bitsLT(VT) &&
-             "Should only be an extending load, not truncating!");
-    assert((ExtType == ISD::EXTLOAD || VT.isInteger()) &&
-           "Cannot sign/zero extend a FP/Vector load!");
+    assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+           "Should only be an extending load, not truncating!");
     assert(VT.isInteger() == MemVT.isInteger() &&
            "Cannot convert from FP to Int or Int -> FP!");
+    assert(VT.isVector() == MemVT.isVector() &&
+           "Cannot use trunc store to convert to or from a vector!");
+    assert((!VT.isVector() ||
+            VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
+           "Cannot use trunc store to change the number of vector elements!");
   }
 
   bool Indexed = AM != ISD::UNINDEXED;
@@ -3758,12 +3881,14 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<LoadSDNode>(E)->refineAlignment(MMO);
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
   }
   SDNode *N = NodeAllocator.Allocate<LoadSDNode>();
   new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -3834,12 +3959,14 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
   }
   SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
   new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -3874,10 +4001,15 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
   if (VT == SVT)
     return getStore(Chain, dl, Val, Ptr, MMO);
 
-  assert(VT.bitsGT(SVT) && "Not a truncation?");
+  assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+         "Should only be a truncating store, not extending!");
   assert(VT.isInteger() == SVT.isInteger() &&
          "Can't do FP-INT conversion!");
-
+  assert(VT.isVector() == SVT.isVector() &&
+         "Cannot use trunc store to convert to or from a vector!");
+  assert((!VT.isVector() ||
+          VT.getVectorNumElements() == SVT.getVectorNumElements()) &&
+         "Cannot use trunc store to change the number of vector elements!");
 
   SDVTList VTs = getVTList(MVT::Other);
   SDValue Undef = getUNDEF(Ptr.getValueType());
@@ -3889,12 +4021,14 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
   void *IP = 0;
   if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
     cast<StoreSDNode>(E)->refineAlignment(MMO);
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
   }
   SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
   new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO);
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -3911,14 +4045,17 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
   ID.AddInteger(ST->getMemoryVT().getRawBits());
   ID.AddInteger(ST->getRawSubclassData());
   void *IP = 0;
-  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+  if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+    if (Ordering) Ordering->add(E);
     return SDValue(E, 0);
+  }
   SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
   new (N) StoreSDNode(Ops, dl, VTs, AM,
                       ST->isTruncatingStore(), ST->getMemoryVT(),
                       ST->getMemOperand());
   CSEMap.InsertNode(N, IP);
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
   return SDValue(N, 0);
 }
 
@@ -3984,8 +4121,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
     AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
     void *IP = 0;
 
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      if (Ordering) Ordering->add(E);
       return SDValue(E, 0);
+    }
 
     N = NodeAllocator.Allocate<SDNode>();
     new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
@@ -3996,6 +4135,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
   }
 
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
 #ifndef NDEBUG
   VerifyNode(N);
 #endif
@@ -4051,8 +4191,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
     void *IP = 0;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      if (Ordering) Ordering->add(E);
       return SDValue(E, 0);
+    }
     if (NumOps == 1) {
       N = NodeAllocator.Allocate<UnarySDNode>();
       new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
@@ -4083,6 +4225,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
     }
   }
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
 #ifndef NDEBUG
   VerifyNode(N);
 #endif
@@ -4166,7 +4309,7 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
                           I->VTs[2] == VT3 && I->VTs[3] == VT4)
       return *I;
 
-  EVT *Array = Allocator.Allocate<EVT>(3);
+  EVT *Array = Allocator.Allocate<EVT>(4);
   Array[0] = VT1;
   Array[1] = VT2;
   Array[2] = VT3;
@@ -4545,8 +4688,10 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
   if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) {
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
-    if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      if (Ordering) Ordering->add(ON);
       return ON;
+    }
   }
 
   if (!RemoveNodeFromCSEMaps(N))
@@ -4610,6 +4755,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
 
   if (IP)
     CSEMap.InsertNode(N, IP);   // Memoize the new node.
+  if (Ordering) Ordering->add(N);
   return N;
 }
 
@@ -4748,8 +4894,10 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
     IP = 0;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      if (Ordering) Ordering->add(E);
       return cast<MachineSDNode>(E);
+    }
   }
 
   // Allocate a new MachineSDNode.
@@ -4771,6 +4919,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
     CSEMap.InsertNode(N, IP);
 
   AllNodes.push_back(N);
+  if (Ordering) Ordering->add(N);
 #ifndef NDEBUG
   VerifyNode(N);
 #endif
@@ -4807,8 +4956,10 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
     void *IP = 0;
-    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+    if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+      if (Ordering) Ordering->add(E);
       return E;
+    }
   }
   return NULL;
 }
@@ -5867,6 +6018,99 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
                  &Scalars[0], Scalars.size());
 }
 
+
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a 
+/// location that is 'Dist' units away from the location that the 'Base' load 
+/// is loading from.
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, 
+                                     unsigned Bytes, int Dist) const {
+  if (LD->getChain() != Base->getChain())
+    return false;
+  EVT VT = LD->getValueType(0);
+  if (VT.getSizeInBits() / 8 != Bytes)
+    return false;
+
+  SDValue Loc = LD->getOperand(1);
+  SDValue BaseLoc = Base->getOperand(1);
+  if (Loc.getOpcode() == ISD::FrameIndex) {
+    if (BaseLoc.getOpcode() != ISD::FrameIndex)
+      return false;
+    const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo();
+    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
+    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+    int FS  = MFI->getObjectSize(FI);
+    int BFS = MFI->getObjectSize(BFI);
+    if (FS != BFS || FS != (int)Bytes) return false;
+    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+  }
+  if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
+    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
+    if (V && (V->getSExtValue() == Dist*Bytes))
+      return true;
+  }
+
+  GlobalValue *GV1 = NULL;
+  GlobalValue *GV2 = NULL;
+  int64_t Offset1 = 0;
+  int64_t Offset2 = 0;
+  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+  if (isGA1 && isGA2 && GV1 == GV2)
+    return Offset1 == (Offset2 + Dist*Bytes);
+  return false;
+}
+
+
+/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+/// it cannot be inferred.
+unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
+  // If this is a GlobalAddress + cst, return the alignment.
+  GlobalValue *GV;
+  int64_t GVOffset = 0;
+  if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset))
+    return MinAlign(GV->getAlignment(), GVOffset);
+
+  // If this is a direct reference to a stack slot, use information about the
+  // stack slot's alignment.
+  int FrameIdx = 1 << 31;
+  int64_t FrameOffset = 0;
+  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
+    FrameIdx = FI->getIndex();
+  } else if (Ptr.getOpcode() == ISD::ADD &&
+             isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+             isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+    FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+    FrameOffset = Ptr.getConstantOperandVal(1);
+  }
+
+  if (FrameIdx != (1 << 31)) {
+    // FIXME: Handle FI+CST.
+    const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
+    unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
+                                    FrameOffset);
+    if (MFI.isFixedObjectIndex(FrameIdx)) {
+      int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset;
+
+      // The alignment of the frame index can be determined from its offset from
+      // the incoming frame position.  If the frame object is at offset 32 and
+      // the stack is guaranteed to be 16-byte aligned, then we know that the
+      // object is 16-byte aligned.
+      unsigned StackAlign = getTarget().getFrameInfo()->getStackAlignment();
+      unsigned Align = MinAlign(ObjectOffset, StackAlign);
+
+      // Finally, the frame object itself may have a known alignment.  Factor
+      // the alignment + offset into a new alignment.  For example, if we know
+      // the FI is 8 byte aligned, but the pointer is 4 off, we really have a
+      // 4-byte alignment of the resultant pointer.  Likewise align 4 + 4-byte
+      // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc.
+      return std::max(Align, FIInfoAlign);
+    }
+    return FIInfoAlign;
+  }
+
+  return 0;
+}
+
 void SelectionDAG::dump() const {
   errs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
 
@@ -5882,6 +6126,9 @@ void SelectionDAG::dump() const {
   errs() << "\n\n";
 }
 
+void SelectionDAG::NodeOrdering::dump() const {
+}
+
 void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
   print_types(OS, G);
   print_details(OS, G);
@@ -6022,4 +6269,3 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
       return false;
   return true;
 }
-
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 57d8903..7568384 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -583,6 +583,9 @@ void SelectionDAGBuilder::visit(Instruction &I) {
 }
 
 void SelectionDAGBuilder::visit(unsigned Opcode, User &I) {
+  // Tell the DAG that we're processing a new instruction.
+  DAG.NewInst();
+
   // Note: this doesn't use InstVisitor, because it has to work with
   // ConstantExpr's in addition to instructions.
   switch (Opcode) {
@@ -2108,7 +2111,7 @@ void SelectionDAGBuilder::visitSelect(User &I) {
 
     for (unsigned i = 0; i != NumValues; ++i)
       Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
-                              TrueVal.getValueType(), Cond,
+                              TrueVal.getNode()->getValueType(i), Cond,
                               SDValue(TrueVal.getNode(), TrueVal.getResNo() + i),
                               SDValue(FalseVal.getNode(), FalseVal.getResNo() + i));
 
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index c39437f..a640c7d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -60,8 +60,6 @@
 using namespace llvm;
 
 static cl::opt<bool>
-DisableLegalizeTypes("disable-legalize-types", cl::Hidden);
-static cl::opt<bool>
 EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
           cl::desc("Enable verbose messages in the \"fast\" "
                    "instruction selector"));
@@ -362,6 +360,39 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   return true;
 }
 
+/// SetDebugLoc - Update MF's and SDB's DebugLocs if debug information is
+/// attached with this instruction.
+static void SetDebugLoc(unsigned MDDbgKind,
+                        MetadataContext &TheMetadata,
+                        Instruction *I,
+                        SelectionDAGBuilder *SDB,
+                        FastISel *FastIS,
+                        MachineFunction *MF) {
+  if (!isa<DbgInfoIntrinsic>(I)) 
+    if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) {
+      DILocation DILoc(Dbg);
+      DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
+
+      SDB->setCurDebugLoc(Loc);
+
+      if (FastIS)
+        FastIS->setCurDebugLoc(Loc);
+
+      // If the function doesn't have a default debug location yet, set
+      // it. This is kind of a hack.
+      if (MF->getDefaultDebugLoc().isUnknown())
+        MF->setDefaultDebugLoc(Loc);
+    }
+}
+
+/// ResetDebugLoc - Set MF's and SDB's DebugLocs to Unknown.
+static void ResetDebugLoc(SelectionDAGBuilder *SDB,
+                          FastISel *FastIS) {
+  SDB->setCurDebugLoc(DebugLoc::getUnknownLoc());
+  if (FastIS)
+    FastIS->setCurDebugLoc(DebugLoc::getUnknownLoc());
+}
+
 void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
                                         BasicBlock::iterator Begin,
                                         BasicBlock::iterator End,
@@ -373,20 +404,16 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
   // Lower all of the non-terminator instructions. If a call is emitted
   // as a tail call, cease emitting nodes for this block.
   for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
-    if (MDDbgKind) {
-      // Update DebugLoc if debug information is attached with this
-      // instruction.
-      if (!isa<DbgInfoIntrinsic>(I)) 
-        if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) {
-          DILocation DILoc(Dbg);
-          DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
-          SDB->setCurDebugLoc(Loc);
-          if (MF->getDefaultDebugLoc().isUnknown())
-            MF->setDefaultDebugLoc(Loc);
-        }
-    }
-    if (!isa<TerminatorInst>(I))
+    if (MDDbgKind)
+      SetDebugLoc(MDDbgKind, TheMetadata, I, SDB, 0, MF);
+
+    if (!isa<TerminatorInst>(I)) {
       SDB->visit(*I);
+
+      // Set the current debug location back to "unknown" so that it doesn't
+      // spuriously apply to subsequent instructions.
+      ResetDebugLoc(SDB, 0);
+    }
   }
 
   if (!SDB->HasTailCall) {
@@ -401,7 +428,9 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
       HandlePHINodesInSuccessorBlocks(LLVMBB);
 
       // Lower the terminator after the copies are emitted.
+      SetDebugLoc(MDDbgKind, TheMetadata, LLVMBB->getTerminator(), SDB, 0, MF);
       SDB->visit(*LLVMBB->getTerminator());
+      ResetDebugLoc(SDB, 0);
     }
   }
 
@@ -498,75 +527,73 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
   // Second step, hack on the DAG until it only uses operations and types that
   // the target supports.
-  if (!DisableLegalizeTypes) {
-    if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +
-                                                 BlockName);
+  if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +
+                                               BlockName);
+
+  bool Changed;
+  if (TimePassesIsEnabled) {
+    NamedRegionTimer T("Type Legalization", GroupName);
+    Changed = CurDAG->LegalizeTypes();
+  } else {
+    Changed = CurDAG->LegalizeTypes();
+  }
+
+  DEBUG(errs() << "Type-legalized selection DAG:\n");
+  DEBUG(CurDAG->dump());
 
-    bool Changed;
+  if (Changed) {
+    if (ViewDAGCombineLT)
+      CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
+
+    // Run the DAG combiner in post-type-legalize mode.
     if (TimePassesIsEnabled) {
-      NamedRegionTimer T("Type Legalization", GroupName);
-      Changed = CurDAG->LegalizeTypes();
+      NamedRegionTimer T("DAG Combining after legalize types", GroupName);
+      CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
     } else {
-      Changed = CurDAG->LegalizeTypes();
+      CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
     }
 
-    DEBUG(errs() << "Type-legalized selection DAG:\n");
+    DEBUG(errs() << "Optimized type-legalized selection DAG:\n");
     DEBUG(CurDAG->dump());
+  }
 
-    if (Changed) {
-      if (ViewDAGCombineLT)
-        CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
-
-      // Run the DAG combiner in post-type-legalize mode.
-      if (TimePassesIsEnabled) {
-        NamedRegionTimer T("DAG Combining after legalize types", GroupName);
-        CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
-      } else {
-        CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
-      }
-
-      DEBUG(errs() << "Optimized type-legalized selection DAG:\n");
-      DEBUG(CurDAG->dump());
-    }
+  if (TimePassesIsEnabled) {
+    NamedRegionTimer T("Vector Legalization", GroupName);
+    Changed = CurDAG->LegalizeVectors();
+  } else {
+    Changed = CurDAG->LegalizeVectors();
+  }
 
+  if (Changed) {
     if (TimePassesIsEnabled) {
-      NamedRegionTimer T("Vector Legalization", GroupName);
-      Changed = CurDAG->LegalizeVectors();
+      NamedRegionTimer T("Type Legalization 2", GroupName);
+      Changed = CurDAG->LegalizeTypes();
     } else {
-      Changed = CurDAG->LegalizeVectors();
+      Changed = CurDAG->LegalizeTypes();
     }
 
-    if (Changed) {
-      if (TimePassesIsEnabled) {
-        NamedRegionTimer T("Type Legalization 2", GroupName);
-        Changed = CurDAG->LegalizeTypes();
-      } else {
-        Changed = CurDAG->LegalizeTypes();
-      }
-
-      if (ViewDAGCombineLT)
-        CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
+    if (ViewDAGCombineLT)
+      CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
 
-      // Run the DAG combiner in post-type-legalize mode.
-      if (TimePassesIsEnabled) {
-        NamedRegionTimer T("DAG Combining after legalize vectors", GroupName);
-        CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
-      } else {
-        CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
-      }
-
-      DEBUG(errs() << "Optimized vector-legalized selection DAG:\n");
-      DEBUG(CurDAG->dump());
+    // Run the DAG combiner in post-type-legalize mode.
+    if (TimePassesIsEnabled) {
+      NamedRegionTimer T("DAG Combining after legalize vectors", GroupName);
+      CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+    } else {
+      CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
     }
+
+    DEBUG(errs() << "Optimized vector-legalized selection DAG:\n");
+    DEBUG(CurDAG->dump());
   }
 
   if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
 
   if (TimePassesIsEnabled) {
     NamedRegionTimer T("DAG Legalization", GroupName);
-    CurDAG->Legalize(DisableLegalizeTypes, OptLevel);
+    CurDAG->Legalize(OptLevel);
   } else {
-    CurDAG->Legalize(DisableLegalizeTypes, OptLevel);
+    CurDAG->Legalize(OptLevel);
   }
 
   DEBUG(errs() << "Legalized selection DAG:\n");
@@ -738,24 +765,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
       FastIS->startNewBlock(BB);
       // Do FastISel on as many instructions as possible.
       for (; BI != End; ++BI) {
-        if (MDDbgKind) {
-          // Update DebugLoc if debug information is attached with this
-          // instruction.
-          if (!isa<DbgInfoIntrinsic>(BI)) 
-            if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) {
-              DILocation DILoc(Dbg);
-              DebugLoc Loc = ExtractDebugLocation(DILoc,
-                                                  MF.getDebugLocInfo());
-              FastIS->setCurDebugLoc(Loc);
-              if (MF.getDefaultDebugLoc().isUnknown())
-                MF.setDefaultDebugLoc(Loc);
-            }
-        }
-
         // Just before the terminator instruction, insert instructions to
         // feed PHI nodes in successor blocks.
         if (isa<TerminatorInst>(BI))
           if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
+            ResetDebugLoc(SDB, FastIS);
             if (EnableFastISelVerbose || EnableFastISelAbort) {
               errs() << "FastISel miss: ";
               BI->dump();
@@ -765,13 +779,18 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
             break;
           }
 
+        if (MDDbgKind)
+          SetDebugLoc(MDDbgKind, TheMetadata, BI, SDB, FastIS, &MF);
+
         // First try normal tablegen-generated "fast" selection.
-        if (FastIS->SelectInstruction(BI))
+        if (FastIS->SelectInstruction(BI)) {
+          ResetDebugLoc(SDB, FastIS);
           continue;
+        }
 
-        // Next, try calling the target to attempt to handle the instruction.
-        if (FastIS->TargetSelectInstruction(BI))
-          continue;
+        // Clear out the debug location so that it doesn't carry over to
+        // unrelated instructions.
+        ResetDebugLoc(SDB, FastIS);
 
         // Then handle certain instructions as single-LLVM-Instruction blocks.
         if (isa<CallInst>(BI)) {
@@ -786,10 +805,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
               R = FuncInfo->CreateRegForValue(BI);
           }
 
-          SDB->setCurDebugLoc(FastIS->getCurDebugLoc());
-
           bool HadTailCall = false;
-          SelectBasicBlock(LLVMBB, BI, next(BI), HadTailCall);
+          SelectBasicBlock(LLVMBB, BI, llvm::next(BI), HadTailCall);
 
           // If the call was emitted as a tail call, we're done with the block.
           if (HadTailCall) {
@@ -823,9 +840,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
     // not handled by FastISel. If FastISel is not run, this is the entire
     // block.
     if (BI != End) {
-      // If FastISel is run and it has known DebugLoc then use it.
-      if (FastIS && !FastIS->getCurDebugLoc().isUnknown())
-        SDB->setCurDebugLoc(FastIS->getCurDebugLoc());
       bool HadTailCall;
       SelectBasicBlock(LLVMBB, BI, End, HadTailCall);
     }
@@ -1313,14 +1327,6 @@ SDNode *SelectionDAGISel::Select_UNDEF(const SDValue &N) {
                               N.getValueType());
 }
 
-SDNode *SelectionDAGISel::Select_DBG_LABEL(const SDValue &N) {
-  SDValue Chain = N.getOperand(0);
-  unsigned C = cast<LabelSDNode>(N)->getLabelID();
-  SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32);
-  return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::DBG_LABEL,
-                              MVT::Other, Tmp, Chain);
-}
-
 SDNode *SelectionDAGISel::Select_EH_LABEL(const SDValue &N) {
   SDValue Chain = N.getOperand(0);
   unsigned C = cast<LabelSDNode>(N)->getLabelID();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index c5adc50..83fa5a8 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -29,14 +29,14 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
-#include <fstream>
 using namespace llvm;
 
 namespace llvm {
   template<>
   struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
 
-    DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+    explicit DOTGraphTraits(bool isSimple=false) :
+      DefaultDOTGraphTraits(isSimple) {}
 
     static bool hasEdgeDestLabels() {
       return true;
@@ -50,6 +50,11 @@ namespace llvm {
       return ((const SDNode *) Node)->getValueType(i).getEVTString();
     }
 
+    template<typename EdgeIter>
+    static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
+      return itostr(I - SDNodeIterator::begin((SDNode *) Node));
+    }
+
     /// edgeTargetsEdgeSource - This method returns true if this outgoing edge
     /// should actually target another edge source, not a node.  If this method
     /// is implemented, getEdgeTarget should be implemented.
@@ -76,12 +81,12 @@ namespace llvm {
     static bool renderGraphFromBottomUp() {
       return true;
     }
-    
+
     static bool hasNodeAddressLabel(const SDNode *Node,
                                     const SelectionDAG *Graph) {
       return true;
     }
-    
+
     /// If you want to override the dot attributes printed for a particular
     /// edge, override this method.
     template<typename EdgeIter>
@@ -94,7 +99,7 @@ namespace llvm {
         return "color=blue,style=dashed";
       return "";
     }
-    
+
 
     static std::string getSimpleNodeLabel(const SDNode *Node,
                                           const SelectionDAG *G) {
@@ -132,7 +137,7 @@ namespace llvm {
 
 std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
                                                         const SelectionDAG *G) {
-  return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel (Node, G);
+  return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G);
 }
 
 
@@ -142,7 +147,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
 void SelectionDAG::viewGraph(const std::string &Title) {
 // This code is only for debugging!
 #ifndef NDEBUG
-  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(), 
+  ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(),
             false, Title);
 #else
   errs() << "SelectionDAG::viewGraph is only available in debug builds on "
@@ -186,7 +191,7 @@ const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
 #ifndef NDEBUG
   std::map<const SDNode *, std::string>::const_iterator I =
     NodeGraphAttrs.find(N);
-    
+
   if (I != NodeGraphAttrs.end())
     return I->second;
   else
@@ -252,8 +257,7 @@ void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
     // Visually mark that we hit the limit
     if (strcmp(Color, "red") == 0) {
       setSubgraphColorHelper(N, "blue", visited, 0, printed);
-    }
-    else if (strcmp(Color, "yellow") == 0) {
+    } else if (strcmp(Color, "yellow") == 0) {
       setSubgraphColorHelper(N, "green", visited, 0, printed);
     }
   }
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 68bc2d6..1026169 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -911,7 +911,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                                           TargetLoweringOpt &TLO,
                                           unsigned Depth) const {
   unsigned BitWidth = DemandedMask.getBitWidth();
-  assert(Op.getValueSizeInBits() == BitWidth &&
+  assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth &&
          "Mask size mismatches value type size!");
   APInt NewMask = DemandedMask;
   DebugLoc dl = Op.getDebugLoc();
@@ -1240,7 +1240,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       // demand the input sign bit.
       APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
       if (HighBits.intersects(NewMask))
-        InDemandedMask |= APInt::getSignBit(VT.getSizeInBits());
+        InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
       
       if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
                                KnownZero, KnownOne, TLO, Depth+1))
@@ -2184,48 +2184,6 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
 }
 
 
-/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a 
-/// location that is 'Dist' units away from the location that the 'Base' load 
-/// is loading from.
-bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, 
-                                       unsigned Bytes, int Dist, 
-                                       const MachineFrameInfo *MFI) const {
-  if (LD->getChain() != Base->getChain())
-    return false;
-  EVT VT = LD->getValueType(0);
-  if (VT.getSizeInBits() / 8 != Bytes)
-    return false;
-
-  SDValue Loc = LD->getOperand(1);
-  SDValue BaseLoc = Base->getOperand(1);
-  if (Loc.getOpcode() == ISD::FrameIndex) {
-    if (BaseLoc.getOpcode() != ISD::FrameIndex)
-      return false;
-    int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
-    int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
-    int FS  = MFI->getObjectSize(FI);
-    int BFS = MFI->getObjectSize(BFI);
-    if (FS != BFS || FS != (int)Bytes) return false;
-    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
-  }
-  if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
-    ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
-    if (V && (V->getSExtValue() == Dist*Bytes))
-      return true;
-  }
-
-  GlobalValue *GV1 = NULL;
-  GlobalValue *GV2 = NULL;
-  int64_t Offset1 = 0;
-  int64_t Offset2 = 0;
-  bool isGA1 = isGAPlusOffset(Loc.getNode(), GV1, Offset1);
-  bool isGA2 = isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
-  if (isGA1 && isGA2 && GV1 == GV2)
-    return Offset1 == (Offset2 + Dist*Bytes);
-  return false;
-}
-
-
 SDValue TargetLowering::
 PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
   // Default implementation: no optimization.
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 5876371..ed407eb 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -130,7 +130,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
   // See PR3149:
   // 172     %ECX<def> = MOV32rr %reg1039<kill>
   // 180     INLINEASM <es:subl $5,$1
-  //         sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, %EAX<kill>,
+  //         sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9,
+  //         %EAX<kill>,
   // 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
   // 188     %EAX<def> = MOV32rr %EAX<kill>
   // 196     %ECX<def> = MOV32rr %ECX<kill>
@@ -281,12 +282,12 @@ TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) {
   }
 }
 
-/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with IntA
-/// being the source and IntB being the dest, thus this defines a value number
-/// in IntB.  If the source value number (in IntA) is defined by a commutable
-/// instruction and its other operand is coalesced to the copy dest register,
-/// see if we can transform the copy into a noop by commuting the definition. For
-/// example,
+/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
+/// IntA being the source and IntB being the dest, thus this defines a value
+/// number in IntB.  If the source value number (in IntA) is defined by a
+/// commutable instruction and its other operand is coalesced to the copy dest
+/// register, see if we can transform the copy into a noop by commuting the
+/// definition. For example,
 ///
 ///  A3 = op A2 B0<kill>
 ///    ...
@@ -508,7 +509,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
     if (BHasSubRegs) {
       for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
         LiveInterval &SRLI = li_->getInterval(*SR);
-        SRLI.MergeInClobberRange(*li_, AI->start, End, li_->getVNInfoAllocator());
+        SRLI.MergeInClobberRange(*li_, AI->start, End,
+                                 li_->getVNInfoAllocator());
       }
     }
   }
@@ -708,7 +710,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
       checkForDeadDef = true;
     }
 
-  MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI));
+  MachineBasicBlock::iterator MII =
+    llvm::next(MachineBasicBlock::iterator(CopyMI));
   tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, tri_);
   MachineInstr *NewMI = prior(MII);
 
@@ -1314,7 +1317,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
                       "coalesced to another register.\n");
       return false;  // Not coalescable.
     }
-  } else if (!tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){
+  } else if (tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
+    if (SrcSubIdx && DstSubIdx && SrcSubIdx != DstSubIdx) {
+      // e.g. %reg16404:1<def> = MOV8rr %reg16412:2<kill>
+      Again = true;
+      return false;  // Not coalescable.
+    }
+  } else {
     llvm_unreachable("Unrecognized copy instruction!");
   }
 
@@ -1611,9 +1620,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
           }
         }
       } else {
-        // If the virtual register live interval is long but it has low use desity,
-        // do not join them, instead mark the physical register as its allocation
-        // preference.
+        // If the virtual register live interval is long but it has low use
+        // density, do not join them, instead mark the physical register as its
+        // allocation preference.
         LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt;
         unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg;
         unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg;
@@ -1938,6 +1947,10 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
     if (Overlaps) {
       // If we haven't already recorded that this value # is safe, check it.
       if (!InVector(LHSIt->valno, EliminatedLHSVals)) {
+        // If it's re-defined by an early clobber somewhere in the live range,
+        // then conservatively abort coalescing.
+        if (LHSIt->valno->hasRedefByEC())
+          return false;
         // Copy from the RHS?
         if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg))
           return false;    // Nope, bail out.
@@ -1977,6 +1990,10 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
           // if coalescing succeeds.  Just skip the liverange.
           if (++LHSIt == LHSEnd) break;
         } else {
+          // If it's re-defined by an early clobber somewhere in the live range,
+          // then conservatively abort coalescing.
+          if (LHSIt->valno->hasRedefByEC())
+            return false;
           // Otherwise, if this is a copy from the RHS, mark it as being merged
           // in.
           if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) {
@@ -2316,6 +2333,10 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
       if (LHSValNoAssignments[I->valno->id] !=
           RHSValNoAssignments[J->valno->id])
         return false;
+      // If it's re-defined by an early clobber somewhere in the live range,
+      // then conservatively abort coalescing.
+      if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC())
+        return false;
     }
 
     if (I->end < J->end) {
@@ -2401,9 +2422,15 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
 
     // If this isn't a copy nor a extract_subreg, we can't join intervals.
     unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+    bool isInsUndef = false;
     if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
       DstReg = Inst->getOperand(0).getReg();
       SrcReg = Inst->getOperand(1).getReg();
+    } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+      DstReg = Inst->getOperand(0).getReg();
+      SrcReg = Inst->getOperand(2).getReg();
+      if (Inst->getOperand(1).isUndef())
+        isInsUndef = true;
     } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
                Inst->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
       DstReg = Inst->getOperand(0).getReg();
@@ -2413,7 +2440,8 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
 
     bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
     bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
-    if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
+    if (isInsUndef ||
+        (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()))
       ImpDefCopies.push_back(CopyRec(Inst, 0));
     else if (SrcIsPhys || DstIsPhys)
       PhysCopies.push_back(CopyRec(Inst, 0));
@@ -2421,9 +2449,9 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
       VirtCopies.push_back(CopyRec(Inst, 0));
   }
 
-  // Try coalescing implicit copies first, followed by copies to / from
-  // physical registers, then finally copies from virtual registers to
-  // virtual registers.
+  // Try coalescing implicit copies and insert_subreg <undef> first,
+  // followed by copies to / from physical registers, then finally copies
+  // from virtual registers to virtual registers.
   for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) {
     CopyRec &TheCopy = ImpDefCopies[i];
     bool Again = false;
@@ -2594,114 +2622,6 @@ void SimpleRegisterCoalescing::releaseMemory() {
   ReMatDefs.clear();
 }
 
-/// Returns true if the given live interval is zero length.
-static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) {
-  for (LiveInterval::Ranges::const_iterator
-         i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
-    if (i->end.getPrevIndex() > i->start)
-      return false;
-  return true;
-}
-
-
-void SimpleRegisterCoalescing::CalculateSpillWeights() {
-  SmallSet<unsigned, 4> Processed;
-  for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
-       mbbi != mbbe; ++mbbi) {
-    MachineBasicBlock* MBB = mbbi;
-    SlotIndex MBBEnd = li_->getMBBEndIdx(MBB);
-    MachineLoop* loop = loopInfo->getLoopFor(MBB);
-    unsigned loopDepth = loop ? loop->getLoopDepth() : 0;
-    bool isExiting = loop ? loop->isLoopExiting(MBB) : false;
-
-    for (MachineBasicBlock::const_iterator mii = MBB->begin(), mie = MBB->end();
-         mii != mie; ++mii) {
-      const MachineInstr *MI = mii;
-      if (tii_->isIdentityCopy(*MI))
-        continue;
-
-      if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
-        continue;
-
-      for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-        const MachineOperand &mopi = MI->getOperand(i);
-        if (!mopi.isReg() || mopi.getReg() == 0)
-          continue;
-        unsigned Reg = mopi.getReg();
-        if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg()))
-          continue;
-        // Multiple uses of reg by the same instruction. It should not
-        // contribute to spill weight again.
-        if (!Processed.insert(Reg))
-          continue;
-
-        bool HasDef = mopi.isDef();
-        bool HasUse = !HasDef;
-        for (unsigned j = i+1; j != e; ++j) {
-          const MachineOperand &mopj = MI->getOperand(j);
-          if (!mopj.isReg() || mopj.getReg() != Reg)
-            continue;
-          HasDef |= mopj.isDef();
-          HasUse |= mopj.isUse();
-          if (HasDef && HasUse)
-            break;
-        }
-
-        LiveInterval &RegInt = li_->getInterval(Reg);
-        float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth);
-        if (HasDef && isExiting) {
-          // Looks like this is a loop count variable update.
-          SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex();
-          const LiveRange *DLR =
-            li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
-          if (DLR->end > MBBEnd)
-            Weight *= 3.0F;
-        }
-        RegInt.weight += Weight;
-      }
-      Processed.clear();
-    }
-  }
-
-  for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
-    LiveInterval &LI = *I->second;
-    if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
-      // If the live interval length is essentially zero, i.e. in every live
-      // range the use follows def immediately, it doesn't make sense to spill
-      // it and hope it will be easier to allocate for this li.
-      if (isZeroLengthInterval(&LI, li_)) {
-        LI.weight = HUGE_VALF;
-        continue;
-      }
-
-      bool isLoad = false;
-      SmallVector<LiveInterval*, 4> SpillIs;
-      if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
-        // If all of the definitions of the interval are re-materializable,
-        // it is a preferred candidate for spilling. If non of the defs are
-        // loads, then it's potentially very cheap to re-materialize.
-        // FIXME: this gets much more complicated once we support non-trivial
-        // re-materialization.
-        if (isLoad)
-          LI.weight *= 0.9F;
-        else
-          LI.weight *= 0.5F;
-      }
-
-      // Slightly prefer live interval that has been assigned a preferred reg.
-      std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
-      if (Hint.first || Hint.second)
-        LI.weight *= 1.01F;
-
-      // Divide the weight of the interval by its size.  This encourages
-      // spilling of intervals that are large and have few uses, and
-      // discourages spilling of small intervals with many uses.
-      LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
-    }
-  }
-}
-
-
 bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
   mf_ = &fn;
   mri_ = &fn.getRegInfo();
@@ -2727,7 +2647,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
     joinIntervals();
     DEBUG({
         errs() << "********** INTERVALS POST JOINING **********\n";
-        for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I){
+        for (LiveIntervals::iterator I = li_->begin(), E = li_->end();
+             I != E; ++I){
           I->second->print(errs(), tri_);
           errs() << "\n";
         }
@@ -2768,7 +2689,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
           DoDelete = true;
         }
         if (!DoDelete)
-          mii = next(mii);
+          mii = llvm::next(mii);
         else {
           li_->RemoveMachineInstrFromMaps(MI);
           mii = mbbi->erase(mii);
@@ -2831,8 +2752,6 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
     }
   }
 
-  CalculateSpillWeights();
-
   DEBUG(dump());
   return true;
 }
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index 78f8a9a..605a740 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -244,10 +244,6 @@ namespace llvm {
     MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End,
                                     unsigned Reg, SlotIndex &LastUseIdx) const;
 
-    /// CalculateSpillWeights - Compute spill weights for all virtual register
-    /// live intervals.
-    void CalculateSpillWeights();
-
     void printRegName(unsigned reg) const;
   };
 
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 237d0b5..bc246c1 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -20,22 +20,25 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include <set>
 
 using namespace llvm;
 
 namespace {
-  enum SpillerName { trivial, standard };
+  enum SpillerName { trivial, standard, splitting };
 }
 
 static cl::opt<SpillerName>
 spillerOpt("spiller",
            cl::desc("Spiller to use: (default: standard)"),
            cl::Prefix,
-           cl::values(clEnumVal(trivial, "trivial spiller"),
-                      clEnumVal(standard, "default spiller"),
+           cl::values(clEnumVal(trivial,   "trivial spiller"),
+                      clEnumVal(standard,  "default spiller"),
+                      clEnumVal(splitting, "splitting spiller"),
                       clEnumValEnd),
            cl::init(standard));
 
+// Spiller virtual destructor implementation.
 Spiller::~Spiller() {}
 
 namespace {
@@ -140,9 +143,9 @@ protected:
 
       // Insert store if necessary.
       if (hasDef) {
-        tii->storeRegToStackSlot(*mi->getParent(), next(miItr), newVReg, true,
+        tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg, true,
                                  ss, trc);
-        MachineInstr *storeInstr(next(miItr));
+        MachineInstr *storeInstr(llvm::next(miItr));
         SlotIndex storeIndex =
           lis->InsertMachineInstrInMaps(storeInstr).getDefIndex();
         SlotIndex beginIndex = storeIndex.getPrevIndex();
@@ -170,7 +173,8 @@ public:
     : SpillerBase(mf, lis, vrm) {}
 
   std::vector<LiveInterval*> spill(LiveInterval *li,
-                                   SmallVectorImpl<LiveInterval*> &spillIs) {
+                                   SmallVectorImpl<LiveInterval*> &spillIs,
+                                   SlotIndex*) {
     // Ignore spillIs - we don't use it.
     return trivialSpillEverywhere(li);
   }
@@ -179,23 +183,336 @@ public:
 
 /// Falls back on LiveIntervals::addIntervalsForSpills.
 class StandardSpiller : public Spiller {
-private:
+protected:
   LiveIntervals *lis;
   const MachineLoopInfo *loopInfo;
   VirtRegMap *vrm;
 public:
-  StandardSpiller(MachineFunction *mf, LiveIntervals *lis,
-                  const MachineLoopInfo *loopInfo, VirtRegMap *vrm)
+  StandardSpiller(LiveIntervals *lis, const MachineLoopInfo *loopInfo,
+                  VirtRegMap *vrm)
     : lis(lis), loopInfo(loopInfo), vrm(vrm) {}
 
   /// Falls back on LiveIntervals::addIntervalsForSpills.
   std::vector<LiveInterval*> spill(LiveInterval *li,
-                                   SmallVectorImpl<LiveInterval*> &spillIs) {
+                                   SmallVectorImpl<LiveInterval*> &spillIs,
+                                   SlotIndex*) {
     return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
   }
 
 };
 
+/// When a call to spill is placed this spiller will first try to break the
+/// interval up into its component values (one new interval per value).
+/// If this fails, or if a call is placed to spill a previously split interval
+/// then the spiller falls back on the standard spilling mechanism. 
+class SplittingSpiller : public StandardSpiller {
+public:
+  SplittingSpiller(MachineFunction *mf, LiveIntervals *lis,
+                   const MachineLoopInfo *loopInfo, VirtRegMap *vrm)
+    : StandardSpiller(lis, loopInfo, vrm) {
+
+    mri = &mf->getRegInfo();
+    tii = mf->getTarget().getInstrInfo();
+    tri = mf->getTarget().getRegisterInfo();
+  }
+
+  std::vector<LiveInterval*> spill(LiveInterval *li,
+                                   SmallVectorImpl<LiveInterval*> &spillIs,
+                                   SlotIndex *earliestStart) {
+    
+    if (worthTryingToSplit(li)) {
+      return tryVNISplit(li, earliestStart);
+    }
+    // else
+    return StandardSpiller::spill(li, spillIs, earliestStart);
+  }
+
+private:
+
+  MachineRegisterInfo *mri;
+  const TargetInstrInfo *tii;
+  const TargetRegisterInfo *tri;  
+  DenseSet<LiveInterval*> alreadySplit;
+
+  bool worthTryingToSplit(LiveInterval *li) const {
+    return (!alreadySplit.count(li) && li->getNumValNums() > 1);
+  }
+
+  /// Try to break a LiveInterval into its component values.
+  std::vector<LiveInterval*> tryVNISplit(LiveInterval *li,
+                                         SlotIndex *earliestStart) {
+
+    DEBUG(errs() << "Trying VNI split of %reg" << *li << "\n");
+
+    std::vector<LiveInterval*> added;
+    SmallVector<VNInfo*, 4> vnis;
+
+    std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis));
+   
+    for (SmallVectorImpl<VNInfo*>::iterator vniItr = vnis.begin(),
+         vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) {
+      VNInfo *vni = *vniItr;
+      
+      // Skip unused VNIs, or VNIs with no kills.
+      if (vni->isUnused() || vni->kills.empty())
+        continue;
+
+      DEBUG(errs() << "  Extracted Val #" << vni->id << " as ");
+      LiveInterval *splitInterval = extractVNI(li, vni);
+      
+      if (splitInterval != 0) {
+        DEBUG(errs() << *splitInterval << "\n");
+        added.push_back(splitInterval);
+        alreadySplit.insert(splitInterval);
+        if (earliestStart != 0) {
+          if (splitInterval->beginIndex() < *earliestStart)
+            *earliestStart = splitInterval->beginIndex();
+        }
+      } else {
+        DEBUG(errs() << "0\n");
+      }
+    } 
+
+    DEBUG(errs() << "Original LI: " << *li << "\n");
+
+    // If there original interval still contains some live ranges
+    // add it to added and alreadySplit.    
+    if (!li->empty()) {
+      added.push_back(li);
+      alreadySplit.insert(li);
+      if (earliestStart != 0) {
+        if (li->beginIndex() < *earliestStart)
+          *earliestStart = li->beginIndex();
+      }
+    }
+
+    return added;
+  }
+
+  /// Extract the given value number from the interval.
+  LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const {
+    assert(vni->isDefAccurate() || vni->isPHIDef());
+    assert(!vni->kills.empty());
+
+    // Create a new vreg and live interval, copy VNI kills & ranges over.                                                                                                                                                     
+    const TargetRegisterClass *trc = mri->getRegClass(li->reg);
+    unsigned newVReg = mri->createVirtualRegister(trc);
+    vrm->grow();
+    LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
+    VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator());
+
+    // Start by copying all live ranges in the VN to the new interval.                                                                                                                                                        
+    for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end();
+         rItr != rEnd; ++rItr) {
+      if (rItr->valno == vni) {
+        newLI->addRange(LiveRange(rItr->start, rItr->end, newVNI));
+      }
+    }
+
+    // Erase the old VNI & ranges.                                                                                                                                                                                            
+    li->removeValNo(vni);
+
+    // Collect all current uses of the register belonging to the given VNI.
+    // We'll use this to rename the register after we've dealt with the def.
+    std::set<MachineInstr*> uses;
+    for (MachineRegisterInfo::use_iterator
+         useItr = mri->use_begin(li->reg), useEnd = mri->use_end();
+         useItr != useEnd; ++useItr) {
+      uses.insert(&*useItr);
+    }
+
+    // Process the def instruction for this VNI.
+    if (newVNI->isPHIDef()) {
+      // Insert a copy at the start of the MBB. The range proceeding the
+      // copy will be attached to the original LiveInterval.
+      MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def);
+      tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc);
+      MachineInstr *copyMI = defMBB->begin();
+      copyMI->addRegisterKilled(li->reg, tri);
+      SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
+      VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB),
+                                           0, false, lis->getVNInfoAllocator());
+      phiDefVNI->setIsPHIDef(true);
+      phiDefVNI->addKill(copyIdx.getDefIndex());
+      li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI));
+      LiveRange *oldPHIDefRange =
+        newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB));
+
+      // If the old phi def starts in the middle of the range chop it up.
+      if (oldPHIDefRange->start < lis->getMBBStartIdx(defMBB)) {
+        LiveRange oldPHIDefRange2(copyIdx.getDefIndex(), oldPHIDefRange->end,
+                                  oldPHIDefRange->valno);
+        oldPHIDefRange->end = lis->getMBBStartIdx(defMBB);
+        newLI->addRange(oldPHIDefRange2);
+      } else if (oldPHIDefRange->start == lis->getMBBStartIdx(defMBB)) {
+        // Otherwise if it's at the start of the range just trim it.
+        oldPHIDefRange->start = copyIdx.getDefIndex();
+      } else {
+        assert(false && "PHI def range doesn't cover PHI def?");
+      }
+
+      newVNI->def = copyIdx.getDefIndex();
+      newVNI->setCopy(copyMI);
+      newVNI->setIsPHIDef(false); // not a PHI def anymore.
+      newVNI->setIsDefAccurate(true);
+    } else {
+      // non-PHI def. Rename the def. If it's two-addr that means renaming the use
+      // and inserting a new copy too.
+      MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def);
+      // We'll rename this now, so we can remove it from uses.
+      uses.erase(defInst);
+      unsigned defOpIdx = defInst->findRegisterDefOperandIdx(li->reg);
+      bool isTwoAddr = defInst->isRegTiedToUseOperand(defOpIdx),
+        twoAddrUseIsUndef = false;
+
+      for (unsigned i = 0; i < defInst->getNumOperands(); ++i) {
+        MachineOperand &mo = defInst->getOperand(i);
+        if (mo.isReg() && (mo.isDef() || isTwoAddr) && (mo.getReg()==li->reg)) {
+          mo.setReg(newVReg);
+          if (isTwoAddr && mo.isUse() && mo.isUndef())
+            twoAddrUseIsUndef = true;
+        }
+      }
+    
+      SlotIndex defIdx = lis->getInstructionIndex(defInst);
+      newVNI->def = defIdx.getDefIndex();
+
+      if (isTwoAddr && !twoAddrUseIsUndef) {
+        MachineBasicBlock *defMBB = defInst->getParent();
+        tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc);
+        MachineInstr *copyMI = prior(MachineBasicBlock::iterator(defInst));
+        SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
+        copyMI->addRegisterKilled(li->reg, tri);
+        LiveRange *origUseRange =
+          li->getLiveRangeContaining(newVNI->def.getUseIndex());
+        VNInfo *origUseVNI = origUseRange->valno;
+        origUseRange->end = copyIdx.getDefIndex();
+        bool updatedKills = false;
+        for (unsigned k = 0; k < origUseVNI->kills.size(); ++k) {
+          if (origUseVNI->kills[k] == defIdx.getDefIndex()) {
+            origUseVNI->kills[k] = copyIdx.getDefIndex();
+            updatedKills = true;
+            break;
+          }
+        }
+        assert(updatedKills && "Failed to update VNI kill list.");
+        VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI,
+                                              true, lis->getVNInfoAllocator());
+        copyVNI->addKill(defIdx.getDefIndex());
+        LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI);
+        newLI->addRange(copyRange);
+      }    
+    }
+    
+    for (std::set<MachineInstr*>::iterator
+         usesItr = uses.begin(), usesEnd = uses.end();
+         usesItr != usesEnd; ++usesItr) {
+      MachineInstr *useInst = *usesItr;
+      SlotIndex useIdx = lis->getInstructionIndex(useInst);
+      LiveRange *useRange =
+        newLI->getLiveRangeContaining(useIdx.getUseIndex());
+
+      // If this use doesn't belong to the new interval skip it.
+      if (useRange == 0)
+        continue;
+
+      // This use doesn't belong to the VNI, skip it.
+      if (useRange->valno != newVNI)
+        continue;
+
+      // Check if this instr is two address.
+      unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg);
+      bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx);
+      
+      // Rename uses (and defs for two-address instrs).
+      for (unsigned i = 0; i < useInst->getNumOperands(); ++i) {
+        MachineOperand &mo = useInst->getOperand(i);
+        if (mo.isReg() && (mo.isUse() || isTwoAddress) &&
+            (mo.getReg() == li->reg)) {
+          mo.setReg(newVReg);
+        }
+      }
+
+      // If this is a two address instruction we've got some extra work to do.
+      if (isTwoAddress) {
+        // We modified the def operand, so we need to copy back to the original
+        // reg.
+        MachineBasicBlock *useMBB = useInst->getParent();
+        MachineBasicBlock::iterator useItr(useInst);
+        tii->copyRegToReg(*useMBB, next(useItr), li->reg, newVReg, trc, trc);
+        MachineInstr *copyMI = next(useItr);
+        copyMI->addRegisterKilled(newVReg, tri);
+        SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
+
+        // Change the old two-address defined range & vni to start at
+        // (and be defined by) the copy.
+        LiveRange *origDefRange =
+          li->getLiveRangeContaining(useIdx.getDefIndex());
+        origDefRange->start = copyIdx.getDefIndex();
+        origDefRange->valno->def = copyIdx.getDefIndex();
+        origDefRange->valno->setCopy(copyMI);
+
+        // Insert a new range & vni for the two-address-to-copy value. This
+        // will be attached to the new live interval.
+        VNInfo *copyVNI =
+          newLI->getNextValue(useIdx.getDefIndex(), 0, true,
+                              lis->getVNInfoAllocator());
+        copyVNI->addKill(copyIdx.getDefIndex());
+        LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI);
+        newLI->addRange(copyRange);
+      }
+    }
+    
+    // Iterate over any PHI kills - we'll need to insert new copies for them.
+    for (VNInfo::KillSet::iterator
+         killItr = newVNI->kills.begin(), killEnd = newVNI->kills.end();
+         killItr != killEnd; ++killItr) {
+      SlotIndex killIdx(*killItr);
+      if (killItr->isPHI()) {
+        MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx);
+        LiveRange *oldKillRange =
+          newLI->getLiveRangeContaining(killIdx);
+
+        assert(oldKillRange != 0 && "No kill range?");
+
+        tii->copyRegToReg(*killMBB, killMBB->getFirstTerminator(),
+                          li->reg, newVReg, trc, trc);
+        MachineInstr *copyMI = prior(killMBB->getFirstTerminator());
+        copyMI->addRegisterKilled(newVReg, tri);
+        SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
+
+        // Save the current end. We may need it to add a new range if the
+        // current range runs of the end of the MBB.
+        SlotIndex newKillRangeEnd = oldKillRange->end;
+        oldKillRange->end = copyIdx.getDefIndex();
+
+        if (newKillRangeEnd != lis->getMBBEndIdx(killMBB).getNextSlot()) {
+          assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB).getNextSlot() &&
+                 "PHI kill range doesn't reach kill-block end. Not sane.");
+          newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB).getNextSlot(),
+                                    newKillRangeEnd, newVNI));
+        }
+
+        *killItr = oldKillRange->end;
+        VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(),
+                                              copyMI, true,
+                                              lis->getVNInfoAllocator());
+        newKillVNI->addKill(lis->getMBBTerminatorGap(killMBB));
+        newKillVNI->setHasPHIKill(true);
+        li->addRange(LiveRange(copyIdx.getDefIndex(),
+                               lis->getMBBEndIdx(killMBB).getNextSlot(),
+                               newKillVNI));
+      }
+
+    }
+
+    newVNI->setHasPHIKill(false);
+
+    return newLI;
+  }
+
+};
+
 }
 
 llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
@@ -203,7 +520,8 @@ llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
                                    VirtRegMap *vrm) {
   switch (spillerOpt) {
     case trivial: return new TrivialSpiller(mf, lis, vrm); break;
-    case standard: return new StandardSpiller(mf, lis, loopInfo, vrm); break;
+    case standard: return new StandardSpiller(lis, loopInfo, vrm); break;
+    case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); break;
     default: llvm_unreachable("Unreachable!"); break;
   }
 }
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index c6bd985..dda52e8 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -21,6 +21,7 @@ namespace llvm {
   class MachineFunction;
   class MachineInstr;
   class MachineLoopInfo;
+  class SlotIndex;
   class VirtRegMap;
   class VNInfo;
 
@@ -35,7 +36,8 @@ namespace llvm {
     /// Spill the given live range. The method used will depend on the Spiller
     /// implementation selected.
     virtual std::vector<LiveInterval*> spill(LiveInterval *li,
-                                   SmallVectorImpl<LiveInterval*> &spillIs) = 0;
+					     SmallVectorImpl<LiveInterval*> &spillIs,
+                                             SlotIndex *earliestIndex = 0) = 0;
 
   };
 
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index c299192..fd25a37 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -668,7 +668,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
     if (DCELimit != -1 && (int)NumDead >= DCELimit)
       break;
     
-    MachineBasicBlock::iterator NextMI = next(I);
+    MachineBasicBlock::iterator NextMI = llvm::next(I);
     if (NextMI == MBB->end()) continue;
     
     int FirstSS, SecondSS;
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 9c0b596..bf58902 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -17,15 +17,19 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 using namespace llvm;
 
+STATISTIC(NumTails     , "Number of tails duplicated");
 STATISTIC(NumTailDups  , "Number of tail duplicated blocks");
 STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
@@ -36,34 +40,71 @@ TailDuplicateSize("tail-dup-size",
                   cl::desc("Maximum instructions to consider tail duplicating"),
                   cl::init(2), cl::Hidden);
 
+static cl::opt<bool>
+TailDupVerify("tail-dup-verify",
+              cl::desc("Verify sanity of PHI instructions during taildup"),
+              cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned>
+TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden);
+
+typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
+
 namespace {
   /// TailDuplicatePass - Perform tail duplication.
   class TailDuplicatePass : public MachineFunctionPass {
+    bool PreRegAlloc;
     const TargetInstrInfo *TII;
     MachineModuleInfo *MMI;
+    MachineRegisterInfo *MRI;
+
+    // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
+    SmallVector<unsigned, 16> SSAUpdateVRs;
+
+    // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of
+    // source virtual registers.
+    DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
 
   public:
     static char ID;
-    explicit TailDuplicatePass() : MachineFunctionPass(&ID) {}
+    explicit TailDuplicatePass(bool PreRA) :
+      MachineFunctionPass(&ID), PreRegAlloc(PreRA) {}
 
     virtual bool runOnMachineFunction(MachineFunction &MF);
     virtual const char *getPassName() const { return "Tail Duplication"; }
 
   private:
+    void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+                           MachineBasicBlock *BB);
+    void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
+                    MachineBasicBlock *PredBB,
+                    DenseMap<unsigned, unsigned> &LocalVRMap,
+                    SmallVector<std::pair<unsigned,unsigned>, 4> &Copies);
+    void DuplicateInstruction(MachineInstr *MI,
+                              MachineBasicBlock *TailBB,
+                              MachineBasicBlock *PredBB,
+                              MachineFunction &MF,
+                              DenseMap<unsigned, unsigned> &LocalVRMap);
+    void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+                              SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                              SmallSetVector<MachineBasicBlock*, 8> &Succs);
     bool TailDuplicateBlocks(MachineFunction &MF);
-    bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF);
+    bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
+                       SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                       SmallVector<MachineInstr*, 16> &Copies);
     void RemoveDeadBlock(MachineBasicBlock *MBB);
   };
 
   char TailDuplicatePass::ID = 0;
 }
 
-FunctionPass *llvm::createTailDuplicatePass() {
-  return new TailDuplicatePass();
+FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) {
+  return new TailDuplicatePass(PreRegAlloc);
 }
 
 bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getTarget().getInstrInfo();
+  MRI = &MF.getRegInfo();
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
 
   bool MadeChange = false;
@@ -77,36 +118,325 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
   return MadeChange;
 }
 
+static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
+  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
+    MachineBasicBlock *MBB = I;
+    SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(),
+                                                MBB->pred_end());
+    MachineBasicBlock::iterator MI = MBB->begin();
+    while (MI != MBB->end()) {
+      if (MI->getOpcode() != TargetInstrInfo::PHI)
+        break;
+      for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+             PE = Preds.end(); PI != PE; ++PI) {
+        MachineBasicBlock *PredBB = *PI;
+        bool Found = false;
+        for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+          MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+          if (PHIBB == PredBB) {
+            Found = true;
+            break;
+          }
+        }
+        if (!Found) {
+          errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+          errs() << "  missing input from predecessor BB#"
+                 << PredBB->getNumber() << '\n';
+          llvm_unreachable(0);
+        }
+      }
+
+      for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+        MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+        if (CheckExtra && !Preds.count(PHIBB)) {
+          // This is not a hard error.
+          errs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
+                 << ": " << *MI;
+          errs() << "  extra input from predecessor BB#"
+                 << PHIBB->getNumber() << '\n';
+        }
+        if (PHIBB->getNumber() < 0) {
+          errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+          errs() << "  non-existing BB#" << PHIBB->getNumber() << '\n';
+          llvm_unreachable(0);
+        }
+      }
+      ++MI;
+    }
+  }
+}
+
 /// TailDuplicateBlocks - Look for small blocks that are unconditionally
 /// branched to and do not fall through. Tail-duplicate their instructions
 /// into their predecessors to eliminate (dynamic) branches.
 bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
   bool MadeChange = false;
 
+  if (PreRegAlloc && TailDupVerify) {
+    DEBUG(errs() << "\n*** Before tail-duplicating\n");
+    VerifyPHIs(MF, true);
+  }
+
+  SmallVector<MachineInstr*, 8> NewPHIs;
+  MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+
   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
     MachineBasicBlock *MBB = I++;
 
+    if (NumTails == TailDupLimit)
+      break;
+
     // Only duplicate blocks that end with unconditional branches.
     if (MBB->canFallThrough())
       continue;
 
-    MadeChange |= TailDuplicate(MBB, MF);
-
-    // If it is dead, remove it.
-    if (MBB->pred_empty()) {
-      NumInstrDups -= MBB->size();
-      RemoveDeadBlock(MBB);
+    // Save the successors list.
+    SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
+                                                MBB->succ_end());
+
+    SmallVector<MachineBasicBlock*, 8> TDBBs;
+    SmallVector<MachineInstr*, 16> Copies;
+    if (TailDuplicate(MBB, MF, TDBBs, Copies)) {
+      ++NumTails;
+
+      // TailBB's immediate successors are now successors of those predecessors
+      // which duplicated TailBB. Add the predecessors as sources to the PHI
+      // instructions.
+      bool isDead = MBB->pred_empty();
+      if (PreRegAlloc)
+        UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
+
+      // If it is dead, remove it.
+      if (isDead) {
+        NumInstrDups -= MBB->size();
+        RemoveDeadBlock(MBB);
+        ++NumDeadBlocks;
+      }
+
+      // Update SSA form.
+      if (!SSAUpdateVRs.empty()) {
+        for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
+          unsigned VReg = SSAUpdateVRs[i];
+          SSAUpdate.Initialize(VReg);
+
+          // If the original definition is still around, add it as an available
+          // value.
+          MachineInstr *DefMI = MRI->getVRegDef(VReg);
+          MachineBasicBlock *DefBB = 0;
+          if (DefMI) {
+            DefBB = DefMI->getParent();
+            SSAUpdate.AddAvailableValue(DefBB, VReg);
+          }
+
+          // Add the new vregs as available values.
+          DenseMap<unsigned, AvailableValsTy>::iterator LI =
+            SSAUpdateVals.find(VReg);  
+          for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+            MachineBasicBlock *SrcBB = LI->second[j].first;
+            unsigned SrcReg = LI->second[j].second;
+            SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
+          }
+
+          // Rewrite uses that are outside of the original def's block.
+          MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
+          while (UI != MRI->use_end()) {
+            MachineOperand &UseMO = UI.getOperand();
+            MachineInstr *UseMI = &*UI;
+            ++UI;
+            if (UseMI->getParent() == DefBB)
+              continue;
+            SSAUpdate.RewriteUse(UseMO);
+          }
+        }
+
+        SSAUpdateVRs.clear();
+        SSAUpdateVals.clear();
+      }
+
+      // Eliminate some of the copies inserted tail duplication to maintain
+      // SSA form.
+      for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+        MachineInstr *Copy = Copies[i];
+        unsigned Src, Dst, SrcSR, DstSR;
+        if (TII->isMoveInstr(*Copy, Src, Dst, SrcSR, DstSR)) {
+          MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
+          if (++UI == MRI->use_end()) {
+            // Copy is the only use. Do trivial copy propagation here.
+            MRI->replaceRegWith(Dst, Src);
+            Copy->eraseFromParent();
+          }
+        }
+      }
+
+      if (PreRegAlloc && TailDupVerify)
+        VerifyPHIs(MF, false);
       MadeChange = true;
-      ++NumDeadBlocks;
     }
   }
+
   return MadeChange;
 }
 
+static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
+                         const MachineRegisterInfo *MRI) {
+  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+         UE = MRI->use_end(); UI != UE; ++UI) {
+    MachineInstr *UseMI = &*UI;
+    if (UseMI->getParent() != BB)
+      return true;
+  }
+  return false;
+}
+
+static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
+  for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
+    if (MI->getOperand(i+1).getMBB() == SrcBB)
+      return i;
+  return 0;
+}
+
+/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
+/// SSA update.
+void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+                                          MachineBasicBlock *BB) {
+  DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg);
+  if (LI != SSAUpdateVals.end())
+    LI->second.push_back(std::make_pair(BB, NewReg));
+  else {
+    AvailableValsTy Vals;
+    Vals.push_back(std::make_pair(BB, NewReg));
+    SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
+    SSAUpdateVRs.push_back(OrigReg);
+  }
+}
+
+/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB.
+/// Remember the source register that's contributed by PredBB and update SSA
+/// update map.
+void TailDuplicatePass::ProcessPHI(MachineInstr *MI,
+                                   MachineBasicBlock *TailBB,
+                                   MachineBasicBlock *PredBB,
+                                   DenseMap<unsigned, unsigned> &LocalVRMap,
+                         SmallVector<std::pair<unsigned,unsigned>, 4> &Copies) {
+  unsigned DefReg = MI->getOperand(0).getReg();
+  unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
+  assert(SrcOpIdx && "Unable to find matching PHI source?");
+  unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
+  const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+  LocalVRMap.insert(std::make_pair(DefReg, SrcReg));
+
+  // Insert a copy from source to the end of the block. The def register is the
+  // available value liveout of the block.
+  unsigned NewDef = MRI->createVirtualRegister(RC);
+  Copies.push_back(std::make_pair(NewDef, SrcReg));
+  if (isDefLiveOut(DefReg, TailBB, MRI))
+    AddSSAUpdateEntry(DefReg, NewDef, PredBB);
+
+  // Remove PredBB from the PHI node.
+  MI->RemoveOperand(SrcOpIdx+1);
+  MI->RemoveOperand(SrcOpIdx);
+  if (MI->getNumOperands() == 1)
+    MI->eraseFromParent();
+}
+
+/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update
+/// the source operands due to earlier PHI translation.
+void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
+                                     MachineBasicBlock *TailBB,
+                                     MachineBasicBlock *PredBB,
+                                     MachineFunction &MF,
+                                     DenseMap<unsigned, unsigned> &LocalVRMap) {
+  MachineInstr *NewMI = MF.CloneMachineInstr(MI);
+  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = NewMI->getOperand(i);
+    if (!MO.isReg())
+      continue;
+    unsigned Reg = MO.getReg();
+    if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+      continue;
+    if (MO.isDef()) {
+      const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+      unsigned NewReg = MRI->createVirtualRegister(RC);
+      MO.setReg(NewReg);
+      LocalVRMap.insert(std::make_pair(Reg, NewReg));
+      if (isDefLiveOut(Reg, TailBB, MRI))
+        AddSSAUpdateEntry(Reg, NewReg, PredBB);
+    } else {
+      DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
+      if (VI != LocalVRMap.end())
+        MO.setReg(VI->second);
+    }
+  }
+  PredBB->insert(PredBB->end(), NewMI);
+}
+
+/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
+/// blocks, the successors have gained new predecessors. Update the PHI
+/// instructions in them accordingly.
+void
+TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+                                  SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                                  SmallSetVector<MachineBasicBlock*,8> &Succs) {
+  for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
+         SE = Succs.end(); SI != SE; ++SI) {
+    MachineBasicBlock *SuccBB = *SI;
+    for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
+         II != EE; ++II) {
+      if (II->getOpcode() != TargetInstrInfo::PHI)
+        break;
+      unsigned Idx = 0;
+      for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
+        MachineOperand &MO = II->getOperand(i+1);
+        if (MO.getMBB() == FromBB) {
+          Idx = i;
+          break;
+        }
+      }
+
+      assert(Idx != 0);
+      MachineOperand &MO0 = II->getOperand(Idx);
+      unsigned Reg = MO0.getReg();
+      if (isDead) {
+        // Folded into the previous BB.
+        // There could be duplicate phi source entries. FIXME: Should sdisel
+        // or earlier pass fixed this?
+        for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) {
+          MachineOperand &MO = II->getOperand(i+1);
+          if (MO.getMBB() == FromBB) {
+            II->RemoveOperand(i+1);
+            II->RemoveOperand(i);
+          }
+        }
+        II->RemoveOperand(Idx+1);
+        II->RemoveOperand(Idx);
+      }
+      DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
+      if (LI != SSAUpdateVals.end()) {
+        // This register is defined in the tail block.
+        for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+          MachineBasicBlock *SrcBB = LI->second[j].first;
+          unsigned SrcReg = LI->second[j].second;
+          II->addOperand(MachineOperand::CreateReg(SrcReg, false));
+          II->addOperand(MachineOperand::CreateMBB(SrcBB));
+        }
+      } else {
+        // Live in tail block, must also be live in predecessors.
+        for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
+          MachineBasicBlock *SrcBB = TDBBs[j];
+          II->addOperand(MachineOperand::CreateReg(Reg, false));
+          II->addOperand(MachineOperand::CreateMBB(SrcBB));
+        }
+      }
+    }
+  }
+}
+
 /// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
 /// of its predecessors.
-bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
-                                        MachineFunction &MF) {
+bool
+TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
+                                 SmallVector<MachineBasicBlock*, 8> &TDBBs,
+                                 SmallVector<MachineInstr*, 16> &Copies) {
   // Don't try to tail-duplicate single-block loops.
   if (TailBB->isSuccessor(TailBB))
     return false;
@@ -129,28 +459,36 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
 
   // Check the instructions in the block to determine whether tail-duplication
   // is invalid or unlikely to be profitable.
-  unsigned i = 0;
+  unsigned InstrCount = 0;
   bool HasCall = false;
   for (MachineBasicBlock::iterator I = TailBB->begin();
-       I != TailBB->end(); ++I, ++i) {
+       I != TailBB->end(); ++I) {
     // Non-duplicable things shouldn't be tail-duplicated.
     if (I->getDesc().isNotDuplicable()) return false;
+    // Do not duplicate 'return' instructions if this is a pre-regalloc run.
+    // A return may expand into a lot more instructions (e.g. reload of callee
+    // saved registers) after PEI.
+    if (PreRegAlloc && I->getDesc().isReturn()) return false;
     // Don't duplicate more than the threshold.
-    if (i == MaxDuplicateCount) return false;
+    if (InstrCount == MaxDuplicateCount) return false;
     // Remember if we saw a call.
     if (I->getDesc().isCall()) HasCall = true;
+    if (I->getOpcode() != TargetInstrInfo::PHI)
+      InstrCount += 1;
   }
   // Heuristically, don't tail-duplicate calls if it would expand code size,
   // as it's less likely to be worth the extra cost.
-  if (i > 1 && HasCall)
+  if (InstrCount > 1 && HasCall)
     return false;
 
+  DEBUG(errs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+
   // Iterate through all the unique predecessors and tail-duplicate this
   // block into them, if possible. Copying the list ahead of time also
   // avoids trouble with the predecessor list reallocating.
   bool Changed = false;
-  SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
-                                               TailBB->pred_end());
+  SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+                                              TailBB->pred_end());
   for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
        PE = Preds.end(); PI != PE; ++PI) {
     MachineBasicBlock *PredBB = *PI;
@@ -175,13 +513,35 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
     DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB
                  << "From Succ: " << *TailBB);
 
+    TDBBs.push_back(PredBB);
+
     // Remove PredBB's unconditional branch.
     TII->RemoveBranch(*PredBB);
+
     // Clone the contents of TailBB into PredBB.
-    for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
-         I != E; ++I) {
-      MachineInstr *NewMI = MF.CloneMachineInstr(I);
-      PredBB->insert(PredBB->end(), NewMI);
+    DenseMap<unsigned, unsigned> LocalVRMap;
+    SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+    MachineBasicBlock::iterator I = TailBB->begin();
+    while (I != TailBB->end()) {
+      MachineInstr *MI = &*I;
+      ++I;
+      if (MI->getOpcode() == TargetInstrInfo::PHI) {
+        // Replace the uses of the def of the PHI with the register coming
+        // from PredBB.
+        ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos);
+      } else {
+        // Replace def of virtual registers with new registers, and update
+        // uses with PHI source register or the new registers.
+        DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap);
+      }
+    }
+    MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
+    for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+      const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first);
+      TII->copyRegToReg(*PredBB, Loc, CopyInfos[i].first,
+                        CopyInfos[i].second, RC,RC);
+      MachineInstr *CopyMI = prior(Loc);
+      Copies.push_back(CopyMI);
     }
     NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
 
@@ -190,8 +550,8 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
     assert(PredBB->succ_empty() &&
            "TailDuplicate called on block with multiple successors!");
     for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
-         E = TailBB->succ_end(); I != E; ++I)
-       PredBB->addSuccessor(*I);
+           E = TailBB->succ_end(); I != E; ++I)
+      PredBB->addSuccessor(*I);
 
     Changed = true;
     ++NumTailDups;
@@ -200,22 +560,56 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
   // If TailBB was duplicated into all its predecessors except for the prior
   // block, which falls through unconditionally, move the contents of this
   // block into the prior block.
-  MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB));
+  MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
   MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
   SmallVector<MachineOperand, 4> PriorCond;
   bool PriorUnAnalyzable =
-    TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+    TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true);
   // This has to check PrevBB->succ_size() because EH edges are ignored by
   // AnalyzeBranch.
   if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
-      TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 &&
+      TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 &&
       !TailBB->hasAddressTaken()) {
-    DEBUG(errs() << "\nMerging into block: " << PrevBB
+    DEBUG(errs() << "\nMerging into block: " << *PrevBB
           << "From MBB: " << *TailBB);
-    PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end());
-    PrevBB.removeSuccessor(PrevBB.succ_begin());;
-    assert(PrevBB.succ_empty());
-    PrevBB.transferSuccessors(TailBB);
+    if (PreRegAlloc) {
+      DenseMap<unsigned, unsigned> LocalVRMap;
+      SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+      MachineBasicBlock::iterator I = TailBB->begin();
+      // Process PHI instructions first.
+      while (I != TailBB->end() && I->getOpcode() == TargetInstrInfo::PHI) {
+        // Replace the uses of the def of the PHI with the register coming
+        // from PredBB.
+        MachineInstr *MI = &*I++;
+        ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos);
+        if (MI->getParent())
+          MI->eraseFromParent();
+      }
+
+      // Now copy the non-PHI instructions.
+      while (I != TailBB->end()) {
+        // Replace def of virtual registers with new registers, and update
+        // uses with PHI source register or the new registers.
+        MachineInstr *MI = &*I++;
+        DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap);
+        MI->eraseFromParent();
+      }
+      MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
+      for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+        const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first);
+        TII->copyRegToReg(*PrevBB, Loc, CopyInfos[i].first,
+                          CopyInfos[i].second, RC, RC);
+        MachineInstr *CopyMI = prior(Loc);
+        Copies.push_back(CopyMI);
+      }
+    } else {
+      // No PHIs to worry about, just splice the instructions over.
+      PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
+    }
+    PrevBB->removeSuccessor(PrevBB->succ_begin());
+    assert(PrevBB->succ_empty());
+    PrevBB->transferSuccessors(TailBB);
+    TDBBs.push_back(PrevBB);
     Changed = true;
   }
 
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 102e2a3..393e315 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -329,7 +329,7 @@ TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr *
       return false;
 
     // For the def, it should be the only def of that register.
-    if (MO.isDef() && (next(MRI.def_begin(Reg)) != MRI.def_end() ||
+    if (MO.isDef() && (llvm::next(MRI.def_begin(Reg)) != MRI.def_end() ||
                        MRI.isLiveIn(Reg)))
       return false;
 
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 5fa690b..98b95ac 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -211,7 +211,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
   ++KillPos;
 
   unsigned NumVisited = 0;
-  for (MachineBasicBlock::iterator I = next(OldPos); I != KillPos; ++I) {
+  for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) {
     MachineInstr *OtherMI = I;
     if (NumVisited > 30)  // FIXME: Arbitrary limit to reduce compile time cost.
       return false;
@@ -412,7 +412,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
     MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
     // If there are multiple defs, we can't do a simple analysis, so just
     // go with what the kill flag says.
-    if (next(Begin) != MRI->def_end())
+    if (llvm::next(Begin) != MRI->def_end())
       return true;
     DefMI = &*Begin;
     bool IsSrcPhys, IsDstPhys;
@@ -643,7 +643,7 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
     if (!Sunk) {
       DistanceMap.insert(std::make_pair(NewMI, Dist));
       mi = NewMI;
-      nmi = next(mi);
+      nmi = llvm::next(mi);
     }
     return true;
   }
@@ -923,7 +923,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
     Processed.clear();
     for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
          mi != me; ) {
-      MachineBasicBlock::iterator nmi = next(mi);
+      MachineBasicBlock::iterator nmi = llvm::next(mi);
       const TargetInstrDesc &TID = mi->getDesc();
       bool FirstTied = true;
 
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 10c8066..054c3b6 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -754,7 +754,7 @@ void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
     }
 
     // Skip over the same register.
-    std::multimap<unsigned, int>::iterator NI = next(I);
+    std::multimap<unsigned, int>::iterator NI = llvm::next(I);
     while (NI != E && NI->first == Reg) {
       ++I;
       ++NI;
@@ -1133,7 +1133,7 @@ private:
                          std::vector<MachineOperand*> &KillOps,
                          VirtRegMap &VRM) {
 
-    MachineBasicBlock::iterator NextMII = next(MII);
+    MachineBasicBlock::iterator NextMII = llvm::next(MII);
     if (NextMII == MBB.end())
       return false;
 
@@ -1186,7 +1186,7 @@ private:
     // Unfold next instructions that fold the same SS.
     do {
       MachineInstr &NextMI = *NextMII;
-      NextMII = next(NextMII);
+      NextMII = llvm::next(NextMII);
       NewMIs.clear();
       if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
         llvm_unreachable("Unable unfold the load / store folding instruction!");
@@ -1463,8 +1463,8 @@ private:
                            std::vector<MachineOperand*> &KillOps,
                            VirtRegMap &VRM) {
 
-    MachineBasicBlock::iterator oldNextMII = next(MII);
-    TII->storeRegToStackSlot(MBB, next(MII), PhysReg, true, StackSlot, RC);
+    MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+    TII->storeRegToStackSlot(MBB, llvm::next(MII), PhysReg, true, StackSlot, RC);
     MachineInstr *StoreMI = prior(oldNextMII);
     VRM.addSpillSlotUse(StackSlot, StoreMI);
     DEBUG(errs() << "Store:\t" << *StoreMI);
@@ -1626,14 +1626,14 @@ private:
     DistanceMap.clear();
     for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
          MII != E; ) {
-      MachineBasicBlock::iterator NextMII = next(MII);
+      MachineBasicBlock::iterator NextMII = llvm::next(MII);
 
       VirtRegMap::MI2VirtMapTy::const_iterator I, End;
       bool Erased = false;
       bool BackTracked = false;
       if (OptimizeByUnfold(MBB, MII,
                            MaybeDeadStores, Spills, RegKills, KillOps, VRM))
-        NextMII = next(MII);
+        NextMII = llvm::next(MII);
 
       MachineInstr &MI = *MII;
 
@@ -1657,7 +1657,7 @@ private:
 
           // Back-schedule reloads and remats.
           MachineBasicBlock::iterator InsertLoc =
-            ComputeReloadLoc(next(MII), MBB.begin(), PhysReg, TRI, false,
+            ComputeReloadLoc(llvm::next(MII), MBB.begin(), PhysReg, TRI, false,
                              SS, TII, MF);
 
           TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SS, RC);
@@ -1667,7 +1667,7 @@ private:
           ++NumPSpills;
           DistanceMap.insert(std::make_pair(LoadMI, Dist++));
         }
-        NextMII = next(MII);
+        NextMII = llvm::next(MII);
       }
 
       // Insert restores here if asked to.
@@ -1785,14 +1785,14 @@ private:
           const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
           unsigned Phys = VRM.getPhys(VirtReg);
           int StackSlot = VRM.getStackSlot(VirtReg);
-          MachineBasicBlock::iterator oldNextMII = next(MII);
-          TII->storeRegToStackSlot(MBB, next(MII), Phys, isKill, StackSlot, RC);
+          MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+          TII->storeRegToStackSlot(MBB, llvm::next(MII), Phys, isKill, StackSlot, RC);
           MachineInstr *StoreMI = prior(oldNextMII);
           VRM.addSpillSlotUse(StackSlot, StoreMI);
           DEBUG(errs() << "Store:\t" << *StoreMI);
           VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
         }
-        NextMII = next(MII);
+        NextMII = llvm::next(MII);
       }
 
       /// ReusedOperands - Keep track of operand reuse in case we need to undo
@@ -2265,7 +2265,7 @@ private:
 
               if (CommuteToFoldReload(MBB, MII, VirtReg, SrcReg, StackSlot,
                                       Spills, RegKills, KillOps, TRI, VRM)) {
-                NextMII = next(MII);
+                NextMII = llvm::next(MII);
                 BackTracked = true;
                 goto ProcessNextInst;
               }
@@ -2381,7 +2381,7 @@ private:
           MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
           SpillRegToStackSlot(MBB, MII, -1, PhysReg, StackSlot, RC, true,
                             LastStore, Spills, ReMatDefs, RegKills, KillOps, VRM);
-          NextMII = next(MII);
+          NextMII = llvm::next(MII);
 
           // Check to see if this is a noop copy.  If so, eliminate the
           // instruction before considering the dest reg to be changed.
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 6d781c7..26afa54 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -208,7 +208,7 @@ ExecutionEngine *JIT::createJIT(ModuleProvider *MP,
                                 JITMemoryManager *JMM,
                                 CodeGenOpt::Level OptLevel,
                                 bool GVsWithCode,
-				CodeModel::Model CMM) {
+                                CodeModel::Model CMM) {
   // Make sure we can resolve symbols in the program as well. The zero arg
   // to the function tells DynamicLibrary to load the program, not a library.
   if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
@@ -681,7 +681,7 @@ void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
   if (Ptr) return Ptr;
 
   // If the global is external, just remember the address.
-  if (GV->isDeclaration()) {
+  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) {
 #if HAVE___DSO_HANDLE
     if (GV->getName() == "__dso_handle")
       return (void*)&__dso_handle;
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index f2b28ad..0193486 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -175,7 +175,6 @@ struct KeyInfo {
   static inline unsigned getTombstoneKey() { return -2U; }
   static unsigned getHashValue(const unsigned &Key) { return Key; }
   static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; }
-  static bool isPod() { return true; }
 };
 
 /// ActionEntry - Structure describing an entry in the actions table.
diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
index b45c71f..52a8f71 100644
--- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
@@ -69,24 +69,18 @@ OProfileJITEventListener::~OProfileJITEventListener() {
 }
 
 class FilenameCache {
-  // Holds the filename of each Scope, so that we can pass the
-  // pointer into oprofile.  These char*s are freed in the destructor.
-  DenseMap<MDNode*, char*> Filenames;
+  // Holds the filename of each Scope, so that we can pass a null-terminated
+  // string into oprofile.
+  DenseMap<MDNode*, std::string> Filenames;
 
  public:
   const char *getFilename(MDNode *Scope) {
-    char *&Filename = Filenames[Scope];
-    if (Filename == NULL) {
+    std::string &Filename = Filenames[Scope];
+    if (Filename.empty()) {
       DIScope S(Scope);
-      Filename = strdup(S.getFilename());
-    }
-    return Filename;
-  }
-  ~FilenameCache() {
-    for (DenseMap<MDNode*, char*>::iterator
-             I = Filenames.begin(), E = Filenames.end(); I != E; ++I) {
-      free(I->second);
+      Filename = S.getFilename();
     }
+    return Filename.c_str();
   }
 };
 
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index cd355ff..ac736dc 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMSupport
   CommandLine.cpp
   ConstantRange.cpp
   Debug.cpp
+  DeltaAlgorithm.cpp
   Dwarf.cpp
   ErrorHandling.cpp
   FileUtilities.cpp
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 9cf9c89..b6c0e08 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -778,9 +778,10 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
       free(*i);
   }
 
-  DEBUG(errs() << "\nArgs: ";
+  DEBUG(errs() << "Args: ";
         for (int i = 0; i < argc; ++i)
           errs() << argv[i] << ' ';
+        errs() << '\n';
        );
 
   // If we had an error processing our arguments, don't let the program execute
diff --git a/lib/Support/DeltaAlgorithm.cpp b/lib/Support/DeltaAlgorithm.cpp
new file mode 100644
index 0000000..d176548
--- /dev/null
+++ b/lib/Support/DeltaAlgorithm.cpp
@@ -0,0 +1,114 @@
+//===--- DeltaAlgorithm.cpp - A Set Minimization Algorithm -----*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DeltaAlgorithm.h"
+#include <algorithm>
+#include <iterator>
+using namespace llvm;
+
+DeltaAlgorithm::~DeltaAlgorithm() {
+}
+
+bool DeltaAlgorithm::GetTestResult(const changeset_ty &Changes) {
+  if (FailedTestsCache.count(Changes))
+    return false;
+
+  bool Result = ExecuteOneTest(Changes);
+  if (!Result)
+    FailedTestsCache.insert(Changes);
+
+  return Result;
+}
+
+void DeltaAlgorithm::Split(const changeset_ty &S, changesetlist_ty &Res) {
+  // FIXME: Allow clients to provide heuristics for improved splitting.
+
+  // FIXME: This is really slow.
+  changeset_ty LHS, RHS;
+  unsigned idx = 0;
+  for (changeset_ty::const_iterator it = S.begin(),
+         ie = S.end(); it != ie; ++it, ++idx)
+    ((idx & 1) ? LHS : RHS).insert(*it);
+  if (!LHS.empty())
+    Res.push_back(LHS);
+  if (!RHS.empty())
+    Res.push_back(RHS);
+}
+
+DeltaAlgorithm::changeset_ty
+DeltaAlgorithm::Delta(const changeset_ty &Changes,
+                      const changesetlist_ty &Sets) {
+  // Invariant: union(Res) == Changes
+  UpdatedSearchState(Changes, Sets);
+
+  // If there is nothing left we can remove, we are done.
+  if (Sets.size() <= 1)
+    return Changes;
+
+  // Look for a passing subset.
+  changeset_ty Res;
+  if (Search(Changes, Sets, Res))
+    return Res;
+
+  // Otherwise, partition the sets if possible; if not we are done.
+  changesetlist_ty SplitSets;
+  for (changesetlist_ty::const_iterator it = Sets.begin(),
+         ie = Sets.end(); it != ie; ++it)
+    Split(*it, SplitSets);
+  if (SplitSets.size() == Sets.size())
+    return Changes;
+
+  return Delta(Changes, SplitSets);
+}
+
+bool DeltaAlgorithm::Search(const changeset_ty &Changes,
+                            const changesetlist_ty &Sets,
+                            changeset_ty &Res) {
+  // FIXME: Parallelize.
+  for (changesetlist_ty::const_iterator it = Sets.begin(),
+         ie = Sets.end(); it != ie; ++it) {
+    // If the test passes on this subset alone, recurse.
+    if (GetTestResult(*it)) {
+      changesetlist_ty Sets;
+      Split(*it, Sets);
+      Res = Delta(*it, Sets);
+      return true;
+    }
+
+    // Otherwise, if we have more than two sets, see if test passes on the
+    // complement.
+    if (Sets.size() > 2) {
+      // FIXME: This is really slow.
+      changeset_ty Complement;
+      std::set_difference(
+        Changes.begin(), Changes.end(), it->begin(), it->end(),
+        std::insert_iterator<changeset_ty>(Complement, Complement.begin()));
+      if (GetTestResult(Complement)) {
+        changesetlist_ty ComplementSets;
+        ComplementSets.insert(ComplementSets.end(), Sets.begin(), it);
+        ComplementSets.insert(ComplementSets.end(), it + 1, Sets.end());
+        Res = Delta(Complement, ComplementSets);
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+DeltaAlgorithm::changeset_ty DeltaAlgorithm::Run(const changeset_ty &Changes) {
+  // Check empty set first to quickly find poor test functions.
+  if (GetTestResult(changeset_ty()))
+    return changeset_ty();
+
+  // Otherwise run the real delta algorithm.
+  changesetlist_ty Sets;
+  Split(Changes, Sets);
+
+  return Delta(Changes, Sets);
+}
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index b04864a..df1aa6a 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -176,7 +176,7 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
 #endif
   int FD = ::open(Filename.str().c_str(), O_RDONLY|OpenFlags);
   if (FD == -1) {
-    if (ErrStr) *ErrStr = "could not open file";
+    if (ErrStr) *ErrStr = strerror(errno);
     return 0;
   }
   
@@ -186,7 +186,7 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
     struct stat FileInfo;
     // TODO: This should use fstat64 when available.
     if (fstat(FD, &FileInfo) == -1) {
-      if (ErrStr) *ErrStr = "could not get file length";
+      if (ErrStr) *ErrStr = strerror(errno);
       ::close(FD);
       return 0;
     }
@@ -230,8 +230,8 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
       // try again
     } else {
       // error reading.
+      if (ErrStr) *ErrStr = strerror(errno);
       close(FD);
-      if (ErrStr) *ErrStr = "error reading file data";
       return 0;
     }
   }
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 31451cc..0c90e77 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -209,8 +209,7 @@ raw_ostream &raw_ostream::operator<<(const void *P) {
 }
 
 raw_ostream &raw_ostream::operator<<(double N) {
-  this->operator<<(ftostr(N));
-  return *this;
+  return this->operator<<(ftostr(N));
 }
 
 
diff --git a/lib/System/Atomic.cpp b/lib/System/Atomic.cpp
index f9b55a1..7ba8b77 100644
--- a/lib/System/Atomic.cpp
+++ b/lib/System/Atomic.cpp
@@ -85,7 +85,7 @@ sys::cas_flag sys::AtomicAdd(volatile sys::cas_flag* ptr, sys::cas_flag val) {
 #elif defined(__GNUC__)
   return __sync_add_and_fetch(ptr, val);
 #elif defined(_MSC_VER)
-  return InterlockedAdd(ptr, val);
+  return InterlockedExchangeAdd(ptr, val) + val;
 #else
 #  error No atomic add implementation for your platform!
 #endif
diff --git a/lib/System/Host.cpp b/lib/System/Host.cpp
index e112698..79897e4 100644
--- a/lib/System/Host.cpp
+++ b/lib/System/Host.cpp
@@ -103,11 +103,8 @@ static void DetectX86FamilyModel(unsigned EAX, unsigned &Family, unsigned &Model
     Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
   }
 }
-#endif
-
 
 std::string sys::getHostCPUName() {
-#if defined(__x86_64__) || defined(__i386__)
   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
   if (GetX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
     return "generic";
@@ -295,7 +292,10 @@ std::string sys::getHostCPUName() {
       return "generic";
     }
   }
-#endif
-
   return "generic";
 }
+#else
+std::string sys::getHostCPUName() {
+  return "generic";
+}
+#endif
diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc
index ff1497a..33b26f7 100644
--- a/lib/System/Unix/Path.inc
+++ b/lib/System/Unix/Path.inc
@@ -77,7 +77,7 @@ inline bool lastIsSlash(const std::string& path) {
 namespace llvm {
 using namespace sys;
 
-extern const char sys::PathSeparator = ':';
+const char sys::PathSeparator = ':';
 
 Path::Path(const std::string& p)
   : path(p) {}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index ff1980d..21445ad 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -109,7 +109,6 @@ FunctionPass *createNEONPreAllocPass();
 FunctionPass *createNEONMoveFixPass();
 FunctionPass *createThumb2ITBlockPass();
 FunctionPass *createThumb2SizeReductionPass();
-FunctionPass *createARMMaxStackAlignmentCalculatorPass();
 
 extern Target TheARMTarget, TheThumbTarget;
 
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c95d4c8..1aae369 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -418,11 +418,13 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
   return true;
 }
 
-/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
+DISABLE_INLINE
 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
-                                unsigned JTI) DISABLE_INLINE;
+                                unsigned JTI);
 static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
                                 unsigned JTI) {
+  assert(JTI < JT.size());
   return JT[JTI].MBBs.size();
 }
 
@@ -467,6 +469,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
       return MI->getOperand(2).getImm();
     case ARM::Int_eh_sjlj_setjmp:
       return 24;
+    case ARM::tInt_eh_sjlj_setjmp:
+      return 22;
     case ARM::t2Int_eh_sjlj_setjmp:
       return 22;
     case ARM::BR_JTr:
@@ -755,7 +759,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     assert((RC == ARM::QPRRegisterClass ||
             RC == ARM::QPR_VFP2RegisterClass ||
             RC == ARM::QPR_8RegisterClass) && "Unknown regclass!");
-    // FIXME: Neon instructions should support predicates
     if (Align >= 16
         && (getRegisterInfo().needsStackRealignment(MF))) {
       AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 282e30c..78d9135 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -92,6 +92,8 @@ namespace ARMII {
     StMiscFrm     = 9  << FormShift,
     LdStMulFrm    = 10 << FormShift,
 
+    LdStExFrm     = 28 << FormShift,
+
     // Miscellaneous arithmetic instructions
     ArithMiscFrm  = 11 << FormShift,
 
@@ -190,9 +192,6 @@ public:
   // if there is not such an opcode.
   virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
 
-  // Return true if the block does not fall through.
-  virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const =0;
-
   virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
                                               MachineBasicBlock::iterator &MBBI,
                                               LiveVariables *LV) const;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 653328d..9b5f79f 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -471,21 +471,6 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
   }
 }
 
-static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
-  unsigned MaxAlign = 0;
-
-  for (int i = FFI->getObjectIndexBegin(),
-         e = FFI->getObjectIndexEnd(); i != e; ++i) {
-    if (FFI->isDeadObjectIndex(i))
-      continue;
-
-    unsigned Align = FFI->getObjectAlignment(i);
-    MaxAlign = std::max(MaxAlign, Align);
-  }
-
-  return MaxAlign;
-}
-
 /// hasFP - Return true if the specified function should have a dedicated frame
 /// pointer register.  This is true if the function has variable sized allocas
 /// or if frame pointer elimination is disabled.
@@ -585,16 +570,21 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
   SmallVector<unsigned, 4> UnspilledCS2GPRs;
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
-  MachineFrameInfo *MFI = MF.getFrameInfo();
 
   // Calculate and set max stack object alignment early, so we can decide
   // whether we will need stack realignment (and thus FP).
   if (RealignStack) {
-    unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
-                                 calculateMaxStackAlignment(MFI));
-    MFI->setMaxAlignment(MaxAlign);
+    MachineFrameInfo *MFI = MF.getFrameInfo();
+    MFI->calculateMaxStackAlignment();
   }
 
+  // Spill R4 if Thumb2 function requires stack realignment - it will be used as
+  // scratch register.
+  // FIXME: It will be better just to find spare register here.
+  if (needsStackRealignment(MF) &&
+      AFI->isThumb2Function())
+    MF.getRegInfo().setPhysRegUsed(ARM::R4);
+
   // Don't spill FP if the frame can be eliminated. This is determined
   // by scanning the callee-save registers to see if any is used.
   const unsigned *CSRegs = getCalleeSavedRegs();
@@ -1368,14 +1358,30 @@ emitPrologue(MachineFunction &MF) const {
 
   // If we need dynamic stack realignment, do it here.
   if (needsStackRealignment(MF)) {
-    unsigned Opc;
     unsigned MaxAlign = MFI->getMaxAlignment();
     assert (!AFI->isThumb1OnlyFunction());
-    Opc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri;
-
-    AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), ARM::SP)
+    if (!AFI->isThumbFunction()) {
+      // Emit bic sp, sp, MaxAlign
+      AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+                                          TII.get(ARM::BICri), ARM::SP)
                                   .addReg(ARM::SP, RegState::Kill)
                                   .addImm(MaxAlign-1)));
+    } else {
+      // We cannot use sp as source/dest register here, thus we're emitting the
+      // following sequence:
+      // mov r4, sp
+      // bic r4, r4, MaxAlign
+      // mov sp, r4
+      // FIXME: It will be better just to find spare register here.
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R4)
+        .addReg(ARM::SP, RegState::Kill);
+      AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+                                          TII.get(ARM::t2BICri), ARM::R4)
+                                  .addReg(ARM::R4, RegState::Kill)
+                                  .addImm(MaxAlign-1)));
+      BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+        .addReg(ARM::R4, RegState::Kill);
+    }
   }
 }
 
@@ -1479,48 +1485,4 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
     emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
 }
 
-namespace {
-  struct MaximalStackAlignmentCalculator : public MachineFunctionPass {
-    static char ID;
-    MaximalStackAlignmentCalculator() : MachineFunctionPass(&ID) {}
-
-    virtual bool runOnMachineFunction(MachineFunction &MF) {
-      MachineFrameInfo *FFI = MF.getFrameInfo();
-      MachineRegisterInfo &RI = MF.getRegInfo();
-
-      // Calculate max stack alignment of all already allocated stack objects.
-      unsigned MaxAlign = calculateMaxStackAlignment(FFI);
-
-      // Be over-conservative: scan over all vreg defs and find, whether vector
-      // registers are used. If yes - there is probability, that vector register
-      // will be spilled and thus stack needs to be aligned properly.
-      for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
-           RegNum < RI.getLastVirtReg(); ++RegNum)
-        MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
-
-      if (FFI->getMaxAlignment() == MaxAlign)
-        return false;
-
-      FFI->setMaxAlignment(MaxAlign);
-      return true;
-    }
-
-    virtual const char *getPassName() const {
-      return "ARM Stack Required Alignment Auto-Detector";
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-  };
-
-  char MaximalStackAlignmentCalculator::ID = 0;
-}
-
-FunctionPass*
-llvm::createARMMaxStackAlignmentCalculatorPass() {
-  return new MaximalStackAlignmentCalculator();
-}
-
 #include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index e59a315..acd30d2 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -418,10 +418,10 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
 static bool BBHasFallthrough(MachineBasicBlock *MBB) {
   // Get the next machine basic block in the function.
   MachineFunction::iterator MBBI = MBB;
-  if (next(MBBI) == MBB->getParent()->end())  // Can't fall off end of function.
+  if (llvm::next(MBBI) == MBB->getParent()->end())  // Can't fall off end of function.
     return false;
 
-  MachineBasicBlock *NextBB = next(MBBI);
+  MachineBasicBlock *NextBB = llvm::next(MBBI);
   for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
        E = MBB->succ_end(); I != E; ++I)
     if (*I == NextBB)
@@ -760,7 +760,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
                      CompareMBBNumbers);
   MachineBasicBlock* WaterBB = *IP;
   if (WaterBB == OrigBB)
-    WaterList.insert(next(IP), NewBB);
+    WaterList.insert(llvm::next(IP), NewBB);
   else
     WaterList.insert(IP, OrigBB);
   NewWaterList.insert(OrigBB);
@@ -887,7 +887,7 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
 
 void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
                                               int delta) {
-  MachineFunction::iterator MBBI = BB; MBBI = next(MBBI);
+  MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
   for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
       i < e; ++i) {
     BBOffsets[i] += delta;
@@ -929,7 +929,7 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
       if (delta==0)
         return;
     }
-    MBBI = next(MBBI);
+    MBBI = llvm::next(MBBI);
   }
 }
 
@@ -1096,7 +1096,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
     DEBUG(errs() << "Split at end of block\n");
     if (&UserMBB->back() == UserMI)
       assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
-    NewMBB = next(MachineFunction::iterator(UserMBB));
+    NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
     // Add an unconditional branch from UserMBB to fallthrough block.
     // Record it for branch lengthening; this new branch will not get out of
     // range, but if the preceding conditional branch is out of range, the
@@ -1144,7 +1144,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
     for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
          Offset < BaseInsertOffset;
          Offset += TII->GetInstSizeInBytes(MI),
-            MI = next(MI)) {
+            MI = llvm::next(MI)) {
       if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) {
         CPUser &U = CPUsers[CPUIndex];
         if (!OffsetIsInRange(Offset, EndInsertOffset,
@@ -1204,7 +1204,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
       NewWaterList.insert(NewIsland);
     }
     // The new CPE goes before the following block (NewMBB).
-    NewMBB = next(MachineFunction::iterator(WaterBB));
+    NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
 
   } else {
     // No water found.
@@ -1406,7 +1406,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
 
   NumCBrFixed++;
   if (BMI != MI) {
-    if (next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+    if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
         BMI->getOpcode() == Br.UncondBr) {
       // Last MI in the BB is an unconditional branch. Can we simply invert the
       // condition and swap destinations:
@@ -1433,12 +1433,12 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
     // branch to the destination.
     int delta = TII->GetInstSizeInBytes(&MBB->back());
     BBSizes[MBB->getNumber()] -= delta;
-    MachineBasicBlock* SplitBB = next(MachineFunction::iterator(MBB));
+    MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
     AdjustBBOffsetsAfter(SplitBB, -delta);
     MBB->back().eraseFromParent();
     // BBOffsets[SplitBB] is wrong temporarily, fixed below
   }
-  MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
+  MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
 
   DEBUG(errs() << "  Insert B to BB#" << DestBB->getNumber()
                << " also invert condition and change dest. to BB#"
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index c929c54..1b8727d 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -48,7 +48,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
   while (MBBI != E) {
     MachineInstr &MI = *MBBI;
-    MachineBasicBlock::iterator NMBBI = next(MBBI);
+    MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
 
     unsigned Opcode = MI.getOpcode();
     switch (Opcode) {
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index c839fc6..655c762 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -42,6 +42,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include <sstream>
 using namespace llvm;
 
@@ -377,7 +378,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
   else
     setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
-  setOperationAction(ISD::MEMBARRIER,         MVT::Other, Expand);
+  setOperationAction(ISD::MEMBARRIER,         MVT::Other, Custom);
 
   if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) {
     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -500,6 +501,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
 
   case ARMISD::DYN_ALLOC:     return "ARMISD::DYN_ALLOC";
 
+  case ARMISD::MEMBARRIER:    return "ARMISD::MEMBARRIER";
+  case ARMISD::SYNCBARRIER:   return "ARMISD::SYNCBARRIER";
+
   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
   case ARMISD::VCGE:          return "ARMISD::VCGE";
   case ARMISD::VCGEU:         return "ARMISD::VCGEU";
@@ -1470,6 +1474,28 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
   }
 }
 
+static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
+                          const ARMSubtarget *Subtarget) {
+  DebugLoc dl = Op.getDebugLoc();
+  SDValue Op5 = Op.getOperand(5);
+  SDValue Res;
+  unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
+  if (isDeviceBarrier) {
+    if (Subtarget->hasV7Ops())
+      Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0));
+    else
+      Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0),
+                        DAG.getConstant(0, MVT::i32));
+  } else {
+    if (Subtarget->hasV7Ops())
+      Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+    else
+      Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+                        DAG.getConstant(0, MVT::i32));
+  }
+  return Res;
+}
+
 static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
                             unsigned VarArgsFrameIndex) {
   // vastart just stores the address of the VarArgsFrameIndex slot into the
@@ -2528,6 +2554,25 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
   return true;
 }
 
+/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
+static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+                                unsigned &WhichResult) {
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned i = 0; i < NumElts; i += 2) {
+    if ((unsigned) M[i] != i + WhichResult ||
+        (unsigned) M[i+1] != i + WhichResult)
+      return false;
+  }
+  return true;
+}
+
 static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
                        unsigned &WhichResult) {
   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
@@ -2548,6 +2593,33 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
   return true;
 }
 
+/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
+static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+                                unsigned &WhichResult) {
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned Half = VT.getVectorNumElements() / 2;
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  for (unsigned j = 0; j != 2; ++j) {
+    unsigned Idx = WhichResult;
+    for (unsigned i = 0; i != Half; ++i) {
+      if ((unsigned) M[i + j * Half] != Idx)
+        return false;
+      Idx += 2;
+    }
+  }
+
+  // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+  if (VT.is64BitVector() && EltSz == 32)
+    return false;
+
+  return true;
+}
+
 static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
                        unsigned &WhichResult) {
   unsigned EltSz = VT.getVectorElementType().getSizeInBits();
@@ -2571,6 +2643,33 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
   return true;
 }
 
+/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
+static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+                                unsigned &WhichResult) {
+  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  if (EltSz == 64)
+    return false;
+
+  unsigned NumElts = VT.getVectorNumElements();
+  WhichResult = (M[0] == 0 ? 0 : 1);
+  unsigned Idx = WhichResult * NumElts / 2;
+  for (unsigned i = 0; i != NumElts; i += 2) {
+    if ((unsigned) M[i] != Idx ||
+        (unsigned) M[i+1] != Idx)
+      return false;
+    Idx += 1;
+  }
+
+  // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+  if (VT.is64BitVector() && EltSz == 32)
+    return false;
+
+  return true;
+}
+
+
 static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) {
   // Canonicalize all-zeros and all-ones vectors.
   ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode());
@@ -2683,7 +2782,10 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
           isVEXTMask(M, VT, ReverseVEXT, Imm) ||
           isVTRNMask(M, VT, WhichResult) ||
           isVUZPMask(M, VT, WhichResult) ||
-          isVZIPMask(M, VT, WhichResult));
+          isVZIPMask(M, VT, WhichResult) ||
+          isVTRN_v_undef_Mask(M, VT, WhichResult) ||
+          isVUZP_v_undef_Mask(M, VT, WhichResult) ||
+          isVZIP_v_undef_Mask(M, VT, WhichResult));
 }
 
 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
@@ -2815,6 +2917,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
                        V1, V2).getValue(WhichResult);
 
+  if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
+    return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+                       V1, V1).getValue(WhichResult);
+  if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+    return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+                       V1, V1).getValue(WhichResult);
+  if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+    return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+                       V1, V1).getValue(WhichResult);
+
   // If the shuffle is not directly supported and it has 4 elements, use
   // the PerfectShuffle-generated table to synthesize it from other shuffles.
   if (VT.getVectorNumElements() == 4 &&
@@ -2886,6 +2998,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
   case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
   case ISD::VASTART:       return LowerVASTART(Op, DAG, VarArgsFrameIndex);
+  case ISD::MEMBARRIER:    return LowerMEMBARRIER(Op, DAG, Subtarget);
   case ISD::SINT_TO_FP:
   case ISD::UINT_TO_FP:    return LowerINT_TO_FP(Op, DAG);
   case ISD::FP_TO_SINT:
@@ -2938,14 +3051,233 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
 //===----------------------------------------------------------------------===//
 
 MachineBasicBlock *
+ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
+                                     MachineBasicBlock *BB,
+                                     unsigned Size) const {
+  unsigned dest    = MI->getOperand(0).getReg();
+  unsigned ptr     = MI->getOperand(1).getReg();
+  unsigned oldval  = MI->getOperand(2).getReg();
+  unsigned newval  = MI->getOperand(3).getReg();
+  unsigned scratch = BB->getParent()->getRegInfo()
+    .createVirtualRegister(ARM::GPRRegisterClass);
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  bool isThumb2 = Subtarget->isThumb2();
+
+  unsigned ldrOpc, strOpc;
+  switch (Size) {
+  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+  case 1:
+    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+    strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
+    break;
+  case 2:
+    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+    break;
+  case 4:
+    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+    break;
+  }
+
+  MachineFunction *MF = BB->getParent();
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator It = BB;
+  ++It; // insert the new blocks after the current block
+
+  MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MF->insert(It, loop1MBB);
+  MF->insert(It, loop2MBB);
+  MF->insert(It, exitMBB);
+  exitMBB->transferSuccessors(BB);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loop1MBB
+  BB->addSuccessor(loop1MBB);
+
+  // loop1MBB:
+  //   ldrex dest, [ptr]
+  //   cmp dest, oldval
+  //   bne exitMBB
+  BB = loop1MBB;
+  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+                 .addReg(dest).addReg(oldval));
+  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+    .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+  BB->addSuccessor(loop2MBB);
+  BB->addSuccessor(exitMBB);
+
+  // loop2MBB:
+  //   strex scratch, newval, [ptr]
+  //   cmp scratch, #0
+  //   bne loop1MBB
+  BB = loop2MBB;
+  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
+                 .addReg(ptr));
+  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+                 .addReg(scratch).addImm(0));
+  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+    .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+  BB->addSuccessor(loop1MBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+  return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+                                    unsigned Size, unsigned BinOpcode) const {
+  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction *F = BB->getParent();
+  MachineFunction::iterator It = BB;
+  ++It;
+
+  unsigned dest = MI->getOperand(0).getReg();
+  unsigned ptr = MI->getOperand(1).getReg();
+  unsigned incr = MI->getOperand(2).getReg();
+  DebugLoc dl = MI->getDebugLoc();
+  bool isThumb2 = Subtarget->isThumb2();
+  unsigned ldrOpc, strOpc;
+  switch (Size) {
+  default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+  case 1:
+    ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+    strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
+    break;
+  case 2:
+    ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+    strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+    break;
+  case 4:
+    ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+    strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+    break;
+  }
+
+  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+  F->insert(It, loopMBB);
+  F->insert(It, exitMBB);
+  exitMBB->transferSuccessors(BB);
+
+  MachineRegisterInfo &RegInfo = F->getRegInfo();
+  unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+  unsigned scratch2 = (!BinOpcode) ? incr :
+    RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+
+  //  thisMBB:
+  //   ...
+  //   fallthrough --> loopMBB
+  BB->addSuccessor(loopMBB);
+
+  //  loopMBB:
+  //   ldrex dest, ptr
+  //   <binop> scratch2, dest, incr
+  //   strex scratch, scratch2, ptr
+  //   cmp scratch, #0
+  //   bne- loopMBB
+  //   fallthrough --> exitMBB
+  BB = loopMBB;
+  AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+  if (BinOpcode) {
+    // operand order needs to go the other way for NAND
+    if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
+      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+                     addReg(incr).addReg(dest)).addReg(0);
+    else
+      AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+                     addReg(dest).addReg(incr)).addReg(0);
+  }
+
+  AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
+                 .addReg(ptr));
+  AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+                 .addReg(scratch).addImm(0));
+  BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+    .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+  BB->addSuccessor(loopMBB);
+  BB->addSuccessor(exitMBB);
+
+  //  exitMBB:
+  //   ...
+  BB = exitMBB;
+  return BB;
+}
+
+MachineBasicBlock *
 ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                MachineBasicBlock *BB,
                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
   const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
+  bool isThumb2 = Subtarget->isThumb2();
   switch (MI->getOpcode()) {
   default:
+    MI->dump();
     llvm_unreachable("Unexpected instr type to insert");
+
+  case ARM::ATOMIC_LOAD_ADD_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+  case ARM::ATOMIC_LOAD_ADD_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+  case ARM::ATOMIC_LOAD_ADD_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+
+  case ARM::ATOMIC_LOAD_AND_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+  case ARM::ATOMIC_LOAD_AND_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+  case ARM::ATOMIC_LOAD_AND_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+
+  case ARM::ATOMIC_LOAD_OR_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+  case ARM::ATOMIC_LOAD_OR_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+  case ARM::ATOMIC_LOAD_OR_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+
+  case ARM::ATOMIC_LOAD_XOR_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+  case ARM::ATOMIC_LOAD_XOR_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+  case ARM::ATOMIC_LOAD_XOR_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+
+  case ARM::ATOMIC_LOAD_NAND_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+  case ARM::ATOMIC_LOAD_NAND_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+  case ARM::ATOMIC_LOAD_NAND_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+
+  case ARM::ATOMIC_LOAD_SUB_I8:
+     return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+  case ARM::ATOMIC_LOAD_SUB_I16:
+     return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+  case ARM::ATOMIC_LOAD_SUB_I32:
+     return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+
+  case ARM::ATOMIC_SWAP_I8:  return EmitAtomicBinary(MI, BB, 1, 0);
+  case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
+  case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
+
+  case ARM::ATOMIC_CMP_SWAP_I8:  return EmitAtomicCmpSwap(MI, BB, 1);
+  case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
+  case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+
   case ARM::tMOVCCr_pseudo: {
     // To "insert" a SELECT_CC instruction, we actually have to insert the
     // diamond control-flow pattern.  The incoming instruction knows the
@@ -3935,6 +4267,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
         return std::make_pair(0U, ARM::SPRRegisterClass);
       if (VT == MVT::f64)
         return std::make_pair(0U, ARM::DPRRegisterClass);
+      if (VT.getSizeInBits() == 128)
+        return std::make_pair(0U, ARM::QPRRegisterClass);
       break;
     }
   }
@@ -3973,6 +4307,9 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
                                    ARM::D4, ARM::D5, ARM::D6, ARM::D7,
                                    ARM::D8, ARM::D9, ARM::D10,ARM::D11,
                                    ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
+    if (VT.getSizeInBits() == 128)
+      return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
+                                   ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
       break;
   }
 
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 4f31f8a..e1b3348 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -72,6 +72,9 @@ namespace llvm {
 
       DYN_ALLOC,    // Dynamic allocation on the stack.
 
+      MEMBARRIER,   // Memory barrier
+      SYNCBARRIER,  // Memory sync barrier
+
       VCEQ,         // Vector compare equal.
       VCGE,         // Vector compare greater than or equal.
       VCGEU,        // Vector compare unsigned greater than or equal.
@@ -328,6 +331,15 @@ namespace llvm {
 
     SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                       SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl);
+
+    MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
+                                         MachineBasicBlock *BB,
+                                         unsigned Size) const;
+    MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+                                        MachineBasicBlock *BB,
+                                        unsigned Size,
+                                        unsigned BinOpcode) const;
+
   };
 }
 
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index e76e93c..cf0edff 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -33,6 +33,8 @@ def LdMiscFrm     : Format<8>;
 def StMiscFrm     : Format<9>;
 def LdStMulFrm    : Format<10>;
 
+def LdStExFrm     : Format<28>;
+
 def ArithMiscFrm  : Format<11>;
 def ExtFrm        : Format<12>;
 
@@ -199,6 +201,19 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   let Pattern = pattern;
   list<Predicate> Predicates = [IsARM];
 }
+// A few are not predicable
+class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+        IndexMode im, Format f, InstrItinClass itin, 
+        string opc, string asm, string cstr,
+        list<dag> pattern>
+  : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString   = !strconcat(opc, asm);
+  let Pattern = pattern;
+  let isPredicable = 0;
+  list<Predicate> Predicates = [IsARM];
+}
 
 // Same as I except it can optionally modify CPSR. Note it's modeled as
 // an input operand since by default it's a zero register. It will
@@ -239,6 +254,10 @@ class AXI<dag oops, dag iops, Format f, InstrItinClass itin,
           string asm, list<dag> pattern>
   : XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
        asm, "", pattern>;
+class AInoP<dag oops, dag iops, Format f, InstrItinClass itin,
+         string opc, string asm, list<dag> pattern>
+  : InoP<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+      opc, asm, "", pattern>;
 
 // Ctrl flow instructions
 class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
@@ -264,6 +283,28 @@ class JTI<dag oops, dag iops, InstrItinClass itin,
   : XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin,
        asm, "", pattern>;
 
+
+// Atomic load/store instructions
+
+class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
+      opc, asm, "", pattern> {
+  let Inst{27-23} = 0b00011;
+  let Inst{22-21} = opcod;
+  let Inst{20} = 1;
+  let Inst{11-0}  = 0b111110011111;
+}
+class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+              string opc, string asm, list<dag> pattern>
+  : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
+      opc, asm, "", pattern> {
+  let Inst{27-23} = 0b00011;
+  let Inst{22-21} = opcod;
+  let Inst{20} = 0;
+  let Inst{11-4}  = 0b11111001;
+}
+
 // addrmode1 instructions
 class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
           string opc, string asm, list<dag> pattern>
@@ -967,6 +1008,17 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
   list<Predicate> Predicates = [IsThumb2];
 }
 
+class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+               InstrItinClass itin,
+               string asm, string cstr, list<dag> pattern>
+  : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+  let OutOperandList = oops;
+  let InOperandList = iops;
+  let AsmString   = asm;
+  let Pattern = pattern;
+  list<Predicate> Predicates = [IsThumb1Only];
+}
+
 class T2I<dag oops, dag iops, InstrItinClass itin,
           string opc, string asm, list<dag> pattern>
   : Thumb2I<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>;
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 87bb12b..85f6b40 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -60,25 +60,6 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
   return 0;
 }
 
-bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
-  if (MBB.empty()) return false;
-
-  switch (MBB.back().getOpcode()) {
-  case ARM::BX_RET:   // Return.
-  case ARM::LDM_RET:
-  case ARM::B:
-  case ARM::BRIND:
-  case ARM::BR_JTr:   // Jumptable branch.
-  case ARM::BR_JTm:   // Jumptable branch through mem.
-  case ARM::BR_JTadd: // Jumptable branch add to pc.
-    return true;
-  default:
-    break;
-  }
-
-  return false;
-}
-
 void ARMInstrInfo::
 reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
               unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig,
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 4319577..d4199d1 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -32,9 +32,6 @@ public:
   // if there is not such an opcode.
   unsigned getUnindexedOpcode(unsigned Opc) const;
 
-  // Return true if the block does not fall through.
-  bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-
   void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                      unsigned DestReg, unsigned SubIdx,
                      const MachineInstr *Orig,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 7516d3c..e14696a 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -46,6 +46,11 @@ def SDT_ARMPICAdd  : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
 def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
 def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
 
+def SDT_ARMMEMBARRIERV7  : SDTypeProfile<0, 0, []>;
+def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
+def SDT_ARMMEMBARRIERV6  : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
 // Node definitions.
 def ARMWrapper       : SDNode<"ARMISD::Wrapper",     SDTIntUnaryOp>;
 def ARMWrapperJT     : SDNode<"ARMISD::WrapperJT",   SDTIntBinOp>;
@@ -93,6 +98,15 @@ def ARMrrx           : SDNode<"ARMISD::RRX"     , SDTIntUnaryOp, [SDNPInFlag ]>;
 def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
 def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>;
 
+def ARMMemBarrierV7  : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7,
+                              [SDNPHasChain]>;
+def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7,
+                              [SDNPHasChain]>;
+def ARMMemBarrierV6  : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6,
+                              [SDNPHasChain]>;
+def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6,
+                              [SDNPHasChain]>;
+
 //===----------------------------------------------------------------------===//
 // ARM Instruction Predicate Definitions.
 //
@@ -772,6 +786,7 @@ let isBranch = 1, isTerminator = 1 in {
   def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
                     IIC_Br, "mov\tpc, $target \n$jt",
                     [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
+    let Inst{11-4}  = 0b00000000;
     let Inst{15-12} = 0b1111;
     let Inst{20}    = 0; // S Bit
     let Inst{24-21} = 0b1101;
@@ -1561,6 +1576,189 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst),
   let Inst{25} = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def Int_MemBarrierV7 : AInoP<(outs), (ins),
+                        Pseudo, NoItinerary,
+                        "dmb", "",
+                        [(ARMMemBarrierV7)]>,
+                        Requires<[IsARM, HasV7]> {
+  let Inst{31-4} = 0xf57ff05;
+  // FIXME: add support for options other than a full system DMB
+  let Inst{3-0} = 0b1111;
+}
+
+def Int_SyncBarrierV7 : AInoP<(outs), (ins),
+                        Pseudo, NoItinerary,
+                        "dsb", "",
+                        [(ARMSyncBarrierV7)]>,
+                        Requires<[IsARM, HasV7]> {
+  let Inst{31-4} = 0xf57ff04;
+  // FIXME: add support for options other than a full system DSB
+  let Inst{3-0} = 0b1111;
+}
+
+def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero),
+                       Pseudo, NoItinerary,
+                       "mcr", "\tp15, 0, $zero, c7, c10, 5",
+                       [(ARMMemBarrierV6 GPR:$zero)]>,
+                       Requires<[IsARM, HasV6]> {
+  // FIXME: add support for options other than a full system DMB
+  // FIXME: add encoding
+}
+
+def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero),
+                        Pseudo, NoItinerary,
+                        "mcr", "\tp15, 0, $zero, c7, c10, 4",
+                        [(ARMSyncBarrierV6 GPR:$zero)]>,
+                        Requires<[IsARM, HasV6]> {
+  // FIXME: add support for options other than a full system DSB
+  // FIXME: add encoding
+}
+}
+
+let usesCustomInserter = 1 in {
+  let Uses = [CPSR] in {
+    def ATOMIC_LOAD_ADD_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_ADD_I8 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_SUB_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_SUB_I8 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_AND_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_AND_I8 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_OR_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_OR_I8 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_XOR_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_XOR_I8 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_NAND_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_NAND_I8 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_ADD_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_ADD_I16 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_SUB_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_SUB_I16 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_AND_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_AND_I16 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_OR_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_OR_I16 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_XOR_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_XOR_I16 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_NAND_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_NAND_I16 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_ADD_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_SUB_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_SUB_I32 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_AND_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_AND_I32 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_OR_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_OR_I32 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_XOR_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_XOR_I32 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
+    def ATOMIC_LOAD_NAND_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+      "${:comment} ATOMIC_LOAD_NAND_I32 PSEUDO!",
+      [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
+
+    def ATOMIC_SWAP_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+      "${:comment} ATOMIC_SWAP_I8 PSEUDO!",
+      [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
+    def ATOMIC_SWAP_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+      "${:comment} ATOMIC_SWAP_I16 PSEUDO!",
+      [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
+    def ATOMIC_SWAP_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+      "${:comment} ATOMIC_SWAP_I32 PSEUDO!",
+      [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
+
+    def ATOMIC_CMP_SWAP_I8 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+      "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!",
+      [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
+    def ATOMIC_CMP_SWAP_I16 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+      "${:comment} ATOMIC_CMP_SWAP_I16 PSEUDO!",
+      [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
+    def ATOMIC_CMP_SWAP_I32 : PseudoInst<
+      (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+      "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!",
+      [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
+}
+}
+
+let mayLoad = 1 in {
+def LDREXB : AIldrex<0b10, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary,
+                    "ldrexb", "\t$dest, [$ptr]",
+                    []>;
+def LDREXH : AIldrex<0b11, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary,
+                    "ldrexh", "\t$dest, [$ptr]",
+                    []>;
+def LDREX  : AIldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary,
+                    "ldrex", "\t$dest, [$ptr]",
+                    []>;
+def LDREXD : AIldrex<0b01, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr),
+                    NoItinerary,
+                    "ldrexd", "\t$dest, $dest2, [$ptr]",
+                    []>;
+}
+
+let mayStore = 1 in {
+def STREXB : AIstrex<0b10, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+                    NoItinerary,
+                    "strexb", "\t$success, $src, [$ptr]",
+                    []>;
+def STREXH : AIstrex<0b11, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+                    NoItinerary,
+                    "strexh", "\t$success, $src, [$ptr]",
+                    []>;
+def STREX  : AIstrex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+                    NoItinerary,
+                    "strex", "\t$success, $src, [$ptr]",
+                    []>;
+def STREXD : AIstrex<0b01, (outs GPR:$success),
+                    (ins GPR:$src, GPR:$src2, GPR:$ptr),
+                    NoItinerary,
+                    "strexd", "\t$success, $src, $src2, [$ptr]",
+                    []>;
+}
 
 //===----------------------------------------------------------------------===//
 // TLS Instructions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 3166931..61b7705 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -152,7 +152,7 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
   let Inst{24}    = 0; // P bit
   let Inst{23}    = 1; // U bit
   let Inst{20}    = 1;
-  let Inst{11-9}  = 0b101;
+  let Inst{11-8}  = 0b1011;
 }
 
 // Use vstmia to store a Q register as a D register pair.
@@ -164,7 +164,7 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr),
   let Inst{24}    = 0; // P bit
   let Inst{23}    = 1; // U bit
   let Inst{20}    = 0;
-  let Inst{11-9}  = 0b101;
+  let Inst{11-8}  = 0b1011;
 }
 
 //   VLD1     : Vector Load (multiple single elements)
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index b5956a3..9306bdb 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -669,6 +669,35 @@ let isCall = 1,
                [(set R0, ARMthread_pointer)]>;
 }
 
+// SJLJ Exception handling intrinsics
+//   eh_sjlj_setjmp() is an instruction sequence to store the return
+//   address and save #0 in R0 for the non-longjmp case.
+//   Since by its nature we may be coming from some other function to get
+//   here, and we're using the stack frame for the containing function to
+//   save/restore registers, we can't keep anything live in regs across
+//   the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+//   when we get here from a longjmp(). We force everthing out of registers
+//   except for our own input by listing the relevant registers in Defs. By
+//   doing so, we also cause the prologue/epilogue code to actively preserve
+//   all of the callee-saved resgisters, which is exactly what we want.
+let Defs =
+  [ R0,  R1,  R2,  R3,  R4,  R5,  R6,  R7, R12 ] in {
+  def tInt_eh_sjlj_setjmp : ThumbXI<(outs), (ins GPR:$src),
+                              AddrModeNone, SizeSpecial, NoItinerary,
+                              "mov\tr12, r1\t@ begin eh.setjmp\n"
+                              "\tmov\tr1, sp\n"
+                              "\tstr\tr1, [$src, #8]\n"
+                              "\tadr\tr1, 0f\n"
+                              "\tadds\tr1, #1\n"
+                              "\tstr\tr1, [$src, #4]\n"
+                              "\tmov\tr1, r12\n"
+                              "\tmovs\tr0, #0\n"
+                              "\tb\t1f\n"
+                              ".align 2\n"
+                              "0:\tmovs\tr0, #1\t@ end eh.setjmp\n"
+                              "1:", "",
+                              [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>;
+}
 //===----------------------------------------------------------------------===//
 // Non-Instruction Patterns
 //
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 9489815..949ce73 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1065,6 +1065,68 @@ def t2MOVCCror : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs),
                    RegConstraint<"$false = $dst">;
 
 //===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def t2Int_MemBarrierV7 : AInoP<(outs), (ins),
+                        Pseudo, NoItinerary,
+                        "dmb", "",
+                        [(ARMMemBarrierV7)]>,
+                        Requires<[IsThumb2]> {
+  // FIXME: add support for options other than a full system DMB
+}
+
+def t2Int_SyncBarrierV7 : AInoP<(outs), (ins),
+                        Pseudo, NoItinerary,
+                        "dsb", "",
+                        [(ARMSyncBarrierV7)]>,
+                        Requires<[IsThumb2]> {
+  // FIXME: add support for options other than a full system DSB
+}
+}
+
+let mayLoad = 1 in {
+def t2LDREXB : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+                      Size4Bytes, NoItinerary,
+                      "ldrexb", "\t$dest, [$ptr]", "",
+                      []>;
+def t2LDREXH : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+                      Size4Bytes, NoItinerary,
+                      "ldrexh", "\t$dest, [$ptr]", "",
+                      []>;
+def t2LDREX  : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+                      Size4Bytes, NoItinerary,
+                      "ldrex", "\t$dest, [$ptr]", "",
+                      []>;
+def t2LDREXD : Thumb2I<(outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr),
+                      AddrModeNone, Size4Bytes, NoItinerary,
+                      "ldrexd", "\t$dest, $dest2, [$ptr]", "",
+                      []>;
+}
+
+let mayStore = 1 in {
+def t2STREXB : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+                      AddrModeNone, Size4Bytes, NoItinerary,
+                      "strexb", "\t$success, $src, [$ptr]", "",
+                      []>;
+def t2STREXH : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+                      AddrModeNone, Size4Bytes, NoItinerary,
+                      "strexh", "\t$success, $src, [$ptr]", "",
+                      []>;
+def t2STREX  : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+                      AddrModeNone, Size4Bytes, NoItinerary,
+                      "strex", "\t$success, $src, [$ptr]", "",
+                      []>;
+def t2STREXD : Thumb2I<(outs GPR:$success),
+                      (ins GPR:$src, GPR:$src2, GPR:$ptr),
+                      AddrModeNone, Size4Bytes, NoItinerary,
+                      "strexd", "\t$success, $src, $src2, [$ptr]", "",
+                      []>;
+}
+
+//===----------------------------------------------------------------------===//
 // TLS Instructions
 //
 
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 304d0ef..22bd80e 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -449,7 +449,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
     }
 
     if (MBBI != MBB.end()) {
-      MachineBasicBlock::iterator NextMBBI = next(MBBI);
+      MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
       if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
           isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
         MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
@@ -494,7 +494,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
     }
 
     if (MBBI != MBB.end()) {
-      MachineBasicBlock::iterator NextMBBI = next(MBBI);
+      MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
       if (Mode == ARM_AM::ia &&
           isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
         MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
@@ -604,7 +604,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
   }
 
   if (!DoMerge && MBBI != MBB.end()) {
-    MachineBasicBlock::iterator NextMBBI = next(MBBI);
+    MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
     if (!isAM5 &&
         isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
       DoMerge = true;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 2564ed9..1c6fca7 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -95,7 +95,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
 
   // Calculate and set max stack object alignment early, so we can decide
   // whether we will need stack realignment (and thus FP).
-  PM.add(createARMMaxStackAlignmentCalculatorPass());
+  PM.add(createMaxStackAlignmentCalculatorPass());
 
   // FIXME: temporarily disabling load / store optimization pass for Thumb1.
   if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 692bb19..362bbf1 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -1045,6 +1045,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
       printNoHashImmediate(MI, OpNum);
       return false;
     case 'P': // Print a VFP double precision register.
+    case 'q': // Print a NEON quad precision register.
       printOperand(MI, OpNum);
       return false;
     case 'Q':
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 50abcf4..3c0414d 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -51,7 +51,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
   MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
   MachineBasicBlock::iterator NextMII;
   for (; MII != E; MII = NextMII) {
-    NextMII = next(MII);
+    NextMII = llvm::next(MII);
     MachineInstr *MI = &*MII;
 
     if (MI->getOpcode() == ARM::VMOVD &&
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index 206677b..d9942c8 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -338,7 +338,7 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
     if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride))
       continue;
 
-    MachineBasicBlock::iterator NextI = next(MBBI);
+    MachineBasicBlock::iterator NextI = llvm::next(MBBI);
     for (unsigned R = 0; R < NumRegs; ++R) {
       MachineOperand &MO = MI->getOperand(FirstOpnd + R);
       assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 7602b6d..66d3b83 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -32,25 +32,6 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
   return 0;
 }
 
-bool
-Thumb1InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
-  if (MBB.empty()) return false;
-
-  switch (MBB.back().getOpcode()) {
-  case ARM::tBX_RET:
-  case ARM::tBX_RET_vararg:
-  case ARM::tPOP_RET:
-  case ARM::tB:
-  case ARM::tBRIND:
-  case ARM::tBR_JTr:
-    return true;
-  default:
-    break;
-  }
-
-  return false;
-}
-
 bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator I,
                                    unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index b28229d..516ddf1 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -31,9 +31,6 @@ public:
   // if there is not such an opcode.
   unsigned getUnindexedOpcode(unsigned Opc) const;
 
-  // Return true if the block does not fall through.
-  bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-
   /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info.  As
   /// such, whenever a client has an instance of instruction info, it should
   /// always be able to get register info as well (through this method).
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 37adf37..9f3816a 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -528,7 +528,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
         MI.getOperand(i+1).ChangeToImmediate(Mask);
       }
       Offset = (Offset - Mask * Scale);
-      MachineBasicBlock::iterator NII = next(II);
+      MachineBasicBlock::iterator NII = llvm::next(II);
       emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
                                 *this, dl);
     } else {
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 16c1e6f..f4a8c27 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -36,30 +36,6 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
 }
 
 bool
-Thumb2InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
-  if (MBB.empty()) return false;
-
-  switch (MBB.back().getOpcode()) {
-  case ARM::t2LDM_RET:
-  case ARM::t2B:        // Uncond branch.
-  case ARM::t2BR_JT:    // Jumptable branch.
-  case ARM::t2TBB:      // Table branch byte.
-  case ARM::t2TBH:      // Table branch halfword.
-  case ARM::tBR_JTr:    // Jumptable branch (16-bit version).
-  case ARM::tBX_RET:
-  case ARM::tBX_RET_vararg:
-  case ARM::tPOP_RET:
-  case ARM::tB:
-  case ARM::tBRIND:
-    return true;
-  default:
-    break;
-  }
-
-  return false;
-}
-
-bool
 Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator I,
                               unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 663a60b..a0f89a6 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -31,9 +31,6 @@ public:
   // if there is not such an opcode.
   unsigned getUnindexedOpcode(unsigned Opc) const;
 
-  // Return true if the block does not fall through.
-  bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-
   bool copyRegToReg(MachineBasicBlock &MBB,
                     MachineBasicBlock::iterator I,
                     unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index b2fd7b3..35359aa 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -649,7 +649,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
   MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
   MachineBasicBlock::iterator NextMII;
   for (; MII != E; MII = NextMII) {
-    NextMII = next(MII);
+    NextMII = llvm::next(MII);
 
     MachineInstr *MI = &*MII;
     LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index 86173ff..39f0749 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -392,18 +392,6 @@ void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB,
     .addReg(Alpha::R31);
 }
 
-bool AlphaInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
-  if (MBB.empty()) return false;
-  
-  switch (MBB.back().getOpcode()) {
-  case Alpha::RETDAG: // Return.
-  case Alpha::RETDAGp:
-  case Alpha::BR:     // Uncond branch.
-  case Alpha::JMP:  // Indirect branch.
-    return true;
-  default: return false;
-  }
-}
 bool AlphaInstrInfo::
 ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   assert(Cond.size() == 2 && "Invalid Alpha branch opcode!");
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
index 274f452..c3b6044 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -78,7 +78,6 @@ public:
   unsigned RemoveBranch(MachineBasicBlock &MBB) const;
   void insertNoop(MachineBasicBlock &MBB, 
                   MachineBasicBlock::iterator MI) const;
-  bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
 
   /// getGlobalBaseReg - Return a virtual register initialized with the
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 9e4fe27..a52ca79 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -2573,7 +2573,7 @@ void CWriter::visitSwitchInst(SwitchInst &SI) {
     BasicBlock *Succ = cast<BasicBlock>(SI.getOperand(i+1));
     printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
     printBranchToBlock(SI.getParent(), Succ, 2);
-    if (Function::iterator(Succ) == next(Function::iterator(SI.getParent())))
+    if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent())))
       Out << "    break;\n";
   }
   Out << "  }\n";
@@ -2593,7 +2593,7 @@ bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) {
   /// FIXME: This should be reenabled, but loop reordering safe!!
   return true;
 
-  if (next(Function::iterator(From)) != Function::iterator(To))
+  if (llvm::next(Function::iterator(From)) != Function::iterator(To))
     return true;  // Not the direct successor, we need a goto.
 
   //isa<SwitchInst>(From->getTerminator())
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index ecce8e3..2306665 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -580,10 +580,6 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   }
 }
 
-bool
-SPUInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
-  return (!MBB.empty() && isUncondBranch(&MBB.back()));
-}
 //! Reverses a branch's condition, returning false on success.
 bool
 SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index c644a11..42677fc 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -79,9 +79,6 @@ namespace llvm {
     bool canFoldMemoryOperand(const MachineInstr *MI,
                               const SmallVectorImpl<unsigned> &Ops) const;
 
-    //! Return true if the specified block does not fall through
-    virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-
     //! Reverses a branch's condition, returning false on success.
     virtual
     bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 4bae6c7..a872fbd 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -976,21 +976,20 @@ namespace {
     nl(Out);
     printType(GV->getType());
     if (GV->hasInitializer()) {
-      Constant* Init = GV->getInitializer();
+      Constant *Init = GV->getInitializer();
       printType(Init->getType());
-      if (Function* F = dyn_cast<Function>(Init)) {
+      if (Function *F = dyn_cast<Function>(Init)) {
         nl(Out)<< "/ Function Declarations"; nl(Out);
         printFunctionHead(F);
       } else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
         nl(Out) << "// Global Variable Declarations"; nl(Out);
         printVariableHead(gv);
-      } else  {
-        nl(Out) << "// Constant Definitions"; nl(Out);
-        printConstant(gv);
-      }
-      if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+        
         nl(Out) << "// Global Variable Definitions"; nl(Out);
         printVariableBody(gv);
+      } else  {
+        nl(Out) << "// Constant Definitions"; nl(Out);
+        printConstant(Init);
       }
     }
   }
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index beccb2c..4edf422 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -86,10 +86,10 @@ namespace {
 
     void dump() {
       errs() << "MSP430ISelAddressMode " << this << '\n';
-      if (Base.Reg.getNode() != 0) {
+      if (BaseType == RegBase && Base.Reg.getNode() != 0) {
         errs() << "Base.Reg ";
         Base.Reg.getNode()->dump();
-      } else {
+      } else if (BaseType == FrameIndexBase) {
         errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
       }
       errs() << " Disp " << Disp << '\n';
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 29cc370..5fe9b20 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -15,6 +15,7 @@
 
 #include "MSP430ISelLowering.h"
 #include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
 #include "MSP430TargetMachine.h"
 #include "MSP430Subtarget.h"
 #include "llvm/DerivedTypes.h"
@@ -32,16 +33,38 @@
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/ADT/VectorExtras.h"
 using namespace llvm;
 
+typedef enum {
+  NoHWMult,
+  HWMultIntr,
+  HWMultNoIntr
+} HWMultUseMode;
+
+static cl::opt<HWMultUseMode>
+HWMultMode("msp430-hwmult-mode",
+           cl::desc("Hardware multiplier use mode"),
+           cl::init(HWMultNoIntr),
+           cl::values(
+             clEnumValN(NoHWMult, "no",
+                "Do not use hardware multiplier"),
+             clEnumValN(HWMultIntr, "interrupts",
+                "Assume hardware multiplier can be used inside interrupts"),
+             clEnumValN(HWMultNoIntr, "use",
+                "Assume hardware multiplier cannot be used inside interrupts"),
+             clEnumValEnd));
+
 MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
   TargetLowering(tm, new TargetLoweringObjectFileELF()),
   Subtarget(*tm.getSubtargetImpl()), TM(tm) {
 
+  TD = getTargetData();
+
   // Set up the register classes.
   addRegisterClass(MVT::i8,  MSP430::GR8RegisterClass);
   addRegisterClass(MVT::i16, MSP430::GR16RegisterClass);
@@ -92,8 +115,8 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
   setOperationAction(ISD::BR_CC,            MVT::i8,    Custom);
   setOperationAction(ISD::BR_CC,            MVT::i16,   Custom);
   setOperationAction(ISD::BRCOND,           MVT::Other, Expand);
-  setOperationAction(ISD::SETCC,            MVT::i8,    Expand);
-  setOperationAction(ISD::SETCC,            MVT::i16,   Expand);
+  setOperationAction(ISD::SETCC,            MVT::i8,    Custom);
+  setOperationAction(ISD::SETCC,            MVT::i16,   Custom);
   setOperationAction(ISD::SELECT,           MVT::i8,    Expand);
   setOperationAction(ISD::SELECT,           MVT::i16,   Expand);
   setOperationAction(ISD::SELECT_CC,        MVT::i8,    Custom);
@@ -142,6 +165,15 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
   setOperationAction(ISD::SDIV,             MVT::i16,   Expand);
   setOperationAction(ISD::SDIVREM,          MVT::i16,   Expand);
   setOperationAction(ISD::SREM,             MVT::i16,   Expand);
+
+  // Libcalls names.
+  if (HWMultMode == HWMultIntr) {
+    setLibcallName(RTLIB::MUL_I8,  "__mulqi3hw");
+    setLibcallName(RTLIB::MUL_I16, "__mulhi3hw");
+  } else if (HWMultMode == HWMultNoIntr) {
+    setLibcallName(RTLIB::MUL_I8,  "__mulqi3hw_noint");
+    setLibcallName(RTLIB::MUL_I16, "__mulhi3hw_noint");
+  }
 }
 
 SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
@@ -151,9 +183,12 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
   case ISD::SRA:              return LowerShifts(Op, DAG);
   case ISD::GlobalAddress:    return LowerGlobalAddress(Op, DAG);
   case ISD::ExternalSymbol:   return LowerExternalSymbol(Op, DAG);
+  case ISD::SETCC:            return LowerSETCC(Op, DAG);
   case ISD::BR_CC:            return LowerBR_CC(Op, DAG);
   case ISD::SELECT_CC:        return LowerSELECT_CC(Op, DAG);
   case ISD::SIGN_EXTEND:      return LowerSIGN_EXTEND(Op, DAG);
+  case ISD::RETURNADDR:       return LowerRETURNADDR(Op, DAG);
+  case ISD::FRAMEADDR:        return LowerFRAMEADDR(Op, DAG);
   default:
     llvm_unreachable("unimplemented operand");
     return SDValue();
@@ -225,6 +260,13 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
   case CallingConv::C:
   case CallingConv::Fast:
     return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
+  case CallingConv::MSP430_INTR:
+   if (Ins.empty())
+     return Chain;
+   else {
+    llvm_report_error("ISRs cannot have arguments");
+    return SDValue();
+   }
   }
 }
 
@@ -244,6 +286,9 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
   case CallingConv::C:
     return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
                           Outs, Ins, dl, DAG, InVals);
+  case CallingConv::MSP430_INTR:
+    llvm_report_error("ISRs cannot be called directly");
+    return SDValue();
   }
 }
 
@@ -340,6 +385,12 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
   // CCValAssign - represent the assignment of the return value to a location
   SmallVector<CCValAssign, 16> RVLocs;
 
+  // ISRs cannot return any value.
+  if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) {
+    llvm_report_error("ISRs cannot return any value");
+    return SDValue();
+  }
+
   // CCState - Info about the registers and stack slot.
   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
                  RVLocs, *DAG.getContext());
@@ -370,11 +421,14 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
     Flag = Chain.getValue(1);
   }
 
+  unsigned Opc = (CallConv == CallingConv::MSP430_INTR ?
+                  MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG);
+
   if (Flag.getNode())
-    return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+    return DAG.getNode(Opc, dl, MVT::Other, Chain, Flag);
 
   // Return Void
-  return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain);
+  return DAG.getNode(Opc, dl, MVT::Other, Chain);
 }
 
 /// LowerCCCCallTo - functions arguments are copied from virtual regs to
@@ -538,9 +592,21 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
   EVT VT = Op.getValueType();
   DebugLoc dl = N->getDebugLoc();
 
-  // We currently only lower shifts of constant argument.
+  // Expand non-constant shifts to loops:
   if (!isa<ConstantSDNode>(N->getOperand(1)))
-    return SDValue();
+    switch (Opc) {
+    default:
+      assert(0 && "Invalid shift opcode!");
+    case ISD::SHL:
+      return DAG.getNode(MSP430ISD::SHL, dl,
+                         VT, N->getOperand(0), N->getOperand(1));
+    case ISD::SRA:
+      return DAG.getNode(MSP430ISD::SRA, dl,
+                         VT, N->getOperand(0), N->getOperand(1));
+    case ISD::SRL:
+      return DAG.getNode(MSP430ISD::SRL, dl,
+                         VT, N->getOperand(0), N->getOperand(1));
+    }
 
   uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
 
@@ -648,6 +714,88 @@ SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
                      Chain, Dest, TargetCC, Flag);
 }
 
+
+SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
+  SDValue LHS   = Op.getOperand(0);
+  SDValue RHS   = Op.getOperand(1);
+  DebugLoc dl   = Op.getDebugLoc();
+
+  // If we are doing an AND and testing against zero, then the CMP
+  // will not be generated.  The AND (or BIT) will generate the condition codes,
+  // but they are different from CMP.
+  bool andCC = false;
+  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+    if (RHSC->isNullValue() && LHS.hasOneUse() &&
+        (LHS.getOpcode() == ISD::AND ||
+         (LHS.getOpcode() == ISD::TRUNCATE &&
+          LHS.getOperand(0).getOpcode() == ISD::AND))) {
+      andCC = true;
+    }
+  }
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+  SDValue TargetCC;
+  SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+  // Get the condition codes directly from the status register, if its easy.
+  // Otherwise a branch will be generated.  Note that the AND and BIT
+  // instructions generate different flags than CMP, the carry bit can be used
+  // for NE/EQ.
+  bool Invert = false;
+  bool Shift = false;
+  bool Convert = true;
+  switch (cast<ConstantSDNode>(TargetCC)->getZExtValue()) {
+   default:
+    Convert = false;
+    break;
+   case MSP430CC::COND_HS:
+     // Res = SRW & 1, no processing is required
+     break;
+    case MSP430CC::COND_LO:
+     // Res = ~(SRW & 1)
+     Invert = true;
+     break;
+    case MSP430CC::COND_NE:
+     if (andCC) {
+       // C = ~Z, thus Res = SRW & 1, no processing is required
+     } else {
+       // Res = (SRW >> 1) & 1
+       Shift = true;
+     }
+     break;
+    case MSP430CC::COND_E:
+     if (andCC) {
+       // C = ~Z, thus Res = ~(SRW & 1)
+     } else {
+       // Res = ~((SRW >> 1) & 1)
+       Shift = true;
+     }
+     Invert = true;
+     break;
+  }
+  EVT VT = Op.getValueType();
+  SDValue One  = DAG.getConstant(1, VT);
+  if (Convert) {
+    SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SRW,
+                                     MVT::i16, Flag);
+    if (Shift)
+      // FIXME: somewhere this is turned into a SRL, lower it MSP specific?
+      SR = DAG.getNode(ISD::SRA, dl, MVT::i16, SR, One);
+    SR = DAG.getNode(ISD::AND, dl, MVT::i16, SR, One);
+    if (Invert)
+      SR = DAG.getNode(ISD::XOR, dl, MVT::i16, SR, One);
+    return SR;
+  } else {
+    SDValue Zero = DAG.getConstant(0, VT);
+    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+    SmallVector<SDValue, 4> Ops;
+    Ops.push_back(One);
+    Ops.push_back(Zero);
+    Ops.push_back(TargetCC);
+    Ops.push_back(Flag);
+    return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+  }
+}
+
 SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
   SDValue LHS    = Op.getOperand(0);
   SDValue RHS    = Op.getOperand(1);
@@ -682,6 +830,55 @@ SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
                      DAG.getValueType(Val.getValueType()));
 }
 
+SDValue MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+  int ReturnAddrIndex = FuncInfo->getRAIndex();
+
+  if (ReturnAddrIndex == 0) {
+    // Set up a frame object for the return address.
+    uint64_t SlotSize = TD->getPointerSize();
+    ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+                                                           true, false);
+    FuncInfo->setRAIndex(ReturnAddrIndex);
+  }
+
+  return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  DebugLoc dl = Op.getDebugLoc();
+
+  if (Depth > 0) {
+    SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+    SDValue Offset =
+      DAG.getConstant(TD->getPointerSize(), MVT::i16);
+    return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                       DAG.getNode(ISD::ADD, dl, getPointerTy(),
+                                   FrameAddr, Offset),
+                       NULL, 0);
+  }
+
+  // Just load the return address.
+  SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
+  return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+                     RetAddrFI, NULL, 0);
+}
+
+SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI->setFrameAddressIsTaken(true);
+  EVT VT = Op.getValueType();
+  DebugLoc dl = Op.getDebugLoc();  // FIXME probably not meaningful
+  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+                                         MSP430::FPW, VT);
+  while (Depth--)
+    FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+  return FrameAddr;
+}
+
 /// getPostIndexedAddressParts - returns true by value, base pointer and
 /// offset pointer and addressing mode by reference if this node can be
 /// combined with a load / store to form a post-indexed load / store.
@@ -722,6 +919,7 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch (Opcode) {
   default: return NULL;
   case MSP430ISD::RET_FLAG:           return "MSP430ISD::RET_FLAG";
+  case MSP430ISD::RETI_FLAG:          return "MSP430ISD::RETI_FLAG";
   case MSP430ISD::RRA:                return "MSP430ISD::RRA";
   case MSP430ISD::RLA:                return "MSP430ISD::RLA";
   case MSP430ISD::RRC:                return "MSP430ISD::RRC";
@@ -730,6 +928,8 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case MSP430ISD::BR_CC:              return "MSP430ISD::BR_CC";
   case MSP430ISD::CMP:                return "MSP430ISD::CMP";
   case MSP430ISD::SELECT_CC:          return "MSP430ISD::SELECT_CC";
+  case MSP430ISD::SHL:                return "MSP430ISD::SHL";
+  case MSP430ISD::SRA:                return "MSP430ISD::SRA";
   }
 }
 
@@ -738,13 +938,131 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
 //===----------------------------------------------------------------------===//
 
 MachineBasicBlock*
+MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
+                                     MachineBasicBlock *BB,
+                   DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+  MachineFunction *F = BB->getParent();
+  MachineRegisterInfo &RI = F->getRegInfo();
+  DebugLoc dl = MI->getDebugLoc();
+  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+
+  unsigned Opc;
+  const TargetRegisterClass * RC;
+  switch (MI->getOpcode()) {
+  default:
+    assert(0 && "Invalid shift opcode!");
+  case MSP430::Shl8:
+   Opc = MSP430::SHL8r1;
+   RC = MSP430::GR8RegisterClass;
+   break;
+  case MSP430::Shl16:
+   Opc = MSP430::SHL16r1;
+   RC = MSP430::GR16RegisterClass;
+   break;
+  case MSP430::Sra8:
+   Opc = MSP430::SAR8r1;
+   RC = MSP430::GR8RegisterClass;
+   break;
+  case MSP430::Sra16:
+   Opc = MSP430::SAR16r1;
+   RC = MSP430::GR16RegisterClass;
+   break;
+  case MSP430::Srl8:
+   Opc = MSP430::SAR8r1c;
+   RC = MSP430::GR8RegisterClass;
+   break;
+  case MSP430::Srl16:
+   Opc = MSP430::SAR16r1c;
+   RC = MSP430::GR16RegisterClass;
+   break;
+  }
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator I = BB;
+  ++I;
+
+  // Create loop block
+  MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *RemBB  = F->CreateMachineBasicBlock(LLVM_BB);
+
+  F->insert(I, LoopBB);
+  F->insert(I, RemBB);
+
+  // Update machine-CFG edges by transferring all successors of the current
+  // block to the block containing instructions after shift.
+  RemBB->transferSuccessors(BB);
+
+  // Inform sdisel of the edge changes.
+  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+         SE = BB->succ_end(); SI != SE; ++SI)
+    EM->insert(std::make_pair(*SI, RemBB));
+
+  // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
+  BB->addSuccessor(LoopBB);
+  BB->addSuccessor(RemBB);
+  LoopBB->addSuccessor(RemBB);
+  LoopBB->addSuccessor(LoopBB);
+
+  unsigned ShiftAmtReg = RI.createVirtualRegister(MSP430::GR8RegisterClass);
+  unsigned ShiftAmtReg2 = RI.createVirtualRegister(MSP430::GR8RegisterClass);
+  unsigned ShiftReg = RI.createVirtualRegister(RC);
+  unsigned ShiftReg2 = RI.createVirtualRegister(RC);
+  unsigned ShiftAmtSrcReg = MI->getOperand(2).getReg();
+  unsigned SrcReg = MI->getOperand(1).getReg();
+  unsigned DstReg = MI->getOperand(0).getReg();
+
+  // BB:
+  // cmp 0, N
+  // je RemBB
+  BuildMI(BB, dl, TII.get(MSP430::CMP8ir))
+    .addImm(0).addReg(ShiftAmtSrcReg);
+  BuildMI(BB, dl, TII.get(MSP430::JCC))
+    .addMBB(RemBB)
+    .addImm(MSP430CC::COND_E);
+
+  // LoopBB:
+  // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
+  // ShiftAmt = phi [%N, BB],      [%ShiftAmt2, LoopBB]
+  // ShiftReg2 = shift ShiftReg
+  // ShiftAmt2 = ShiftAmt - 1;
+  BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftReg)
+    .addReg(SrcReg).addMBB(BB)
+    .addReg(ShiftReg2).addMBB(LoopBB);
+  BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg)
+    .addReg(ShiftAmtSrcReg).addMBB(BB)
+    .addReg(ShiftAmtReg2).addMBB(LoopBB);
+  BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
+    .addReg(ShiftReg);
+  BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2)
+    .addReg(ShiftAmtReg).addImm(1);
+  BuildMI(LoopBB, dl, TII.get(MSP430::JCC))
+    .addMBB(LoopBB)
+    .addImm(MSP430CC::COND_NE);
+
+  // RemBB:
+  // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
+  BuildMI(RemBB, dl, TII.get(MSP430::PHI), DstReg)
+    .addReg(SrcReg).addMBB(BB)
+    .addReg(ShiftReg2).addMBB(LoopBB);
+
+  return RemBB;
+}
+
+MachineBasicBlock*
 MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
                                                   MachineBasicBlock *BB,
                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+  unsigned Opc = MI->getOpcode();
+
+  if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 ||
+      Opc == MSP430::Sra8 || Opc == MSP430::Sra16 ||
+      Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
+    return EmitShiftInstr(MI, BB, EM);
+
   const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
   DebugLoc dl = MI->getDebugLoc();
-  assert((MI->getOpcode() == MSP430::Select16 ||
-          MI->getOpcode() == MSP430::Select8) &&
+
+  assert((Opc == MSP430::Select16 || Opc == MSP430::Select8) &&
          "Unexpected instr type to insert");
 
   // To "insert" a SELECT instruction, we actually have to insert the diamond
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index d413ccb..4921500 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -27,6 +27,9 @@ namespace llvm {
       /// Return with a flag operand. Operand 0 is the chain operand.
       RET_FLAG,
 
+      /// Same as RET_FLAG, but used for returning from ISRs.
+      RETI_FLAG,
+
       /// Y = R{R,L}A X, rotate right (left) arithmetically
       RRA, RLA,
 
@@ -44,7 +47,7 @@ namespace llvm {
       /// CMP - Compare instruction.
       CMP,
 
-      /// SetCC. Operand 0 is condition code, and operand 1 is the flag
+      /// SetCC - Operand 0 is condition code, and operand 1 is the flag
       /// operand produced by a CMP instruction.
       SETCC,
 
@@ -54,9 +57,12 @@ namespace llvm {
       /// instruction.
       BR_CC,
 
-      /// SELECT_CC. Operand 0 and operand 1 are selection variable, operand 3
+      /// SELECT_CC - Operand 0 and operand 1 are selection variable, operand 3
       /// is condition code and operand 4 is flag operand.
-      SELECT_CC
+      SELECT_CC,
+
+      /// SHL, SRA, SRL - Non-constant shifts.
+      SHL, SRA, SRL
     };
   }
 
@@ -81,8 +87,12 @@ namespace llvm {
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
     SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
     SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
+    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
 
     TargetLowering::ConstraintType
     getConstraintType(const std::string &Constraint) const;
@@ -92,6 +102,9 @@ namespace llvm {
     MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
                                                    MachineBasicBlock *BB,
                     DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+    MachineBasicBlock* EmitShiftInstr(MachineInstr *MI,
+                                      MachineBasicBlock *BB,
+                    DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
 
   private:
     SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
@@ -144,6 +157,7 @@ namespace llvm {
 
     const MSP430Subtarget &Subtarget;
     const MSP430TargetMachine &TM;
+    const TargetData *TD;
   };
 } // namespace llvm
 
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index b2f09c7..2ae6759 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -219,17 +219,6 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   return false;
 }
 
-bool MSP430InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB)const{
-  if (MBB.empty()) return false;
-
-  switch (MBB.back().getOpcode()) {
-  case MSP430::RET:   // Return.
-  case MSP430::JMP:   // Uncond branch.
-    return true;
-  default: return false;
-  }
-}
-
 bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
   const TargetInstrDesc &TID = MI->getDesc();
   if (!TID.isTerminator()) return false;
@@ -270,8 +259,8 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       }
 
       // If the block has any instructions after a JMP, delete them.
-      while (next(I) != MBB.end())
-        next(I)->eraseFromParent();
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
       Cond.clear();
       FBB = 0;
 
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index 35e35db..e4ceeb9 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -61,7 +61,6 @@ public:
 
   // Branch folding goodness
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-  bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
   bool isUnpredicatedTerminator(const MachineInstr *MI) const;
   bool AnalyzeBranch(MachineBasicBlock &MBB,
                      MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 7a26f6c..d67ba90 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -31,12 +31,15 @@ def SDT_MSP430BrCC         : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
                                                   SDTCisVT<1, i8>]>;
 def SDT_MSP430SelectCC     : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, 
                                                   SDTCisVT<3, i8>]>;
+def SDT_MSP430Shift        : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisI8<2>]>;
 
 //===----------------------------------------------------------------------===//
 // MSP430 Specific Node Definitions.
 //===----------------------------------------------------------------------===//
-def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
-                     [SDNPHasChain, SDNPOptInFlag]>;
+def MSP430retflag  : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
+                       [SDNPHasChain, SDNPOptInFlag]>;
+def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
+                       [SDNPHasChain, SDNPOptInFlag]>;
 
 def MSP430rra     : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
 def MSP430rla     : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
@@ -54,6 +57,9 @@ def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>;
 def MSP430cmp     : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>;
 def MSP430brcc    : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, [SDNPHasChain, SDNPInFlag]>;
 def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, [SDNPInFlag]>;
+def MSP430shl     : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>;
+def MSP430sra     : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>;
+def MSP430srl     : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>;
 
 //===----------------------------------------------------------------------===//
 // MSP430 Operand Definitions.
@@ -90,7 +96,9 @@ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], []>;
 // Pattern Fragments
 def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
 def  extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>;
-
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+  return N->hasOneUse();
+}]>;
 //===----------------------------------------------------------------------===//
 // Instruction list..
 
@@ -117,6 +125,27 @@ let usesCustomInserter = 1 in {
                         "# Select16 PSEUDO",
                         [(set GR16:$dst,
                           (MSP430selectcc GR16:$src1, GR16:$src2, imm:$cc))]>;
+  let Defs = [SRW] in {
+  def Shl8     : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+                        "# Shl8 PSEUDO",
+                        [(set GR8:$dst, (MSP430shl GR8:$src, GR8:$cnt))]>;
+  def Shl16    : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+                        "# Shl16 PSEUDO",
+                        [(set GR16:$dst, (MSP430shl GR16:$src, GR8:$cnt))]>;
+  def Sra8     : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+                        "# Sra8 PSEUDO",
+                        [(set GR8:$dst, (MSP430sra GR8:$src, GR8:$cnt))]>;
+  def Sra16    : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+                        "# Sra16 PSEUDO",
+                        [(set GR16:$dst, (MSP430sra GR16:$src, GR8:$cnt))]>;
+  def Srl8     : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+                        "# Srl8 PSEUDO",
+                        [(set GR8:$dst, (MSP430srl GR8:$src, GR8:$cnt))]>;
+  def Srl16    : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+                        "# Srl16 PSEUDO",
+                        [(set GR16:$dst, (MSP430srl GR16:$src, GR8:$cnt))]>;
+
+  }
 }
 
 let neverHasSideEffects = 1 in
@@ -128,7 +157,8 @@ def NOP : Pseudo<(outs), (ins), "nop", []>;
 
 // FIXME: Provide proper encoding!
 let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
-  def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>;
+  def RET  : Pseudo<(outs), (ins), "ret",  [(MSP430retflag)]>;
+  def RETI : Pseudo<(outs), (ins), "reti", [(MSP430retiflag)]>;
 }
 
 let isBranch = 1, isTerminator = 1 in {
@@ -823,6 +853,65 @@ def CMP16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2),
                 "cmp.w\t{$src1, $src2}",
                 [(MSP430cmp (load addr:$src1), GR16:$src2), (implicit SRW)]>;
 
+
+// BIT TESTS, just sets condition codes
+// Note that the C condition is set differently than when using CMP.
+let isCommutable = 1 in {
+def BIT8rr  : Pseudo<(outs), (ins GR8:$src1, GR8:$src2),
+                     "bit.b\t{$src2, $src1}",
+                     [(MSP430cmp 0, (and_su GR8:$src1, GR8:$src2)),
+                      (implicit SRW)]>;
+def BIT16rr : Pseudo<(outs), (ins GR16:$src1, GR16:$src2),
+                     "bit.w\t{$src2, $src1}",
+                     [(MSP430cmp 0, (and_su GR16:$src1, GR16:$src2)),
+                      (implicit SRW)]>;
+}
+def BIT8ri  : Pseudo<(outs), (ins GR8:$src1, i8imm:$src2),
+                     "bit.b\t{$src2, $src1}",
+                     [(MSP430cmp 0, (and_su GR8:$src1, imm:$src2)),
+                      (implicit SRW)]>;
+def BIT16ri : Pseudo<(outs), (ins GR16:$src1, i16imm:$src2),
+                     "bit.w\t{$src2, $src1}",
+                     [(MSP430cmp 0, (and_su GR16:$src1, imm:$src2)),
+                      (implicit SRW)]>;
+
+def BIT8rm  : Pseudo<(outs), (ins GR8:$src1, memdst:$src2),
+                     "bit.b\t{$src2, $src1}",
+                     [(MSP430cmp 0, (and_su GR8:$src1,  (load addr:$src2))),
+                      (implicit SRW)]>;
+def BIT16rm : Pseudo<(outs), (ins GR16:$src1, memdst:$src2),
+                     "bit.w\t{$src2, $src1}",
+                     [(MSP430cmp 0, (and_su GR16:$src1,  (load addr:$src2))),
+                      (implicit SRW)]>;
+
+def BIT8mr  : Pseudo<(outs), (ins memsrc:$src1, GR8:$src2),
+                     "bit.b\t{$src2, $src1}",
+                     [(MSP430cmp 0, (and_su (load addr:$src1), GR8:$src2)),
+                      (implicit SRW)]>;
+def BIT16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2),
+                     "bit.w\t{$src2, $src1}",
+                     [(MSP430cmp 0, (and_su (load addr:$src1), GR16:$src2)),
+                      (implicit SRW)]>;
+
+def BIT8mi  : Pseudo<(outs), (ins memsrc:$src1, i8imm:$src2),
+                     "bit.b\t{$src2, $src1}",
+                     [(MSP430cmp 0, (and_su (load addr:$src1), (i8 imm:$src2))),
+                      (implicit SRW)]>;
+def BIT16mi : Pseudo<(outs), (ins memsrc:$src1, i16imm:$src2),
+                     "bit.w\t{$src2, $src1}",
+                     [(MSP430cmp 0, (and_su (load addr:$src1), (i16 imm:$src2))),
+                      (implicit SRW)]>;
+
+def BIT8mm  : Pseudo<(outs), (ins memsrc:$src1, memsrc:$src2),
+                     "bit.b\t{$src2, $src1}",
+                     [(MSP430cmp 0, (and_su (i8 (load addr:$src1)), 
+                                            (load addr:$src2))),
+                      (implicit SRW)]>;
+def BIT16mm : Pseudo<(outs), (ins memsrc:$src1, memsrc:$src2),
+                     "bit.w\t{$src2, $src1}",
+                     [(MSP430cmp 0, (and_su (i16 (load addr:$src1)),
+                                            (load addr:$src2))),
+                      (implicit SRW)]>;
 } // Defs = [SRW]
 
 //===----------------------------------------------------------------------===//
@@ -905,3 +994,6 @@ def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
 
 // peephole patterns
 def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>;
+def : Pat<(MSP430cmp 0, (trunc (and_su GR16:$src1, GR16:$src2))),
+          (BIT8rr (EXTRACT_SUBREG GR16:$src1, subreg_8bit),
+                  (EXTRACT_SUBREG GR16:$src2, subreg_8bit))>;
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index 1d26ae3..383fd2e 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -25,14 +25,20 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo {
   /// stack frame in bytes.
   unsigned CalleeSavedFrameSize;
 
+  /// ReturnAddrIndex - FrameIndex for return slot.
+  int ReturnAddrIndex;
+
 public:
   MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
 
   explicit MSP430MachineFunctionInfo(MachineFunction &MF)
-    : CalleeSavedFrameSize(0) {}
+    : CalleeSavedFrameSize(0), ReturnAddrIndex(0) {}
 
   unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
   void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+  int getRAIndex() const { return ReturnAddrIndex; }
+  void setRAIndex(int Index) { ReturnAddrIndex = Index; }
 };
 
 } // End llvm namespace
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 92baad9..e85c7a2 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -17,6 +17,7 @@
 #include "MSP430MachineFunctionInfo.h"
 #include "MSP430RegisterInfo.h"
 #include "MSP430TargetMachine.h"
+#include "llvm/Function.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -37,17 +38,26 @@ MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
 
 const unsigned*
 MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  const Function* F = MF->getFunction();
   static const unsigned CalleeSavedRegs[] = {
     MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
     MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
     0
   };
+  static const unsigned CalleeSavedRegsIntr[] = {
+    MSP430::FPW,  MSP430::R5W,  MSP430::R6W,  MSP430::R7W,
+    MSP430::R8W,  MSP430::R9W,  MSP430::R10W, MSP430::R11W,
+    MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+    0
+  };
 
-  return CalleeSavedRegs;
+  return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+          CalleeSavedRegsIntr : CalleeSavedRegs);
 }
 
 const TargetRegisterClass *const *
 MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+  const Function* F = MF->getFunction();
   static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
     &MSP430::GR16RegClass, &MSP430::GR16RegClass,
     &MSP430::GR16RegClass, &MSP430::GR16RegClass,
@@ -55,8 +65,18 @@ MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
     &MSP430::GR16RegClass, &MSP430::GR16RegClass,
     0
   };
+  static const TargetRegisterClass * const CalleeSavedRegClassesIntr[] = {
+    &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+    &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+    &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+    &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+    &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+    &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+    0
+  };
 
-  return CalleeSavedRegClasses;
+  return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+          CalleeSavedRegClassesIntr : CalleeSavedRegClasses);
 }
 
 BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -193,10 +213,10 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     // We need to materialize the offset via add instruction.
     unsigned DstReg = MI.getOperand(0).getReg();
     if (Offset < 0)
-      BuildMI(MBB, next(II), dl, TII.get(MSP430::SUB16ri), DstReg)
+      BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::SUB16ri), DstReg)
         .addReg(DstReg).addImm(-Offset);
     else
-      BuildMI(MBB, next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
+      BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
         .addReg(DstReg).addImm(Offset);
 
     return 0;
@@ -251,7 +271,7 @@ void MSP430RegisterInfo::emitPrologue(MachineFunction &MF) const {
       .addReg(MSP430::SPW);
 
     // Mark the FramePtr as live-in in every block except the entry.
-    for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
          I != E; ++I)
       I->addLiveIn(MSP430::FPW);
 
@@ -292,7 +312,8 @@ void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF,
   DebugLoc DL = MBBI->getDebugLoc();
 
   switch (RetOpcode) {
-  case MSP430::RET: break;  // These are ok
+  case MSP430::RET:
+  case MSP430::RETI: break;  // These are ok
   default:
     llvm_unreachable("Can only insert epilog into returning blocks");
   }
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 6d8e160..48b9bdf 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -590,22 +590,6 @@ RemoveBranch(MachineBasicBlock &MBB) const
   return 2;
 }
 
-/// BlockHasNoFallThrough - Analyze if MachineBasicBlock does not
-/// fall-through into its successor block.
-bool MipsInstrInfo::
-BlockHasNoFallThrough(const MachineBasicBlock &MBB) const 
-{
-  if (MBB.empty()) return false;
-  
-  switch (MBB.back().getOpcode()) {
-  case Mips::RET:     // Return.
-  case Mips::JR:      // Indirect branch.
-  case Mips::J:       // Uncond branch.
-    return true;
-  default: return false;
-  }
-}
-
 /// ReverseBranchCondition - Return the inverse opcode of the 
 /// specified Branch instruction.
 bool MipsInstrInfo::
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 249d3de..ab8dc59 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -232,7 +232,6 @@ public:
     return 0;
   }
   
-  virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
   virtual
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
 
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 0083598..af7d812 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -740,18 +740,6 @@ bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
 }
 
 
-bool PPCInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
-  if (MBB.empty()) return false;
-  
-  switch (MBB.back().getOpcode()) {
-  case PPC::BLR:   // Return.
-  case PPC::B:     // Uncond branch.
-  case PPC::BCTR:  // Indirect branch.
-    return true;
-  default: return false;
-  }
-}
-
 bool PPCInstrInfo::
 ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index bb0dc15a..57facac 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -143,7 +143,6 @@ public:
   virtual bool canFoldMemoryOperand(const MachineInstr *MI,
                                     const SmallVectorImpl<unsigned> &Ops) const;
   
-  virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
   virtual
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
   
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 2d8a687..e1772c2 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -801,8 +801,21 @@ void bar(unsigned n) {
     true();
 }
 
-I think this basically amounts to a dag combine to simplify comparisons against
-multiply hi's into a comparison against the mullo.
+This is equivalent to the following, where 2863311531 is the multiplicative
+inverse of 3, and 1431655766 is ((2^32)-1)/3+1:
+void bar(unsigned n) {
+  if (n * 2863311531U < 1431655766U)
+    true();
+}
+
+The same transformation can work with an even modulo with the addition of a
+rotate: rotate the result of the multiply to the right by the number of bits
+which need to be zero for the condition to be true, and shrink the compare RHS
+by the same amount.  Unless the target supports rotates, though, that
+transformation probably isn't worthwhile.
+
+The transformation can also easily be made to work with non-zero equality
+comparisons: just transform, for example, "n % 3 == 1" to "(n-1) % 3 == 0".
 
 //===---------------------------------------------------------------------===//
 
@@ -823,20 +836,6 @@ int main() {
 
 //===---------------------------------------------------------------------===//
 
-Instcombine will merge comparisons like (x >= 10) && (x < 20) by producing (x -
-10) u< 10, but only when the comparisons have matching sign.
-
-This could be converted with a similiar technique. (PR1941)
-
-define i1 @test(i8 %x) {
-  %A = icmp uge i8 %x, 5
-  %B = icmp slt i8 %x, 20
-  %C = and i1 %A, %B
-  ret i1 %C
-}
-
-//===---------------------------------------------------------------------===//
-
 These functions perform the same computation, but produce different assembly.
 
 define i8 @select(i8 %x) readnone nounwind {
@@ -884,18 +883,6 @@ The expression should optimize to something like
 
 //===---------------------------------------------------------------------===//
 
-From GCC Bug 15241:
-unsigned int
-foo (unsigned int a, unsigned int b)
-{
- if (a <= 7 && b <= 7)
-   baz ();
-}
-Should combine to "(a|b) <= 7".  Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
-
-//===---------------------------------------------------------------------===//
-
 From GCC Bug 3756:
 int
 pn (int n)
@@ -907,19 +894,6 @@ Should combine to (n >> 31) | 1.  Currently not optimized with "clang
 
 //===---------------------------------------------------------------------===//
 
-From GCC Bug 28685:
-int test(int a, int b)
-{
- int lt = a < b;
- int eq = a == b;
-
- return (lt || eq);
-}
-Should combine to "a <= b".  Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts | llc".
-
-//===---------------------------------------------------------------------===//
-
 void a(int variable)
 {
  if (variable == 4 || variable == 6)
@@ -993,12 +967,6 @@ Should combine to 0.  Currently not optimized with "clang
 
 //===---------------------------------------------------------------------===//
 
-int a(unsigned char* b) {return *b > 99;}
-There's an unnecessary zext in the generated code with "clang
--emit-llvm-bc | opt -std-compile-opts".
-
-//===---------------------------------------------------------------------===//
-
 int a(unsigned b) {return ((b << 31) | (b << 30)) >> 31;}
 Should be combined to  "((b >> 1) | b) & 1".  Currently not optimized
 with "clang -emit-llvm-bc | opt -std-compile-opts".
@@ -1011,12 +979,6 @@ Should combine to "x | (y & 3)".  Currently not optimized with "clang
 
 //===---------------------------------------------------------------------===//
 
-unsigned a(unsigned a) {return ((a | 1) & 3) | (a & -4);}
-Should combine to "a | 1".  Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
-
-//===---------------------------------------------------------------------===//
-
 int a(int a, int b, int c) {return (~a & c) | ((c|a) & b);}
 Should fold to "(~a & c) | (a & b)".  Currently not optimized with
 "clang -emit-llvm-bc | opt -std-compile-opts".
@@ -1704,3 +1666,50 @@ need all but the bottom two bits from %A, and if we gave that mask to SDB it
 would delete the or instruction for us.
 
 //===---------------------------------------------------------------------===//
+
+FunctionAttrs is not marking this function as readnone (just readonly):
+$ clang t.c -emit-llvm -S -o - -O0 | opt -mem2reg -S -functionattrs
+
+int t(int a, int b, int c) {
+ int *p;
+ if (a)
+   p = &a;
+ else
+   p = &c;
+ return *p;
+}
+
+This is because we codegen this to:
+
+define i32 @t(i32 %a, i32 %b, i32 %c) nounwind readonly ssp {
+entry:
+  %a.addr = alloca i32                            ; <i32*> [#uses=3]
+  %c.addr = alloca i32                            ; <i32*> [#uses=2]
+...
+
+if.end:
+  %p.0 = phi i32* [ %a.addr, %if.then ], [ %c.addr, %if.else ]
+  %tmp2 = load i32* %p.0                          ; <i32> [#uses=1]
+  ret i32 %tmp2
+}
+
+And functionattrs doesn't realize that the p.0 load points to function local
+memory.
+
+Also, functionattrs doesn't know about memcpy/memset.  This function should be
+marked readnone, since it only twiddles local memory, but functionattrs doesn't
+handle memset/memcpy/memmove aggressively:
+
+struct X { int *p; int *q; };
+int foo() {
+ int i = 0, j = 1;
+ struct X x, y;
+ int **p;
+ y.p = &i;
+ x.q = &j;
+ p = __builtin_memcpy (&x, &y, sizeof (int *));
+ return **p;
+}
+
+//===---------------------------------------------------------------------===//
+
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index d82d928..5fa7e8c 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -402,18 +402,6 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   return false;
 }
 
-bool SystemZInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB)const{
-  if (MBB.empty()) return false;
-
-  switch (MBB.back().getOpcode()) {
-  case SystemZ::RET:   // Return.
-  case SystemZ::JMP:   // Uncond branch.
-  case SystemZ::JMPr:  // Indirect branch.
-    return true;
-  default: return false;
-  }
-}
-
 bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
   const TargetInstrDesc &TID = MI->getDesc();
   if (!TID.isTerminator()) return false;
@@ -454,8 +442,8 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       }
 
       // If the block has any instructions after a JMP, delete them.
-      while (next(I) != MBB.end())
-        next(I)->eraseFromParent();
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
       Cond.clear();
       FBB = 0;
 
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index e16d704..ef3b39e 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -89,7 +89,6 @@ public:
                                  const std::vector<CalleeSavedInfo> &CSI) const;
 
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
-  virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
   virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
   virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
                              MachineBasicBlock *&TBB,
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 4d1c01f..1318195 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -247,7 +247,7 @@ void SystemZRegisterInfo::emitPrologue(MachineFunction &MF) const {
       .addReg(SystemZ::R15D);
 
     // Mark the FramePtr as live-in in every block except the entry.
-    for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
          I != E; ++I)
       I->addLiveIn(SystemZ::R11D);
 
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index fc71bc3..9434a19 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -117,14 +117,6 @@ TargetAlignElem::operator==(const TargetAlignElem &rhs) const {
           && TypeBitWidth == rhs.TypeBitWidth);
 }
 
-std::ostream &
-TargetAlignElem::dump(std::ostream &os) const {
-  return os << AlignType
-            << TypeBitWidth
-            << ":" << (int) (ABIAlign * 8)
-            << ":" << (int) (PrefAlign * 8);
-}
-
 const TargetAlignElem TargetData::InvalidAlignmentElem =
                 TargetAlignElem::get((AlignTypeEnum) -1, 0, 0, 0);
 
@@ -323,11 +315,10 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
                  : Alignments[BestMatchIdx].PrefAlign;
 }
 
-typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy;
-
-namespace llvm {
+namespace {
 
 class StructLayoutMap : public AbstractTypeUser {
+  typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy;
   LayoutInfoTy LayoutInfo;
 
   /// refineAbstractType - The callback method invoked when an abstract type is
@@ -336,19 +327,11 @@ class StructLayoutMap : public AbstractTypeUser {
   ///
   virtual void refineAbstractType(const DerivedType *OldTy,
                                   const Type *) {
-    const StructType *STy = dyn_cast<const StructType>(OldTy);
-    if (!STy) {
-      OldTy->removeAbstractTypeUser(this);
-      return;
-    }
-
-    StructLayout *SL = LayoutInfo[STy];
-    if (SL) {
-      SL->~StructLayout();
-      free(SL);
-      LayoutInfo[STy] = NULL;
-    }
-
+    const StructType *STy = cast<const StructType>(OldTy);
+    LayoutInfoTy::iterator Iter = LayoutInfo.find(STy);
+    Iter->second->~StructLayout();
+    free(Iter->second);
+    LayoutInfo.erase(Iter);
     OldTy->removeAbstractTypeUser(this);
   }
 
@@ -358,70 +341,43 @@ class StructLayoutMap : public AbstractTypeUser {
   /// This method notifies ATU's when this occurs for a type.
   ///
   virtual void typeBecameConcrete(const DerivedType *AbsTy) {
-    const StructType *STy = dyn_cast<const StructType>(AbsTy);
-    if (!STy) {
-      AbsTy->removeAbstractTypeUser(this);
-      return;
-    }
-
-    StructLayout *SL = LayoutInfo[STy];
-    if (SL) {
-      SL->~StructLayout();
-      free(SL);
-      LayoutInfo[STy] = NULL;
-    }
-
+    const StructType *STy = cast<const StructType>(AbsTy);
+    LayoutInfoTy::iterator Iter = LayoutInfo.find(STy);
+    Iter->second->~StructLayout();
+    free(Iter->second);
+    LayoutInfo.erase(Iter);
     AbsTy->removeAbstractTypeUser(this);
   }
 
-  bool insert(const Type *Ty) {
-    if (Ty->isAbstract())
-      Ty->addAbstractTypeUser(this);
-    return true;
-  }
-
 public:
   virtual ~StructLayoutMap() {
     // Remove any layouts.
     for (LayoutInfoTy::iterator
-           I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I)
-      if (StructLayout *SL = I->second) {
-        SL->~StructLayout();
-        free(SL);
-      }
-  }
+           I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I) {
+      const Type *Key = I->first;
+      StructLayout *Value = I->second;
 
-  inline LayoutInfoTy::iterator begin() {
-    return LayoutInfo.begin();
-  }
-  inline LayoutInfoTy::iterator end() {
-    return LayoutInfo.end();
-  }
-  inline LayoutInfoTy::const_iterator begin() const {
-    return LayoutInfo.begin();
-  }
-  inline LayoutInfoTy::const_iterator end() const {
-    return LayoutInfo.end();
-  }
+      if (Key->isAbstract())
+        Key->removeAbstractTypeUser(this);
 
-  LayoutInfoTy::iterator find(const StructType *&Val) {
-    return LayoutInfo.find(Val);
-  }
-  LayoutInfoTy::const_iterator find(const StructType *&Val) const {
-    return LayoutInfo.find(Val);
+      Value->~StructLayout();
+      free(Value);
+    }
   }
 
-  bool erase(const StructType *&Val) {
-    return LayoutInfo.erase(Val);
-  }
-  bool erase(LayoutInfoTy::iterator I) {
-    return LayoutInfo.erase(I);
+  void InvalidateEntry(const StructType *Ty) {
+    LayoutInfoTy::iterator I = LayoutInfo.find(Ty);
+    if (I == LayoutInfo.end()) return;
+
+    I->second->~StructLayout();
+    free(I->second);
+    LayoutInfo.erase(I);
+
+    if (Ty->isAbstract())
+      Ty->removeAbstractTypeUser(this);
   }
 
-  StructLayout *&operator[](const Type *Key) {
-    const StructType *STy = dyn_cast<const StructType>(Key);
-    assert(STy && "Trying to access the struct layout map with a non-struct!");
-    insert(STy);
+  StructLayout *&operator[](const StructType *STy) {
     return LayoutInfo[STy];
   }
 
@@ -429,17 +385,18 @@ public:
   virtual void dump() const {}
 };
 
-} // end namespace llvm
+} // end anonymous namespace
 
 TargetData::~TargetData() {
-  delete LayoutMap;
+  delete static_cast<StructLayoutMap*>(LayoutMap);
 }
 
 const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
   if (!LayoutMap)
     LayoutMap = new StructLayoutMap();
   
-  StructLayout *&SL = (*LayoutMap)[Ty];
+  StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
+  StructLayout *&SL = (*STM)[Ty];
   if (SL) return SL;
 
   // Otherwise, create the struct layout.  Because it is variable length, we 
@@ -453,6 +410,10 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
   SL = L;
   
   new (L) StructLayout(Ty, *this);
+
+  if (Ty->isAbstract())
+    Ty->addAbstractTypeUser(STM);
+
   return L;
 }
 
@@ -463,15 +424,10 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
 void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const {
   if (!LayoutMap) return;  // No cache.
   
-  DenseMap<const StructType*, StructLayout*>::iterator I = LayoutMap->find(Ty);
-  if (I == LayoutMap->end()) return;
-  
-  I->second->~StructLayout();
-  free(I->second);
-  LayoutMap->erase(I);
+  StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
+  STM->InvalidateEntry(Ty);
 }
 
-
 std::string TargetData::getStringRepresentation() const {
   std::string Result;
   raw_string_ostream OS(Result);
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index a167118..684c61f 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -62,11 +62,6 @@ MCCodeEmitter *createX86MCCodeEmitter(const Target &, TargetMachine &TM);
 ///
 FunctionPass *createEmitX86CodeToMemory();
 
-/// createX86MaxStackAlignmentCalculatorPass - This function returns a pass
-/// which calculates maximal stack alignment required for function
-///
-FunctionPass *createX86MaxStackAlignmentCalculatorPass();
-
 extern Target TheX86_32Target, TheX86_64Target;
 
 } // End llvm namespace
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index afd5525..5017af2 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -16,6 +16,7 @@
 
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/ADT/StringSet.h"
+#include "X86MachineFunctionInfo.h"
 
 namespace llvm {
   class X86MachineFunctionInfo;
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index d77f039..12d3d04 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -64,11 +64,18 @@ def RetCC_X86_32_C : CallingConv<[
 // X86-32 FastCC return-value convention.
 def RetCC_X86_32_Fast : CallingConv<[
   // The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has
-  // SSE2, otherwise it is the the C calling conventions.
+  // SSE2.
   // This can happen when a float, 2 x float, or 3 x float vector is split by
   // target lowering, and is returned in 1-3 sse regs.
   CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
   CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+
+  // For integers, ECX can be used as an extra return register
+  CCIfType<[i8],  CCAssignToReg<[AL, DL, CL]>>,
+  CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>,
+  CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>,
+
+  // Otherwise, it is the same as the common X86 calling convention.
   CCDelegateTo<RetCC_X86Common>
 ]>;
 
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index a2fe9b0..044bd4b 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -289,7 +289,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
         while (Start != BB.begin() && prior(Start) != PrevI) --Start;
         errs() << "Inserted instructions:\n\t";
         Start->print(errs(), &MF.getTarget());
-        while (++Start != next(I)) {}
+        while (++Start != llvm::next(I)) {}
       }
       dumpStack();
     );
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index d80b8ec..0517b56 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -595,6 +595,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
     setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
     setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand);
+    setOperationAction(ISD::TRUNCATE,  (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::SIGN_EXTEND,  (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::ZERO_EXTEND,  (MVT::SimpleValueType)VT, Expand);
+    setOperationAction(ISD::ANY_EXTEND,  (MVT::SimpleValueType)VT, Expand);
+    for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+         InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+      setTruncStoreAction((MVT::SimpleValueType)VT,
+                          (MVT::SimpleValueType)InnerVT, Expand);
+    setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+    setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+    setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
   }
 
   // FIXME: In order to prevent SSE instructions being expanded to MMX ones
@@ -671,8 +683,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 
     setOperationAction(ISD::INSERT_VECTOR_ELT,  MVT::v4i16, Custom);
 
-    setTruncStoreAction(MVT::v8i16,             MVT::v8i8, Expand);
-    setOperationAction(ISD::TRUNCATE,           MVT::v8i8, Expand);
     setOperationAction(ISD::SELECT,             MVT::v8i8, Promote);
     setOperationAction(ISD::SELECT,             MVT::v4i16, Promote);
     setOperationAction(ISD::SELECT,             MVT::v2i32, Promote);
@@ -3344,6 +3354,82 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
 }
 
 SDValue
+X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+                                          SelectionDAG &DAG) {
+  
+  // Check if the scalar load can be widened into a vector load. And if
+  // the address is "base + cst" see if the cst can be "absorbed" into
+  // the shuffle mask.
+  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
+    SDValue Ptr = LD->getBasePtr();
+    if (!ISD::isNormalLoad(LD) || LD->isVolatile())
+      return SDValue();
+    EVT PVT = LD->getValueType(0);
+    if (PVT != MVT::i32 && PVT != MVT::f32)
+      return SDValue();
+
+    int FI = -1;
+    int64_t Offset = 0;
+    if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
+      FI = FINode->getIndex();
+      Offset = 0;
+    } else if (Ptr.getOpcode() == ISD::ADD &&
+               isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+               isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+      FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+      Offset = Ptr.getConstantOperandVal(1);
+      Ptr = Ptr.getOperand(0);
+    } else {
+      return SDValue();
+    }
+
+    SDValue Chain = LD->getChain();
+    // Make sure the stack object alignment is at least 16.
+    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    if (DAG.InferPtrAlignment(Ptr) < 16) {
+      if (MFI->isFixedObjectIndex(FI)) {
+        // Can't change the alignment. Reference stack + offset explicitly
+        // if stack pointer is at least 16-byte aligned.
+        unsigned StackAlign = Subtarget->getStackAlignment();
+        if (StackAlign < 16)
+          return SDValue();
+        Offset = MFI->getObjectOffset(FI) + Offset;
+        SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
+                                              getPointerTy());
+        Ptr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+                          DAG.getConstant(Offset & ~15, getPointerTy()));
+        Offset %= 16;
+      } else {
+        MFI->setObjectAlignment(FI, 16);
+      }
+    }
+
+    // (Offset % 16) must be multiple of 4. Then address is then
+    // Ptr + (Offset & ~15).
+    if (Offset < 0)
+      return SDValue();
+    if ((Offset % 16) & 3)
+      return SDValue();
+    int64_t StartOffset = Offset & ~15;
+    if (StartOffset)
+      Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(),
+                        Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
+
+    int EltNo = (Offset - StartOffset) >> 2;
+    int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
+    EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
+    SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0);
+    // Canonicalize it to a v4i32 shuffle.
+    V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1);
+    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+                       DAG.getVectorShuffle(MVT::v4i32, dl, V1,
+                                            DAG.getUNDEF(MVT::v4i32), &Mask[0]));
+  }
+
+  return SDValue();
+}
+
+SDValue
 X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
   DebugLoc dl = Op.getDebugLoc();
   // All zero's are handled with pxor, all one's are handled with pcmpeqd.
@@ -3486,8 +3572,19 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
   }
 
   // Splat is obviously ok. Let legalizer expand it to a shuffle.
-  if (Values.size() == 1)
+  if (Values.size() == 1) {
+    if (EVTBits == 32) {
+      // Instead of a shuffle like this:
+      // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
+      // Check if it's possible to issue this instead.
+      // shuffle (vload ptr)), undef, <1, 1, 1, 1>
+      unsigned Idx = CountTrailingZeros_32(NonZeros);
+      SDValue Item = Op.getOperand(Idx);
+      if (Op.getNode()->isOnlyUserOf(Item.getNode()))
+        return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
+    }
     return SDValue();
+  }
 
   // A vector full of immediates; various special cases are already
   // handled, so this is best done with a single constant-pool load.
@@ -4278,7 +4375,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   unsigned ShAmt = 0;
   SDValue ShVal;
   bool isShift = getSubtarget()->hasSSE2() &&
-  isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+    isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
   if (isShift && ShVal.hasOneUse()) {
     // If the shifted value has multiple uses, it may be cheaper to use
     // v_set0 + movlhps or movhlps, etc.
@@ -4815,6 +4912,7 @@ static SDValue
 GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
            SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
            unsigned char OperandFlags) {
+  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
   DebugLoc dl = GA->getDebugLoc();
   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
@@ -4828,6 +4926,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
     SDValue Ops[]  = { Chain, TGA };
     Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
   }
+
+  // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+  MFI->setHasCalls(true);
+
   SDValue Flag = Chain.getValue(1);
   return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
 }
@@ -5648,6 +5750,17 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
     return SDValue();
 
   SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG);
+
+  // Use sbb x, x to materialize carry bit into a GPR.
+  // FIXME: Temporarily disabled since it breaks self-hosting. It's apparently
+  // miscompiling ARMISelDAGToDAG.cpp.
+  if (0 && !isFP && X86CC == X86::COND_B) {
+    return DAG.getNode(ISD::AND, dl, MVT::i8,
+                       DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8,
+                                   DAG.getConstant(X86CC, MVT::i8), Cond),
+                       DAG.getConstant(1, MVT::i8));
+  }
+
   return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
                      DAG.getConstant(X86CC, MVT::i8), Cond);
 }
@@ -5800,9 +5913,18 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
       Cond = NewCond;
   }
 
+  // Look pass (and (setcc_carry (cmp ...)), 1).
+  if (Cond.getOpcode() == ISD::AND &&
+      Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+    if (C && C->getAPIntValue() == 1) 
+      Cond = Cond.getOperand(0);
+  }
+
   // If condition flag is set by a X86ISD::CMP, then use it as the condition
   // setting operand in place of the X86ISD::SETCC.
-  if (Cond.getOpcode() == X86ISD::SETCC) {
+  if (Cond.getOpcode() == X86ISD::SETCC ||
+      Cond.getOpcode() == X86ISD::SETCC_CARRY) {
     CC = Cond.getOperand(0);
 
     SDValue Cmp = Cond.getOperand(1);
@@ -5885,9 +6007,18 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
     Cond = LowerXALUO(Cond, DAG);
 #endif
 
+  // Look pass (and (setcc_carry (cmp ...)), 1).
+  if (Cond.getOpcode() == ISD::AND &&
+      Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+    ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+    if (C && C->getAPIntValue() == 1) 
+      Cond = Cond.getOperand(0);
+  }
+
   // If condition flag is set by a X86ISD::CMP, then use it as the condition
   // setting operand in place of the X86ISD::SETCC.
-  if (Cond.getOpcode() == X86ISD::SETCC) {
+  if (Cond.getOpcode() == X86ISD::SETCC ||
+      Cond.getOpcode() == X86ISD::SETCC_CARRY) {
     CC = Cond.getOperand(0);
 
     SDValue Cmp = Cond.getOperand(1);
@@ -7274,6 +7405,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::COMI:               return "X86ISD::COMI";
   case X86ISD::UCOMI:              return "X86ISD::UCOMI";
   case X86ISD::SETCC:              return "X86ISD::SETCC";
+  case X86ISD::SETCC_CARRY:        return "X86ISD::SETCC_CARRY";
   case X86ISD::CMOV:               return "X86ISD::CMOV";
   case X86ISD::BRCOND:             return "X86ISD::BRCOND";
   case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
@@ -8327,16 +8459,6 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
   return TargetLowering::isGAPlusOffset(N, GA, Offset);
 }
 
-static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
-                               const TargetLowering &TLI) {
-  GlobalValue *GV;
-  int64_t Offset = 0;
-  if (TLI.isGAPlusOffset(Base, GV, Offset))
-    return (GV->getAlignment() >= N && (Offset % N) == 0);
-  // DAG combine handles the stack object case.
-  return false;
-}
-
 static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
                                      EVT EltVT, LoadSDNode *&LDBase,
                                      unsigned &LastLoadedElt,
@@ -8366,7 +8488,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
       continue;
 
     LoadSDNode *LD = cast<LoadSDNode>(Elt);
-    if (!TLI.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i, MFI))
+    if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
       return false;
     LastLoadedElt = i;
   }
@@ -8399,7 +8521,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   if (LastLoadedElt == NumElems - 1) {
-    if (isBaseAlignmentOfN(16, LD->getBasePtr().getNode(), TLI))
+    if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16)
       return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
                          LD->getSrcValue(), LD->getSrcValueOffset(),
                          LD->isVolatile());
@@ -8858,11 +8980,42 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  EVT VT = N0.getValueType();
+
+  // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
+  // since the result of setcc_c is all zero's or all ones.
+  if (N1C && N0.getOpcode() == ISD::AND &&
+      N0.getOperand(1).getOpcode() == ISD::Constant) {
+    SDValue N00 = N0.getOperand(0);
+    if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
+        ((N00.getOpcode() == ISD::ANY_EXTEND ||
+          N00.getOpcode() == ISD::ZERO_EXTEND) &&
+         N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
+      APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+      APInt ShAmt = N1C->getAPIntValue();
+      Mask = Mask.shl(ShAmt);
+      if (Mask != 0)
+        return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+                           N00, DAG.getConstant(Mask, VT));
+    }
+  }
+
+  return SDValue();
+}
 
 /// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
 ///                       when possible.
 static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
                                    const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
+  if (!VT.isVector() && VT.isInteger() &&
+      N->getOpcode() == ISD::SHL)
+    return PerformSHLCombine(N, DAG);
+
   // On X86 with SSE2 support, we can transform this to a vector shift if
   // all elements are shifted by the same amount.  We can't do this in legalize
   // because the a constant vector is typically transformed to a constant pool
@@ -8870,7 +9023,6 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
   if (!Subtarget->hasSSE2())
     return SDValue();
 
-  EVT VT = N->getValueType(0);
   if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
     return SDValue();
 
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 7b4ab62..64bc70c 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -118,6 +118,10 @@ namespace llvm {
       /// operand produced by a CMP instruction.
       SETCC,
 
+      // Same as SETCC except it's materialized with a sbb and the value is all
+      // one's or all zero's.
+      SETCC_CARRY,
+
       /// X86 conditional moves. Operand 0 and operand 1 are the two values
       /// to select from. Operand 2 is the condition code, and operand 3 is the
       /// flag operand produced by a CMP or TEST instruction. It also writes a
@@ -626,7 +630,9 @@ namespace llvm {
 
     std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
                                                bool isSigned);
-    
+
+    SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+                                   SelectionDAG &DAG);
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
     SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index b5fa862..b6a2c05 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1333,6 +1333,15 @@ def CMOVNO64rm : RI<0x41, MRMSrcMem,       // if !overflow, GR64 = [mem64]
                                      X86_COND_NO, EFLAGS))]>, TB;
 } // isTwoAddress
 
+// Use sbb to materialize carry flag into a GPR.
+let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in
+def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins),
+                  "sbb{q}\t$dst, $dst",
+                 [(set GR64:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>;
+
+def : Pat<(i64 (anyext (X86setcc_c X86_COND_B, EFLAGS))),
+          (SETB_C64r)>;
+
 //===----------------------------------------------------------------------===//
 //  Conversion Instructions...
 //
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index a37013d..1947d35 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -783,12 +783,14 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
     if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
       return Reg;
     // Check for post-frame index elimination operations
-    return hasLoadFromStackSlot(MI, FrameIndex);
+    const MachineMemOperand *Dummy;
+    return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
   }
   return 0;
 }
 
 bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+                                        const MachineMemOperand *&MMO,
                                         int &FrameIndex) const {
   for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
          oe = MI->memoperands_end();
@@ -798,6 +800,7 @@ bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
       if (const FixedStackPseudoSourceValue *Value =
           dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
         FrameIndex = Value->getFrameIndex();
+        MMO = *o;
         return true;
       }
   }
@@ -819,12 +822,14 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
     if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
       return Reg;
     // Check for post-frame index elimination operations
-    return hasStoreToStackSlot(MI, FrameIndex);
+    const MachineMemOperand *Dummy;
+    return hasStoreToStackSlot(MI, Dummy, FrameIndex);
   }
   return 0;
 }
 
 bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+                                       const MachineMemOperand *&MMO,
                                        int &FrameIndex) const {
   for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
          oe = MI->memoperands_end();
@@ -834,6 +839,7 @@ bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
       if (const FixedStackPseudoSourceValue *Value =
           dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
         FrameIndex = Value->getFrameIndex();
+        MMO = *o;
         return true;
       }
   }
@@ -1052,6 +1058,112 @@ static bool hasLiveCondCodeDef(MachineInstr *MI) {
   return false;
 }
 
+/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when
+/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting
+/// to a 32-bit superregister and then truncating back down to a 16-bit
+/// subregister.
+MachineInstr *
+X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
+                                           MachineFunction::iterator &MFI,
+                                           MachineBasicBlock::iterator &MBBI,
+                                           LiveVariables *LV) const {
+  MachineInstr *MI = MBBI;
+  unsigned Dest = MI->getOperand(0).getReg();
+  unsigned Src = MI->getOperand(1).getReg();
+  bool isDead = MI->getOperand(0).isDead();
+  bool isKill = MI->getOperand(1).isKill();
+
+  unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
+    ? X86::LEA64_32r : X86::LEA32r;
+  MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
+  unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+  unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+            
+  // Build and insert into an implicit UNDEF value. This is OK because
+  // well be shifting and then extracting the lower 16-bits. 
+  // This has the potential to cause partial register stall. e.g.
+  //   movw    (%rbp,%rcx,2), %dx
+  //   leal    -65(%rdx), %esi
+  // But testing has shown this *does* help performance in 64-bit mode (at
+  // least on modern x86 machines).
+  BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+  MachineInstr *InsMI =
+    BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
+    .addReg(leaInReg)
+    .addReg(Src, getKillRegState(isKill))
+    .addImm(X86::SUBREG_16BIT);
+
+  MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
+                                    get(Opc), leaOutReg);
+  switch (MIOpc) {
+  default:
+    llvm_unreachable(0);
+    break;
+  case X86::SHL16ri: {
+    unsigned ShAmt = MI->getOperand(2).getImm();
+    MIB.addReg(0).addImm(1 << ShAmt)
+       .addReg(leaInReg, RegState::Kill).addImm(0);
+    break;
+  }
+  case X86::INC16r:
+  case X86::INC64_16r:
+    addLeaRegOffset(MIB, leaInReg, true, 1);
+    break;
+  case X86::DEC16r:
+  case X86::DEC64_16r:
+    addLeaRegOffset(MIB, leaInReg, true, -1);
+    break;
+  case X86::ADD16ri:
+  case X86::ADD16ri8:
+    addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());    
+    break;
+  case X86::ADD16rr: {
+    unsigned Src2 = MI->getOperand(2).getReg();
+    bool isKill2 = MI->getOperand(2).isKill();
+    unsigned leaInReg2 = 0;
+    MachineInstr *InsMI2 = 0;
+    if (Src == Src2) {
+      // ADD16rr %reg1028<kill>, %reg1028
+      // just a single insert_subreg.
+      addRegReg(MIB, leaInReg, true, leaInReg, false);
+    } else {
+      leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+      // Build and insert into an implicit UNDEF value. This is OK because
+      // well be shifting and then extracting the lower 16-bits. 
+      BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+      InsMI2 =
+        BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2)
+        .addReg(leaInReg2)
+        .addReg(Src2, getKillRegState(isKill2))
+        .addImm(X86::SUBREG_16BIT);
+      addRegReg(MIB, leaInReg, true, leaInReg2, true);
+    }
+    if (LV && isKill2 && InsMI2)
+      LV->replaceKillInstruction(Src2, MI, InsMI2);
+    break;
+  }
+  }
+
+  MachineInstr *NewMI = MIB;
+  MachineInstr *ExtMI =
+    BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
+    .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+    .addReg(leaOutReg, RegState::Kill)
+    .addImm(X86::SUBREG_16BIT);
+
+  if (LV) {
+    // Update live variables
+    LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
+    LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
+    if (isKill)
+      LV->replaceKillInstruction(Src, MI, InsMI);
+    if (isDead)
+      LV->replaceKillInstruction(Dest, MI, ExtMI);
+  }
+
+  return ExtMI;
+}
+
 /// convertToThreeAddress - This method must be implemented by targets that
 /// set the M_CONVERTIBLE_TO_3_ADDR flag.  When this flag is set, the target
 /// may be able to convert a two-address instruction into a true
@@ -1077,7 +1189,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   MachineInstr *NewMI = NULL;
   // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's.  When
   // we have better subtarget support, enable the 16-bit LEA generation here.
+  // 16-bit LEA is also slow on Core2.
   bool DisableLEA16 = true;
+  bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
 
   unsigned MIOpc = MI->getOpcode();
   switch (MIOpc) {
@@ -1116,8 +1230,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     unsigned ShAmt = MI->getOperand(2).getImm();
     if (ShAmt == 0 || ShAmt >= 4) return 0;
 
-    unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ?
-      X86::LEA64_32r : X86::LEA32r;
+    unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
     NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
       .addReg(Dest, RegState::Define | getDeadRegState(isDead))
       .addReg(0).addImm(1 << ShAmt)
@@ -1131,51 +1244,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     unsigned ShAmt = MI->getOperand(2).getImm();
     if (ShAmt == 0 || ShAmt >= 4) return 0;
 
-    if (DisableLEA16) {
-      // If 16-bit LEA is disabled, use 32-bit LEA via subregisters.
-      MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
-      unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
-        ? X86::LEA64_32r : X86::LEA32r;
-      unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
-      unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
-            
-      // Build and insert into an implicit UNDEF value. This is OK because
-      // well be shifting and then extracting the lower 16-bits. 
-      BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
-      MachineInstr *InsMI =
-        BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
-        .addReg(leaInReg)
-        .addReg(Src, getKillRegState(isKill))
-        .addImm(X86::SUBREG_16BIT);
-      
-      NewMI = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg)
-        .addReg(0).addImm(1 << ShAmt)
-        .addReg(leaInReg, RegState::Kill)
-        .addImm(0);
-      
-      MachineInstr *ExtMI =
-        BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
-        .addReg(Dest, RegState::Define | getDeadRegState(isDead))
-        .addReg(leaOutReg, RegState::Kill)
-        .addImm(X86::SUBREG_16BIT);
-
-      if (LV) {
-        // Update live variables
-        LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
-        LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
-        if (isKill)
-          LV->replaceKillInstruction(Src, MI, InsMI);
-        if (isDead)
-          LV->replaceKillInstruction(Dest, MI, ExtMI);
-      }
-      return ExtMI;
-    } else {
-      NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
-        .addReg(Dest, RegState::Define | getDeadRegState(isDead))
-        .addReg(0).addImm(1 << ShAmt)
-        .addReg(Src, getKillRegState(isKill))
-        .addImm(0);
-    }
+    if (DisableLEA16)
+      return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+    NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+      .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+      .addReg(0).addImm(1 << ShAmt)
+      .addReg(Src, getKillRegState(isKill))
+      .addImm(0);
     break;
   }
   default: {
@@ -1185,7 +1260,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     if (hasLiveCondCodeDef(MI))
       return 0;
 
-    bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
     switch (MIOpc) {
     default: return 0;
     case X86::INC64r:
@@ -1202,7 +1276,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     }
     case X86::INC16r:
     case X86::INC64_16r:
-      if (DisableLEA16) return 0;
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
       assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
       NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
                            .addReg(Dest, RegState::Define |
@@ -1223,7 +1298,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     }
     case X86::DEC16r:
     case X86::DEC64_16r:
-      if (DisableLEA16) return 0;
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
       assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
       NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
                            .addReg(Dest, RegState::Define |
@@ -1246,7 +1322,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
       break;
     }
     case X86::ADD16rr: {
-      if (DisableLEA16) return 0;
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
       unsigned Src2 = MI->getOperand(2).getReg();
       bool isKill2 = MI->getOperand(2).isKill();
@@ -1261,56 +1338,32 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     case X86::ADD64ri32:
     case X86::ADD64ri8:
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
-      if (MI->getOperand(2).isImm())
-        NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
-                                .addReg(Dest, RegState::Define |
-                                        getDeadRegState(isDead)),
-                                Src, isKill, MI->getOperand(2).getImm());
+      NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
+                              Src, isKill, MI->getOperand(2).getImm());
       break;
     case X86::ADD32ri:
-    case X86::ADD32ri8:
+    case X86::ADD32ri8: {
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
-      if (MI->getOperand(2).isImm()) {
-        unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
-        NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
-                                .addReg(Dest, RegState::Define |
-                                        getDeadRegState(isDead)),
+      unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+      NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
                                 Src, isKill, MI->getOperand(2).getImm());
-      }
       break;
+    }
     case X86::ADD16ri:
     case X86::ADD16ri8:
-      if (DisableLEA16) return 0;
+      if (DisableLEA16)
+        return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
       assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
-      if (MI->getOperand(2).isImm())
-        NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
-                             .addReg(Dest, RegState::Define |
-                                     getDeadRegState(isDead)),
-                             Src, isKill, MI->getOperand(2).getImm());
-      break;
-    case X86::SHL16ri:
-      if (DisableLEA16) return 0;
-    case X86::SHL32ri:
-    case X86::SHL64ri: {
-      assert(MI->getNumOperands() >= 3 && MI->getOperand(2).isImm() &&
-             "Unknown shl instruction!");
-      unsigned ShAmt = MI->getOperand(2).getImm();
-      if (ShAmt == 1 || ShAmt == 2 || ShAmt == 3) {
-        X86AddressMode AM;
-        AM.Scale = 1 << ShAmt;
-        AM.IndexReg = Src;
-        unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r
-          : (MIOpc == X86::SHL32ri
-             ? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r);
-        NewMI = addFullAddress(BuildMI(MF, MI->getDebugLoc(), get(Opc))
-                               .addReg(Dest, RegState::Define |
-                                       getDeadRegState(isDead)), AM);
-        if (isKill)
-          NewMI->getOperand(3).setIsKill(true);
-      }
+      NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+                              .addReg(Dest, RegState::Define |
+                                      getDeadRegState(isDead)),
+                              Src, isKill, MI->getOperand(2).getImm());
       break;
     }
-    }
   }
   }
 
@@ -1571,14 +1624,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
   MachineBasicBlock::iterator I = MBB.end();
   while (I != MBB.begin()) {
     --I;
-    // Working from the bottom, when we see a non-terminator
-    // instruction, we're done.
+
+    // Working from the bottom, when we see a non-terminator instruction, we're
+    // done.
     if (!isBrAnalysisUnpredicatedTerminator(I, *this))
       break;
-    // A terminator that isn't a branch can't easily be handled
-    // by this analysis.
+
+    // A terminator that isn't a branch can't easily be handled by this
+    // analysis.
     if (!I->getDesc().isBranch())
       return true;
+
     // Handle unconditional branches.
     if (I->getOpcode() == X86::JMP) {
       if (!AllowModify) {
@@ -1587,10 +1643,12 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       }
 
       // If the block has any instructions after a JMP, delete them.
-      while (next(I) != MBB.end())
-        next(I)->eraseFromParent();
+      while (llvm::next(I) != MBB.end())
+        llvm::next(I)->eraseFromParent();
+
       Cond.clear();
       FBB = 0;
+
       // Delete the JMP if it's equivalent to a fall-through.
       if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
         TBB = 0;
@@ -1598,14 +1656,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
         I = MBB.end();
         continue;
       }
+
       // TBB is used to indicate the unconditinal destination.
       TBB = I->getOperand(0).getMBB();
       continue;
     }
+
     // Handle conditional branches.
     X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
     if (BranchCode == X86::COND_INVALID)
       return true;  // Can't handle indirect branch.
+
     // Working from the bottom, handle the first conditional branch.
     if (Cond.empty()) {
       FBB = TBB;
@@ -1613,24 +1674,26 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       Cond.push_back(MachineOperand::CreateImm(BranchCode));
       continue;
     }
-    // Handle subsequent conditional branches. Only handle the case
-    // where all conditional branches branch to the same destination
-    // and their condition opcodes fit one of the special
-    // multi-branch idioms.
+
+    // Handle subsequent conditional branches. Only handle the case where all
+    // conditional branches branch to the same destination and their condition
+    // opcodes fit one of the special multi-branch idioms.
     assert(Cond.size() == 1);
     assert(TBB);
-    // Only handle the case where all conditional branches branch to
-    // the same destination.
+
+    // Only handle the case where all conditional branches branch to the same
+    // destination.
     if (TBB != I->getOperand(0).getMBB())
       return true;
-    X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
+
     // If the conditions are the same, we can leave them alone.
+    X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
     if (OldBranchCode == BranchCode)
       continue;
-    // If they differ, see if they fit one of the known patterns.
-    // Theoretically we could handle more patterns here, but
-    // we shouldn't expect to see them if instruction selection
-    // has done a reasonable job.
+
+    // If they differ, see if they fit one of the known patterns. Theoretically,
+    // we could handle more patterns here, but we shouldn't expect to see them
+    // if instruction selection has done a reasonable job.
     if ((OldBranchCode == X86::COND_NP &&
          BranchCode == X86::COND_E) ||
         (OldBranchCode == X86::COND_E &&
@@ -1643,6 +1706,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
       BranchCode = X86::COND_NE_OR_P;
     else
       return true;
+
     // Update the MachineOperand.
     Cond[0].setImm(BranchCode);
   }
@@ -2713,27 +2777,6 @@ unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
   return I->second.first;
 }
 
-bool X86InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
-  if (MBB.empty()) return false;
-  
-  switch (MBB.back().getOpcode()) {
-  case X86::TCRETURNri:
-  case X86::TCRETURNdi:
-  case X86::RET:     // Return.
-  case X86::RETI:
-  case X86::TAILJMPd:
-  case X86::TAILJMPr:
-  case X86::TAILJMPm:
-  case X86::JMP:     // Uncond branch.
-  case X86::JMP32r:  // Indirect branch.
-  case X86::JMP64r:  // Indirect branch (64-bit).
-  case X86::JMP32m:  // Indirect branch through mem.
-  case X86::JMP64m:  // Indirect branch through mem (64-bit).
-    return true;
-  default: return false;
-  }
-}
-
 bool X86InstrInfo::
 ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   assert(Cond.size() == 1 && "Invalid X86 branch condition!");
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index c6daa25..b83441d 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -457,11 +457,14 @@ public:
 
   /// hasLoadFromStackSlot - If the specified machine instruction has
   /// a load from a stack slot, return true along with the FrameIndex
-  /// of the loaded stack slot.  If not, return false.  Unlike
+  /// of the loaded stack slot and the machine mem operand containing
+  /// the reference.  If not, return false.  Unlike
   /// isLoadFromStackSlot, this returns true for any instructions that
   /// loads from the stack.  This is a hint only and may not catch all
   /// cases.
-  bool hasLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  bool hasLoadFromStackSlot(const MachineInstr *MI,
+                            const MachineMemOperand *&MMO,
+                            int &FrameIndex) const;
 
   unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
   /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
@@ -472,11 +475,13 @@ public:
 
   /// hasStoreToStackSlot - If the specified machine instruction has a
   /// store to a stack slot, return true along with the FrameIndex of
-  /// the loaded stack slot.  If not, return false.  Unlike
-  /// isStoreToStackSlot, this returns true for any instructions that
-  /// loads from the stack.  This is a hint only and may not catch all
-  /// cases.
-  bool hasStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+  /// the loaded stack slot and the machine mem operand containing the
+  /// reference.  If not, return false.  Unlike isStoreToStackSlot,
+  /// this returns true for any instructions that loads from the
+  /// stack.  This is a hint only and may not catch all cases.
+  bool hasStoreToStackSlot(const MachineInstr *MI,
+                           const MachineMemOperand *&MMO,
+                           int &FrameIndex) const;
 
   bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
                                          AliasAnalysis *AA) const;
@@ -595,7 +600,6 @@ public:
                                       bool UnfoldLoad, bool UnfoldStore,
                                       unsigned *LoadRegIndex = 0) const;
   
-  virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
   virtual
   bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
 
@@ -633,6 +637,11 @@ public:
   unsigned getGlobalBaseReg(MachineFunction *MF) const;
 
 private:
+  MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
+                                              MachineFunction::iterator &MFI,
+                                              MachineBasicBlock::iterator &MBBI,
+                                              LiveVariables *LV) const;
+
   MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
                                      MachineInstr* MI,
                                      unsigned OpNum,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 90ef1f4..3cc1853 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -87,6 +87,7 @@ def X86cmov    : SDNode<"X86ISD::CMOV",     SDTX86Cmov>;
 def X86brcond  : SDNode<"X86ISD::BRCOND",   SDTX86BrCond,
                         [SDNPHasChain]>;
 def X86setcc   : SDNode<"X86ISD::SETCC",    SDTX86SetCC>;
+def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC>;
 
 def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
                         [SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
@@ -816,7 +817,7 @@ def BSR32rm  : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
 
 let neverHasSideEffects = 1 in
 def LEA16r   : I<0x8D, MRMSrcMem,
-                 (outs GR16:$dst), (ins i32mem:$src),
+                 (outs GR16:$dst), (ins lea32mem:$src),
                  "lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
 let isReMaterializable = 1 in
 def LEA32r   : I<0x8D, MRMSrcMem,
@@ -3059,6 +3060,21 @@ let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
 def LAHF     : I<0x9F, RawFrm, (outs),  (ins), "lahf", []>;  // AH = flags
 
 let Uses = [EFLAGS] in {
+// Use sbb to materialize carry bit.
+
+let Defs = [EFLAGS], isCodeGenOnly = 1 in {
+def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins),
+                 "sbb{b}\t$dst, $dst",
+                 [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins),
+                  "sbb{w}\t$dst, $dst",
+                 [(set GR16:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>,
+                OpSize;
+def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins),
+                  "sbb{l}\t$dst, $dst",
+                 [(set GR32:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>;
+} // isCodeGenOnly
+
 def SETEr    : I<0x94, MRM0r, 
                  (outs GR8   :$dst), (ins),
                  "sete\t$dst",
@@ -4169,6 +4185,12 @@ def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1),
                        GR16:$src2, (i8 imm:$amt2)), addr:$dst),
           (SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
 
+// (anyext (setcc_carry)) -> (zext (setcc_carry))
+def : Pat<(i16 (anyext (X86setcc_c X86_COND_B, EFLAGS))),
+          (SETB_C16r)>;
+def : Pat<(i32 (anyext (X86setcc_c X86_COND_B, EFLAGS))),
+          (SETB_C32r)>;
+
 //===----------------------------------------------------------------------===//
 // EFLAGS-defining Patterns
 //===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index dfdd4ce..62841f8 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2083,7 +2083,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
                      (outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
                      "pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                      [(set VR128:$dst, (v4i32 (pshufd:$src2
-                                             (bc_v4i32(memopv2i64 addr:$src1)),
+                                             (bc_v4i32 (memopv2i64 addr:$src1)),
                                              (undef))))]>;
 }
 
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 33852bd..d96aafd 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -423,21 +423,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 // Stack Frame Processing methods
 //===----------------------------------------------------------------------===//
 
-static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
-  unsigned MaxAlign = 0;
-
-  for (int i = FFI->getObjectIndexBegin(),
-         e = FFI->getObjectIndexEnd(); i != e; ++i) {
-    if (FFI->isDeadObjectIndex(i))
-      continue;
-
-    unsigned Align = FFI->getObjectAlignment(i);
-    MaxAlign = std::max(MaxAlign, Align);
-  }
-
-  return MaxAlign;
-}
-
 /// hasFP - Return true if the specified function should have a dedicated frame
 /// pointer register.  This is true if the function has variable sized allocas
 /// or if frame pointer elimination is disabled.
@@ -638,10 +623,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
 
   // Calculate and set max stack object alignment early, so we can decide
   // whether we will need stack realignment (and thus FP).
-  unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
-                               calculateMaxStackAlignment(MFI));
-
-  MFI->setMaxAlignment(MaxAlign);
+  MFI->calculateMaxStackAlignment();
 
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
@@ -741,7 +723,7 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB,
 
   if (MBBI == MBB.end()) return;
 
-  MachineBasicBlock::iterator NI = next(MBBI);
+  MachineBasicBlock::iterator NI = llvm::next(MBBI);
   if (NI == MBB.end()) return;
 
   unsigned Opc = NI->getOpcode();
@@ -775,7 +757,7 @@ static int mergeSPUpdates(MachineBasicBlock &MBB,
     return 0;
 
   MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
-  MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI);
+  MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
   unsigned Opc = PI->getOpcode();
   int Offset = 0;
 
@@ -1001,7 +983,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
     }
 
     // Mark the FramePtr as live-in in every block except the entry.
-    for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+    for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
          I != E; ++I)
       I->addLiveIn(FramePtr);
 
@@ -1482,45 +1464,3 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
 }
 
 #include "X86GenRegisterInfo.inc"
-
-namespace {
-  struct MSAC : public MachineFunctionPass {
-    static char ID;
-    MSAC() : MachineFunctionPass(&ID) {}
-
-    virtual bool runOnMachineFunction(MachineFunction &MF) {
-      MachineFrameInfo *FFI = MF.getFrameInfo();
-      MachineRegisterInfo &RI = MF.getRegInfo();
-
-      // Calculate max stack alignment of all already allocated stack objects.
-      unsigned MaxAlign = calculateMaxStackAlignment(FFI);
-
-      // Be over-conservative: scan over all vreg defs and find, whether vector
-      // registers are used. If yes - there is probability, that vector register
-      // will be spilled and thus stack needs to be aligned properly.
-      for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
-           RegNum < RI.getLastVirtReg(); ++RegNum)
-        MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
-
-      if (FFI->getMaxAlignment() == MaxAlign)
-        return false;
-
-      FFI->setMaxAlignment(MaxAlign);
-      return true;
-    }
-
-    virtual const char *getPassName() const {
-      return "X86 Maximal Stack Alignment Calculator";
-    }
-
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
-      AU.setPreservesCFG();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
-  };
-
-  char MSAC::ID = 0;
-}
-
-FunctionPass*
-llvm::createX86MaxStackAlignmentCalculatorPass() { return new MSAC(); }
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 661f560..75cdbad 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -367,5 +367,5 @@ bool X86Subtarget::enablePostRAScheduler(
             RegClassVector& CriticalPathRCs) const {
   Mode = TargetSubtarget::ANTIDEP_CRITICAL;
   CriticalPathRCs.clear();
-  return OptLevel >= CodeGenOpt::Default;
+  return OptLevel >= CodeGenOpt::Aggressive;
 }
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 0cda8bc..0152121 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -163,7 +163,7 @@ bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
                                       CodeGenOpt::Level OptLevel) {
   // Calculate and set max stack object alignment early, so we can decide
   // whether we will need stack realignment (and thus FP).
-  PM.add(createX86MaxStackAlignmentCalculatorPass());
+  PM.add(createMaxStackAlignmentCalculatorPass());
   return false;  // -print-machineinstr shouldn't print after this.
 }
 
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index e616fe6..5a54844 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -453,26 +453,6 @@ bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   return true;
 }
 
-/// BlockHasNoFallThrough - Analyse if MachineBasicBlock does not
-/// fall-through into its successor block.
-bool XCoreInstrInfo::
-BlockHasNoFallThrough(const MachineBasicBlock &MBB) const 
-{
-  if (MBB.empty()) return false;
-  
-  switch (MBB.back().getOpcode()) {
-  case XCore::RETSP_u6:     // Return.
-  case XCore::RETSP_lu6:
-  case XCore::BAU_1r:       // Indirect branch.
-  case XCore::BRFU_u6:      // Uncond branch.
-  case XCore::BRFU_lu6:
-  case XCore::BRBU_u6:
-  case XCore::BRBU_lu6:
-    return true;
-  default: return false;
-  }
-}
-
 /// ReverseBranchCondition - Return the inverse opcode of the 
 /// specified Branch instruction.
 bool XCoreInstrInfo::
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 24230ac..3e0a765 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -87,8 +87,6 @@ public:
                                          MachineBasicBlock::iterator MI,
                                const std::vector<CalleeSavedInfo> &CSI) const;
 
-  virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-
   virtual bool ReverseBranchCondition(
                             SmallVectorImpl<MachineOperand> &Cond) const;
 };
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 4635d0e..1793bbf 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2493,29 +2493,28 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
       Changed = true;
     }
 
-    // If the aliasee has internal linkage, give it the name and linkage
-    // of the alias, and delete the alias.  This turns:
-    //   define internal ... @f(...)
-    //   @a = alias ... @f
-    // into:
-    //   define ... @a(...)
-    if (!Target->hasLocalLinkage())
-      continue;
-
-    // The transform is only useful if the alias does not have internal linkage.
-    if (J->hasLocalLinkage())
-      continue;
+    // If the alias is externally visible, we may still be able to simplify it.
+    if (!J->hasLocalLinkage()) {
+      // If the aliasee has internal linkage, give it the name and linkage
+      // of the alias, and delete the alias.  This turns:
+      //   define internal ... @f(...)
+      //   @a = alias ... @f
+      // into:
+      //   define ... @a(...)
+      if (!Target->hasLocalLinkage())
+        continue;
 
-    // Do not perform the transform if multiple aliases potentially target the
-    // aliasee.  This check also ensures that it is safe to replace the section
-    // and other attributes of the aliasee with those of the alias.
-    if (!hasOneUse)
-      continue;
+      // Do not perform the transform if multiple aliases potentially target the
+      // aliasee.  This check also ensures that it is safe to replace the section
+      // and other attributes of the aliasee with those of the alias.
+      if (!hasOneUse)
+        continue;
 
-    // Give the aliasee the name, linkage and other attributes of the alias.
-    Target->takeName(J);
-    Target->setLinkage(J->getLinkage());
-    Target->GlobalValue::copyAttributesFrom(J);
+      // Give the aliasee the name, linkage and other attributes of the alias.
+      Target->takeName(J);
+      Target->setLinkage(J->getLinkage());
+      Target->GlobalValue::copyAttributesFrom(J);
+    }
 
     // Delete the alias.
     M.getAliasList().erase(J);
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index 2951dbc..829da6b 100644
--- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
 #define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
 
+#include "llvm/BasicBlock.h"
 #include "llvm/ADT/EquivalenceClasses.h"
 #include <vector>
 #include <algorithm>
@@ -33,6 +34,18 @@ namespace llvm {
                       typename MaximumSpanningTree<CT>::EdgeWeight Y) const {
         if (X.second > Y.second) return true;
         if (X.second < Y.second) return false;
+        if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) {
+          if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) {
+            if (BBX->size() > BBY->size()) return true;
+            if (BBX->size() < BBY->size()) return false;
+          }
+        }
+        if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) {
+          if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) {
+            if (BBX->size() > BBY->size()) return true;
+            if (BBX->size() < BBY->size()) return false;
+          }
+        }
         return false;
       }
     };
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 9ca90c3..e4c4ae5 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -21,7 +21,6 @@
 #include "llvm/InlineAsm.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Pass.h"
 #include "llvm/Analysis/ProfileInfo.h"
 #include "llvm/Target/TargetData.h"
@@ -563,7 +562,7 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
   return false;
 }
 
-/// OptimizeMemoryInst - Load and Store Instructions have often have
+/// OptimizeMemoryInst - Load and Store Instructions often have
 /// addressing modes that can do significant amounts of computation.  As such,
 /// instruction selection will try to get the load or store to do as much
 /// computation as possible for the program.  The problem is that isel can only
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index b0988b5..1cfde8f 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -85,9 +85,14 @@ static bool doesClobberMemory(Instruction *I) {
     return true;
   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
     switch (II->getIntrinsicID()) {
-    default: return false;
-    case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy:
-    case Intrinsic::init_trampoline: case Intrinsic::lifetime_end: return true;
+    default:
+      return false;
+    case Intrinsic::memset:
+    case Intrinsic::memmove:
+    case Intrinsic::memcpy:
+    case Intrinsic::init_trampoline:
+    case Intrinsic::lifetime_end:
+      return true;
     }
   }
   return false;
@@ -111,14 +116,13 @@ static Value *getPointerOperand(Instruction *I) {
     return SI->getPointerOperand();
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
     return MI->getOperand(1);
-  IntrinsicInst *II = cast<IntrinsicInst>(I);
-  switch (II->getIntrinsicID()) {
-    default:
-      assert(false && "Unexpected intrinsic!");
-    case Intrinsic::init_trampoline:
-      return II->getOperand(1);
-    case Intrinsic::lifetime_end:
-      return II->getOperand(2);
+  
+  switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+  default: assert(false && "Unexpected intrinsic!");
+  case Intrinsic::init_trampoline:
+    return I->getOperand(1);
+  case Intrinsic::lifetime_end:
+    return I->getOperand(2);
   }
 }
 
@@ -135,15 +139,13 @@ static unsigned getStoreSize(Instruction *I, const TargetData *TD) {
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
     Len = MI->getLength();
   } else {
-    IntrinsicInst *II = cast<IntrinsicInst>(I);
-    switch (II->getIntrinsicID()) {
-      default:
-        assert(false && "Unexpected intrinsic!");
-      case Intrinsic::init_trampoline:
-        return -1u;
-      case Intrinsic::lifetime_end:
-        Len = II->getOperand(1);
-        break;
+    switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+    default: assert(false && "Unexpected intrinsic!");
+    case Intrinsic::init_trampoline:
+      return -1u;
+    case Intrinsic::lifetime_end:
+      Len = I->getOperand(1);
+      break;
     }
   }
   if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len))
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 6f1c32c..222792b 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -31,15 +31,18 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetData.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -187,8 +190,11 @@ template <> struct DenseMapInfo<Expression> {
   static bool isEqual(const Expression &LHS, const Expression &RHS) {
     return LHS == RHS;
   }
-  static bool isPod() { return true; }
 };
+  
+template <>
+struct isPodLike<Expression> { static const bool value = true; };
+
 }
 
 //===----------------------------------------------------------------------===//
@@ -488,21 +494,21 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
     // Check to see if we have a single dominating call instruction that is
     // identical to C.
     for (unsigned i = 0, e = deps.size(); i != e; ++i) {
-      const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i];
+      const NonLocalDepEntry *I = &deps[i];
       // Ignore non-local dependencies.
-      if (I->second.isNonLocal())
+      if (I->getResult().isNonLocal())
         continue;
 
       // We don't handle non-depedencies.  If we already have a call, reject
       // instruction dependencies.
-      if (I->second.isClobber() || cdep != 0) {
+      if (I->getResult().isClobber() || cdep != 0) {
         cdep = 0;
         break;
       }
 
-      CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst());
+      CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
       // FIXME: All duplicated with non-local case.
-      if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){
+      if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
         cdep = NonLocalDepCall;
         continue;
       }
@@ -987,27 +993,27 @@ static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
 }
 
 
-/// AnalyzeLoadFromClobberingStore - This function is called when we have a
-/// memdep query of a load that ends up being a clobbering store.  This means
-/// that the store *may* provide bits used by the load but we can't be sure
-/// because the pointers don't mustalias.  Check this case to see if there is
-/// anything more we can do before we give up.  This returns -1 if we have to
-/// give up, or a byte number in the stored value of the piece that feeds the
-/// load.
-static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
+/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering memory write (store,
+/// memset, memcpy, memmove).  This means that the write *may* provide bits used
+/// by the load but we can't be sure because the pointers don't mustalias.
+///
+/// Check this case to see if there is anything more we can do before we give
+/// up.  This returns -1 if we have to give up, or a byte number in the stored
+/// value of the piece that feeds the load.
+static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
+                                          Value *WritePtr,
+                                          uint64_t WriteSizeInBits,
                                           const TargetData &TD) {
   // If the loaded or stored value is an first class array or struct, don't try
   // to transform them.  We need to be able to bitcast to integer.
-  if (isa<StructType>(L->getType()) || isa<ArrayType>(L->getType()) ||
-      isa<StructType>(DepSI->getOperand(0)->getType()) ||
-      isa<ArrayType>(DepSI->getOperand(0)->getType()))
+  if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy))
     return -1;
   
   int64_t StoreOffset = 0, LoadOffset = 0;
-  Value *StoreBase = 
-    GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD);
+  Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD);
   Value *LoadBase = 
-    GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD);
+    GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
   if (StoreBase != LoadBase)
     return -1;
   
@@ -1018,12 +1024,10 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
 #if 0
     errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
     << "Base       = " << *StoreBase << "\n"
-    << "Store Ptr  = " << *DepSI->getPointerOperand() << "\n"
-    << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
-    << "Load Ptr   = " << *L->getPointerOperand() << "\n"
-    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n";
-    errs() << "'" << L->getParent()->getParent()->getName() << "'"
-    << *L->getParent();
+    << "Store Ptr  = " << *WritePtr << "\n"
+    << "Store Offs = " << StoreOffset << "\n"
+    << "Load Ptr   = " << *LoadPtr << "\n";
+    abort();
 #endif
     return -1;
   }
@@ -1033,12 +1037,11 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
   // must have gotten confused.
   // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
   // remove this check, as it is duplicated with what we have below.
-  uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
-  uint64_t LoadSize = TD.getTypeSizeInBits(L->getType());
+  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
   
-  if ((StoreSize & 7) | (LoadSize & 7))
+  if ((WriteSizeInBits & 7) | (LoadSize & 7))
     return -1;
-  StoreSize >>= 3;  // Convert to bytes.
+  uint64_t StoreSize = WriteSizeInBits >> 3;  // Convert to bytes.
   LoadSize >>= 3;
   
   
@@ -1052,12 +1055,10 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
 #if 0
     errs() << "STORE LOAD DEP WITH COMMON BASE:\n"
     << "Base       = " << *StoreBase << "\n"
-    << "Store Ptr  = " << *DepSI->getPointerOperand() << "\n"
-    << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
-    << "Load Ptr   = " << *L->getPointerOperand() << "\n"
-    << "Load Offs  = " << LoadOffset << " - " << *L << "\n\n";
-    errs() << "'" << L->getParent()->getParent()->getName() << "'"
-    << *L->getParent();
+    << "Store Ptr  = " << *WritePtr << "\n"
+    << "Store Offs = " << StoreOffset << "\n"
+    << "Load Ptr   = " << *LoadPtr << "\n";
+    abort();
 #endif
     return -1;
   }
@@ -1075,6 +1076,66 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
   return LoadOffset-StoreOffset;
 }  
 
+/// AnalyzeLoadFromClobberingStore - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.
+static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
+                                          StoreInst *DepSI,
+                                          const TargetData &TD) {
+  // Cannot handle reading from store of first-class aggregate yet.
+  if (isa<StructType>(DepSI->getOperand(0)->getType()) ||
+      isa<ArrayType>(DepSI->getOperand(0)->getType()))
+    return -1;
+
+  Value *StorePtr = DepSI->getPointerOperand();
+  uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
+  return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+                                        StorePtr, StoreSize, TD);
+}
+
+static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
+                                            MemIntrinsic *MI,
+                                            const TargetData &TD) {
+  // If the mem operation is a non-constant size, we can't handle it.
+  ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
+  if (SizeCst == 0) return -1;
+  uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
+
+  // If this is memset, we just need to see if the offset is valid in the size
+  // of the memset..
+  if (MI->getIntrinsicID() == Intrinsic::memset)
+    return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+                                          MemSizeInBits, TD);
+  
+  // If we have a memcpy/memmove, the only case we can handle is if this is a
+  // copy from constant memory.  In that case, we can read directly from the
+  // constant memory.
+  MemTransferInst *MTI = cast<MemTransferInst>(MI);
+  
+  Constant *Src = dyn_cast<Constant>(MTI->getSource());
+  if (Src == 0) return -1;
+  
+  GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject());
+  if (GV == 0 || !GV->isConstant()) return -1;
+  
+  // See if the access is within the bounds of the transfer.
+  int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+                                              MI->getDest(), MemSizeInBits, TD);
+  if (Offset == -1)
+    return Offset;
+  
+  // Otherwise, see if we can constant fold a load from the constant with the
+  // offset applied as appropriate.
+  Src = ConstantExpr::getBitCast(Src,
+                                 llvm::Type::getInt8PtrTy(Src->getContext()));
+  Constant *OffsetCst = 
+    ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+  if (ConstantFoldLoadFromConstPtr(Src, &TD))
+    return Offset;
+  return -1;
+}
+                                            
 
 /// GetStoreValueForLoad - This function is called when we have a
 /// memdep query of a load that ends up being a clobbering store.  This means
@@ -1089,50 +1150,134 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
   uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8;
   uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
   
+  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
   
   // Compute which bits of the stored value are being used by the load.  Convert
   // to an integer type to start with.
   if (isa<PointerType>(SrcVal->getType()))
-    SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt);
+    SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
   if (!isa<IntegerType>(SrcVal->getType()))
-    SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8),
-                             "tmp", InsertPt);
+    SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
+                                   "tmp");
   
   // Shift the bits to the least significant depending on endianness.
   unsigned ShiftAmt;
-  if (TD.isLittleEndian()) {
+  if (TD.isLittleEndian())
     ShiftAmt = Offset*8;
-  } else {
+  else
     ShiftAmt = (StoreSize-LoadSize-Offset)*8;
-  }
   
   if (ShiftAmt)
-    SrcVal = BinaryOperator::CreateLShr(SrcVal,
-                ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt);
+    SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
   
   if (LoadSize != StoreSize)
-    SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8),
-                           "tmp", InsertPt);
+    SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
+                                 "tmp");
   
   return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
 }
 
+/// GetMemInstValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering mem intrinsic.
+static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+                                     const Type *LoadTy, Instruction *InsertPt,
+                                     const TargetData &TD){
+  LLVMContext &Ctx = LoadTy->getContext();
+  uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+
+  IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
+  
+  // We know that this method is only called when the mem transfer fully
+  // provides the bits for the load.
+  if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
+    // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
+    // independently of what the offset is.
+    Value *Val = MSI->getValue();
+    if (LoadSize != 1)
+      Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
+    
+    Value *OneElt = Val;
+    
+    // Splat the value out to the right number of bits.
+    for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
+      // If we can double the number of bytes set, do it.
+      if (NumBytesSet*2 <= LoadSize) {
+        Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
+        Val = Builder.CreateOr(Val, ShVal);
+        NumBytesSet <<= 1;
+        continue;
+      }
+      
+      // Otherwise insert one byte at a time.
+      Value *ShVal = Builder.CreateShl(Val, 1*8);
+      Val = Builder.CreateOr(OneElt, ShVal);
+      ++NumBytesSet;
+    }
+    
+    return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
+  }
+ 
+  // Otherwise, this is a memcpy/memmove from a constant global.
+  MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+  Constant *Src = cast<Constant>(MTI->getSource());
+
+  // Otherwise, see if we can constant fold a load from the constant with the
+  // offset applied as appropriate.
+  Src = ConstantExpr::getBitCast(Src,
+                                 llvm::Type::getInt8PtrTy(Src->getContext()));
+  Constant *OffsetCst = 
+  ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+  Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+  Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+  return ConstantFoldLoadFromConstPtr(Src, &TD);
+}
+
+
+
 struct AvailableValueInBlock {
   /// BB - The basic block in question.
   BasicBlock *BB;
+  enum ValType {
+    SimpleVal,  // A simple offsetted value that is accessed.
+    MemIntrin   // A memory intrinsic which is loaded from.
+  };
+  
   /// V - The value that is live out of the block.
-  Value *V;
-  /// Offset - The byte offset in V that is interesting for the load query.
+  PointerIntPair<Value *, 1, ValType> Val;
+  
+  /// Offset - The byte offset in Val that is interesting for the load query.
   unsigned Offset;
   
   static AvailableValueInBlock get(BasicBlock *BB, Value *V,
                                    unsigned Offset = 0) {
     AvailableValueInBlock Res;
     Res.BB = BB;
-    Res.V = V;
+    Res.Val.setPointer(V);
+    Res.Val.setInt(SimpleVal);
+    Res.Offset = Offset;
+    return Res;
+  }
+
+  static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+                                     unsigned Offset = 0) {
+    AvailableValueInBlock Res;
+    Res.BB = BB;
+    Res.Val.setPointer(MI);
+    Res.Val.setInt(MemIntrin);
     Res.Offset = Offset;
     return Res;
   }
+  
+  bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+  Value *getSimpleValue() const {
+    assert(isSimpleValue() && "Wrong accessor");
+    return Val.getPointer();
+  }
+  
+  MemIntrinsic *getMemIntrinValue() const {
+    assert(!isSimpleValue() && "Wrong accessor");
+    return cast<MemIntrinsic>(Val.getPointer());
+  }
 };
 
 /// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
@@ -1149,30 +1294,33 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   const Type *LoadTy = LI->getType();
   
   for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
-    BasicBlock *BB = ValuesPerBlock[i].BB;
-    Value *AvailableVal = ValuesPerBlock[i].V;
-    unsigned Offset = ValuesPerBlock[i].Offset;
+    const AvailableValueInBlock &AV = ValuesPerBlock[i];
+    BasicBlock *BB = AV.BB;
     
     if (SSAUpdate.HasValueForBlock(BB))
       continue;
-    
-    if (AvailableVal->getType() != LoadTy) {
-      assert(TD && "Need target data to handle type mismatch case");
-      AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
-                                          BB->getTerminator(), *TD);
-      
-      if (Offset) {
-        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
-              << *ValuesPerBlock[i].V << '\n'
+
+    unsigned Offset = AV.Offset;
+
+    Value *AvailableVal;
+    if (AV.isSimpleValue()) {
+      AvailableVal = AV.getSimpleValue();
+      if (AvailableVal->getType() != LoadTy) {
+        assert(TD && "Need target data to handle type mismatch case");
+        AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
+                                            BB->getTerminator(), *TD);
+        
+        DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << "  "
+              << *AV.getSimpleValue() << '\n'
               << *AvailableVal << '\n' << "\n\n\n");
       }
-      
-      
-      DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
-            << *ValuesPerBlock[i].V << '\n'
+    } else {
+      AvailableVal = GetMemInstValueForLoad(AV.getMemIntrinValue(), Offset,
+                                            LoadTy, BB->getTerminator(), *TD);
+      DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+            << "  " << *AV.getMemIntrinValue() << '\n'
             << *AvailableVal << '\n' << "\n\n\n");
     }
-    
     SSAUpdate.AddAvailableValue(BB, AvailableVal);
   }
   
@@ -1187,12 +1335,18 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
   return V;
 }
 
+static bool isLifetimeStart(Instruction *Inst) {
+  if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
+    return II->getIntrinsicID() == Intrinsic::lifetime_start;
+  return false;
+}
+
 /// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
 /// non-local by performing PHI construction.
 bool GVN::processNonLocalLoad(LoadInst *LI,
                               SmallVectorImpl<Instruction*> &toErase) {
   // Find the non-local dependencies of the load.
-  SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps;
+  SmallVector<NonLocalDepEntry, 64> Deps;
   MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
                                    Deps);
   //DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: "
@@ -1206,11 +1360,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
 
   // If we had a phi translation failure, we'll have a single entry which is a
   // clobber in the current block.  Reject this early.
-  if (Deps.size() == 1 && Deps[0].second.isClobber()) {
+  if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
     DEBUG(
       errs() << "GVN: non-local load ";
       WriteAsOperand(errs(), LI);
-      errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n';
+      errs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
     );
     return false;
   }
@@ -1225,18 +1379,24 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   const TargetData *TD = 0;
   
   for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
-    BasicBlock *DepBB = Deps[i].first;
-    MemDepResult DepInfo = Deps[i].second;
+    BasicBlock *DepBB = Deps[i].getBB();
+    MemDepResult DepInfo = Deps[i].getResult();
 
     if (DepInfo.isClobber()) {
+      // The address being loaded in this non-local block may not be the same as
+      // the pointer operand of the load if PHI translation occurs.  Make sure
+      // to consider the right address.
+      Value *Address = Deps[i].getAddress();
+      
       // If the dependence is to a store that writes to a superset of the bits
       // read by the load, we can extract the bits we need for the load from the
       // stored value.
       if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
         if (TD == 0)
           TD = getAnalysisIfAvailable<TargetData>();
-        if (TD) {
-          int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD);
+        if (TD && Address) {
+          int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
+                                                      DepSI, *TD);
           if (Offset != -1) {
             ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
                                                            DepSI->getOperand(0),
@@ -1245,8 +1405,23 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
           }
         }
       }
+
+      // If the clobbering value is a memset/memcpy/memmove, see if we can
+      // forward a value on from it.
+      if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
+        if (TD == 0)
+          TD = getAnalysisIfAvailable<TargetData>();
+        if (TD && Address) {
+          int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
+                                                        DepMI, *TD);
+          if (Offset != -1) {
+            ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
+                                                                  Offset));
+            continue;
+          }            
+        }
+      }
       
-      // FIXME: Handle memset/memcpy.
       UnavailableBlocks.push_back(DepBB);
       continue;
     }
@@ -1254,21 +1429,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
     Instruction *DepInst = DepInfo.getInst();
 
     // Loading the allocation -> undef.
-    if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
+    if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
+        // Loading immediately after lifetime begin -> undef.
+        isLifetimeStart(DepInst)) {
       ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
                                              UndefValue::get(LI->getType())));
       continue;
     }
     
-    // Loading immediately after lifetime begin or end -> undef.
-    if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
-      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
-          II->getIntrinsicID() == Intrinsic::lifetime_end) {
-        ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
-                                             UndefValue::get(LI->getType())));
-      }
-    }
-
     if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
       // Reject loads and stores that are to the same address but are of
       // different types if we have to.
@@ -1378,19 +1546,25 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // to eliminate LI even if we insert uses in the other predecessors, we will
   // end up increasing code size.  Reject this by scanning for LI.
   for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-    if (ValuesPerBlock[i].V == LI)
+    if (ValuesPerBlock[i].isSimpleValue() &&
+        ValuesPerBlock[i].getSimpleValue() == LI)
       return false;
 
+  // FIXME: It is extremely unclear what this loop is doing, other than
+  // artificially restricting loadpre.
   if (isSinglePred) {
     bool isHot = false;
-    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
-      if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].V))
+    for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+      const AvailableValueInBlock &AV = ValuesPerBlock[i];
+      if (AV.isSimpleValue())
         // "Hot" Instruction is in some loop (because it dominates its dep.
         // instruction).
-        if (DT->dominates(LI, I)) {
-          isHot = true;
-          break;
-        }
+        if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
+          if (DT->dominates(LI, I)) {
+            isHot = true;
+            break;
+          }
+    }
 
     // We are interested only in "hot" instructions. We don't want to do any
     // mis-optimizations here.
@@ -1435,29 +1609,43 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
   // Do PHI translation to get its value in the predecessor if necessary.  The
   // returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
   //
-  // FIXME: This may insert a computation, but we don't tell scalar GVN
-  // optimization stuff about it.  How do we do this?
   SmallVector<Instruction*, 8> NewInsts;
-  Value *LoadPtr = 0;
   
   // If all preds have a single successor, then we know it is safe to insert the
   // load on the pred (?!?), so we can insert code to materialize the pointer if
   // it is not available.
+  PHITransAddr Address(LI->getOperand(0), TD);
+  Value *LoadPtr = 0;
   if (allSingleSucc) {
-    LoadPtr = MD->InsertPHITranslatedPointer(LI->getOperand(0), LoadBB,
-                                             UnavailablePred, TD, *DT,NewInsts);
+    LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+                                                *DT, NewInsts);
   } else {
-    LoadPtr = MD->GetAvailablePHITranslatedValue(LI->getOperand(0), LoadBB,
-                                                 UnavailablePred, TD, *DT);
-  }
+    Address.PHITranslateValue(LoadBB, UnavailablePred);
+    LoadPtr = Address.getAddr();
     
+    // Make sure the value is live in the predecessor.
+    if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr))
+      if (!DT->dominates(Inst->getParent(), UnavailablePred))
+        LoadPtr = 0;
+  }
+
   // If we couldn't find or insert a computation of this phi translated value,
   // we fail PRE.
   if (LoadPtr == 0) {
+    assert(NewInsts.empty() && "Shouldn't insert insts on failure");
     DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
                  << *LI->getOperand(0) << "\n");
     return false;
   }
+
+  // Assign value numbers to these new instructions.
+  for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
+    // FIXME: We really _ought_ to insert these value numbers into their 
+    // parent's availability map.  However, in doing so, we risk getting into
+    // ordering issues.  If a block hasn't been processed yet, we would be
+    // marking a value as AVAIL-IN, which isn't what we intend.
+    VN.lookup_or_add(NewInsts[i]);
+  }
   
   // Make sure it is valid to move this load here.  We have to watch out for:
   //  @1 = getelementptr (i8* p, ...
@@ -1517,11 +1705,6 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
 
   // If the value isn't available, don't do anything!
   if (Dep.isClobber()) {
-    // FIXME: We should handle memset/memcpy/memmove as dependent instructions
-    // to forward the value if available.
-    //if (isa<MemIntrinsic>(Dep.getInst()))
-    //errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *Dep.getInst()<<"\n\n";
-    
     // Check to see if we have something like this:
     //   store i32 123, i32* %P
     //   %A = bitcast i32* %P to i8*
@@ -1532,25 +1715,42 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     // a common base + constant offset, and if the previous store (or memset)
     // completely covers this load.  This sort of thing can happen in bitfield
     // access code.
+    Value *AvailVal = 0;
     if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
       if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
-        int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD);
-        if (Offset != -1) {
-          Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
-                                                 L->getType(), L, *TD);
-          DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n'
-                       << *AvailVal << '\n' << *L << "\n\n\n");
-    
-          // Replace the load!
-          L->replaceAllUsesWith(AvailVal);
-          if (isa<PointerType>(AvailVal->getType()))
-            MD->invalidateCachedPointerInfo(AvailVal);
-          toErase.push_back(L);
-          NumGVNLoad++;
-          return true;
-        }
+        int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
+                                                    L->getPointerOperand(),
+                                                    DepSI, *TD);
+        if (Offset != -1)
+          AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
+                                          L->getType(), L, *TD);
       }
     
+    // If the clobbering value is a memset/memcpy/memmove, see if we can forward
+    // a value on from it.
+    if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
+      if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
+        int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
+                                                      L->getPointerOperand(),
+                                                      DepMI, *TD);
+        if (Offset != -1)
+          AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
+      }
+    }
+        
+    if (AvailVal) {
+      DEBUG(errs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
+            << *AvailVal << '\n' << *L << "\n\n\n");
+      
+      // Replace the load!
+      L->replaceAllUsesWith(AvailVal);
+      if (isa<PointerType>(AvailVal->getType()))
+        MD->invalidateCachedPointerInfo(AvailVal);
+      toErase.push_back(L);
+      NumGVNLoad++;
+      return true;
+    }
+        
     DEBUG(
       // fast print dep, using operator<< on instruction would be too slow
       errs() << "GVN: load ";
@@ -1635,11 +1835,10 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
     return true;
   }
   
-  // If this load occurs either right after a lifetime begin or a lifetime end,
+  // If this load occurs either right after a lifetime begin,
   // then the loaded value is undefined.
   if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
-    if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
-        II->getIntrinsicID() == Intrinsic::lifetime_end) {
+    if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
       L->replaceAllUsesWith(UndefValue::get(L->getType()));
       toErase.push_back(L);
       NumGVNLoad++;
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index d12ad81..b9c536f 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -8585,25 +8585,36 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
   if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
     if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
       uint32_t BitWidth = ITy->getBitWidth();
-      if (BitWidth > 1) {
-        Value *LHS = ICI->getOperand(0);
-        Value *RHS = ICI->getOperand(1);
-
-        APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
-        APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
-        APInt TypeMask(APInt::getHighBitsSet(BitWidth, BitWidth-1));
-        ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
-        ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
-
-        if (KnownZeroLHS.countLeadingOnes() == BitWidth-1 &&
-            KnownZeroRHS.countLeadingOnes() == BitWidth-1) {
+      Value *LHS = ICI->getOperand(0);
+      Value *RHS = ICI->getOperand(1);
+
+      APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
+      APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
+      APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+      ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
+      ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
+
+      if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
+        APInt KnownBits = KnownZeroLHS | KnownOneLHS;
+        APInt UnknownBit = ~KnownBits;
+        if (UnknownBit.countPopulation() == 1) {
           if (!DoXform) return ICI;
 
-          Value *Xor = Builder->CreateXor(LHS, RHS);
+          Value *Result = Builder->CreateXor(LHS, RHS);
+
+          // Mask off any bits that are set and won't be shifted away.
+          if (KnownOneLHS.uge(UnknownBit))
+            Result = Builder->CreateAnd(Result,
+                                        ConstantInt::get(ITy, UnknownBit));
+
+          // Shift the bit we're testing down to the lsb.
+          Result = Builder->CreateLShr(
+               Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
+
           if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
-            Xor = Builder->CreateXor(Xor, ConstantInt::get(ITy, 1));
-          Xor->takeName(ICI);
-          return ReplaceInstUsesWith(CI, Xor);
+            Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1));
+          Result->takeName(ICI);
+          return ReplaceInstUsesWith(CI, Result);
         }
       }
     }
@@ -11189,8 +11200,9 @@ namespace llvm {
       return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
              LHS.Width == RHS.Width;
     }
-    static bool isPod() { return true; }
   };
+  template <>
+  struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
 }
 
 
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 1b93f34..d58b9c9 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -718,6 +718,11 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
       if (PredSI->getSuccessor(PredCase) != DestBB &&
           DestSI->getSuccessor(i) != DestBB)
         continue;
+      
+      // Do not forward this if it already goes to this destination, this would
+      // be an infinite loop.
+      if (PredSI->getSuccessor(PredCase) == DestSucc)
+        continue;
 
       // Otherwise, we're safe to make the change.  Make sure that the edge from
       // DestSI to DestSucc is not critical and has no PHI nodes.
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 5511387..42a8fdc 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -160,16 +160,17 @@ namespace {
 
       // Because the exit block is not in the loop, we know we have to get _at
       // least_ its immediate dominator.
-      do {
-        // Get next Immediate Dominator.
-        IDom = IDom->getIDom();
-
+      IDom = IDom->getIDom();
+      
+      while (IDom && IDom != BlockInLoopNode) {
         // If we have got to the header of the loop, then the instructions block
         // did not dominate the exit node, so we can't hoist it.
         if (IDom->getBlock() == LoopHeader)
           return false;
 
-      } while (IDom != BlockInLoopNode);
+        // Get next Immediate Dominator.
+        IDom = IDom->getIDom();
+      };
 
       return true;
     }
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 564c7ac..85cc712 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -24,18 +24,14 @@
 #include "llvm/Constants.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/Type.h"
 #include "llvm/DerivedTypes.h"
-#include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/IVUsers.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Transforms/Utils/AddrModeMatcher.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CFG.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ValueHandle.h"
@@ -85,8 +81,6 @@ namespace {
 
   class LoopStrengthReduce : public LoopPass {
     IVUsers *IU;
-    LoopInfo *LI;
-    DominatorTree *DT;
     ScalarEvolution *SE;
     bool Changed;
 
@@ -94,10 +88,6 @@ namespace {
     /// particular stride.
     std::map<const SCEV *, IVsOfOneStride> IVsByStride;
 
-    /// StrideNoReuse - Keep track of all the strides whose ivs cannot be
-    /// reused (nor should they be rewritten to reuse other strides).
-    SmallSet<const SCEV *, 4> StrideNoReuse;
-
     /// DeadInsts - Keep track of instructions we may have made dead, so that
     /// we can remove them after we are done working.
     SmallVector<WeakVH, 16> DeadInsts;
@@ -109,8 +99,7 @@ namespace {
   public:
     static char ID; // Pass ID, replacement for typeid
     explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
-      LoopPass(&ID), TLI(tli) {
-    }
+      LoopPass(&ID), TLI(tli) {}
 
     bool runOnLoop(Loop *L, LPPassManager &LPM);
 
@@ -118,13 +107,11 @@ namespace {
       // We split critical edges, so we change the CFG.  However, we do update
       // many analyses if they are around.
       AU.addPreservedID(LoopSimplifyID);
-      AU.addPreserved<LoopInfo>();
-      AU.addPreserved<DominanceFrontier>();
-      AU.addPreserved<DominatorTree>();
+      AU.addPreserved("loops");
+      AU.addPreserved("domfrontier");
+      AU.addPreserved("domtree");
 
       AU.addRequiredID(LoopSimplifyID);
-      AU.addRequired<LoopInfo>();
-      AU.addRequired<DominatorTree>();
       AU.addRequired<ScalarEvolution>();
       AU.addPreserved<ScalarEvolution>();
       AU.addRequired<IVUsers>();
@@ -228,19 +215,17 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
   if (DeadInsts.empty()) return;
 
   while (!DeadInsts.empty()) {
-    Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back());
-    DeadInsts.pop_back();
+    Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
 
     if (I == 0 || !isInstructionTriviallyDead(I))
       continue;
 
-    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
+    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
       if (Instruction *U = dyn_cast<Instruction>(*OI)) {
         *OI = 0;
         if (U->use_empty())
           DeadInsts.push_back(U);
       }
-    }
 
     I->eraseFromParent();
     Changed = true;
@@ -265,7 +250,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
       if (newLoop == L)
         return false;
       // if newLoop is an outer loop of L, this is OK.
-      if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop))
+      if (newLoop->contains(L->getHeader()))
         return false;
     }
     return true;
@@ -338,9 +323,6 @@ namespace {
   /// BasedUser - For a particular base value, keep information about how we've
   /// partitioned the expression so far.
   struct BasedUser {
-    /// SE - The current ScalarEvolution object.
-    ScalarEvolution *SE;
-
     /// Base - The Base value for the PHI node that needs to be inserted for
     /// this use.  As the use is processed, information gets moved from this
     /// field to the Imm field (below).  BasedUser values are sorted by this
@@ -372,9 +354,9 @@ namespace {
     bool isUseOfPostIncrementedValue;
 
     BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
-      : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
+      : Base(IVSU.getOffset()), Inst(IVSU.getUser()),
         OperandValToReplace(IVSU.getOperandValToReplace()),
-        Imm(SE->getIntegerSCEV(0, Base->getType())),
+        Imm(se->getIntegerSCEV(0, Base->getType())),
         isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
 
     // Once we rewrite the code to insert the new IVs we want, update the
@@ -383,14 +365,14 @@ namespace {
     void RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
                                         Instruction *InsertPt,
                                        SCEVExpander &Rewriter, Loop *L, Pass *P,
-                                        LoopInfo &LI,
-                                        SmallVectorImpl<WeakVH> &DeadInsts);
+                                        SmallVectorImpl<WeakVH> &DeadInsts,
+                                        ScalarEvolution *SE);
 
     Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
                                        const Type *Ty,
                                        SCEVExpander &Rewriter,
-                                       Instruction *IP, Loop *L,
-                                       LoopInfo &LI);
+                                       Instruction *IP,
+                                       ScalarEvolution *SE);
     void dump() const;
   };
 }
@@ -404,27 +386,12 @@ void BasedUser::dump() const {
 Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
                                               const Type *Ty,
                                               SCEVExpander &Rewriter,
-                                              Instruction *IP, Loop *L,
-                                              LoopInfo &LI) {
-  // Figure out where we *really* want to insert this code.  In particular, if
-  // the user is inside of a loop that is nested inside of L, we really don't
-  // want to insert this expression before the user, we'd rather pull it out as
-  // many loops as possible.
-  Instruction *BaseInsertPt = IP;
-
-  // Figure out the most-nested loop that IP is in.
-  Loop *InsertLoop = LI.getLoopFor(IP->getParent());
-
-  // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out
-  // the preheader of the outer-most loop where NewBase is not loop invariant.
-  if (L->contains(IP->getParent()))
-    while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) {
-      BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator();
-      InsertLoop = InsertLoop->getParentLoop();
-    }
-
-  Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt);
+                                              Instruction *IP,
+                                              ScalarEvolution *SE) {
+  Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP);
 
+  // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to
+  // re-analyze it.
   const SCEV *NewValSCEV = SE->getUnknown(Base);
 
   // Always emit the immediate into the same block as the user.
@@ -443,8 +410,8 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
 void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
                                                Instruction *NewBasePt,
                                       SCEVExpander &Rewriter, Loop *L, Pass *P,
-                                      LoopInfo &LI,
-                                      SmallVectorImpl<WeakVH> &DeadInsts) {
+                                      SmallVectorImpl<WeakVH> &DeadInsts,
+                                      ScalarEvolution *SE) {
   if (!isa<PHINode>(Inst)) {
     // By default, insert code at the user instruction.
     BasicBlock::iterator InsertPt = Inst;
@@ -473,7 +440,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
     }
     Value *NewVal = InsertCodeForBaseAtPosition(NewBase,
                                                 OperandValToReplace->getType(),
-                                                Rewriter, InsertPt, L, LI);
+                                                Rewriter, InsertPt, SE);
     // Replace the use of the operand Value with the new Phi we just created.
     Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
 
@@ -535,7 +502,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
                                 PHIPred->getTerminator() :
                                 OldLoc->getParent()->getTerminator();
         Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(),
-                                           Rewriter, InsertPt, L, LI);
+                                           Rewriter, InsertPt, SE);
 
         DEBUG(errs() << "      Changing PHI use to ");
         DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false));
@@ -1011,17 +978,13 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
                                 const SCEV *const &Stride,
                                 IVExpr &IV, const Type *Ty,
                                 const std::vector<BasedUser>& UsersToProcess) {
-  if (StrideNoReuse.count(Stride))
-    return SE->getIntegerSCEV(0, Stride->getType());
-
   if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
     int64_t SInt = SC->getValue()->getSExtValue();
     for (unsigned NewStride = 0, e = IU->StrideOrder.size();
          NewStride != e; ++NewStride) {
       std::map<const SCEV *, IVsOfOneStride>::iterator SI =
                 IVsByStride.find(IU->StrideOrder[NewStride]);
-      if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||
-          StrideNoReuse.count(SI->first))
+      if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
         continue;
       // The other stride has no uses, don't reuse it.
       std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI =
@@ -1780,8 +1743,8 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
         RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
 
       User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
-                                          Rewriter, L, this, *LI,
-                                          DeadInsts);
+                                          Rewriter, L, this,
+                                          DeadInsts, SE);
 
       // Mark old value we replaced as possibly dead, so that it is eliminated
       // if we just replaced the last use of that value.
@@ -2745,8 +2708,6 @@ bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
 
 bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
   IU = &getAnalysis<IVUsers>();
-  LI = &getAnalysis<LoopInfo>();
-  DT = &getAnalysis<DominatorTree>();
   SE = &getAnalysis<ScalarEvolution>();
   Changed = false;
 
@@ -2792,15 +2753,14 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
     // After all sharing is done, see if we can adjust the loop to test against
     // zero instead of counting up to a maximum.  This is usually faster.
     OptimizeLoopCountIV(L);
-  }
 
-  // We're done analyzing this loop; release all the state we built up for it.
-  IVsByStride.clear();
-  StrideNoReuse.clear();
+    // We're done analyzing this loop; release all the state we built up for it.
+    IVsByStride.clear();
 
-  // Clean up after ourselves
-  if (!DeadInsts.empty())
-    DeleteTriviallyDeadInstructions();
+    // Clean up after ourselves
+    if (!DeadInsts.empty())
+      DeleteTriviallyDeadInstructions();
+  }
 
   // At this point, it is worth checking to see if any recurrence PHIs are also
   // dead, so that we can remove them as well.
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 38d267a..b7adfdc 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -404,12 +404,13 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
 bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
 
   initLoopData();
-  Function *F = loopHeader->getParent();
 
   // If LoopSimplify was unable to form a preheader, don't do any unswitching.
   if (!loopPreheader)
     return false;
 
+  Function *F = loopHeader->getParent();
+
   // If the condition is trivial, always unswitch.  There is no code growth for
   // this case.
   if (!IsTrivialUnswitchCondition(LoopCond)) {
diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp
index 001267a..dbc82e1 100644
--- a/lib/Transforms/Scalar/SCCVN.cpp
+++ b/lib/Transforms/Scalar/SCCVN.cpp
@@ -19,7 +19,6 @@
 #include "llvm/Constants.h"
 #include "llvm/DerivedTypes.h"
 #include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Operator.h"
 #include "llvm/Value.h"
 #include "llvm/ADT/DenseMap.h"
@@ -155,8 +154,10 @@ template <> struct DenseMapInfo<Expression> {
   static bool isEqual(const Expression &LHS, const Expression &RHS) {
     return LHS == RHS;
   }
-  static bool isPod() { return true; }
 };
+template <>
+struct isPodLike<Expression> { static const bool value = true; };
+
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index ae6ad74..b040a27 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -105,7 +105,7 @@ namespace {
     void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
                                AllocaInfo &Info);
     void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
-                         AllocaInfo &Info);
+                          AllocaInfo &Info);
     void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
                                         unsigned OpNo, AllocaInfo &Info);
     void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI,
@@ -362,7 +362,6 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
 
   // Now that we have created the alloca instructions that we want to use,
   // expand the getelementptr instructions to use them.
-  //
   while (!AI->use_empty()) {
     Instruction *User = cast<Instruction>(AI->use_back());
     if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) {
@@ -450,11 +449,9 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
   NumReplaced++;
 }
 
-
 /// isSafeElementUse - Check to see if this use is an allowed use for a
 /// getelementptr instruction of an array aggregate allocation.  isFirstElt
 /// indicates whether Ptr is known to the start of the aggregate.
-///
 void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
                             AllocaInfo &Info) {
   for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
@@ -503,7 +500,6 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
         }
       }
       
-      
       isSafeElementUse(GEP, AreAllZeroIndices, AI, Info);
       if (Info.isUnsafe) return;
       break;
@@ -543,9 +539,8 @@ static bool AllUsersAreLoads(Value *Ptr) {
   return true;
 }
 
-/// isSafeUseOfAllocation - Check to see if this user is an allowed use for an
+/// isSafeUseOfAllocation - Check if this user is an allowed use for an
 /// aggregate allocation.
-///
 void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
                                  AllocaInfo &Info) {
   if (BitCastInst *C = dyn_cast<BitCastInst>(User))
@@ -614,7 +609,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
       // integer. Specifically, consider A[0][i]. We cannot know that the user
       // isn't doing invalid things like allowing i to index an out-of-range
       // subscript that accesses A[1].  Because of this, we have to reject SROA
-      // of any accesses into structs where any of the components are variables. 
+      // of any accesses into structs where any of the components are variables.
       if (IdxVal->getZExtValue() >= AT->getNumElements())
         return MarkUnsafe(Info);
     } else if (const VectorType *VT = dyn_cast<VectorType>(*I)) {
@@ -628,7 +623,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
   return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info);
 }
 
-/// isSafeMemIntrinsicOnAllocation - Return true if the specified memory
+/// isSafeMemIntrinsicOnAllocation - Check if the specified memory
 /// intrinsic can be promoted by SROA.  At this point, we know that the operand
 /// of the memintrinsic is a pointer to the beginning of the allocation.
 void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
@@ -656,8 +651,8 @@ void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
   }
 }
 
-/// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast
-/// are 
+/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast
+/// from an alloca are safe for SROA of that alloca.
 void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI,
                                           AllocaInfo &Info) {
   for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end();
@@ -773,6 +768,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
       OtherPtr = MTI->getRawDest();
     }
   }
+
+  // Keep track of the other intrinsic argument, so it can be removed if it
+  // is dead when the intrinsic is replaced.
+  Value *PossiblyDead = OtherPtr;
   
   // If there is an other pointer, we want to convert it to the same pointer
   // type as AI has, so we can GEP through it safely.
@@ -926,9 +925,11 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
     }
   }
   MI->eraseFromParent();
+  if (PossiblyDead)
+    RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead);
 }
 
-/// RewriteStoreUserOfWholeAlloca - We found an store of an integer that
+/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
 /// overwrites the entire allocation.  Extract out the pieces of the stored
 /// integer and store them individually.
 void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
@@ -1052,7 +1053,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
   SI->eraseFromParent();
 }
 
-/// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to
+/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
 /// an integer.  Load the individual pieces to form the aggregate value.
 void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
                                         SmallVector<AllocaInst*, 32> &NewElts) {
@@ -1186,7 +1187,6 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) {
 /// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
 /// an aggregate can be broken down into elements.  Return 0 if not, 3 if safe,
 /// or 1 if safe after canonicalization has been performed.
-///
 int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
   // Loop over the use list of the alloca.  We can only transform it if all of
   // the users are safe to transform.
@@ -1215,7 +1215,7 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
   return Info.needsCleanup ? 1 : 3;
 }
 
-/// CleanupGEP - GEP is used by an Alloca, which can be prompted after the GEP
+/// CleanupGEP - GEP is used by an Alloca, which can be promoted after the GEP
 /// is canonicalized here.
 void SROA::CleanupGEP(GetElementPtrInst *GEPI) {
   gep_type_iterator I = gep_type_begin(GEPI);
@@ -1347,7 +1347,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
 }
 
 /// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
-/// its accesses to use a to single vector type, return true, and set VecTy to
+/// its accesses to a single vector type, return true and set VecTy to
 /// the new type.  If we could convert the alloca into a single promotable
 /// integer, return true but set VecTy to VoidTy.  Further, if the use is not a
 /// completely trivial use that mem2reg could promote, set IsNotTrivial.  Offset
@@ -1355,7 +1355,6 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
 ///
 /// If we see at least one access to the value that is as a vector type, set the
 /// SawVec flag.
-///
 bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
                               bool &SawVec, uint64_t Offset,
                               unsigned AllocaSize) {
@@ -1438,7 +1437,6 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
   return true;
 }
 
-
 /// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
 /// directly.  This happens when we are converting an "integer union" to a
 /// single integer scalar, or when we are converting a "vector union" to a
@@ -1481,7 +1479,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
     if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
       assert(SI->getOperand(0) != Ptr && "Consistency error!");
       // FIXME: Remove once builder has Twine API.
-      Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str());
+      Value *Old = Builder.CreateLoad(NewAI,
+                                      (NewAI->getName()+".in").str().c_str());
       Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
                                              Builder);
       Builder.CreateStore(New, NewAI);
@@ -1506,7 +1505,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
             APVal |= APVal << 8;
         
         // FIXME: Remove once builder has Twine API.
-        Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str());
+        Value *Old = Builder.CreateLoad(NewAI,
+                                        (NewAI->getName()+".in").str().c_str());
         Value *New = ConvertScalar_InsertValue(
                                     ConstantInt::get(User->getContext(), APVal),
                                                Old, Offset, Builder);
@@ -1679,7 +1679,6 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
   return FromVal;
 }
 
-
 /// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
 /// or vector value "Old" at the offset specified by Offset.
 ///
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
index 13077fe..5acd6aa 100644
--- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -88,7 +88,7 @@ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
     if (!isa<ReturnInst>(Body->getTerminator()))
       break;
 
-    Instruction *NextInst = next(BasicBlock::iterator(Call));
+    Instruction *NextInst = llvm::next(BasicBlock::iterator(Call));
 
     // Inline the call, taking care of what code ends up where.
     NewBlock = SplitBlock(NextInst->getParent(), NextInst, this);
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index f9b929c..6fd884b 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -128,8 +128,7 @@ public:
 
 /// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
 Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) {
-  return
-        B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr");
+  return B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr");
 }
 
 /// EmitStrLen - Emit a call to the strlen function to the builder, for the
@@ -157,27 +156,25 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
 Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
                                        unsigned Align, IRBuilder<> &B) {
   Module *M = Caller->getParent();
-  Intrinsic::ID IID = Intrinsic::memcpy;
-  const Type *Tys[1];
-  Tys[0] = Len->getType();
-  Value *MemCpy = Intrinsic::getDeclaration(M, IID, Tys, 1);
-  return B.CreateCall4(MemCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Len,
+  const Type *Ty = Len->getType();
+  Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1);
+  Dst = CastToCStr(Dst, B);
+  Src = CastToCStr(Src, B);
+  return B.CreateCall4(MemCpy, Dst, Src, Len,
                        ConstantInt::get(Type::getInt32Ty(*Context), Align));
 }
 
-/// EmitMemMOve - Emit a call to the memmove function to the builder.  This
+/// EmitMemMove - Emit a call to the memmove function to the builder.  This
 /// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
 Value *LibCallOptimization::EmitMemMove(Value *Dst, Value *Src, Value *Len,
 					unsigned Align, IRBuilder<> &B) {
   Module *M = Caller->getParent();
-  Intrinsic::ID IID = Intrinsic::memmove;
-  const Type *Tys[1];
-  Tys[0] = TD->getIntPtrType(*Context);
-  Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1);
-  Value *D = CastToCStr(Dst, B);
-  Value *S = CastToCStr(Src, B);
+  const Type *Ty = TD->getIntPtrType(*Context);
+  Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1);
+  Dst = CastToCStr(Dst, B);
+  Src = CastToCStr(Src, B);
   Value *A = ConstantInt::get(Type::getInt32Ty(*Context), Align);
-  return B.CreateCall4(MemMove, D, S, Len, A);
+  return B.CreateCall4(MemMove, Dst, Src, Len, A);
 }
 
 /// EmitMemChr - Emit a call to the memchr function.  This assumes that Ptr is
@@ -2647,10 +2644,11 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
 //   * strcspn("",a) -> 0
 //   * strcspn(s,"") -> strlen(a)
 //
-// strstr:
+// strstr: (PR5783)
 //   * strstr(x,x)  -> x
-//   * strstr(s1,s2) -> offset_of_s2_in(s1)
-//       (if s1 and s2 are constant strings)
+//   * strstr(x, "") -> x
+//   * strstr(x, "a") -> strchr(x, 'a')
+//   * strstr(s1,s2) -> result   (if s1 and s2 are constant strings)
 //
 // tan, tanf, tanl:
 //   * tan(atan(x)) -> x
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 2974592..2962e84 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -16,7 +16,6 @@
 #include "llvm/Function.h"
 #include "llvm/Instructions.h"
 #include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
 #include "llvm/Constant.h"
 #include "llvm/Type.h"
 #include "llvm/Analysis/AliasAnalysis.h"
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index aef0f5f..7a37aa3 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -603,3 +603,65 @@ bool llvm::OnlyUsedByDbgInfoIntrinsics(Instruction *I,
   return true;
 }
 
+/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
+/// nodes in this block. This doesn't try to be clever about PHI nodes
+/// which differ only in the order of the incoming values, but instcombine
+/// orders them so it usually won't matter.
+///
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+  bool Changed = false;
+
+  // This implementation doesn't currently consider undef operands
+  // specially. Theroetically, two phis which are identical except for
+  // one having an undef where the other doesn't could be collapsed.
+
+  // Map from PHI hash values to PHI nodes. If multiple PHIs have
+  // the same hash value, the element is the first PHI in the
+  // linked list in CollisionMap.
+  DenseMap<uintptr_t, PHINode *> HashMap;
+
+  // Maintain linked lists of PHI nodes with common hash values.
+  DenseMap<PHINode *, PHINode *> CollisionMap;
+
+  // Examine each PHI.
+  for (BasicBlock::iterator I = BB->begin();
+       PHINode *PN = dyn_cast<PHINode>(I++); ) {
+    // Compute a hash value on the operands. Instcombine will likely have sorted
+    // them, which helps expose duplicates, but we have to check all the
+    // operands to be safe in case instcombine hasn't run.
+    uintptr_t Hash = 0;
+    for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
+      // This hash algorithm is quite weak as hash functions go, but it seems
+      // to do a good enough job for this particular purpose, and is very quick.
+      Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
+      Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+    }
+    // If we've never seen this hash value before, it's a unique PHI.
+    std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
+      HashMap.insert(std::make_pair(Hash, PN));
+    if (Pair.second) continue;
+    // Otherwise it's either a duplicate or a hash collision.
+    for (PHINode *OtherPN = Pair.first->second; ; ) {
+      if (OtherPN->isIdenticalTo(PN)) {
+        // A duplicate. Replace this PHI with its duplicate.
+        PN->replaceAllUsesWith(OtherPN);
+        PN->eraseFromParent();
+        Changed = true;
+        break;
+      }
+      // A non-duplicate hash collision.
+      DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN);
+      if (I == CollisionMap.end()) {
+        // Set this PHI to be the head of the linked list of colliding PHIs.
+        PHINode *Old = Pair.first->second;
+        Pair.first->second = PN;
+        CollisionMap[PN] = Old;
+        break;
+      }
+      // Procede to the next PHI in the list.
+      OtherPN = I->second;
+    }
+  }
+
+  return Changed;
+}
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 8c18b59..743bb6e 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -244,7 +244,7 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
 
   // Merge case into clusters
   if (Cases.size()>=2)
-    for (CaseItr I=Cases.begin(), J=next(Cases.begin()); J!=Cases.end(); ) {
+    for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
       int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
       int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
       BasicBlock* nextBB = J->BB;
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index e25f9e2..846e432 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -55,7 +55,6 @@ struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
   static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
     return LHS == RHS;
   }
-  static bool isPod() { return true; }
 };
 }
 
@@ -102,7 +101,7 @@ namespace {
   public:
     typedef std::vector<Value *> ValVector;
     
-    RenamePassData() {}
+    RenamePassData() : BB(NULL), Pred(NULL), Values() {}
     RenamePassData(BasicBlock *B, BasicBlock *P,
                    const ValVector &V) : BB(B), Pred(P), Values(V) {}
     BasicBlock *BB;
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 8a07c35..ba41bf9 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -295,10 +295,14 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
       InsertedVal = SingularValue;
     }
 
+    // Either path through the 'if' should have set insertedVal -> SingularVal.
+    assert((InsertedVal == SingularValue || isa<UndefValue>(InsertedVal)) &&
+           "RAUW didn't change InsertedVal to be SingularVal");
+
     // Drop the entries we added in IncomingPredInfo to restore the stack.
     IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
                            IncomingPredInfo.end());
-    return InsertedVal;
+    return SingularValue;
   }
 
   // Otherwise, we do need a PHI: insert one now if we don't already have one.
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 89b0bd9..d7ca45e 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1589,69 +1589,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
   return true;
 }
 
-/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
-/// nodes in this block. This doesn't try to be clever about PHI nodes
-/// which differ only in the order of the incoming values, but instcombine
-/// orders them so it usually won't matter.
-///
-bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
-  bool Changed = false;
-  
-  // This implementation doesn't currently consider undef operands
-  // specially. Theroetically, two phis which are identical except for
-  // one having an undef where the other doesn't could be collapsed.
-
-  // Map from PHI hash values to PHI nodes. If multiple PHIs have
-  // the same hash value, the element is the first PHI in the
-  // linked list in CollisionMap.
-  DenseMap<uintptr_t, PHINode *> HashMap;
-
-  // Maintain linked lists of PHI nodes with common hash values.
-  DenseMap<PHINode *, PHINode *> CollisionMap;
-
-  // Examine each PHI.
-  for (BasicBlock::iterator I = BB->begin();
-       PHINode *PN = dyn_cast<PHINode>(I++); ) {
-    // Compute a hash value on the operands. Instcombine will likely have sorted
-    // them, which helps expose duplicates, but we have to check all the
-    // operands to be safe in case instcombine hasn't run.
-    uintptr_t Hash = 0;
-    for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
-      // This hash algorithm is quite weak as hash functions go, but it seems
-      // to do a good enough job for this particular purpose, and is very quick.
-      Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
-      Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
-    }
-    // If we've never seen this hash value before, it's a unique PHI.
-    std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
-      HashMap.insert(std::make_pair(Hash, PN));
-    if (Pair.second) continue;
-    // Otherwise it's either a duplicate or a hash collision.
-    for (PHINode *OtherPN = Pair.first->second; ; ) {
-      if (OtherPN->isIdenticalTo(PN)) {
-        // A duplicate. Replace this PHI with its duplicate.
-        PN->replaceAllUsesWith(OtherPN);
-        PN->eraseFromParent();
-        Changed = true;
-        break;
-      }
-      // A non-duplicate hash collision.
-      DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN);
-      if (I == CollisionMap.end()) {
-        // Set this PHI to be the head of the linked list of colliding PHIs.
-        PHINode *Old = Pair.first->second;
-        Pair.first->second = PN;
-        CollisionMap[PN] = Old;
-        break;
-      }
-      // Procede to the next PHI in the list.
-      OtherPN = I->second;
-    }
-  }
-
-  return Changed;
-}
-
 /// SimplifyCFG - This function is used to do simplification of a CFG.  For
 /// example, it adjusts branches to branches to eliminate the extra hop, it
 /// eliminates unreachable basic blocks, and does other "peephole" optimization
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 82d7914..c765d96 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -813,6 +813,11 @@ void SlotTracker::CreateFunctionSlot(const Value *V) {
 void SlotTracker::CreateMetadataSlot(const MDNode *N) {
   assert(N && "Can't insert a null Value into SlotTracker!");
 
+  // Don't insert if N contains an instruction.
+  for (unsigned i = 0, e = N->getNumElements(); i != e; ++i)
+    if (N->getElement(i) && isa<Instruction>(N->getElement(i)))
+      return;
+
   ValueMap::iterator I = mdnMap.find(N);
   if (I != mdnMap.end())
     return;
@@ -1227,6 +1232,25 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
   }
 
   if (const MDNode *N = dyn_cast<MDNode>(V)) {
+    if (Machine->getMetadataSlot(N) == -1) {
+      // Print metadata inline, not via slot reference number.
+      Out << "!{";
+      for (unsigned mi = 0, me = N->getNumElements(); mi != me; ++mi) {
+        const Value *Val = N->getElement(mi);
+        if (!Val)
+          Out << "null";
+        else {
+          TypePrinter->print(N->getElement(0)->getType(), Out);
+          Out << ' ';
+          WriteAsOperandInternal(Out, N->getElement(0), TypePrinter, Machine);
+        }
+        if (mi + 1 != me)
+          Out << ", ";
+      }
+      Out << '}';
+      return;
+    }
+  
     Out << '!' << Machine->getMetadataSlot(N);
     return;
   }
@@ -1636,6 +1660,7 @@ void AssemblyWriter::printFunction(const Function *F) {
   case CallingConv::ARM_APCS:     Out << "arm_apcscc "; break;
   case CallingConv::ARM_AAPCS:    Out << "arm_aapcscc "; break;
   case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break;
+  case CallingConv::MSP430_INTR:  Out << "msp430_intrcc "; break;
   default: Out << "cc" << F->getCallingConv() << " "; break;
   }
 
@@ -1903,6 +1928,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     case CallingConv::ARM_APCS:     Out << " arm_apcscc "; break;
     case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
     case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
+    case CallingConv::MSP430_INTR:  Out << " msp430_intrcc "; break;
     default: Out << " cc" << CI->getCallingConv(); break;
     }
 
@@ -1953,6 +1979,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     case CallingConv::ARM_APCS:     Out << " arm_apcscc "; break;
     case CallingConv::ARM_AAPCS:    Out << " arm_aapcscc "; break;
     case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
+    case CallingConv::MSP430_INTR:  Out << " msp430_intrcc "; break;
     default: Out << " cc" << II->getCallingConv(); break;
     }
 
diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp
index 23d0557..c7f7f53 100644
--- a/lib/VMCore/BasicBlock.cpp
+++ b/lib/VMCore/BasicBlock.cpp
@@ -262,7 +262,7 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
   assert(I != InstList.end() &&
          "Trying to get me to create degenerate basic block!");
 
-  BasicBlock *InsertBefore = next(Function::iterator(this))
+  BasicBlock *InsertBefore = llvm::next(Function::iterator(this))
                                .getNodePtrUnchecked();
   BasicBlock *New = BasicBlock::Create(getContext(), BBName,
                                        getParent(), InsertBefore);
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index c622558..a62f75b 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -1560,7 +1560,7 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
 
 Constant *ConstantExpr::getInBoundsGetElementPtrTy(const Type *ReqTy,
                                                    Constant *C,
-                                                   Value* const *Idxs,
+                                                   Value *const *Idxs,
                                                    unsigned NumIdx) {
   assert(GetElementPtrInst::getIndexedType(C->getType(), Idxs,
                                            Idxs+NumIdx) ==
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 6cf2c81..88e1fe8 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -77,6 +77,13 @@ bool Argument::hasByValAttr() const {
   return getParent()->paramHasAttr(getArgNo()+1, Attribute::ByVal);
 }
 
+/// hasNestAttr - Return true if this argument has the nest attribute on
+/// it in its containing function.
+bool Argument::hasNestAttr() const {
+  if (!isa<PointerType>(getType())) return false;
+  return getParent()->paramHasAttr(getArgNo()+1, Attribute::Nest);
+}
+
 /// hasNoAliasAttr - Return true if this argument has the noalias attribute on
 /// it in its containing function.
 bool Argument::hasNoAliasAttr() const {
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 1c3244b..8a2378e 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -62,7 +62,6 @@ struct DenseMapAPIntKeyInfo {
   static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
     return LHS == RHS;
   }
-  static bool isPod() { return false; }
 };
 
 struct DenseMapAPFloatKeyInfo {
@@ -89,7 +88,6 @@ struct DenseMapAPFloatKeyInfo {
   static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
     return LHS == RHS;
   }
-  static bool isPod() { return false; }
 };
 
 class LLVMContextImpl {
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index 1232fe2..6bea7a8 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -41,6 +41,10 @@ Pass::~Pass() {
 // Force out-of-line virtual method.
 ModulePass::~ModulePass() { }
 
+PassManagerType ModulePass::getPotentialPassManagerType() const {
+  return PMT_ModulePassManager;
+}
+
 bool Pass::mustPreserveAnalysisID(const PassInfo *AnalysisID) const {
   return Resolver->getAnalysisIfAvailable(AnalysisID, true) != 0;
 }
@@ -60,6 +64,27 @@ const char *Pass::getPassName() const {
   return "Unnamed pass: implement Pass::getPassName()";
 }
 
+void Pass::preparePassManager(PMStack &) {
+  // By default, don't do anything.
+}
+
+PassManagerType Pass::getPotentialPassManagerType() const {
+  // Default implementation.
+  return PMT_Unknown; 
+}
+
+void Pass::getAnalysisUsage(AnalysisUsage &) const {
+  // By default, no analysis results are used, all are invalidated.
+}
+
+void Pass::releaseMemory() {
+  // By default, don't do anything.
+}
+
+void Pass::verifyAnalysis() const {
+  // By default, don't do anything.
+}
+
 // print - Print out the internal state of the pass.  This is called by Analyze
 // to print out the contents of an analysis.  Otherwise it is not necessary to
 // implement this method.
@@ -79,6 +104,10 @@ void Pass::dump() const {
 // Force out-of-line virtual method.
 ImmutablePass::~ImmutablePass() { }
 
+void ImmutablePass::initializePass() {
+  // By default, don't do anything.
+}
+
 //===----------------------------------------------------------------------===//
 // FunctionPass Implementation
 //
@@ -107,6 +136,20 @@ bool FunctionPass::run(Function &F) {
   return Changed | doFinalization(*F.getParent());
 }
 
+bool FunctionPass::doInitialization(Module &) {
+  // By default, don't do anything.
+  return false;
+}
+
+bool FunctionPass::doFinalization(Module &) {
+  // By default, don't do anything.
+  return false;
+}
+
+PassManagerType FunctionPass::getPotentialPassManagerType() const {
+  return PMT_FunctionPassManager;
+}
+
 //===----------------------------------------------------------------------===//
 // BasicBlockPass Implementation
 //
@@ -121,6 +164,30 @@ bool BasicBlockPass::runOnFunction(Function &F) {
   return Changed | doFinalization(F);
 }
 
+bool BasicBlockPass::doInitialization(Module &) {
+  // By default, don't do anything.
+  return false;
+}
+
+bool BasicBlockPass::doInitialization(Function &) {
+  // By default, don't do anything.
+  return false;
+}
+
+bool BasicBlockPass::doFinalization(Function &) {
+  // By default, don't do anything.
+  return false;
+}
+
+bool BasicBlockPass::doFinalization(Module &) {
+  // By default, don't do anything.
+  return false;
+}
+
+PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
+  return PMT_BasicBlockPassManager; 
+}
+
 //===----------------------------------------------------------------------===//
 // Pass Registration mechanism
 //
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index ae418a0..52e8a82 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -738,9 +738,15 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
       std::map<AnalysisID, Pass *>::iterator Info = I++;
       if (!dynamic_cast<ImmutablePass*>(Info->second) &&
           std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) == 
-             PreservedSet.end())
+             PreservedSet.end()) {
         // Remove this analysis
+        if (PassDebugging >= Details) {
+          Pass *S = Info->second;
+          errs() << " -- '" <<  P->getPassName() << "' is not preserving '";
+          errs() << S->getPassName() << "'\n";
+        }
         InheritedAnalysis[Index]->erase(Info);
+      }
     }
   }
 }
@@ -1391,8 +1397,7 @@ MPPassManager::runOnModule(Module &M) {
   for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
     ModulePass *MP = getContainedPass(Index);
 
-    dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG,
-                 M.getModuleIdentifier().c_str());
+    dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier());
     dumpRequiredSet(MP);
 
     initializeAnalysisImpl(MP);
@@ -1406,13 +1411,13 @@ MPPassManager::runOnModule(Module &M) {
 
     if (Changed) 
       dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
-                   M.getModuleIdentifier().c_str());
+                   M.getModuleIdentifier());
     dumpPreservedSet(MP);
     
     verifyPreservedAnalysis(MP);
     removeNotPreservedAnalysis(MP);
     recordAvailableAnalysis(MP);
-    removeDeadPasses(MP, M.getModuleIdentifier().c_str(), ON_MODULE_MSG);
+    removeDeadPasses(MP, M.getModuleIdentifier(), ON_MODULE_MSG);
   }
 
   // Finalize on-the-fly passes
diff --git a/test/Analysis/BasicAA/modref.ll b/test/Analysis/BasicAA/modref.ll
index 3f642cf..4a61636 100644
--- a/test/Analysis/BasicAA/modref.ll
+++ b/test/Analysis/BasicAA/modref.ll
@@ -60,8 +60,8 @@ define i8 @test2a(i8* %P) {
   call void @llvm.memset.i8(i8* %P, i8 2, i8 127, i32 0)
   %A = load i8* %P2
   ret i8 %A
-; CHECK: %A = load i8* %P2
-; CHECK: ret i8 %A
+; CHECK-NOT: load
+; CHECK: ret i8 2
 }
 
 define void @test3(i8* %P, i8 %X) {
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index d7037ab..5ad48ef 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -31,6 +31,8 @@ if(PYTHONINTERP_FOUND)
                 ${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
     COMMAND ${PYTHON_EXECUTABLE}
                 ${LLVM_SOURCE_DIR}/utils/lit/lit.py
+                --param llvm_site_config=${CMAKE_CURRENT_BINARY_DIR}/lit.site.cfg
+                --param llvm_unit_site_config=${CMAKE_CURRENT_BINARY_DIR}/Unit/lit.site.cfg
                 -sv
                 ${CMAKE_CURRENT_BINARY_DIR}
                 DEPENDS
diff --git a/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll b/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
deleted file mode 100644
index 35ca7b4..0000000
--- a/test/CodeGen/ARM/2008-11-19-ScavengerAssert.ll
+++ /dev/null
@@ -1,414 +0,0 @@
-; RUN: llc < %s -mtriple=arm-apple-darwin9 -stats |& grep asm-printer | grep 154
-
-	%"struct.Adv5::Ekin<3>" = type <{ i8 }>
-	%"struct.Adv5::X::Energyflux<3>" = type { double }
-	%"struct.BinaryNode<OpAdd,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> > >" = type { %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >", %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >" }
-	%"struct.BinaryNode<OpAdd,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> > >" = type { %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,Remote<BrickView> >", %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,Remote<BrickView> >" }
-	%"struct.BinaryNode<OpMultiply,Scalar<double>,BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > >" = type { %"struct.Adv5::X::Energyflux<3>", %"struct.BinaryNode<OpAdd,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> > >" }
-	%"struct.BinaryNode<OpMultiply,Scalar<double>,BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> > > >" = type { %"struct.Adv5::X::Energyflux<3>", %"struct.BinaryNode<OpAdd,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> > >" }
-	%"struct.Centering<3>" = type { i32, i32, %"struct.std::vector<Loc<3>,std::allocator<Loc<3> > >", %"struct.std::vector<Vector<3, double, Full>,std::allocator<Vector<3, double, Full> > >" }
-	%"struct.ContextMapper<1>" = type { i32 (...)** }
-	%"struct.DataBlockController<double>" = type { %"struct.RefBlockController<double>", %"struct.Adv5::Ekin<3>"*, i8, %"struct.SingleObservable<int>", i32 }
-	%"struct.DataBlockPtr<double,false>" = type { %"struct.RefCountedBlockPtr<double,false,DataBlockController<double> >" }
-	%"struct.Domain<1,DomainTraits<Interval<1> > >" = type { %"struct.DomainBase<DomainTraits<Interval<1> > >" }
-	%"struct.Domain<1,DomainTraits<Loc<1> > >" = type { %"struct.DomainBase<DomainTraits<Loc<1> > >" }
-	%"struct.Domain<1,DomainTraits<Range<1> > >" = type { %"struct.DomainBase<DomainTraits<Range<1> > >" }
-	%"struct.Domain<3,DomainTraits<Interval<3> > >" = type { %"struct.DomainBase<DomainTraits<Interval<3> > >" }
-	%"struct.Domain<3,DomainTraits<Loc<3> > >" = type { %"struct.DomainBase<DomainTraits<Loc<3> > >" }
-	%"struct.Domain<3,DomainTraits<Range<3> > >" = type { %"struct.DomainBase<DomainTraits<Range<3> > >" }
-	%"struct.DomainBase<DomainTraits<Interval<1> > >" = type { [2 x i32] }
-	%"struct.DomainBase<DomainTraits<Interval<3> > >" = type { [3 x %"struct.WrapNoInit<Interval<1> >"] }
-	%"struct.DomainBase<DomainTraits<Loc<1> > >" = type { i32 }
-	%"struct.DomainBase<DomainTraits<Loc<3> > >" = type { [3 x %"struct.WrapNoInit<Loc<1> >"] }
-	%"struct.DomainBase<DomainTraits<Range<1> > >" = type { [3 x i32] }
-	%"struct.DomainBase<DomainTraits<Range<3> > >" = type { [3 x %"struct.WrapNoInit<Range<1> >"] }
-	%"struct.DomainLayout<3>" = type { %"struct.Node<Interval<3>,Interval<3> >" }
-	%"struct.DomainMap<Interval<1>,int>" = type { i32, %"struct.DomainMapNode<Interval<1>,int>"*, %"struct.DomainMapIterator<Interval<1>,int>" }
-	%"struct.DomainMapIterator<Interval<1>,int>" = type { %"struct.DomainMapNode<Interval<1>,int>"*, %"struct.std::_List_const_iterator<Interval<3> >" }
-	%"struct.DomainMapNode<Interval<1>,int>" = type { %"struct.Interval<1>", %"struct.DomainMapNode<Interval<1>,int>"*, %"struct.DomainMapNode<Interval<1>,int>"*, %"struct.DomainMapNode<Interval<1>,int>"*, %"struct.std::list<Interval<3>,std::allocator<Interval<3> > >" }
-	%"struct.Engine<3,Zero<double>,ConstantFunction>" = type { %"struct.Adv5::Ekin<3>", %"struct.Interval<3>", [3 x i32] }
-	%"struct.Engine<3,double,Brick>" = type { %"struct.Pooma::BrickBase<3>", %"struct.DataBlockPtr<double,false>", double* }
-	%"struct.Engine<3,double,BrickView>" = type { %"struct.Pooma::BrickViewBase<3>", %"struct.DataBlockPtr<double,false>", double* }
-	%"struct.Engine<3,double,ConstantFunction>" = type { double, %"struct.Interval<3>", [3 x i32] }
-	%"struct.Engine<3,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > > > >" = type { %"struct.BinaryNode<OpMultiply,Scalar<double>,BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > >", %"struct.Interval<3>" }
-	%"struct.Engine<3,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> > > > > >" = type { %"struct.BinaryNode<OpMultiply,Scalar<double>,BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> > > >", %"struct.Interval<3>" }
-	%"struct.Engine<3,double,MultiPatch<GridTag, Remote<Brick> > >" = type { %"struct.ContextMapper<1>", %"struct.GridLayout<3>", %"struct.RefCountedBlockPtr<Engine<3, double, Remote<Brick> >,false,RefBlockController<Engine<3, double, Remote<Brick> > > >", i32* }
-	%"struct.Engine<3,double,MultiPatchView<GridTag, Remote<Brick>, 3> >" = type { %"struct.GridLayoutView<3,3>", %"struct.Engine<3,double,MultiPatch<GridTag, Remote<Brick> > >" }
-	%"struct.Engine<3,double,Remote<Brick> >" = type { %"struct.Interval<3>", i32, %"struct.RefCountedPtr<Shared<Engine<3, double, Brick> > >" }
-	%"struct.Engine<3,double,Remote<BrickView> >" = type { %"struct.Interval<3>", i32, %"struct.RefCountedPtr<Shared<Engine<3, double, BrickView> > >" }
-	%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,Zero<double>,ConstantFunction>" = type { %"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,Zero<double>,ConstantFunction>" }
-	%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ConstantFunction>" = type { %"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ConstantFunction>" }
-	%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > > > >" = type { %"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > > > >" }
-	%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> > > > > >" = type { %"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> > > > > >" }
-	%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >" = type { %"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >" }
-	%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >" = type { %"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >" }
-	%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,Remote<BrickView> >" = type { %"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,Remote<BrickView> >" }
-	%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,Zero<double>,ConstantFunction>" = type { i32, %"struct.Centering<3>", i32, %"struct.RefCountedBlockPtr<FieldEngineBaseData<3, Zero<double>, ConstantFunction>,false,RefBlockController<FieldEngineBaseData<3, Zero<double>, ConstantFunction> > >", %"struct.Interval<3>", %"struct.GuardLayers<3>", %"struct.UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >" }
-	%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ConstantFunction>" = type { i32, %"struct.Centering<3>", i32, %"struct.RefCountedBlockPtr<FieldEngineBaseData<3, double, ConstantFunction>,false,RefBlockController<FieldEngineBaseData<3, double, ConstantFunction> > >", %"struct.Interval<3>", %"struct.GuardLayers<3>", %"struct.UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >" }
-	%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > > > >" = type { %"struct.Engine<3,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > > > >", %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"* }
-	%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> > > > > >" = type { %"struct.Engine<3,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> > > > > >", %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,Remote<BrickView> >"* }
-	%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >" = type { i32, %"struct.Centering<3>", i32, %"struct.RefCountedBlockPtr<FieldEngineBaseData<3, double, MultiPatch<GridTag, Remote<Brick> > >,false,RefBlockController<FieldEngineBaseData<3, double, MultiPatch<GridTag, Remote<Brick> > > > >", %"struct.Interval<3>", %"struct.GuardLayers<3>", %"struct.UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >" }
-	%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >" = type { i32, %"struct.Centering<3>", i32, %"struct.RefCountedBlockPtr<FieldEngineBaseData<3, double, MultiPatchView<GridTag, Remote<Brick>, 3> >,false,RefBlockController<FieldEngineBaseData<3, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > >", %"struct.Interval<3>", %"struct.GuardLayers<3>", %"struct.UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >" }
-	%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,Remote<BrickView> >" = type { i32, %"struct.Centering<3>", i32, %"struct.RefCountedBlockPtr<FieldEngineBaseData<3, double, Remote<BrickView> >,false,RefBlockController<FieldEngineBaseData<3, double, Remote<BrickView> > > >", %"struct.Interval<3>", %"struct.GuardLayers<3>", %"struct.UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >" }
-	%"struct.FieldEngineBaseData<3,Zero<double>,ConstantFunction>" = type { %"struct.Engine<3,Zero<double>,ConstantFunction>", %struct.RelationList }
-	%"struct.FieldEngineBaseData<3,double,ConstantFunction>" = type { %"struct.Engine<3,double,ConstantFunction>", %struct.RelationList }
-	%"struct.FieldEngineBaseData<3,double,MultiPatch<GridTag, Remote<Brick> > >" = type { %"struct.Engine<3,double,MultiPatch<GridTag, Remote<Brick> > >", %struct.RelationList }
-	%"struct.FieldEngineBaseData<3,double,MultiPatchView<GridTag, Remote<Brick>, 3> >" = type { %"struct.Engine<3,double,MultiPatchView<GridTag, Remote<Brick>, 3> >", %struct.RelationList }
-	%"struct.FieldEngineBaseData<3,double,Remote<BrickView> >" = type { %"struct.Engine<3,double,Remote<BrickView> >", %struct.RelationList }
-	%struct.GlobalIDDataBase = type { %"struct.std::vector<GlobalIDDataBase::Pack,std::allocator<GlobalIDDataBase::Pack> >", %"struct.std::map<int,InformStream*,std::less<int>,std::allocator<std::pair<const int, InformStream*> > >" }
-	%"struct.GlobalIDDataBase::Pack" = type { i32, i32, i32, i32 }
-	%"struct.GridLayout<3>" = type { %"struct.ContextMapper<1>", %"struct.LayoutBase<3,GridLayoutData<3> >", %"struct.Observable<GridLayout<3> >" }
-	%"struct.GridLayoutData<3>" = type { %"struct.LayoutBaseData<3>", %struct.RefCounted, [21 x i8], i8, [3 x i32], [3 x %"struct.DomainMap<Interval<1>,int>"], [3 x %"struct.DomainMap<Interval<1>,int>"] }
-	%"struct.GridLayoutView<3,3>" = type { %"struct.LayoutBaseView<3,3,GridLayoutViewData<3, 3> >" }
-	%"struct.GridLayoutViewData<3,3>" = type { %"struct.LayoutBaseViewData<3,3,GridLayout<3> >", %struct.RefCounted }
-	%"struct.GuardLayers<3>" = type { [3 x i32], [3 x i32] }
-	%"struct.INode<3>" = type { %"struct.Interval<3>", %struct.GlobalIDDataBase*, i32 }
-	%"struct.Interval<1>" = type { %"struct.Domain<1,DomainTraits<Interval<1> > >" }
-	%"struct.Interval<3>" = type { %"struct.Domain<3,DomainTraits<Interval<3> > >" }
-	%"struct.LayoutBase<3,GridLayoutData<3> >" = type { %"struct.RefCountedPtr<GridLayoutData<3> >" }
-	%"struct.LayoutBaseData<3>" = type { i32, %"struct.Interval<3>", %"struct.Interval<3>", %"struct.std::vector<Node<Interval<3>, Interval<3> >*,std::allocator<Node<Interval<3>, Interval<3> >*> >", %"struct.std::vector<Node<Interval<3>, Interval<3> >*,std::allocator<Node<Interval<3>, Interval<3> >*> >", %"struct.std::vector<Node<Interval<3>, Interval<3> >*,std::allocator<Node<Interval<3>, Interval<3> >*> >", i8, i8, %"struct.GuardLayers<3>", %"struct.GuardLayers<3>", %"struct.std::vector<LayoutBaseData<3>::GCFillInfo,std::allocator<LayoutBaseData<3>::GCFillInfo> >", [3 x i32], [3 x i32], %"struct.Loc<3>" }
-	%"struct.LayoutBaseData<3>::GCFillInfo" = type { %"struct.Interval<3>", i32, i32, i32 }
-	%"struct.LayoutBaseView<3,3,GridLayoutViewData<3, 3> >" = type { %"struct.RefCountedPtr<GridLayoutViewData<3, 3> >" }
-	%"struct.LayoutBaseViewData<3,3,GridLayout<3> >" = type { i32, %"struct.GridLayout<3>", %"struct.GuardLayers<3>", %"struct.GuardLayers<3>", %"struct.ViewIndexer<3,3>", %"struct.std::vector<Node<Interval<3>, Interval<3> >*,std::allocator<Node<Interval<3>, Interval<3> >*> >", %"struct.std::vector<Node<Interval<3>, Interval<3> >*,std::allocator<Node<Interval<3>, Interval<3> >*> >", %"struct.std::vector<Node<Interval<3>, Interval<3> >*,std::allocator<Node<Interval<3>, Interval<3> >*> >", i8 }
-	%"struct.Loc<1>" = type { %"struct.Domain<1,DomainTraits<Loc<1> > >" }
-	%"struct.Loc<3>" = type { %"struct.Domain<3,DomainTraits<Loc<3> > >" }
-	%"struct.MultiArg6<Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatch<GridTag, Remote<Brick> > >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatch<GridTag, Remote<Brick> > >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatch<GridTag, Remote<Brick> > >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatch<GridTag, Remote<Brick> > >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, ConstantFunction>,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, Zero<double>, ConstantFunction> >" = type { %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >", %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >", %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >", %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >", %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ConstantFunction>", %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,Zero<double>,ConstantFunction>" }
-	%"struct.NoMeshData<3>" = type { %struct.RefCounted, %"struct.Interval<3>", %"struct.Interval<3>", %"struct.Interval<3>", %"struct.Interval<3>" }
-	%"struct.Node<Interval<3>,Interval<3> >" = type { %"struct.Interval<3>", %"struct.Interval<3>", i32, i32, i32, i32 }
-	%"struct.Observable<GridLayout<3> >" = type { %"struct.GridLayout<3>"*, %"struct.std::vector<Observer<GridLayout<3> >*,std::allocator<Observer<GridLayout<3> >*> >", i32, %"struct.Adv5::Ekin<3>" }
-	%"struct.Pooma::BrickBase<3>" = type { %"struct.DomainLayout<3>", [3 x i32], [3 x i32], i32, i8 }
-	%"struct.Pooma::BrickViewBase<3>" = type { %"struct.Interval<3>", [3 x i32], [3 x i32], i32, i8 }
-	%"struct.Range<1>" = type { %"struct.Domain<1,DomainTraits<Range<1> > >" }
-	%"struct.Range<3>" = type { %"struct.Domain<3,DomainTraits<Range<3> > >" }
-	%"struct.RefBlockController<Engine<3, double, Remote<Brick> > >" = type { %struct.RefCounted, %"struct.Engine<3,double,Remote<Brick> >"*, %"struct.Engine<3,double,Remote<Brick> >"*, %"struct.Engine<3,double,Remote<Brick> >"*, i8 }
-	%"struct.RefBlockController<FieldEngineBaseData<3, Zero<double>, ConstantFunction> >" = type { %struct.RefCounted, %"struct.FieldEngineBaseData<3,Zero<double>,ConstantFunction>"*, %"struct.FieldEngineBaseData<3,Zero<double>,ConstantFunction>"*, %"struct.FieldEngineBaseData<3,Zero<double>,ConstantFunction>"*, i8 }
-	%"struct.RefBlockController<FieldEngineBaseData<3, double, ConstantFunction> >" = type { %struct.RefCounted, %"struct.FieldEngineBaseData<3,double,ConstantFunction>"*, %"struct.FieldEngineBaseData<3,double,ConstantFunction>"*, %"struct.FieldEngineBaseData<3,double,ConstantFunction>"*, i8 }
-	%"struct.RefBlockController<FieldEngineBaseData<3, double, MultiPatch<GridTag, Remote<Brick> > > >" = type { %struct.RefCounted, %"struct.FieldEngineBaseData<3,double,MultiPatch<GridTag, Remote<Brick> > >"*, %"struct.FieldEngineBaseData<3,double,MultiPatch<GridTag, Remote<Brick> > >"*, %"struct.FieldEngineBaseData<3,double,MultiPatch<GridTag, Remote<Brick> > >"*, i8 }
-	%"struct.RefBlockController<FieldEngineBaseData<3, double, MultiPatchView<GridTag, Remote<Brick>, 3> > >" = type { %struct.RefCounted, %"struct.FieldEngineBaseData<3,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"*, %"struct.FieldEngineBaseData<3,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"*, %"struct.FieldEngineBaseData<3,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"*, i8 }
-	%"struct.RefBlockController<FieldEngineBaseData<3, double, Remote<BrickView> > >" = type { %struct.RefCounted, %"struct.FieldEngineBaseData<3,double,Remote<BrickView> >"*, %"struct.FieldEngineBaseData<3,double,Remote<BrickView> >"*, %"struct.FieldEngineBaseData<3,double,Remote<BrickView> >"*, i8 }
-	%"struct.RefBlockController<double>" = type { %struct.RefCounted, double*, double*, double*, i8 }
-	%struct.RefCounted = type { i32, %"struct.Adv5::Ekin<3>" }
-	%"struct.RefCountedBlockPtr<Engine<3, double, Remote<Brick> >,false,RefBlockController<Engine<3, double, Remote<Brick> > > >" = type { i32, %"struct.RefCountedPtr<RefBlockController<Engine<3, double, Remote<Brick> > > >" }
-	%"struct.RefCountedBlockPtr<FieldEngineBaseData<3, Zero<double>, ConstantFunction>,false,RefBlockController<FieldEngineBaseData<3, Zero<double>, ConstantFunction> > >" = type { i32, %"struct.RefCountedPtr<RefBlockController<FieldEngineBaseData<3, Zero<double>, ConstantFunction> > >" }
-	%"struct.RefCountedBlockPtr<FieldEngineBaseData<3, double, ConstantFunction>,false,RefBlockController<FieldEngineBaseData<3, double, ConstantFunction> > >" = type { i32, %"struct.RefCountedPtr<RefBlockController<FieldEngineBaseData<3, double, ConstantFunction> > >" }
-	%"struct.RefCountedBlockPtr<FieldEngineBaseData<3, double, MultiPatch<GridTag, Remote<Brick> > >,false,RefBlockController<FieldEngineBaseData<3, double, MultiPatch<GridTag, Remote<Brick> > > > >" = type { i32, %"struct.RefCountedPtr<RefBlockController<FieldEngineBaseData<3, double, MultiPatch<GridTag, Remote<Brick> > > > >" }
-	%"struct.RefCountedBlockPtr<FieldEngineBaseData<3, double, MultiPatchView<GridTag, Remote<Brick>, 3> >,false,RefBlockController<FieldEngineBaseData<3, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > >" = type { i32, %"struct.RefCountedPtr<RefBlockController<FieldEngineBaseData<3, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > >" }
-	%"struct.RefCountedBlockPtr<FieldEngineBaseData<3, double, Remote<BrickView> >,false,RefBlockController<FieldEngineBaseData<3, double, Remote<BrickView> > > >" = type { i32, %"struct.RefCountedPtr<RefBlockController<FieldEngineBaseData<3, double, Remote<BrickView> > > >" }
-	%"struct.RefCountedBlockPtr<double,false,DataBlockController<double> >" = type { i32, %"struct.RefCountedPtr<DataBlockController<double> >" }
-	%"struct.RefCountedPtr<DataBlockController<double> >" = type { %"struct.DataBlockController<double>"* }
-	%"struct.RefCountedPtr<GridLayoutData<3> >" = type { %"struct.GridLayoutData<3>"* }
-	%"struct.RefCountedPtr<GridLayoutViewData<3, 3> >" = type { %"struct.GridLayoutViewData<3,3>"* }
-	%"struct.RefCountedPtr<RefBlockController<Engine<3, double, Remote<Brick> > > >" = type { %"struct.RefBlockController<Engine<3, double, Remote<Brick> > >"* }
-	%"struct.RefCountedPtr<RefBlockController<FieldEngineBaseData<3, Zero<double>, ConstantFunction> > >" = type { %"struct.RefBlockController<FieldEngineBaseData<3, Zero<double>, ConstantFunction> >"* }
-	%"struct.RefCountedPtr<RefBlockController<FieldEngineBaseData<3, double, ConstantFunction> > >" = type { %"struct.RefBlockController<FieldEngineBaseData<3, double, ConstantFunction> >"* }
-	%"struct.RefCountedPtr<RefBlockController<FieldEngineBaseData<3, double, MultiPatch<GridTag, Remote<Brick> > > > >" = type { %"struct.RefBlockController<FieldEngineBaseData<3, double, MultiPatch<GridTag, Remote<Brick> > > >"* }
-	%"struct.RefCountedPtr<RefBlockController<FieldEngineBaseData<3, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > >" = type { %"struct.RefBlockController<FieldEngineBaseData<3, double, MultiPatchView<GridTag, Remote<Brick>, 3> > >"* }
-	%"struct.RefCountedPtr<RefBlockController<FieldEngineBaseData<3, double, Remote<BrickView> > > >" = type { %"struct.RefBlockController<FieldEngineBaseData<3, double, Remote<BrickView> > >"* }
-	%"struct.RefCountedPtr<RelationListData>" = type { %struct.RelationListData* }
-	%"struct.RefCountedPtr<Shared<Engine<3, double, Brick> > >" = type { %"struct.Shared<Engine<3, double, Brick> >"* }
-	%"struct.RefCountedPtr<Shared<Engine<3, double, BrickView> > >" = type { %"struct.Shared<Engine<3, double, BrickView> >"* }
-	%"struct.RefCountedPtr<UniformRectilinearMeshData<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> > >" = type { %"struct.UniformRectilinearMeshData<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >"* }
-	%struct.RelationList = type { %"struct.RefCountedPtr<RelationListData>" }
-	%struct.RelationListData = type { %struct.RefCounted, %"struct.std::vector<RelationListItem*,std::allocator<RelationListItem*> >" }
-	%struct.RelationListItem = type { i32 (...)**, i32, i32, i8 }
-	%"struct.Shared<Engine<3, double, Brick> >" = type { %struct.RefCounted, %"struct.Engine<3,double,Brick>" }
-	%"struct.Shared<Engine<3, double, BrickView> >" = type { %struct.RefCounted, %"struct.Engine<3,double,BrickView>" }
-	%"struct.SingleObservable<int>" = type { %"struct.ContextMapper<1>"* }
-	%"struct.UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >" = type { %"struct.RefCountedPtr<UniformRectilinearMeshData<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> > >" }
-	%"struct.UniformRectilinearMeshData<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >" = type { %"struct.NoMeshData<3>", %"struct.Vector<3,double,Full>", %"struct.Vector<3,double,Full>" }
-	%"struct.Vector<3,double,Full>" = type { %"struct.VectorEngine<3,double,Full>" }
-	%"struct.VectorEngine<3,double,Full>" = type { [3 x double] }
-	%"struct.ViewIndexer<3,3>" = type { %"struct.Interval<3>", %"struct.Range<3>", [3 x i32], [3 x i32], %"struct.Loc<3>" }
-	%"struct.WrapNoInit<Interval<1> >" = type { %"struct.Interval<1>" }
-	%"struct.WrapNoInit<Loc<1> >" = type { %"struct.Loc<1>" }
-	%"struct.WrapNoInit<Range<1> >" = type { %"struct.Range<1>" }
-	%"struct.std::_List_base<Interval<3>,std::allocator<Interval<3> > >" = type { %"struct.std::_List_base<Interval<3>,std::allocator<Interval<3> > >::_List_impl" }
-	%"struct.std::_List_base<Interval<3>,std::allocator<Interval<3> > >::_List_impl" = type { %"struct.std::_List_node_base" }
-	%"struct.std::_List_const_iterator<Interval<3> >" = type { %"struct.std::_List_node_base"* }
-	%"struct.std::_List_node_base" = type { %"struct.std::_List_node_base"*, %"struct.std::_List_node_base"* }
-	%"struct.std::_Rb_tree<int,std::pair<const int, InformStream*>,std::_Select1st<std::pair<const int, InformStream*> >,std::less<int>,std::allocator<std::pair<const int, InformStream*> > >" = type { %"struct.std::_Rb_tree<int,std::pair<const int, InformStream*>,std::_Select1st<std::pair<const int, InformStream*> >,std::less<int>,std::allocator<std::pair<const int, InformStream*> > >::_Rb_tree_impl<std::less<int>,false>" }
-	%"struct.std::_Rb_tree<int,std::pair<const int, InformStream*>,std::_Select1st<std::pair<const int, InformStream*> >,std::less<int>,std::allocator<std::pair<const int, InformStream*> > >::_Rb_tree_impl<std::less<int>,false>" = type { %"struct.Adv5::Ekin<3>", %"struct.std::_Rb_tree_node_base", i32 }
-	%"struct.std::_Rb_tree_node_base" = type { i32, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"*, %"struct.std::_Rb_tree_node_base"* }
-	%"struct.std::_Vector_base<GlobalIDDataBase::Pack,std::allocator<GlobalIDDataBase::Pack> >" = type { %"struct.std::_Vector_base<GlobalIDDataBase::Pack,std::allocator<GlobalIDDataBase::Pack> >::_Vector_impl" }
-	%"struct.std::_Vector_base<GlobalIDDataBase::Pack,std::allocator<GlobalIDDataBase::Pack> >::_Vector_impl" = type { %"struct.GlobalIDDataBase::Pack"*, %"struct.GlobalIDDataBase::Pack"*, %"struct.GlobalIDDataBase::Pack"* }
-	%"struct.std::_Vector_base<LayoutBaseData<3>::GCFillInfo,std::allocator<LayoutBaseData<3>::GCFillInfo> >" = type { %"struct.std::_Vector_base<LayoutBaseData<3>::GCFillInfo,std::allocator<LayoutBaseData<3>::GCFillInfo> >::_Vector_impl" }
-	%"struct.std::_Vector_base<LayoutBaseData<3>::GCFillInfo,std::allocator<LayoutBaseData<3>::GCFillInfo> >::_Vector_impl" = type { %"struct.LayoutBaseData<3>::GCFillInfo"*, %"struct.LayoutBaseData<3>::GCFillInfo"*, %"struct.LayoutBaseData<3>::GCFillInfo"* }
-	%"struct.std::_Vector_base<Loc<3>,std::allocator<Loc<3> > >" = type { %"struct.std::_Vector_base<Loc<3>,std::allocator<Loc<3> > >::_Vector_impl" }
-	%"struct.std::_Vector_base<Loc<3>,std::allocator<Loc<3> > >::_Vector_impl" = type { %"struct.Loc<3>"*, %"struct.Loc<3>"*, %"struct.Loc<3>"* }
-	%"struct.std::_Vector_base<Node<Interval<3>, Interval<3> >*,std::allocator<Node<Interval<3>, Interval<3> >*> >" = type { %"struct.std::_Vector_base<Node<Interval<3>, Interval<3> >*,std::allocator<Node<Interval<3>, Interval<3> >*> >::_Vector_impl" }
-	%"struct.std::_Vector_base<Node<Interval<3>, Interval<3> >*,std::allocator<Node<Interval<3>, Interval<3> >*> >::_Vector_impl" = type { %"struct.Node<Interval<3>,Interval<3> >"**, %"struct.Node<Interval<3>,Interval<3> >"**, %"struct.Node<Interval<3>,Interval<3> >"** }
-	%"struct.std::_Vector_base<Observer<GridLayout<3> >*,std::allocator<Observer<GridLayout<3> >*> >" = type { %"struct.std::_Vector_base<Observer<GridLayout<3> >*,std::allocator<Observer<GridLayout<3> >*> >::_Vector_impl" }
-	%"struct.std::_Vector_base<Observer<GridLayout<3> >*,std::allocator<Observer<GridLayout<3> >*> >::_Vector_impl" = type { %"struct.ContextMapper<1>"**, %"struct.ContextMapper<1>"**, %"struct.ContextMapper<1>"** }
-	%"struct.std::_Vector_base<RelationListItem*,std::allocator<RelationListItem*> >" = type { %"struct.std::_Vector_base<RelationListItem*,std::allocator<RelationListItem*> >::_Vector_impl" }
-	%"struct.std::_Vector_base<RelationListItem*,std::allocator<RelationListItem*> >::_Vector_impl" = type { %struct.RelationListItem**, %struct.RelationListItem**, %struct.RelationListItem** }
-	%"struct.std::_Vector_base<Vector<3, double, Full>,std::allocator<Vector<3, double, Full> > >" = type { %"struct.std::_Vector_base<Vector<3, double, Full>,std::allocator<Vector<3, double, Full> > >::_Vector_impl" }
-	%"struct.std::_Vector_base<Vector<3, double, Full>,std::allocator<Vector<3, double, Full> > >::_Vector_impl" = type { %"struct.Vector<3,double,Full>"*, %"struct.Vector<3,double,Full>"*, %"struct.Vector<3,double,Full>"* }
-	%"struct.std::list<Interval<3>,std::allocator<Interval<3> > >" = type { %"struct.std::_List_base<Interval<3>,std::allocator<Interval<3> > >" }
-	%"struct.std::map<int,InformStream*,std::less<int>,std::allocator<std::pair<const int, InformStream*> > >" = type { %"struct.std::_Rb_tree<int,std::pair<const int, InformStream*>,std::_Select1st<std::pair<const int, InformStream*> >,std::less<int>,std::allocator<std::pair<const int, InformStream*> > >" }
-	%"struct.std::vector<GlobalIDDataBase::Pack,std::allocator<GlobalIDDataBase::Pack> >" = type { %"struct.std::_Vector_base<GlobalIDDataBase::Pack,std::allocator<GlobalIDDataBase::Pack> >" }
-	%"struct.std::vector<LayoutBaseData<3>::GCFillInfo,std::allocator<LayoutBaseData<3>::GCFillInfo> >" = type { %"struct.std::_Vector_base<LayoutBaseData<3>::GCFillInfo,std::allocator<LayoutBaseData<3>::GCFillInfo> >" }
-	%"struct.std::vector<Loc<3>,std::allocator<Loc<3> > >" = type { %"struct.std::_Vector_base<Loc<3>,std::allocator<Loc<3> > >" }
-	%"struct.std::vector<Node<Interval<3>, Interval<3> >*,std::allocator<Node<Interval<3>, Interval<3> >*> >" = type { %"struct.std::_Vector_base<Node<Interval<3>, Interval<3> >*,std::allocator<Node<Interval<3>, Interval<3> >*> >" }
-	%"struct.std::vector<Observer<GridLayout<3> >*,std::allocator<Observer<GridLayout<3> >*> >" = type { %"struct.std::_Vector_base<Observer<GridLayout<3> >*,std::allocator<Observer<GridLayout<3> >*> >" }
-	%"struct.std::vector<RelationListItem*,std::allocator<RelationListItem*> >" = type { %"struct.std::_Vector_base<RelationListItem*,std::allocator<RelationListItem*> >" }
-	%"struct.std::vector<Vector<3, double, Full>,std::allocator<Vector<3, double, Full> > >" = type { %"struct.std::_Vector_base<Vector<3, double, Full>,std::allocator<Vector<3, double, Full> > >" }
-
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32) nounwind
-
-declare fastcc void @_ZN11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd10MultiPatchI7GridTag6RemoteI5BrickEEEC1ERKSC_(%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*, %"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*) nounwind
-
-declare fastcc void @_ZN9CenteringILi3EEC1ERKS0_i(%"struct.Centering<3>"*, %"struct.Centering<3>"*, i32) nounwind
-
-declare fastcc void @_ZN11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd6RemoteI9BrickViewEED1Ev(%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,Remote<BrickView> >"*) nounwind
-
-declare fastcc void @_ZN11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd6RemoteI9BrickViewEEC1Id14MultiPatchViewI7GridTagS6_I5BrickELi3EEEERKS_IS5_T_T0_ERK5INodeILi3EE(%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,Remote<BrickView> >"*, %"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"*, %"struct.INode<3>"*) nounwind
-
-declare fastcc void @_ZN11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd14MultiPatchViewI7GridTag6RemoteI5BrickELi3EEED1Ev(%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"*) nounwind
-
-declare fastcc void @_ZN11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd14MultiPatchViewI7GridTag6RemoteI5BrickELi3EEEC1Id10MultiPatchIS7_SA_EEERKS_IS5_T_T0_ERK8IntervalILi3EE(%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"*, %"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*, %"struct.Interval<3>"*) nounwind
-
-define fastcc void @t(double %dt, %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %rh, %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %T, %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %v, %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %pg, %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %ph, %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %cs, %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ConstantFunction>"* %cv, %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,Zero<double>,ConstantFunction>"* %dlmdlt, %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ConstantFunction>"* %xmue, %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %vint, %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %cent, %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %fvis, double %c_nr, double %c_av, i8 zeroext %cartvis_f) nounwind {
-entry:
-	%0 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> > > > > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> > > > > >"*> [#uses=4]
-	%s.i.i.i.i.i = alloca %"struct.Interval<3>"		; <%"struct.Interval<3>"*> [#uses=0]
-	%1 = alloca %"struct.BinaryNode<OpMultiply,Scalar<double>,BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > >"		; <%"struct.BinaryNode<OpMultiply,Scalar<double>,BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > >"*> [#uses=2]
-	%multiArg.i = alloca %"struct.MultiArg6<Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatch<GridTag, Remote<Brick> > >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatch<GridTag, Remote<Brick> > >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatch<GridTag, Remote<Brick> > >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatch<GridTag, Remote<Brick> > >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, ConstantFunction>,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, Zero<double>, ConstantFunction> >"		; <%"struct.MultiArg6<Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatch<GridTag, Remote<Brick> > >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatch<GridTag, Remote<Brick> > >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatch<GridTag, Remote<Brick> > >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatch<GridTag, Remote<Brick> > >,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, ConstantFunction>,Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, Zero<double>, ConstantFunction> >"*> [#uses=0]
-	%2 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=6]
-	%3 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=2]
-	%4 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"*> [#uses=0]
-	%5 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=2]
-	%6 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"*> [#uses=0]
-	%7 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%8 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%9 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%10 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%11 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%12 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%13 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%14 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%15 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%16 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%17 = alloca %"struct.Interval<3>"		; <%"struct.Interval<3>"*> [#uses=0]
-	%18 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%19 = alloca double		; <double*> [#uses=0]
-	%20 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"*> [#uses=0]
-	%21 = alloca %"struct.Interval<3>"		; <%"struct.Interval<3>"*> [#uses=0]
-	%22 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"*> [#uses=0]
-	%23 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"*> [#uses=0]
-	%24 = alloca %"struct.Interval<3>"		; <%"struct.Interval<3>"*> [#uses=0]
-	%25 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%26 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%27 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%28 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%29 = alloca %"struct.Interval<3>"		; <%"struct.Interval<3>"*> [#uses=0]
-	%30 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%31 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%32 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%33 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%34 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%35 = alloca %"struct.Interval<3>"		; <%"struct.Interval<3>"*> [#uses=0]
-	%36 = alloca %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"		; <%"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=0]
-	%37 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %v, i32 0, i32 0, i32 5		; <%"struct.GuardLayers<3>"*> [#uses=1]
-	%38 = bitcast %"struct.GuardLayers<3>"* %37 to i8*		; <i8*> [#uses=1]
-	br label %bb.i.i.i.i.i
-
-bb.i.i.i.i.i:		; preds = %bb.i.i.i.i.i, %entry
-	%39 = icmp eq i32* null, null		; <i1> [#uses=1]
-	br i1 %39, label %_ZN14ScalarCodeInfoILi3ELi4EEC1Ev.exit.i, label %bb.i.i.i.i.i
-
-_ZN14ScalarCodeInfoILi3ELi4EEC1Ev.exit.i:		; preds = %bb.i.i.i.i.i
-	br label %bb.i.i.i35.i.i34
-
-bb.i.i.i35.i.i34:		; preds = %bb.i.i.i35.i.i34, %_ZN14ScalarCodeInfoILi3ELi4EEC1Ev.exit.i
-	%40 = icmp eq i32* null, null		; <i1> [#uses=1]
-	br i1 %40, label %_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_.exit36.i.i37, label %bb.i.i.i35.i.i34
-
-_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_.exit36.i.i37:		; preds = %bb.i.i.i35.i.i34
-	br label %bb.i.i.i19.i.i47
-
-bb.i.i.i19.i.i47:		; preds = %bb.i.i.i19.i.i47, %_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_.exit36.i.i37
-	%41 = icmp eq i32* null, null		; <i1> [#uses=1]
-	br i1 %41, label %_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_.exit20.i.i50, label %bb.i.i.i19.i.i47
-
-_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_.exit20.i.i50:		; preds = %bb.i.i.i19.i.i47
-	%42 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %rh, i32 0, i32 0		; <%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"*> [#uses=1]
-	br label %bb.i.i.i19.i.i.i
-
-bb.i.i.i19.i.i.i:		; preds = %bb.i.i.i19.i.i.i, %_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_.exit20.i.i50
-	%43 = icmp eq i32* null, null		; <i1> [#uses=1]
-	br i1 %43, label %_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_.exit20.i.i.i, label %bb.i.i.i19.i.i.i
-
-_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_.exit20.i.i.i:		; preds = %bb.i.i.i19.i.i.i
-	br label %bb.i.i.i35.i.i433
-
-bb.i.i.i35.i.i433:		; preds = %bb.i.i.i35.i.i433, %_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_.exit20.i.i.i
-	%44 = icmp eq i32* null, null		; <i1> [#uses=1]
-	br i1 %44, label %_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_.exit36.i.i436, label %bb.i.i.i35.i.i433
-
-_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_.exit36.i.i436:		; preds = %bb.i.i.i35.i.i433
-	br label %bb.i.i.i19.i.i446
-
-bb.i.i.i19.i.i446:		; preds = %bb.i.i.i19.i.i446, %_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_.exit36.i.i436
-	%45 = icmp eq i32* null, null		; <i1> [#uses=1]
-	br i1 %45, label %_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_.exit20.i.i449, label %bb.i.i.i19.i.i446
-
-_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_.exit20.i.i449:		; preds = %bb.i.i.i19.i.i446
-	br label %bb.i.i.i.i.i459
-
-bb.i.i.i.i.i459:		; preds = %bb.i.i.i.i.i459, %_ZNSt6vectorIbSaIbEEC1EmRKbRKS0_.exit20.i.i449
-	%46 = icmp eq i32* null, null		; <i1> [#uses=1]
-	br i1 %46, label %_ZN14ScalarCodeInfoILi3ELi6EEC1Ev.exit.i460, label %bb.i.i.i.i.i459
-
-_ZN14ScalarCodeInfoILi3ELi6EEC1Ev.exit.i460:		; preds = %bb.i.i.i.i.i459
-	%47 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %5, i32 0, i32 0, i32 1		; <%"struct.Centering<3>"*> [#uses=1]
-	%48 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %vint, i32 0, i32 0, i32 1		; <%"struct.Centering<3>"*> [#uses=2]
-	%49 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %5, i32 0, i32 0, i32 5		; <%"struct.GuardLayers<3>"*> [#uses=1]
-	%50 = bitcast %"struct.GuardLayers<3>"* %49 to i8*		; <i8*> [#uses=1]
-	%51 = bitcast %"struct.GuardLayers<3>"* null to i8*		; <i8*> [#uses=2]
-	%52 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %3, i32 0, i32 0, i32 1		; <%"struct.Centering<3>"*> [#uses=1]
-	%53 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %3, i32 0, i32 0, i32 5		; <%"struct.GuardLayers<3>"*> [#uses=1]
-	%54 = bitcast %"struct.GuardLayers<3>"* %53 to i8*		; <i8*> [#uses=1]
-	%55 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %2, i32 0, i32 0, i32 1		; <%"struct.Centering<3>"*> [#uses=1]
-	%56 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %2, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
-	%57 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %2, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
-	%58 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %2, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
-	%59 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %2, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
-	%60 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %2, i32 0, i32 0, i32 5		; <%"struct.GuardLayers<3>"*> [#uses=1]
-	%61 = bitcast %"struct.GuardLayers<3>"* %60 to i8*		; <i8*> [#uses=1]
-	%62 = getelementptr %"struct.BinaryNode<OpMultiply,Scalar<double>,BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > >"* %1, i32 0, i32 1, i32 0, i32 0		; <%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"*> [#uses=1]
-	%63 = getelementptr %"struct.BinaryNode<OpMultiply,Scalar<double>,BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > >"* %1, i32 0, i32 1, i32 1, i32 0		; <%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"*> [#uses=1]
-	%64 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, MultiPatchView<GridTag, Remote<Brick>, 3> > > > > >"* null, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0		; <double*> [#uses=1]
-	%65 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> > > > > >"* %0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 0		; <%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,Remote<BrickView> >"*> [#uses=2]
-	%66 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> > > > > >"* %0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0		; <i32*> [#uses=1]
-	%67 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> > > > > >"* %0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
-	%68 = getelementptr %"struct.Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,ExpressionTag<BinaryNode<OpMultiply, Scalar<double>, BinaryNode<OpAdd, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> >, Field<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >, double, Remote<BrickView> > > > > >"* %0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0, i32 1		; <i32*> [#uses=1]
-	br label %bb15
-
-bb15:		; preds = %_Z6assignI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd14MultiPatchViewI7GridTag6RemoteI5BrickELi3EES5_d13ExpressionTagI10BinaryNodeI10OpMultiply6ScalarIdESD_I5OpAdd9ReferenceI5FieldIS5_dSB_EESL_EEE8OpAssignERKSJ_IT_T0_T1_ESV_RKSJ_IT2_T3_T4_ERKT5_.exit, %_ZN14ScalarCodeInfoILi3ELi6EEC1Ev.exit.i460
-	%i.0.reg2mem.0 = phi i32 [ 0, %_ZN14ScalarCodeInfoILi3ELi6EEC1Ev.exit.i460 ], [ %indvar.next, %_Z6assignI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd14MultiPatchViewI7GridTag6RemoteI5BrickELi3EES5_d13ExpressionTagI10BinaryNodeI10OpMultiply6ScalarIdESD_I5OpAdd9ReferenceI5FieldIS5_dSB_EESL_EEE8OpAssignERKSJ_IT_T0_T1_ESV_RKSJ_IT2_T3_T4_ERKT5_.exit ]		; <i32> [#uses=4]
-	call fastcc void @_ZN9CenteringILi3EEC1ERKS0_i(%"struct.Centering<3>"* %47, %"struct.Centering<3>"* %48, i32 %i.0.reg2mem.0) nounwind
-	call void @llvm.memcpy.i32(i8* %50, i8* %51, i32 24, i32 4) nounwind
-	call fastcc void @_ZN9CenteringILi3EEC1ERKS0_i(%"struct.Centering<3>"* %52, %"struct.Centering<3>"* %48, i32 %i.0.reg2mem.0) nounwind
-	call void @llvm.memcpy.i32(i8* %54, i8* %51, i32 24, i32 4) nounwind
-	br i1 false, label %bb.i940, label %bb4.i943
-
-bb.i940:		; preds = %bb15
-	br label %_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd10MultiPatchI7GridTag6RemoteI5BrickEEE11totalDomainEv.exit944
-
-bb4.i943:		; preds = %bb15
-	br label %_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd10MultiPatchI7GridTag6RemoteI5BrickEEE11totalDomainEv.exit944
-
-_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd10MultiPatchI7GridTag6RemoteI5BrickEEE11totalDomainEv.exit944:		; preds = %bb4.i943, %bb.i940
-	call fastcc void @_ZN11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd14MultiPatchViewI7GridTag6RemoteI5BrickELi3EEEC1Id10MultiPatchIS7_SA_EEERKS_IS5_T_T0_ERK8IntervalILi3EE(%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"* null, %"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* null, %"struct.Interval<3>"* null) nounwind
-	call fastcc void @_ZN9CenteringILi3EEC1ERKS0_i(%"struct.Centering<3>"* %55, %"struct.Centering<3>"* null, i32 %i.0.reg2mem.0) nounwind
-	call void @llvm.memcpy.i32(i8* %61, i8* %38, i32 24, i32 4) nounwind
-	%69 = load %"struct.Loc<3>"** null, align 4		; <%"struct.Loc<3>"*> [#uses=1]
-	%70 = ptrtoint %"struct.Loc<3>"* %69 to i32		; <i32> [#uses=1]
-	%.off.i911 = sub i32 0, %70		; <i32> [#uses=1]
-	%71 = icmp ult i32 %.off.i911, 12		; <i1> [#uses=1]
-	%72 = sub i32 0, 0		; <i32> [#uses=2]
-	%73 = load i32* %56, align 4		; <i32> [#uses=1]
-	%74 = add i32 %73, 0		; <i32> [#uses=1]
-	%75 = sub i32 %74, %72		; <i32> [#uses=1]
-	%76 = add i32 %75, 0		; <i32> [#uses=1]
-	%77 = load i32* null, align 8		; <i32> [#uses=2]
-	%78 = load i32* null, align 4		; <i32> [#uses=1]
-	%79 = sub i32 %77, %78		; <i32> [#uses=1]
-	%80 = load i32* %57, align 4		; <i32> [#uses=1]
-	%81 = load i32* null, align 4		; <i32> [#uses=1]
-	br i1 %71, label %bb.i912, label %bb4.i915
-
-bb.i912:		; preds = %_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd10MultiPatchI7GridTag6RemoteI5BrickEEE11totalDomainEv.exit944
-	br label %_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd10MultiPatchI7GridTag6RemoteI5BrickEEE11totalDomainEv.exit916
-
-bb4.i915:		; preds = %_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd10MultiPatchI7GridTag6RemoteI5BrickEEE11totalDomainEv.exit944
-	%82 = sub i32 %77, %79		; <i32> [#uses=1]
-	%83 = add i32 %82, %80		; <i32> [#uses=1]
-	%84 = add i32 %83, %81		; <i32> [#uses=1]
-	%85 = load i32* %58, align 8		; <i32> [#uses=2]
-	%86 = load i32* null, align 8		; <i32> [#uses=1]
-	%87 = sub i32 %85, %86		; <i32> [#uses=2]
-	%88 = load i32* %59, align 4		; <i32> [#uses=1]
-	%89 = load i32* null, align 4		; <i32> [#uses=1]
-	%90 = sub i32 %85, %87		; <i32> [#uses=1]
-	%91 = add i32 %90, %88		; <i32> [#uses=1]
-	%92 = add i32 %91, %89		; <i32> [#uses=1]
-	br label %_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd10MultiPatchI7GridTag6RemoteI5BrickEEE11totalDomainEv.exit916
-
-_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd10MultiPatchI7GridTag6RemoteI5BrickEEE11totalDomainEv.exit916:		; preds = %bb4.i915, %bb.i912
-	%.0978.0.0.1.0.0.0.0.1.0 = phi i32 [ %84, %bb4.i915 ], [ 0, %bb.i912 ]		; <i32> [#uses=0]
-	%.0978.0.0.2.0.0.0.0.0.0 = phi i32 [ %87, %bb4.i915 ], [ 0, %bb.i912 ]		; <i32> [#uses=1]
-	%.0978.0.0.2.0.0.0.0.1.0 = phi i32 [ %92, %bb4.i915 ], [ 0, %bb.i912 ]		; <i32> [#uses=0]
-	store i32 %72, i32* null, align 8
-	store i32 %76, i32* null, align 4
-	store i32 %.0978.0.0.2.0.0.0.0.0.0, i32* null, align 8
-	call fastcc void @_ZN11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd14MultiPatchViewI7GridTag6RemoteI5BrickELi3EEEC1Id10MultiPatchIS7_SA_EEERKS_IS5_T_T0_ERK8IntervalILi3EE(%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"* null, %"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* null, %"struct.Interval<3>"* null) nounwind
-	%93 = load i32* null, align 8		; <i32> [#uses=1]
-	%94 = icmp sgt i32 %93, 0		; <i1> [#uses=1]
-	br i1 %94, label %bb1.i, label %_Z6assignI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd14MultiPatchViewI7GridTag6RemoteI5BrickELi3EES5_d13ExpressionTagI10BinaryNodeI10OpMultiply6ScalarIdESD_I5OpAdd9ReferenceI5FieldIS5_dSB_EESL_EEE8OpAssignERKSJ_IT_T0_T1_ESV_RKSJ_IT2_T3_T4_ERKT5_.exit
-
-bb1.i:		; preds = %bb3.i23.i.i, %_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd10MultiPatchI7GridTag6RemoteI5BrickEEE11totalDomainEv.exit916
-	call fastcc void @_ZN11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd14MultiPatchViewI7GridTag6RemoteI5BrickELi3EEED1Ev(%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"* %63) nounwind
-	call fastcc void @_ZN11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd14MultiPatchViewI7GridTag6RemoteI5BrickELi3EEED1Ev(%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"* %62) nounwind
-	br label %bb.i17.i14.i
-
-bb.i17.i14.i:		; preds = %_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd6RemoteI9BrickViewEE14physicalDomainEv.exit26.i.i.i.i, %bb1.i
-	%i.0.02.rec.i.i.i = phi i32 [ %.rec.i.i.i641, %_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd6RemoteI9BrickViewEE14physicalDomainEv.exit26.i.i.i.i ], [ 0, %bb1.i ]		; <i32> [#uses=1]
-	%95 = load double* %64, align 8		; <double> [#uses=1]
-	store double %95, double* null, align 8
-	call fastcc void @_ZN11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd6RemoteI9BrickViewEEC1Id14MultiPatchViewI7GridTagS6_I5BrickELi3EEEERKS_IS5_T_T0_ERK5INodeILi3EE(%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,Remote<BrickView> >"* %65, %"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatchView<GridTag, Remote<Brick>, 3> >"* null, %"struct.INode<3>"* null) nounwind
-	%96 = load %"struct.Loc<3>"** null, align 4		; <%"struct.Loc<3>"*> [#uses=1]
-	%97 = ptrtoint %"struct.Loc<3>"* %96 to i32		; <i32> [#uses=1]
-	%.off.i21.i.i.i.i = sub i32 0, %97		; <i32> [#uses=1]
-	%98 = icmp ult i32 %.off.i21.i.i.i.i, 12		; <i1> [#uses=1]
-	br i1 %98, label %bb.i22.i.i.i.i, label %bb3.i25.i.i.i.i
-
-bb.i22.i.i.i.i:		; preds = %bb.i17.i14.i
-	%99 = load i32* null, align 4		; <i32> [#uses=1]
-	%100 = icmp eq i32 %99, 1		; <i1> [#uses=1]
-	%101 = load i32* null, align 4		; <i32> [#uses=1]
-	br i1 %100, label %_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd6RemoteI9BrickViewEE14physicalDomainEv.exit26.i.i.i.i, label %bb6.i.i24.i.i.i.i
-
-bb6.i.i24.i.i.i.i:		; preds = %bb.i22.i.i.i.i
-	br label %_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd6RemoteI9BrickViewEE14physicalDomainEv.exit26.i.i.i.i
-
-bb3.i25.i.i.i.i:		; preds = %bb.i17.i14.i
-	%102 = load i32* %66, align 8		; <i32> [#uses=2]
-	%103 = load i32* %67, align 4		; <i32> [#uses=1]
-	%104 = load i32* %68, align 4		; <i32> [#uses=1]
-	br label %_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd6RemoteI9BrickViewEE14physicalDomainEv.exit26.i.i.i.i
-
-_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd6RemoteI9BrickViewEE14physicalDomainEv.exit26.i.i.i.i:		; preds = %bb3.i25.i.i.i.i, %bb6.i.i24.i.i.i.i, %bb.i22.i.i.i.i
-	%.rle1279 = phi i32 [ 0, %bb3.i25.i.i.i.i ], [ 0, %bb6.i.i24.i.i.i.i ], [ 0, %bb.i22.i.i.i.i ]		; <i32> [#uses=1]
-	%.rle1277 = phi i32 [ %102, %bb3.i25.i.i.i.i ], [ 0, %bb6.i.i24.i.i.i.i ], [ 0, %bb.i22.i.i.i.i ]		; <i32> [#uses=1]
-	%.rle1275 = phi i32 [ 0, %bb3.i25.i.i.i.i ], [ 0, %bb6.i.i24.i.i.i.i ], [ 0, %bb.i22.i.i.i.i ]		; <i32> [#uses=1]
-	%.01034.0.0.2.0.0.0.0.1.0 = phi i32 [ %104, %bb3.i25.i.i.i.i ], [ 0, %bb6.i.i24.i.i.i.i ], [ 0, %bb.i22.i.i.i.i ]		; <i32> [#uses=1]
-	%.01034.0.0.2.0.0.0.0.0.0 = phi i32 [ 0, %bb3.i25.i.i.i.i ], [ 0, %bb6.i.i24.i.i.i.i ], [ 0, %bb.i22.i.i.i.i ]		; <i32> [#uses=1]
-	%.01034.0.0.1.0.0.0.0.1.0 = phi i32 [ %103, %bb3.i25.i.i.i.i ], [ 0, %bb6.i.i24.i.i.i.i ], [ 0, %bb.i22.i.i.i.i ]		; <i32> [#uses=1]
-	%.01034.0.0.1.0.0.0.0.0.0 = phi i32 [ %102, %bb3.i25.i.i.i.i ], [ 0, %bb6.i.i24.i.i.i.i ], [ 0, %bb.i22.i.i.i.i ]		; <i32> [#uses=1]
-	%.01034.0.0.0.0.0.0.0.1.0 = phi i32 [ 0, %bb3.i25.i.i.i.i ], [ 0, %bb6.i.i24.i.i.i.i ], [ %101, %bb.i22.i.i.i.i ]		; <i32> [#uses=1]
-	%.01034.0.0.0.0.0.0.0.0.0 = phi i32 [ 0, %bb3.i25.i.i.i.i ], [ 0, %bb6.i.i24.i.i.i.i ], [ 0, %bb.i22.i.i.i.i ]		; <i32> [#uses=1]
-	%105 = sub i32 %.01034.0.0.0.0.0.0.0.0.0, %.rle1275		; <i32> [#uses=0]
-	%106 = sub i32 %.01034.0.0.1.0.0.0.0.0.0, %.rle1277		; <i32> [#uses=0]
-	%107 = sub i32 %.01034.0.0.2.0.0.0.0.0.0, %.rle1279		; <i32> [#uses=0]
-	store i32 %.01034.0.0.0.0.0.0.0.1.0, i32* null, align 4
-	store i32 %.01034.0.0.1.0.0.0.0.1.0, i32* null, align 4
-	store i32 %.01034.0.0.2.0.0.0.0.1.0, i32* null, align 4
-	call fastcc void @_ZN11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd6RemoteI9BrickViewEED1Ev(%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,Remote<BrickView> >"* %65) nounwind
-	%.rec.i.i.i641 = add i32 %i.0.02.rec.i.i.i, 1		; <i32> [#uses=1]
-	%108 = load %"struct.INode<3>"** null, align 4		; <%"struct.INode<3>"*> [#uses=1]
-	%109 = icmp eq %"struct.INode<3>"* null, %108		; <i1> [#uses=1]
-	br i1 %109, label %bb3.i23.i.i, label %bb.i17.i14.i
-
-bb3.i23.i.i:		; preds = %_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd6RemoteI9BrickViewEE14physicalDomainEv.exit26.i.i.i.i
-	br label %bb1.i
-
-_Z6assignI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd14MultiPatchViewI7GridTag6RemoteI5BrickELi3EES5_d13ExpressionTagI10BinaryNodeI10OpMultiply6ScalarIdESD_I5OpAdd9ReferenceI5FieldIS5_dSB_EESL_EEE8OpAssignERKSJ_IT_T0_T1_ESV_RKSJ_IT2_T3_T4_ERKT5_.exit:		; preds = %_ZNK11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd10MultiPatchI7GridTag6RemoteI5BrickEEE11totalDomainEv.exit916
-	%indvar.next = add i32 %i.0.reg2mem.0, 1		; <i32> [#uses=2]
-	%exitcond = icmp eq i32 %indvar.next, 3		; <i1> [#uses=1]
-	br i1 %exitcond, label %bb18, label %bb15
-
-bb18:		; preds = %_Z6assignI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd14MultiPatchViewI7GridTag6RemoteI5BrickELi3EES5_d13ExpressionTagI10BinaryNodeI10OpMultiply6ScalarIdESD_I5OpAdd9ReferenceI5FieldIS5_dSB_EESL_EEE8OpAssignERKSJ_IT_T0_T1_ESV_RKSJ_IT2_T3_T4_ERKT5_.exit
-	call fastcc void @_ZN11FieldEngineI22UniformRectilinearMeshI10MeshTraitsILi3Ed21UniformRectilinearTag12CartesianTagLi3EEEd10MultiPatchI7GridTag6RemoteI5BrickEEEC1ERKSC_(%"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* null, %"struct.FieldEngine<UniformRectilinearMesh<MeshTraits<3, double, UniformRectilinearTag, CartesianTag, 3> >,double,MultiPatch<GridTag, Remote<Brick> > >"* %42) nounwind
-	unreachable
-}
diff --git a/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll
new file mode 100644
index 0000000..a737591
--- /dev/null
+++ b/test/CodeGen/ARM/2009-12-02-vtrn-undef.ll
@@ -0,0 +1,19 @@
+; RUN: llc -mcpu=cortex-a8 < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
+target triple = "armv7-apple-darwin10"
+
+%struct.int16x8_t = type { <8 x i16> }
+%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] }
+
+define arm_apcscc void @t(%struct.int16x8x2_t* noalias nocapture sret %agg.result, <8 x i16> %tmp.0, %struct.int16x8x2_t* nocapture %dst) nounwind {
+entry:
+;CHECK: vtrn.16
+  %0 = shufflevector <8 x i16> %tmp.0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
+  %1 = shufflevector <8 x i16> %tmp.0, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
+  %agg.result1218.0 = getelementptr %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 0, i32 0 ; <<8 x i16>*>
+  store <8 x i16> %0, <8 x i16>* %agg.result1218.0, align 16
+  %agg.result12.1.0 = getelementptr %struct.int16x8x2_t* %agg.result, i32 0, i32 0, i32 1, i32 0 ; <<8 x i16>*>
+  store <8 x i16> %1, <8 x i16>* %agg.result12.1.0, align 16
+  ret void
+}
diff --git a/test/CodeGen/ARM/inlineasm3.ll b/test/CodeGen/ARM/inlineasm3.ll
new file mode 100644
index 0000000..5ebf2fb
--- /dev/null
+++ b/test/CodeGen/ARM/inlineasm3.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
+
+%struct.int32x4_t = type { <4 x i32> }
+
+define arm_apcscc void @t() nounwind {
+entry:
+; CHECK: vmov.I64 q15, #0
+; CHECK: vmov.32 d30[0], r0
+; CHECK: vmov q0, q15
+  %tmp = alloca %struct.int32x4_t, align 16
+  call void asm sideeffect "vmov.I64 q15, #0\0Avmov.32 d30[0], $1\0Avmov ${0:q}, q15\0A", "=*w,r,~{d31},~{d30}"(%struct.int32x4_t* %tmp, i32 8192) nounwind
+  ret void
+}
diff --git a/test/CodeGen/CellSPU/and_ops.ll b/test/CodeGen/CellSPU/and_ops.ll
index 139e97b..716de2e 100644
--- a/test/CodeGen/CellSPU/and_ops.ll
+++ b/test/CodeGen/CellSPU/and_ops.ll
@@ -1,9 +1,9 @@
 ; RUN: llc < %s -march=cellspu > %t1.s
-; RUN: grep and    %t1.s | count 234
+; RUN: grep and    %t1.s | count 230
 ; RUN: grep andc   %t1.s | count 85
-; RUN: grep andi   %t1.s | count 37
-; RUN: grep andhi  %t1.s | count 30
-; RUN: grep andbi  %t1.s | count 4
+; RUN: grep andi   %t1.s | count 39
+; RUN: grep andhi  %t1.s | count 28
+; RUN: grep andbi  %t1.s | count 2
 
 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128"
 target triple = "spu"
diff --git a/test/CodeGen/CellSPU/call_indirect.ll b/test/CodeGen/CellSPU/call_indirect.ll
index 639c794..f25d6b5 100644
--- a/test/CodeGen/CellSPU/call_indirect.ll
+++ b/test/CodeGen/CellSPU/call_indirect.ll
@@ -2,17 +2,17 @@
 ; RUN: llc < %s -march=cellspu -mattr=large_mem > %t2.s
 ; RUN: grep bisl    %t1.s | count 7
 ; RUN: grep ila     %t1.s | count 1
-; RUN: grep rotqby  %t1.s | count 6
+; RUN: grep rotqby  %t1.s | count 5
 ; RUN: grep lqa     %t1.s | count 1
 ; RUN: grep lqd     %t1.s | count 12
 ; RUN: grep dispatch_tab %t1.s | count 5
 ; RUN: grep bisl    %t2.s | count 7
 ; RUN: grep ilhu    %t2.s | count 2
 ; RUN: grep iohl    %t2.s | count 2
-; RUN: grep rotqby  %t2.s | count 6
+; RUN: grep rotqby  %t2.s | count 5
 ; RUN: grep lqd     %t2.s | count 13
 ; RUN: grep ilhu    %t2.s | count 2
-; RUN: grep ai      %t2.s | count 9
+; RUN: grep ai      %t2.s | count 8
 ; RUN: grep dispatch_tab %t2.s | count 6
 
 ; ModuleID = 'call_indirect.bc'
diff --git a/test/CodeGen/MSP430/bit.ll b/test/CodeGen/MSP430/bit.ll
new file mode 100644
index 0000000..2c78366
--- /dev/null
+++ b/test/CodeGen/MSP430/bit.ll
@@ -0,0 +1,166 @@
+; RUN: llvm-as < %s | llc -march=msp430 | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:32"
+target triple = "msp430-generic-generic"
+
+@foo8 = external global i8;
+@bar8 = external global i8;
+
+define i8 @bitbrr(i8 %a, i8 %b) nounwind {
+	%t1 = and i8 %a, %b
+	%t2 = icmp ne i8 %t1, 0
+	%t3 = zext i1 %t2 to i8
+	ret i8 %t3
+}
+; CHECK: bitbrr:
+; CHECK: bit.b	r14, r15
+
+define i8 @bitbri(i8 %a) nounwind {
+	%t1 = and i8 %a, 15
+	%t2 = icmp ne i8 %t1, 0
+	%t3 = zext i1 %t2 to i8
+	ret i8 %t3
+}
+; CHECK: bitbri:
+; CHECK: bit.b	#15, r15
+
+define i8 @bitbir(i8 %a) nounwind {
+	%t1 = and i8 15, %a
+	%t2 = icmp ne i8 %t1, 0
+	%t3 = zext i1 %t2 to i8
+	ret i8 %t3
+}
+; CHECK: bitbir:
+; CHECK: bit.b	#15, r15
+
+define i8 @bitbmi() nounwind {
+	%t1 = load i8* @foo8
+	%t2 = and i8 %t1, 15
+	%t3 = icmp ne i8 %t2, 0
+	%t4 = zext i1 %t3 to i8
+	ret i8 %t4
+}
+; CHECK: bitbmi:
+; CHECK: bit.b	#15, &foo8
+
+define i8 @bitbim() nounwind {
+	%t1 = load i8* @foo8
+	%t2 = and i8 15, %t1
+	%t3 = icmp ne i8 %t2, 0
+	%t4 = zext i1 %t3 to i8
+	ret i8 %t4
+}
+; CHECK: bitbim:
+; CHECK: bit.b	#15, &foo8
+
+define i8 @bitbrm(i8 %a) nounwind {
+	%t1 = load i8* @foo8
+	%t2 = and i8 %a, %t1
+	%t3 = icmp ne i8 %t2, 0
+	%t4 = zext i1 %t3 to i8
+	ret i8 %t4
+}
+; CHECK: bitbrm:
+; CHECK: bit.b	&foo8, r15
+
+define i8 @bitbmr(i8 %a) nounwind {
+	%t1 = load i8* @foo8
+	%t2 = and i8 %t1, %a
+	%t3 = icmp ne i8 %t2, 0
+	%t4 = zext i1 %t3 to i8
+	ret i8 %t4
+}
+; CHECK: bitbmr:
+; CHECK: bit.b	r15, &foo8
+
+define i8 @bitbmm() nounwind {
+	%t1 = load i8* @foo8
+	%t2 = load i8* @bar8
+	%t3 = and i8 %t1, %t2
+	%t4 = icmp ne i8 %t3, 0
+	%t5 = zext i1 %t4 to i8
+	ret i8 %t5
+}
+; CHECK: bitbmm:
+; CHECK: bit.b	&bar8, &foo8
+
+@foo16 = external global i16;
+@bar16 = external global i16;
+
+define i16 @bitwrr(i16 %a, i16 %b) nounwind {
+	%t1 = and i16 %a, %b
+	%t2 = icmp ne i16 %t1, 0
+	%t3 = zext i1 %t2 to i16
+	ret i16 %t3
+}
+; CHECK: bitwrr:
+; CHECK: bit.w	r14, r15
+
+define i16 @bitwri(i16 %a) nounwind {
+	%t1 = and i16 %a, 4080
+	%t2 = icmp ne i16 %t1, 0
+	%t3 = zext i1 %t2 to i16
+	ret i16 %t3
+}
+; CHECK: bitwri:
+; CHECK: bit.w	#4080, r15
+
+define i16 @bitwir(i16 %a) nounwind {
+	%t1 = and i16 4080, %a
+	%t2 = icmp ne i16 %t1, 0
+	%t3 = zext i1 %t2 to i16
+	ret i16 %t3
+}
+; CHECK: bitwir:
+; CHECK: bit.w	#4080, r15
+
+define i16 @bitwmi() nounwind {
+	%t1 = load i16* @foo16
+	%t2 = and i16 %t1, 4080
+	%t3 = icmp ne i16 %t2, 0
+	%t4 = zext i1 %t3 to i16
+	ret i16 %t4
+}
+; CHECK: bitwmi:
+; CHECK: bit.w	#4080, &foo16
+
+define i16 @bitwim() nounwind {
+	%t1 = load i16* @foo16
+	%t2 = and i16 4080, %t1
+	%t3 = icmp ne i16 %t2, 0
+	%t4 = zext i1 %t3 to i16
+	ret i16 %t4
+}
+; CHECK: bitwim:
+; CHECK: bit.w	#4080, &foo16
+
+define i16 @bitwrm(i16 %a) nounwind {
+	%t1 = load i16* @foo16
+	%t2 = and i16 %a, %t1
+	%t3 = icmp ne i16 %t2, 0
+	%t4 = zext i1 %t3 to i16
+	ret i16 %t4
+}
+; CHECK: bitwrm:
+; CHECK: bit.w	&foo16, r15
+
+define i16 @bitwmr(i16 %a) nounwind {
+	%t1 = load i16* @foo16
+	%t2 = and i16 %t1, %a
+	%t3 = icmp ne i16 %t2, 0
+	%t4 = zext i1 %t3 to i16
+	ret i16 %t4
+}
+; CHECK: bitwmr:
+; CHECK: bit.w	r15, &foo16
+
+define i16 @bitwmm() nounwind {
+	%t1 = load i16* @foo16
+	%t2 = load i16* @bar16
+	%t3 = and i16 %t1, %t2
+	%t4 = icmp ne i16 %t3, 0
+	%t5 = zext i1 %t4 to i16
+	ret i16 %t5
+}
+; CHECK: bitwmm:
+; CHECK: bit.w	&bar16, &foo16
+
diff --git a/test/CodeGen/MSP430/setcc.ll b/test/CodeGen/MSP430/setcc.ll
new file mode 100644
index 0000000..971d1b5
--- /dev/null
+++ b/test/CodeGen/MSP430/setcc.ll
@@ -0,0 +1,116 @@
+; RUN: llc -march=msp430 < %s | FileCheck %s
+target datalayout = "e-p:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:16:32"
+target triple = "msp430-generic-generic"
+
+define i16 @sccweqand(i16 %a, i16 %b) nounwind {
+	%t1 = and i16 %a, %b
+	%t2 = icmp eq i16 %t1, 0
+	%t3 = zext i1 %t2 to i16
+	ret i16 %t3
+}
+; CHECK: sccweqand:
+; CHECK:	bit.w	r14, r15
+; CHECK-NEXT:	mov.w	r2, r15
+; CHECK-NEXT:	and.w	#1, r15
+; CHECK-NEXT:	xor.w	#1, r15
+
+define i16 @sccwneand(i16 %a, i16 %b) nounwind {
+	%t1 = and i16 %a, %b
+	%t2 = icmp ne i16 %t1, 0
+	%t3 = zext i1 %t2 to i16
+	ret i16 %t3
+}
+; CHECK: sccwneand:
+; CHECK: 	bit.w	r14, r15
+; CHECK-NEXT:	mov.w	r2, r15
+; CHECK-NEXT:	and.w	#1, r15
+
+define i16 @sccwne(i16 %a, i16 %b) nounwind {
+	%t1 = icmp ne i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccwne:
+; CHECK:	cmp.w	r15, r14
+; CHECK-NEXT:	mov.w	r2, r15
+; CHECK-NEXT:	rra.w	r15
+; CHECK-NEXT:	and.w	#1, r15
+
+define i16 @sccweq(i16 %a, i16 %b) nounwind {
+	%t1 = icmp eq i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccweq:
+; CHECK:	cmp.w	r15, r14
+; CHECK-NEXT:	mov.w	r2, r15
+; CHECK-NEXT:	rra.w	r15
+; CHECK-NEXT:	and.w	#1, r15
+; CHECK-NEXT:	xor.w	#1, r15
+
+define i16 @sccwugt(i16 %a, i16 %b) nounwind {
+	%t1 = icmp ugt i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccwugt:
+; CHECK:	cmp.w	r14, r15
+; CHECK-NEXT:	mov.w	r2, r15
+; CHECK-NEXT:	and.w	#1, r15
+; CHECK-NEXT:	xor.w	#1, r15
+
+define i16 @sccwuge(i16 %a, i16 %b) nounwind {
+	%t1 = icmp uge i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccwuge:
+; CHECK:	cmp.w	r15, r14
+; CHECK-NEXT:	mov.w	r2, r15
+; CHECK-NEXT:	and.w	#1, r15
+
+define i16 @sccwult(i16 %a, i16 %b) nounwind {
+	%t1 = icmp ult i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccwult:
+; CHECK:	cmp.w	r15, r14
+; CHECK-NEXT:	mov.w	r2, r15
+; CHECK-NEXT:	and.w	#1, r15
+; CHECK-NEXT:	xor.w	#1, r15
+
+define i16 @sccwule(i16 %a, i16 %b) nounwind {
+	%t1 = icmp ule i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+; CHECK:sccwule:
+; CHECK:	cmp.w	r14, r15
+; CHECK-NEXT:	mov.w	r2, r15
+; CHECK-NEXT:	and.w	#1, r15
+
+define i16 @sccwsgt(i16 %a, i16 %b) nounwind {
+	%t1 = icmp sgt i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+
+define i16 @sccwsge(i16 %a, i16 %b) nounwind {
+	%t1 = icmp sge i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+
+define i16 @sccwslt(i16 %a, i16 %b) nounwind {
+	%t1 = icmp slt i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+
+define i16 @sccwsle(i16 %a, i16 %b) nounwind {
+	%t1 = icmp sle i16 %a, %b
+	%t2 = zext i1 %t1 to i16
+	ret i16 %t2
+}
+
diff --git a/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
new file mode 100644
index 0000000..79ad0a9
--- /dev/null
+++ b/test/CodeGen/Thumb2/2009-12-01-LoopIVUsers.ll
@@ -0,0 +1,128 @@
+; RUN: opt < %s -std-compile-opts | \
+; RUN:   llc -mtriple=thumbv7-apple-darwin10 -mattr=+neon | FileCheck %s
+
+define arm_apcscc void @fred(i32 %three_by_three, i8* %in, double %dt1, i32 %x_size, i32 %y_size, i8* %bp) nounwind {
+entry:
+; -- The loop following the load should only use a single add-literation
+;    instruction.
+; CHECK: ldr.64
+; CHECK: adds r{{[0-9]+}}, #1
+; CHECK-NOT: adds r{{[0-9]+}}, #1
+; CHECK: subsections_via_symbols
+
+
+  %three_by_three_addr = alloca i32               ; <i32*> [#uses=2]
+  %in_addr = alloca i8*                           ; <i8**> [#uses=2]
+  %dt_addr = alloca float                         ; <float*> [#uses=4]
+  %x_size_addr = alloca i32                       ; <i32*> [#uses=2]
+  %y_size_addr = alloca i32                       ; <i32*> [#uses=1]
+  %bp_addr = alloca i8*                           ; <i8**> [#uses=1]
+  %tmp_image = alloca i8*                         ; <i8**> [#uses=0]
+  %out = alloca i8*                               ; <i8**> [#uses=1]
+  %cp = alloca i8*                                ; <i8**> [#uses=0]
+  %dpt = alloca i8*                               ; <i8**> [#uses=4]
+  %dp = alloca i8*                                ; <i8**> [#uses=2]
+  %ip = alloca i8*                                ; <i8**> [#uses=0]
+  %centre = alloca i32                            ; <i32*> [#uses=0]
+  %tmp = alloca i32                               ; <i32*> [#uses=0]
+  %brightness = alloca i32                        ; <i32*> [#uses=0]
+  %area = alloca i32                              ; <i32*> [#uses=0]
+  %y = alloca i32                                 ; <i32*> [#uses=0]
+  %x = alloca i32                                 ; <i32*> [#uses=2]
+  %j = alloca i32                                 ; <i32*> [#uses=6]
+  %i = alloca i32                                 ; <i32*> [#uses=1]
+  %mask_size = alloca i32                         ; <i32*> [#uses=5]
+  %increment = alloca i32                         ; <i32*> [#uses=1]
+  %n_max = alloca i32                             ; <i32*> [#uses=4]
+  %temp = alloca float                            ; <float*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  store i32 %three_by_three, i32* %three_by_three_addr
+  store i8* %in, i8** %in_addr
+  %dt = fptrunc double %dt1 to float              ; <float> [#uses=1]
+  store float %dt, float* %dt_addr
+  store i32 %x_size, i32* %x_size_addr
+  store i32 %y_size, i32* %y_size_addr
+  store i8* %bp, i8** %bp_addr
+  %0 = load i8** %in_addr, align 4                ; <i8*> [#uses=1]
+  store i8* %0, i8** %out, align 4
+  %1 = call arm_apcscc  i32 (...)* @foo() nounwind ; <i32> [#uses=1]
+  store i32 %1, i32* %i, align 4
+  %2 = load i32* %three_by_three_addr, align 4    ; <i32> [#uses=1]
+  %3 = icmp eq i32 %2, 0                          ; <i1> [#uses=1]
+  br i1 %3, label %bb, label %bb2
+
+bb:                                               ; preds = %entry
+  %4 = load float* %dt_addr, align 4              ; <float> [#uses=1]
+  %5 = fpext float %4 to double                   ; <double> [#uses=1]
+  %6 = fmul double %5, 1.500000e+00               ; <double> [#uses=1]
+  %7 = fptosi double %6 to i32                    ; <i32> [#uses=1]
+  %8 = add nsw i32 %7, 1                          ; <i32> [#uses=1]
+  store i32 %8, i32* %mask_size, align 4
+  br label %bb3
+
+bb2:                                              ; preds = %entry
+  store i32 1, i32* %mask_size, align 4
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb
+  %9 = load i32* %mask_size, align 4              ; <i32> [#uses=1]
+  %10 = mul i32 %9, 2                             ; <i32> [#uses=1]
+  %11 = add nsw i32 %10, 1                        ; <i32> [#uses=1]
+  store i32 %11, i32* %n_max, align 4
+  %12 = load i32* %x_size_addr, align 4           ; <i32> [#uses=1]
+  %13 = load i32* %n_max, align 4                 ; <i32> [#uses=1]
+  %14 = sub i32 %12, %13                          ; <i32> [#uses=1]
+  store i32 %14, i32* %increment, align 4
+  %15 = load i32* %n_max, align 4                 ; <i32> [#uses=1]
+  %16 = load i32* %n_max, align 4                 ; <i32> [#uses=1]
+  %17 = mul i32 %15, %16                          ; <i32> [#uses=1]
+  %18 = call arm_apcscc  noalias i8* @malloc(i32 %17) nounwind ; <i8*> [#uses=1]
+  store i8* %18, i8** %dp, align 4
+  %19 = load i8** %dp, align 4                    ; <i8*> [#uses=1]
+  store i8* %19, i8** %dpt, align 4
+  %20 = load float* %dt_addr, align 4             ; <float> [#uses=1]
+  %21 = load float* %dt_addr, align 4             ; <float> [#uses=1]
+  %22 = fmul float %20, %21                       ; <float> [#uses=1]
+  %23 = fsub float -0.000000e+00, %22             ; <float> [#uses=1]
+  store float %23, float* %temp, align 4
+  %24 = load i32* %mask_size, align 4             ; <i32> [#uses=1]
+  %25 = sub i32 0, %24                            ; <i32> [#uses=1]
+  store i32 %25, i32* %j, align 4
+  br label %bb5
+
+bb4:                                              ; preds = %bb5
+  %26 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %27 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %28 = mul i32 %26, %27                          ; <i32> [#uses=1]
+  %29 = sitofp i32 %28 to double                  ; <double> [#uses=1]
+  %30 = fmul double %29, 1.234000e+00             ; <double> [#uses=1]
+  %31 = fptosi double %30 to i32                  ; <i32> [#uses=1]
+  store i32 %31, i32* %x, align 4
+  %32 = load i32* %x, align 4                     ; <i32> [#uses=1]
+  %33 = trunc i32 %32 to i8                       ; <i8> [#uses=1]
+  %34 = load i8** %dpt, align 4                   ; <i8*> [#uses=1]
+  store i8 %33, i8* %34, align 1
+  %35 = load i8** %dpt, align 4                   ; <i8*> [#uses=1]
+  %36 = getelementptr inbounds i8* %35, i64 1     ; <i8*> [#uses=1]
+  store i8* %36, i8** %dpt, align 4
+  %37 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %38 = add nsw i32 %37, 1                        ; <i32> [#uses=1]
+  store i32 %38, i32* %j, align 4
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3
+  %39 = load i32* %j, align 4                     ; <i32> [#uses=1]
+  %40 = load i32* %mask_size, align 4             ; <i32> [#uses=1]
+  %41 = icmp sle i32 %39, %40                     ; <i1> [#uses=1]
+  br i1 %41, label %bb4, label %bb6
+
+bb6:                                              ; preds = %bb5
+  br label %return
+
+return:                                           ; preds = %bb6
+  ret void
+}
+
+declare arm_apcscc i32 @foo(...)
+
+declare arm_apcscc noalias i8* @malloc(i32) nounwind
diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll
index 6f59961..da44cde 100644
--- a/test/CodeGen/Thumb2/large-stack.ll
+++ b/test/CodeGen/Thumb2/large-stack.ll
@@ -18,7 +18,7 @@ define void @test2() {
 define i32 @test3() {
 ; CHECK: test3:
 ; CHECK: sub.w sp, sp, #805306368
-; CHECK: sub sp, #24
+; CHECK: sub sp, #20
     %retval = alloca i32, align 4
     %tmp = alloca i32, align 4
     %a = alloca [805306369 x i8], align 16
diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll
index aef167b..2b08789 100644
--- a/test/CodeGen/Thumb2/thumb2-spill-q.ll
+++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll
@@ -11,7 +11,7 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
 
 define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
 ; CHECK: aaa:
-; CHECK: bic sp, sp, #15
+; CHECK: bic r4, r4, #15
 ; CHECK: vst1.64 {{.*}}sp, :128
 ; CHECK: vld1.64 {{.*}}sp, :128
 entry:
diff --git a/test/CodeGen/X86/2007-01-08-InstrSched.ll b/test/CodeGen/X86/2007-01-08-InstrSched.ll
index 81f0a1d..317ed0a 100644
--- a/test/CodeGen/X86/2007-01-08-InstrSched.ll
+++ b/test/CodeGen/X86/2007-01-08-InstrSched.ll
@@ -1,5 +1,5 @@
 ; PR1075
-; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -O3 | FileCheck %s
 
 define float @foo(float %x) nounwind {
     %tmp1 = fmul float %x, 3.000000e+00
diff --git a/test/CodeGen/X86/2009-09-10-SpillComments.ll b/test/CodeGen/X86/2009-09-10-SpillComments.ll
index 8c62f4d..1dd9990 100644
--- a/test/CodeGen/X86/2009-09-10-SpillComments.ll
+++ b/test/CodeGen/X86/2009-09-10-SpillComments.ll
@@ -1,6 +1,4 @@
-; RUN: llc < %s -mtriple=x86_64-unknown-linux | grep "Spill"
-; RUN: llc < %s -mtriple=x86_64-unknown-linux | grep "Folded Spill"
-; RUN: llc < %s -mtriple=x86_64-unknown-linux | grep "Reload"
+; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s
 
 	%struct..0anon = type { i32 }
 	%struct.rtvec_def = type { i32, [1 x %struct..0anon] }
@@ -12,6 +10,9 @@ declare %struct.rtx_def* @fixup_memory_subreg(%struct.rtx_def*, %struct.rtx_def*
 
 define %struct.rtx_def* @walk_fixup_memory_subreg(%struct.rtx_def* %x, %struct.rtx_def* %insn) {
 entry:
+; CHECK: Spill
+; CHECK: Folded Spill
+; CHECK: Reload
 	%tmp2 = icmp eq %struct.rtx_def* %x, null		; <i1> [#uses=1]
 	br i1 %tmp2, label %UnifiedReturnBlock, label %cond_next
 
@@ -32,7 +33,7 @@ cond_true13:		; preds = %cond_next
 	br i1 %tmp22, label %cond_true25, label %cond_next32
 
 cond_true25:		; preds = %cond_true13
-	%tmp29 = tail call %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 )		; <%struct.rtx_def*> [#uses=1]
+	%tmp29 = tail call %struct.rtx_def* @fixup_memory_subreg( %struct.rtx_def* %x, %struct.rtx_def* %insn, i32 1 ) nounwind		; <%struct.rtx_def*> [#uses=1]
 	ret %struct.rtx_def* %tmp29
 
 cond_next32:		; preds = %cond_true13, %cond_next
@@ -58,7 +59,7 @@ cond_true47:		; preds = %bb
 	%tmp52 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0		; <%struct..0anon*> [#uses=1]
 	%tmp5354 = bitcast %struct..0anon* %tmp52 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
 	%tmp55 = load %struct.rtx_def** %tmp5354		; <%struct.rtx_def*> [#uses=1]
-	%tmp58 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn )		; <%struct.rtx_def*> [#uses=1]
+	%tmp58 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp55, %struct.rtx_def* %insn ) nounwind		; <%struct.rtx_def*> [#uses=1]
 	%tmp62 = getelementptr %struct.rtx_def* %x, i32 0, i32 3, i32 %i.01.0, i32 0		; <i32*> [#uses=1]
 	%tmp58.c = ptrtoint %struct.rtx_def* %tmp58 to i32		; <i32> [#uses=1]
 	store i32 %tmp58.c, i32* %tmp62
@@ -81,7 +82,7 @@ bb73:		; preds = %bb73, %bb105.preheader
 	%tmp92 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019		; <%struct..0anon*> [#uses=1]
 	%tmp9394 = bitcast %struct..0anon* %tmp92 to %struct.rtx_def**		; <%struct.rtx_def**> [#uses=1]
 	%tmp95 = load %struct.rtx_def** %tmp9394		; <%struct.rtx_def*> [#uses=1]
-	%tmp98 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn )		; <%struct.rtx_def*> [#uses=1]
+	%tmp98 = tail call  %struct.rtx_def* @walk_fixup_memory_subreg( %struct.rtx_def* %tmp95, %struct.rtx_def* %insn ) nounwind		; <%struct.rtx_def*> [#uses=1]
 	%tmp101 = getelementptr %struct.rtvec_def* %tmp81, i32 0, i32 1, i32 %j.019, i32 0		; <i32*> [#uses=1]
 	%tmp98.c = ptrtoint %struct.rtx_def* %tmp98 to i32		; <i32> [#uses=1]
 	store i32 %tmp98.c, i32* %tmp101
diff --git a/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll b/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll
index d372da3..8cb538b 100644
--- a/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll
+++ b/test/CodeGen/X86/2009-09-19-SchedCustomLoweringBug.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -post-RA-scheduler=true | FileCheck %s
 
 ; PR4958
 
diff --git a/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll b/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
new file mode 100644
index 0000000..1e7a418
--- /dev/null
+++ b/test/CodeGen/X86/2009-12-01-EarlyClobberBug.ll
@@ -0,0 +1,38 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; pr5391
+
+define void @t() nounwind ssp {
+entry:
+; CHECK: t:
+; CHECK: movl %ecx, %eax
+; CHECK: %eax = foo (%eax, %ecx)
+  %b = alloca i32                                 ; <i32*> [#uses=2]
+  %a = alloca i32                                 ; <i32*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %1 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %asmtmp = call i32 asm "$0 = foo ($1, $2)", "=&{ax},%0,r,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp, i32* %a
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; CHECK: movl %eax, %ecx
+; CHECK: %ecx = foo (%ecx, %eax)
+  %b = alloca i32                                 ; <i32*> [#uses=2]
+  %a = alloca i32                                 ; <i32*> [#uses=1]
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %1 = load i32* %b, align 4                      ; <i32> [#uses=1]
+  %asmtmp = call i32 asm "$0 = foo ($1, $2)", "=&r,%0,r,~{dirflag},~{fpsr},~{flags}"(i32 %0, i32 %1) nounwind ; <i32> [#uses=1]
+  store i32 %asmtmp, i32* %a
+  br label %return
+
+return:                                           ; preds = %entry
+  ret void
+}
diff --git a/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll b/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
new file mode 100644
index 0000000..f7ba661
--- /dev/null
+++ b/test/CodeGen/X86/2009-12-11-TLSNoRedZone.ll
@@ -0,0 +1,63 @@
+; RUN: llc -relocation-model=pic < %s | FileCheck %s
+; PR5723
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+%0 = type { [1 x i64] }
+%link = type { %0* }
+%test = type { i32, %link }
+
+@data = global [2 x i64] zeroinitializer, align 64 ; <[2 x i64]*> [#uses=1]
+@ptr = linkonce thread_local global [1 x i64] [i64 ptrtoint ([2 x i64]* @data to i64)], align 64 ; <[1 x i64]*> [#uses=1]
+@link_ptr = linkonce thread_local global [1 x i64] zeroinitializer, align 64 ; <[1 x i64]*> [#uses=1]
+@_dm_my_pe = external global [1 x i64], align 64  ; <[1 x i64]*> [#uses=0]
+@_dm_pes_in_prog = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_npes_div_mult = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_npes_div_shift = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_pe_addr_loc = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+@_dm_offset_addr_mask = external global [1 x i64], align 64 ; <[1 x i64]*> [#uses=0]
+
+define void @leaf() nounwind {
+; CHECK: leaf:
+; CHECK-NOT: -8(%rsp)
+; CHECK: leaq link_ptr@TLSGD
+; CHECK: call __tls_get_addr@PLT
+"file foo2.c, line 14, bb1":
+  %p = alloca %test*, align 8                     ; <%test**> [#uses=4]
+  br label %"file foo2.c, line 14, bb2"
+
+"file foo2.c, line 14, bb2":                      ; preds = %"file foo2.c, line 14, bb1"
+  br label %"@CFE_debug_label_0"
+
+"@CFE_debug_label_0":                             ; preds = %"file foo2.c, line 14, bb2"
+  %r = load %test** bitcast ([1 x i64]* @ptr to %test**), align 8 ; <%test*> [#uses=1]
+  store %test* %r, %test** %p, align 8
+  br label %"@CFE_debug_label_2"
+
+"@CFE_debug_label_2":                             ; preds = %"@CFE_debug_label_0"
+  %r1 = load %link** bitcast ([1 x i64]* @link_ptr to %link**), align 8 ; <%link*> [#uses=1]
+  %r2 = load %test** %p, align 8                  ; <%test*> [#uses=1]
+  %r3 = ptrtoint %test* %r2 to i64                ; <i64> [#uses=1]
+  %r4 = inttoptr i64 %r3 to %link**               ; <%link**> [#uses=1]
+  %r5 = getelementptr %link** %r4, i64 1          ; <%link**> [#uses=1]
+  store %link* %r1, %link** %r5, align 8
+  br label %"@CFE_debug_label_3"
+
+"@CFE_debug_label_3":                             ; preds = %"@CFE_debug_label_2"
+  %r6 = load %test** %p, align 8                  ; <%test*> [#uses=1]
+  %r7 = ptrtoint %test* %r6 to i64                ; <i64> [#uses=1]
+  %r8 = inttoptr i64 %r7 to %link*                ; <%link*> [#uses=1]
+  %r9 = getelementptr %link* %r8, i64 1           ; <%link*> [#uses=1]
+  store %link* %r9, %link** bitcast ([1 x i64]* @link_ptr to %link**), align 8
+  br label %"@CFE_debug_label_4"
+
+"@CFE_debug_label_4":                             ; preds = %"@CFE_debug_label_3"
+  %r10 = load %test** %p, align 8                 ; <%test*> [#uses=1]
+  %r11 = ptrtoint %test* %r10 to i64              ; <i64> [#uses=1]
+  %r12 = inttoptr i64 %r11 to i32*                ; <i32*> [#uses=1]
+  store i32 1, i32* %r12, align 4
+  br label %"@CFE_debug_label_5"
+
+"@CFE_debug_label_5":                             ; preds = %"@CFE_debug_label_4"
+  ret void
+}
diff --git a/test/CodeGen/X86/2009-12-12-CoalescerBug.ll b/test/CodeGen/X86/2009-12-12-CoalescerBug.ll
new file mode 100644
index 0000000..4e8f5fd
--- /dev/null
+++ b/test/CodeGen/X86/2009-12-12-CoalescerBug.ll
@@ -0,0 +1,40 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
+
+define i32 @do_loop(i32* nocapture %sdp, i32* nocapture %ddp, i8* %mdp, i8* nocapture %cdp, i32 %w) nounwind readonly optsize ssp {
+entry:
+  br label %bb
+
+bb:                                               ; preds = %bb5, %entry
+  %mask.1.in = load i8* undef, align 1            ; <i8> [#uses=3]
+  %0 = icmp eq i8 %mask.1.in, 0                   ; <i1> [#uses=1]
+  br i1 %0, label %bb5, label %bb1
+
+bb1:                                              ; preds = %bb
+  br i1 undef, label %bb2, label %bb3
+
+bb2:                                              ; preds = %bb1
+; CHECK: %bb2
+; CHECK: movb %ch, %al
+  %1 = zext i8 %mask.1.in to i32                  ; <i32> [#uses=1]
+  %2 = zext i8 undef to i32                       ; <i32> [#uses=1]
+  %3 = mul i32 %2, %1                             ; <i32> [#uses=1]
+  %4 = add i32 %3, 1                              ; <i32> [#uses=1]
+  %5 = add i32 %4, 0                              ; <i32> [#uses=1]
+  %6 = lshr i32 %5, 8                             ; <i32> [#uses=1]
+  %retval12.i = trunc i32 %6 to i8                ; <i8> [#uses=1]
+  br label %bb3
+
+bb3:                                              ; preds = %bb2, %bb1
+  %mask.0.in = phi i8 [ %retval12.i, %bb2 ], [ %mask.1.in, %bb1 ] ; <i8> [#uses=1]
+  %7 = icmp eq i8 %mask.0.in, 0                   ; <i1> [#uses=1]
+  br i1 %7, label %bb5, label %bb4
+
+bb4:                                              ; preds = %bb3
+  br label %bb5
+
+bb5:                                              ; preds = %bb4, %bb3, %bb
+  br i1 undef, label %bb6, label %bb
+
+bb6:                                              ; preds = %bb5
+  ret i32 undef
+}
diff --git a/test/CodeGen/X86/3addr-16bit.ll b/test/CodeGen/X86/3addr-16bit.ll
new file mode 100644
index 0000000..c51247a
--- /dev/null
+++ b/test/CodeGen/X86/3addr-16bit.ll
@@ -0,0 +1,95 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -asm-verbose=false | FileCheck %s -check-prefix=64BIT
+; rdar://7329206
+
+; In 32-bit the partial register stall would degrade performance.
+
+define zeroext i16 @t1(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t1:
+; 32BIT:     movw 20(%esp), %ax
+; 32BIT-NOT: movw %ax, %cx
+; 32BIT:     leal 1(%eax), %ecx
+
+; 64BIT:     t1:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal 1(%rsi), %eax
+  %0 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  %1 = add i16 %k, 1                              ; <i16> [#uses=3]
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %1) nounwind
+  ret i16 %1
+
+bb1:                                              ; preds = %entry
+  ret i16 %1
+}
+
+define zeroext i16 @t2(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t2:
+; 32BIT:     movw 20(%esp), %ax
+; 32BIT-NOT: movw %ax, %cx
+; 32BIT:     leal -1(%eax), %ecx
+
+; 64BIT:     t2:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal -1(%rsi), %eax
+  %0 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  %1 = add i16 %k, -1                             ; <i16> [#uses=3]
+  br i1 %0, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %1) nounwind
+  ret i16 %1
+
+bb1:                                              ; preds = %entry
+  ret i16 %1
+}
+
+declare void @foo(i16 zeroext)
+
+define zeroext i16 @t3(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t3:
+; 32BIT:     movw 20(%esp), %ax
+; 32BIT-NOT: movw %ax, %cx
+; 32BIT:     leal 2(%eax), %ecx
+
+; 64BIT:     t3:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal 2(%rsi), %eax
+  %0 = add i16 %k, 2                              ; <i16> [#uses=3]
+  %1 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %0) nounwind
+  ret i16 %0
+
+bb1:                                              ; preds = %entry
+  ret i16 %0
+}
+
+define zeroext i16 @t4(i16 zeroext %c, i16 zeroext %k) nounwind ssp {
+entry:
+; 32BIT:     t4:
+; 32BIT:     movw 16(%esp), %ax
+; 32BIT:     movw 20(%esp), %cx
+; 32BIT-NOT: movw %cx, %dx
+; 32BIT:     leal (%ecx,%eax), %edx
+
+; 64BIT:     t4:
+; 64BIT-NOT: movw %si, %ax
+; 64BIT:     leal (%rsi,%rdi), %eax
+  %0 = add i16 %k, %c                             ; <i16> [#uses=3]
+  %1 = icmp eq i16 %k, %c                         ; <i1> [#uses=1]
+  br i1 %1, label %bb, label %bb1
+
+bb:                                               ; preds = %entry
+  tail call void @foo(i16 zeroext %0) nounwind
+  ret i16 %0
+
+bb1:                                              ; preds = %entry
+  ret i16 %0
+}
diff --git a/test/CodeGen/X86/fastcc3struct.ll b/test/CodeGen/X86/fastcc3struct.ll
new file mode 100644
index 0000000..84f8ef6
--- /dev/null
+++ b/test/CodeGen/X86/fastcc3struct.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86 -o %t
+; RUN: grep "movl	.48, %ecx" %t
+; RUN: grep "movl	.24, %edx" %t
+; RUN: grep "movl	.12, %eax" %t
+
+%0 = type { i32, i32, i32 }
+
+define internal fastcc %0 @ReturnBigStruct() nounwind readnone {
+entry:
+  %0 = insertvalue %0 zeroinitializer, i32 12, 0
+  %1 = insertvalue %0 %0, i32 24, 1
+  %2 = insertvalue %0 %1, i32 48, 2
+  ret %0 %2
+}
+
diff --git a/test/CodeGen/X86/select-aggregate.ll b/test/CodeGen/X86/select-aggregate.ll
new file mode 100644
index 0000000..822e594
--- /dev/null
+++ b/test/CodeGen/X86/select-aggregate.ll
@@ -0,0 +1,15 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+; PR5757
+
+; CHECK: cmovne %rdi, %rsi
+; CHECK: movl (%rsi), %eax
+
+%0 = type { i64, i32 }
+
+define i32 @foo(%0* %p, %0* %q, i1 %r) nounwind {
+  %t0 = load %0* %p
+  %t1 = load %0* %q
+  %t4 = select i1 %r, %0 %t0, %0 %t1
+  %t5 = extractvalue %0 %t4, 1
+  ret i32 %t5
+}
diff --git a/test/CodeGen/X86/setcc.ll b/test/CodeGen/X86/setcc.ll
new file mode 100644
index 0000000..42ce4c1
--- /dev/null
+++ b/test/CodeGen/X86/setcc.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
+; XFAIL: *
+; rdar://7329206
+
+; Use sbb x, x to materialize carry bit in a GPR. The value is either
+; all 1's or all 0's.
+
+define zeroext i16 @t1(i16 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t1:
+; CHECK: seta %al
+; CHECK: movzbl %al, %eax
+; CHECK: shll $5, %eax
+  %0 = icmp ugt i16 %x, 26                        ; <i1> [#uses=1]
+  %iftmp.1.0 = select i1 %0, i16 32, i16 0        ; <i16> [#uses=1]
+  ret i16 %iftmp.1.0
+}
+
+define zeroext i16 @t2(i16 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t2:
+; CHECK: sbbl %eax, %eax
+; CHECK: andl $32, %eax
+  %0 = icmp ult i16 %x, 26                        ; <i1> [#uses=1]
+  %iftmp.0.0 = select i1 %0, i16 32, i16 0        ; <i16> [#uses=1]
+  ret i16 %iftmp.0.0
+}
+
+define i64 @t3(i64 %x) nounwind readnone ssp {
+entry:
+; CHECK: t3:
+; CHECK: sbbq %rax, %rax
+; CHECK: andq $64, %rax
+  %0 = icmp ult i64 %x, 18                        ; <i1> [#uses=1]
+  %iftmp.2.0 = select i1 %0, i64 64, i64 0        ; <i64> [#uses=1]
+  ret i64 %iftmp.2.0
+}
diff --git a/test/CodeGen/X86/sink-hoist.ll b/test/CodeGen/X86/sink-hoist.ll
index f8d542e..01d7373 100644
--- a/test/CodeGen/X86/sink-hoist.ll
+++ b/test/CodeGen/X86/sink-hoist.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+; RUN: llc < %s -march=x86-64 -asm-verbose=false -mtriple=x86_64-unknown-linux-gnu -post-RA-scheduler=true | FileCheck %s
 
 ; Currently, floating-point selects are lowered to CFG triangles.
 ; This means that one side of the select is always unconditionally
diff --git a/test/CodeGen/X86/splat-scalar-load.ll b/test/CodeGen/X86/splat-scalar-load.ll
new file mode 100644
index 0000000..32d3ab6
--- /dev/null
+++ b/test/CodeGen/X86/splat-scalar-load.ll
@@ -0,0 +1,43 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s
+; rdar://7434544
+
+define <2 x i64> @t1() nounwind ssp {
+entry:
+; CHECK: t1:
+; CHECK: pshufd	$0, (%esp), %xmm0
+  %array = alloca [8 x float], align 16
+  %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 0
+  %tmp2 = load float* %arrayidx
+  %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
+  %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
+  %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
+  %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
+  %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
+  ret <2 x i64> %0
+}
+
+define <2 x i64> @t2() nounwind ssp {
+entry:
+; CHECK: t2:
+; CHECK: pshufd	$85, (%esp), %xmm0
+  %array = alloca [8 x float], align 4
+  %arrayidx = getelementptr inbounds [8 x float]* %array, i32 0, i32 1
+  %tmp2 = load float* %arrayidx
+  %vecinit = insertelement <4 x float> undef, float %tmp2, i32 0
+  %vecinit5 = insertelement <4 x float> %vecinit, float %tmp2, i32 1
+  %vecinit7 = insertelement <4 x float> %vecinit5, float %tmp2, i32 2
+  %vecinit9 = insertelement <4 x float> %vecinit7, float %tmp2, i32 3
+  %0 = bitcast <4 x float> %vecinit9 to <2 x i64>
+  ret <2 x i64> %0
+}
+
+define <4 x float> @t3(float %tmp1, float %tmp2, float %tmp3) nounwind readnone ssp {
+entry:
+; CHECK: t3:
+; CHECK: pshufd	$-86, (%esp), %xmm0
+  %0 = insertelement <4 x float> undef, float %tmp3, i32 0
+  %1 = insertelement <4 x float> %0, float %tmp3, i32 1
+  %2 = insertelement <4 x float> %1, float %tmp3, i32 2
+  %3 = insertelement <4 x float> %2, float %tmp3, i32 3
+  ret <4 x float> %3
+}
diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll
index 58fe28b..f2b8010 100644
--- a/test/CodeGen/X86/sse2.ll
+++ b/test/CodeGen/X86/sse2.ll
@@ -1,5 +1,5 @@
 ; Tests for SSE2 and below, without SSE3+.
-; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 | FileCheck %s
+; RUN: llc < %s -mtriple=i386-apple-darwin10 -mcpu=pentium4 -O3 | FileCheck %s
 
 define void @t1(<2 x double>* %r, <2 x double>* %A, double %B) nounwind  {
 	%tmp3 = load <2 x double>* %A, align 16
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 21c1a3c..5550d26 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -1,6 +1,6 @@
 ; These are tests for SSE3 codegen.  Yonah has SSE3 and earlier but not SSSE3+.
 
-; RUN: llc < %s -march=x86-64 -mcpu=yonah -mtriple=i686-apple-darwin9\
+; RUN: llc < %s -march=x86-64 -mcpu=yonah -mtriple=i686-apple-darwin9 -O3 \
 ; RUN:              | FileCheck %s --check-prefix=X64
 
 ; Test for v8xi16 lowering where we extract the first element of the vector and
diff --git a/test/CodeGen/X86/tail-opts.ll b/test/CodeGen/X86/tail-opts.ll
index 0d86e56..c70c9fa 100644
--- a/test/CodeGen/X86/tail-opts.ll
+++ b/test/CodeGen/X86/tail-opts.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -march=x86-64 -mtriple=x86_64-unknown-linux-gnu -asm-verbose=false -post-RA-scheduler=true | FileCheck %s
 
 declare void @bar(i32)
 declare void @car(i32)
diff --git a/test/CodeGen/X86/tailcallstack64.ll b/test/CodeGen/X86/tailcallstack64.ll
index 69018aa..d05dff8 100644
--- a/test/CodeGen/X86/tailcallstack64.ll
+++ b/test/CodeGen/X86/tailcallstack64.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -tailcallopt -march=x86-64 | FileCheck %s
+; RUN: llc < %s -tailcallopt -march=x86-64 -post-RA-scheduler=true | FileCheck %s
 
 ; Check that lowered arguments on the stack do not overwrite each other.
 ; Add %in1 %p1 to a different temporary register (%eax).
diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll
index 7dddcda..7778983 100644
--- a/test/CodeGen/X86/unaligned-load.ll
+++ b/test/CodeGen/X86/unaligned-load.ll
@@ -1,4 +1,3 @@
-; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=dynamic-no-pic | not grep {movaps\t_.str3}
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -relocation-model=dynamic-no-pic | FileCheck %s
 
 @.str1 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 8
diff --git a/test/CodeGen/X86/vec-trunc-store.ll b/test/CodeGen/X86/vec-trunc-store.ll
new file mode 100644
index 0000000..ea1a151
--- /dev/null
+++ b/test/CodeGen/X86/vec-trunc-store.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=x86-64 -disable-mmx | grep punpcklwd | count 2
+
+define void @foo() nounwind {
+  %cti69 = trunc <8 x i32> undef to <8 x i16>     ; <<8 x i16>> [#uses=1]
+  store <8 x i16> %cti69, <8 x i16>* undef
+  ret void
+}
+
+define void @bar() nounwind {
+  %cti44 = trunc <4 x i32> undef to <4 x i16>     ; <<4 x i16>> [#uses=1]
+  store <4 x i16> %cti44, <4 x i16>* undef
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_compare-2.ll b/test/CodeGen/X86/vec_compare-2.ll
new file mode 100644
index 0000000..091641b
--- /dev/null
+++ b/test/CodeGen/X86/vec_compare-2.ll
@@ -0,0 +1,29 @@
+; RUN: llc < %s -march=x86 -mcpu=penryn -disable-mmx | FileCheck %s
+
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
+
+declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
+
+declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
+
+define void @blackDespeckle_wrapper(i8** %args_list, i64* %gtid, i64 %xend) {
+entry:
+; CHECK-NOT: set
+; CHECK: pcmpgt
+; CHECK: blendvps
+  %shr.i = ashr <4 x i32> zeroinitializer, <i32 3, i32 3, i32 3, i32 3> ; <<4 x i32>> [#uses=1]
+  %cmp318.i = sext <4 x i1> zeroinitializer to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %sub322.i = sub <4 x i32> %shr.i, zeroinitializer ; <<4 x i32>> [#uses=1]
+  %cmp323.x = icmp slt <4 x i32> zeroinitializer, %sub322.i ; <<4 x i1>> [#uses=1]
+  %cmp323.i = sext <4 x i1> %cmp323.x to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %or.i = or <4 x i32> %cmp318.i, %cmp323.i       ; <<4 x i32>> [#uses=1]
+  %tmp10.i83.i = bitcast <4 x i32> %or.i to <4 x float> ; <<4 x float>> [#uses=1]
+  %0 = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> undef, <4 x float> undef, <4 x float> %tmp10.i83.i) nounwind ; <<4 x float>> [#uses=1]
+  %conv.i.i15.i = bitcast <4 x float> %0 to <4 x i32> ; <<4 x i32>> [#uses=1]
+  %swz.i.i28.i = shufflevector <4 x i32> %conv.i.i15.i, <4 x i32> undef, <2 x i32> <i32 0, i32 1> ; <<2 x i32>> [#uses=1]
+  %tmp6.i29.i = bitcast <2 x i32> %swz.i.i28.i to <4 x i16> ; <<4 x i16>> [#uses=1]
+  %swz.i30.i = shufflevector <4 x i16> %tmp6.i29.i, <4 x i16> undef, <2 x i32> <i32 0, i32 1> ; <<2 x i16>> [#uses=1]
+  store <2 x i16> %swz.i30.i, <2 x i16>* undef
+  unreachable
+  ret void
+}
diff --git a/test/CodeGen/X86/vec_ext_inreg.ll b/test/CodeGen/X86/vec_ext_inreg.ll
new file mode 100644
index 0000000..02b16a7
--- /dev/null
+++ b/test/CodeGen/X86/vec_ext_inreg.ll
@@ -0,0 +1,37 @@
+; RUN: llc < %s -march=x86-64 
+
+define <8 x i32> @a(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = sext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <3 x i32> @b(<3 x i32> %a) nounwind {
+  %b = trunc <3 x i32> %a to <3 x i16>
+  %c = sext <3 x i16> %b to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <1 x i32> @c(<1 x i32> %a) nounwind {
+  %b = trunc <1 x i32> %a to <1 x i16>
+  %c = sext <1 x i16> %b to <1 x i32>
+  ret <1 x i32> %c
+}
+
+define <8 x i32> @d(<8 x i32> %a) nounwind {
+  %b = trunc <8 x i32> %a to <8 x i16>
+  %c = zext <8 x i16> %b to <8 x i32>
+  ret <8 x i32> %c
+}
+
+define <3 x i32> @e(<3 x i32> %a) nounwind {
+  %b = trunc <3 x i32> %a to <3 x i16>
+  %c = zext <3 x i16> %b to <3 x i32>
+  ret <3 x i32> %c
+}
+
+define <1 x i32> @f(<1 x i32> %a) nounwind {
+  %b = trunc <1 x i32> %a to <1 x i16>
+  %c = zext <1 x i16> %b to <1 x i32>
+  ret <1 x i32> %c
+}
diff --git a/test/CodeGen/X86/widen_arith-3.ll b/test/CodeGen/X86/widen_arith-3.ll
index a2b8b82..1f2c250 100644
--- a/test/CodeGen/X86/widen_arith-3.ll
+++ b/test/CodeGen/X86/widen_arith-3.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx | FileCheck %s
+; RUN: llc < %s -march=x86 -mattr=+sse42 -disable-mmx -post-RA-scheduler=true | FileCheck %s
 ; CHECK: paddw
 ; CHECK: pextrw
 ; CHECK: movd
diff --git a/test/CodeGen/X86/zext-shl.ll b/test/CodeGen/X86/zext-shl.ll
new file mode 100644
index 0000000..928848e
--- /dev/null
+++ b/test/CodeGen/X86/zext-shl.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -march=x86 | FileCheck %s
+
+define i32 @t1(i8 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t1:
+; CHECK: shll
+; CHECK-NOT: movzwl
+; CHECK: ret
+  %0 = zext i8 %x to i16
+  %1 = shl i16 %0, 5
+  %2 = zext i16 %1 to i32
+  ret i32 %2
+}
+
+define i32 @t2(i8 zeroext %x) nounwind readnone ssp {
+entry:
+; CHECK: t2:
+; CHECK: shrl
+; CHECK-NOT: movzwl
+; CHECK: ret
+  %0 = zext i8 %x to i16
+  %1 = lshr i16 %0, 3
+  %2 = zext i16 %1 to i32
+  ret i32 %2
+}
diff --git a/test/DebugInfo/2008-10-17-C++DebugCrash.ll b/test/DebugInfo/2008-10-17-C++DebugCrash.ll
deleted file mode 100644
index 7f60e95..0000000
--- a/test/DebugInfo/2008-10-17-C++DebugCrash.ll
+++ /dev/null
@@ -1,58 +0,0 @@
-; RUN: llc < %s
-; PR2885
-
-;; NOTE: This generates bad debug info in this case! But that's better than
-;; ICEing.
-
-; ModuleID = 'bug.bc'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([11 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([11 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 14, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 true, i1 true }		; <%llvm.dbg.subprogram.type*> [#uses=0]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 46 }		; <%llvm.dbg.anchor.type*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 393233, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 4, i8* getelementptr ([7 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([16 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0) }		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 17 }		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [7 x i8] c"die.cc\00"		; <[7 x i8]*> [#uses=1]
-@.str1 = internal constant [16 x i8] c"/home/nicholas/\00"		; <[16 x i8]*> [#uses=1]
-@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5623) (LLVM build)\00"		; <[52 x i8]*> [#uses=1]
-@.str3 = internal constant [11 x i8] c"AssertFail\00"		; <[11 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }		; <%llvm.dbg.basictype.type*> [#uses=1]
-@.str4 = internal constant [4 x i8] c"int\00"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.subprogram5 = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([7 x i8]* @.str6, i32 0, i32 0), i8* getelementptr ([7 x i8]* @.str6, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 19, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype7 to { }*), i1 true, i1 true }		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str6 = internal constant [7 x i8] c"FooOne\00"		; <[7 x i8]*> [#uses=1]
-@llvm.dbg.basictype7 = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([9 x i8]* @.str8, i32 0, i32 0), { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }		; <%llvm.dbg.basictype.type*> [#uses=1]
-@.str8 = internal constant [9 x i8] c"long int\00"		; <[9 x i8]*> [#uses=1]
-@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 393473, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram5 to { }*), i8* getelementptr ([6 x i8]* @.str9, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 19, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype10 to { }*) }		; <%llvm.dbg.variable.type*> [#uses=0]
-@.str9 = internal constant [6 x i8] c"count\00"		; <[6 x i8]*> [#uses=1]
-@llvm.dbg.basictype10 = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([18 x i8]* @.str11, i32 0, i32 0), { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 7 }		; <%llvm.dbg.basictype.type*> [#uses=1]
-@.str11 = internal constant [18 x i8] c"long unsigned int\00"		; <[18 x i8]*> [#uses=1]
-@llvm.dbg.subprogram12 = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([7 x i8]* @.str13, i32 0, i32 0), i8* getelementptr ([7 x i8]* @.str13, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 24, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype7 to { }*), i1 true, i1 true }		; <%llvm.dbg.subprogram.type*> [#uses=0]
-@.str13 = internal constant [7 x i8] c"FooTwo\00"		; <[7 x i8]*> [#uses=1]
-@llvm.dbg.subprogram14 = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([9 x i8]* @.str15, i32 0, i32 0), i8* getelementptr ([9 x i8]* @.str15, i32 0, i32 0), i8* getelementptr ([13 x i8]* @.str16, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 29, { }* null, i1 false, i1 true }		; <%llvm.dbg.subprogram.type*> [#uses=0]
-@.str15 = internal constant [9 x i8] c"FooThree\00"		; <[9 x i8]*> [#uses=1]
-@.str16 = internal constant [13 x i8] c"_Z8FooThreev\00"		; <[13 x i8]*> [#uses=1]
-
-declare void @_Z8FooThreev() nounwind
-
-define internal i32 @_ZL10AssertFailv() nounwind {
-entry:
-	unreachable
-}
-
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare void @abort() noreturn nounwind
-
-declare void @llvm.dbg.region.end({ }*) nounwind
-
-declare i32 @_ZL6FooOnem(i32) nounwind
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind
-
-declare i32 @_ZL6FooTwov() nounwind
diff --git a/test/DebugInfo/2008-11-05-InlinedFuncStart.ll b/test/DebugInfo/2008-11-05-InlinedFuncStart.ll
deleted file mode 100644
index b648718..0000000
--- a/test/DebugInfo/2008-11-05-InlinedFuncStart.ll
+++ /dev/null
@@ -1,72 +0,0 @@
-; RUN: llc < %s
-; RUN: llc %s -o - -O0
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 393233, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5627) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.subprogram5 = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([5 x i8]* @.str6, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str6, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str6 = internal constant [5 x i8] c"main\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-
-define i32 @foo() nounwind alwaysinline {
-entry:
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	store i32 42, i32* %0, align 4
-	%1 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %1, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	ret i32 %retval1
-}
-
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare void @llvm.dbg.region.end({ }*) nounwind
-
-define i32 @main() nounwind {
-entry:
-	%retval.i = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%1 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram5 to { }*))
-	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*)) nounwind
-	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
-	store i32 42, i32* %0, align 4
-	%2 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %2, i32* %retval.i, align 4
-	%retval1.i = load i32* %retval.i		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*)) nounwind
-	store i32 %retval1.i, i32* %1, align 4
-	%3 = load i32* %1, align 4		; <i32> [#uses=1]
-	store i32 %3, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram5 to { }*))
-	ret i32 %retval1
-}
diff --git a/test/DebugInfo/2009-01-15-RecordVariableCrash.ll b/test/DebugInfo/2009-01-15-RecordVariableCrash.ll
deleted file mode 100644
index cee4d72..0000000
--- a/test/DebugInfo/2009-01-15-RecordVariableCrash.ll
+++ /dev/null
@@ -1,353 +0,0 @@
-; RUN: llc %s -o /dev/null -verify-dom-info -verify-loop-info
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32, i8*, i8* }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
-	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, i8*, i8* }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1, i8*, i8* }
-	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }*, i8*, i8* }
-	%struct._RuneCharClass = type { [14 x i8], i32 }
-	%struct._RuneEntry = type { i32, i32, i32, i32* }
-	%struct._RuneLocale = type { [8 x i8], [32 x i8], i32 (i8*, i32, i8**)*, i32 (i32, i8*, i32, i8**)*, i32, [256 x i32], [256 x i32], [256 x i32], %struct._RuneRange, %struct._RuneRange, %struct._RuneRange, i8*, i32, i32, %struct._RuneCharClass* }
-	%struct._RuneRange = type { i32, %struct._RuneEntry* }
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [4 x i8] c"x.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@.str2 = internal constant [57 x i8] c"4.2.1 (Based on Apple Inc. build 5628) (LLVM build 9999)\00", section "llvm.metadata"		; <[57 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([57 x i8]* @.str2, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5, i8* null, i8* null }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str4 = internal constant [5 x i8] c"main\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([5 x i8]* @.str4, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str4, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 21, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str5 = internal constant [2 x i8] c"i\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 459008, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([2 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 22, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-@.str6 = internal constant [8 x i8] c"islower\00", section "llvm.metadata"		; <[8 x i8]*> [#uses=1]
-@llvm.dbg.subprogram9 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([8 x i8]* @.str6, i32 0, i32 0), i8* getelementptr ([8 x i8]* @.str6, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 267, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 true, i1 true, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str10 = internal constant [3 x i8] c"_c\00", section "llvm.metadata"		; <[3 x i8]*> [#uses=1]
-@llvm.dbg.variable11 = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram9 to { }*), i8* getelementptr ([3 x i8]* @.str10, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 266, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-@.str12 = internal constant [9 x i8] c"__istype\00", section "llvm.metadata"		; <[9 x i8]*> [#uses=1]
-@llvm.dbg.subprogram13 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([9 x i8]* @.str12, i32 0, i32 0), i8* getelementptr ([9 x i8]* @.str12, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 171, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 true, i1 true, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str14 = internal constant [19 x i8] c"__darwin_ct_rune_t\00", section "llvm.metadata"		; <[19 x i8]*> [#uses=1]
-@.str15 = internal constant [9 x i8] c"_types.h\00", section "llvm.metadata"		; <[9 x i8]*> [#uses=1]
-@.str16 = internal constant [18 x i8] c"/usr/include/i386\00", section "llvm.metadata"		; <[18 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458774, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([19 x i8]* @.str14, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 70, i64 0, i64 0, i64 0, i32 0, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i8* getelementptr ([9 x i8]* @.str15, i32 0, i32 0), i8* getelementptr ([18 x i8]* @.str16, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.variable17 = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram13 to { }*), i8* getelementptr ([3 x i8]* @.str10, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 170, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*), i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-@.str18 = internal constant [18 x i8] c"long unsigned int\00", section "llvm.metadata"		; <[18 x i8]*> [#uses=1]
-@llvm.dbg.basictype19 = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([18 x i8]* @.str18, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 7, i8* null, i8* null }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@.str20 = internal constant [3 x i8] c"_f\00", section "llvm.metadata"		; <[3 x i8]*> [#uses=1]
-@llvm.dbg.variable21 = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram13 to { }*), i8* getelementptr ([3 x i8]* @.str20, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 170, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype19 to { }*), i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-@_DefaultRuneLocale = external global %struct._RuneLocale		; <%struct._RuneLocale*> [#uses=1]
-@.str22 = internal constant [8 x i8] c"isascii\00", section "llvm.metadata"		; <[8 x i8]*> [#uses=1]
-@llvm.dbg.subprogram23 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([8 x i8]* @.str22, i32 0, i32 0), i8* getelementptr ([8 x i8]* @.str22, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 153, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 true, i1 true, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.variable24 = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram23 to { }*), i8* getelementptr ([3 x i8]* @.str10, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 152, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-@.str25 = internal constant [8 x i8] c"toupper\00", section "llvm.metadata"		; <[8 x i8]*> [#uses=1]
-@llvm.dbg.subprogram26 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([8 x i8]* @.str25, i32 0, i32 0), i8* getelementptr ([8 x i8]* @.str25, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 316, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 true, i1 true, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.variable27 = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram26 to { }*), i8* getelementptr ([3 x i8]* @.str10, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 315, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-
-define i32 @main() nounwind {
-entry:
-	%retval = alloca i32		; <i32*> [#uses=1]
-	%i = alloca i32		; <i32*> [#uses=16]
-	%iftmp.5 = alloca i32		; <i32*> [#uses=3]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	%0 = bitcast i32* %i to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable to { }*))
-	call void @llvm.dbg.stoppoint(i32 23, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	store i32 0, i32* %i, align 4
-	br label %bb13
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 23, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb
-
-bb:		; preds = %bb13, %1
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%2 = load i32* %i, align 4		; <i32> [#uses=1]
-	%3 = call i32 @islower(i32 %2) nounwind		; <i32> [#uses=1]
-	%4 = icmp eq i32 %3, 0		; <i1> [#uses=1]
-	br i1 %4, label %bb3, label %bb1
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb1
-
-bb1:		; preds = %5, %bb
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%6 = load i32* %i, align 4		; <i32> [#uses=1]
-	%7 = icmp sle i32 %6, 96		; <i1> [#uses=1]
-	br i1 %7, label %bb11, label %bb2
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb2
-
-bb2:		; preds = %8, %bb1
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%9 = load i32* %i, align 4		; <i32> [#uses=1]
-	%10 = icmp sgt i32 %9, 122		; <i1> [#uses=1]
-	br i1 %10, label %bb11, label %bb3
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb3
-
-bb3:		; preds = %11, %bb2, %bb
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%12 = load i32* %i, align 4		; <i32> [#uses=1]
-	%13 = call i32 @islower(i32 %12) nounwind		; <i32> [#uses=1]
-	%14 = icmp ne i32 %13, 0		; <i1> [#uses=1]
-	br i1 %14, label %bb6, label %bb4
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb4
-
-bb4:		; preds = %15, %bb3
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%16 = load i32* %i, align 4		; <i32> [#uses=1]
-	%17 = icmp sle i32 %16, 96		; <i1> [#uses=1]
-	br i1 %17, label %bb6, label %bb5
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb5
-
-bb5:		; preds = %18, %bb4
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%19 = load i32* %i, align 4		; <i32> [#uses=1]
-	%20 = icmp sle i32 %19, 122		; <i1> [#uses=1]
-	br i1 %20, label %bb11, label %bb6
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb6
-
-bb6:		; preds = %21, %bb5, %bb4, %bb3
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%22 = load i32* %i, align 4		; <i32> [#uses=1]
-	%23 = call i32 @toupper(i32 %22) nounwind		; <i32> [#uses=1]
-	%24 = load i32* %i, align 4		; <i32> [#uses=1]
-	%25 = icmp sle i32 %24, 96		; <i1> [#uses=1]
-	br i1 %25, label %bb9, label %bb7
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb7
-
-bb7:		; preds = %26, %bb6
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%27 = load i32* %i, align 4		; <i32> [#uses=1]
-	%28 = icmp sgt i32 %27, 122		; <i1> [#uses=1]
-	br i1 %28, label %bb9, label %bb8
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb8
-
-bb8:		; preds = %29, %bb7
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%30 = load i32* %i, align 4		; <i32> [#uses=1]
-	%31 = sub i32 %30, 32		; <i32> [#uses=1]
-	store i32 %31, i32* %iftmp.5, align 4
-	br label %bb10
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb9
-
-bb9:		; preds = %32, %bb7, %bb6
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%33 = load i32* %i, align 4		; <i32> [#uses=1]
-	store i32 %33, i32* %iftmp.5, align 4
-	br label %bb10
-
-bb10:		; preds = %bb9, %bb8
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%34 = load i32* %iftmp.5, align 4		; <i32> [#uses=1]
-	%35 = icmp ne i32 %23, %34		; <i1> [#uses=1]
-	br i1 %35, label %bb11, label %bb12
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 24, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb11
-
-bb11:		; preds = %36, %bb10, %bb5, %bb2, %bb1
-	call void @llvm.dbg.stoppoint(i32 26, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @exit(i32 2) noreturn nounwind
-	unreachable
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 26, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb12
-
-bb12:		; preds = %37, %bb10
-	call void @llvm.dbg.stoppoint(i32 23, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%38 = load i32* %i, align 4		; <i32> [#uses=1]
-	%39 = add i32 %38, 1		; <i32> [#uses=1]
-	store i32 %39, i32* %i, align 4
-	br label %bb13
-
-bb13:		; preds = %bb12, %entry
-	call void @llvm.dbg.stoppoint(i32 23, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%40 = load i32* %i, align 4		; <i32> [#uses=1]
-	%41 = icmp sle i32 %40, 255		; <i1> [#uses=1]
-	br i1 %41, label %bb, label %bb14
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 23, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb14
-
-bb14:		; preds = %42, %bb13
-	call void @llvm.dbg.stoppoint(i32 27, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @exit(i32 0) noreturn nounwind
-	unreachable
-
-return:		; No predecessors!
-	%retval15 = load i32* %retval		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 27, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	ret i32 %retval15
-}
-
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-define internal i32 @islower(i32 %_c) nounwind {
-entry:
-	%_c_addr = alloca i32		; <i32*> [#uses=3]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram9 to { }*))
-	%1 = bitcast i32* %_c_addr to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %1, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable11 to { }*))
-	store i32 %_c, i32* %_c_addr
-	call void @llvm.dbg.stoppoint(i32 268, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%2 = load i32* %_c_addr, align 4		; <i32> [#uses=1]
-	%3 = call i32 @__istype(i32 %2, i32 4096) nounwind		; <i32> [#uses=1]
-	store i32 %3, i32* %0, align 4
-	%4 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %4, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 268, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram9 to { }*))
-	ret i32 %retval1
-}
-
-define internal i32 @toupper(i32 %_c) nounwind {
-entry:
-	%_c_addr = alloca i32		; <i32*> [#uses=3]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram26 to { }*))
-	%1 = bitcast i32* %_c_addr to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %1, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable27 to { }*))
-	store i32 %_c, i32* %_c_addr
-	call void @llvm.dbg.stoppoint(i32 317, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%2 = load i32* %_c_addr, align 4		; <i32> [#uses=1]
-	%3 = call i32 @__toupper(i32 %2) nounwind		; <i32> [#uses=1]
-	store i32 %3, i32* %0, align 4
-	%4 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %4, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 317, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram26 to { }*))
-	ret i32 %retval1
-}
-
-declare void @exit(i32) noreturn nounwind
-
-declare void @llvm.dbg.region.end({ }*) nounwind
-
-define internal i32 @__istype(i32 %_c, i32 %_f) nounwind {
-entry:
-	%_c_addr = alloca i32		; <i32*> [#uses=5]
-	%_f_addr = alloca i32		; <i32*> [#uses=4]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%iftmp.0 = alloca i32		; <i32*> [#uses=3]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram13 to { }*))
-	%1 = bitcast i32* %_c_addr to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %1, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable17 to { }*))
-	store i32 %_c, i32* %_c_addr
-	%2 = bitcast i32* %_f_addr to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %2, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable21 to { }*))
-	store i32 %_f, i32* %_f_addr
-	call void @llvm.dbg.stoppoint(i32 175, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%3 = load i32* %_c_addr, align 4		; <i32> [#uses=1]
-	%4 = call i32 @isascii(i32 %3) nounwind		; <i32> [#uses=1]
-	%5 = icmp ne i32 %4, 0		; <i1> [#uses=1]
-	br i1 %5, label %bb, label %bb1
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 175, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb
-
-bb:		; preds = %6, %entry
-	call void @llvm.dbg.stoppoint(i32 175, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%7 = load i32* %_c_addr, align 4		; <i32> [#uses=1]
-	%8 = getelementptr [256 x i32]* getelementptr (%struct._RuneLocale* @_DefaultRuneLocale, i32 0, i32 5), i32 0, i32 %7		; <i32*> [#uses=1]
-	%9 = load i32* %8, align 4		; <i32> [#uses=1]
-	%10 = load i32* %_f_addr, align 4		; <i32> [#uses=1]
-	%11 = and i32 %9, %10		; <i32> [#uses=1]
-	%12 = icmp ne i32 %11, 0		; <i1> [#uses=1]
-	%13 = zext i1 %12 to i32		; <i32> [#uses=1]
-	store i32 %13, i32* %iftmp.0, align 4
-	br label %bb2
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 175, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb1
-
-bb1:		; preds = %14, %entry
-	call void @llvm.dbg.stoppoint(i32 175, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%15 = load i32* %_c_addr, align 4		; <i32> [#uses=1]
-	%16 = load i32* %_f_addr, align 4		; <i32> [#uses=1]
-	%17 = call i32 @__maskrune(i32 %15, i32 %16) nounwind		; <i32> [#uses=1]
-	%18 = icmp ne i32 %17, 0		; <i1> [#uses=1]
-	%19 = zext i1 %18 to i32		; <i32> [#uses=1]
-	store i32 %19, i32* %iftmp.0, align 4
-	br label %bb2
-
-bb2:		; preds = %bb1, %bb
-	call void @llvm.dbg.stoppoint(i32 175, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%20 = load i32* %iftmp.0, align 4		; <i32> [#uses=1]
-	store i32 %20, i32* %0, align 4
-	%21 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %21, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %bb2
-	%retval3 = load i32* %retval		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 175, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram13 to { }*))
-	ret i32 %retval3
-}
-
-define internal i32 @isascii(i32 %_c) nounwind {
-entry:
-	%_c_addr = alloca i32		; <i32*> [#uses=3]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram23 to { }*))
-	%1 = bitcast i32* %_c_addr to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %1, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable24 to { }*))
-	store i32 %_c, i32* %_c_addr
-	call void @llvm.dbg.stoppoint(i32 154, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%2 = load i32* %_c_addr, align 4		; <i32> [#uses=1]
-	%3 = and i32 %2, -128		; <i32> [#uses=1]
-	%4 = icmp eq i32 %3, 0		; <i1> [#uses=1]
-	%5 = zext i1 %4 to i32		; <i32> [#uses=1]
-	store i32 %5, i32* %0, align 4
-	%6 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %6, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 154, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram23 to { }*))
-	ret i32 %retval1
-}
-
-declare i32 @__maskrune(i32, i32)
-
-declare i32 @__toupper(i32)
diff --git a/test/DebugInfo/2009-02-18-DefaultScope-Crash.ll b/test/DebugInfo/2009-02-18-DefaultScope-Crash.ll
deleted file mode 100644
index 27bcef7..0000000
--- a/test/DebugInfo/2009-02-18-DefaultScope-Crash.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; RUN: llc %s -o /dev/null
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8* }
-	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }* }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@.str2 = internal constant [57 x i8] c"4.2.1 (Based on Apple Inc. build 5636) (LLVM build 2098)\00", section "llvm.metadata"		; <[57 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([57 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.array = internal constant [1 x { }*] [ { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) ], section "llvm.metadata"		; <[1 x { }*]*> [#uses=1]
-@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([1 x { }*]* @llvm.dbg.array to { }*) }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str4 = internal constant [4 x i8] c"foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-
-@llvm.dbg.array1 = internal constant [1 x { }*] [ { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*) ], section "llvm.metadata"
-
-define i32 @foo() nounwind ssp {
-entry:
-	ret i32 42
-}
-
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare void @llvm.dbg.region.end({ }*) nounwind
diff --git a/test/DebugInfo/2009-06-12-Inline.ll b/test/DebugInfo/2009-06-12-Inline.ll
deleted file mode 100644
index de40468..0000000
--- a/test/DebugInfo/2009-06-12-Inline.ll
+++ /dev/null
@@ -1,94 +0,0 @@
-; RUN: llc %s -o /dev/null
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
-	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }*, i32 }
-	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }* }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
-	%struct._objc_cache = type opaque
-	%struct._objc_category = type { i8*, i8*, %struct._objc_method_list*, %struct._objc_method_list*, %struct._objc_protocol_list*, i32, %struct._prop_list_t* }
-	%struct._objc_class = type { %struct._objc_class*, %struct._objc_class*, i8*, i32, i32, i32, %struct._objc_ivar_list*, %struct._objc_method_list*, %struct._objc_cache*, %struct._objc_protocol_list*, i8*, %struct._objc_class_extension* }
-	%struct._objc_class_extension = type { i32, i8*, %struct._prop_list_t* }
-	%struct._objc_exception_data = type { [18 x i32], [4 x i8*] }
-	%struct._objc_ivar = type { i8*, i8*, i32 }
-	%struct._objc_ivar_list = type opaque
-	%struct._objc_method = type { %struct.objc_selector*, i8*, i8* }
-	%struct._objc_method_description = type { %struct.objc_selector*, i8* }
-	%struct._objc_method_description_list = type { i32, [0 x %struct._objc_method_description] }
-	%struct._objc_method_list = type opaque
-	%struct._objc_module = type { i32, i32, i8*, %struct._objc_symtab* }
-	%struct._objc_protocol = type { %struct._objc_protocol_extension*, i8*, %struct._objc_protocol_list*, %struct._objc_method_description_list*, %struct._objc_method_description_list* }
-	%struct._objc_protocol_extension = type { i32, %struct._objc_method_description_list*, %struct._objc_method_description_list*, %struct._prop_list_t* }
-	%struct._objc_protocol_list = type { %struct._objc_protocol_list*, i32, [0 x %struct._objc_protocol] }
-	%struct._objc_super = type <{ %struct.objc_object*, %struct.objc_class* }>
-	%struct._objc_symtab = type { i32, %struct.objc_selector*, i16, i16, [0 x i8*] }
-	%struct._prop_list_t = type { i32, i32, [0 x %struct._prop_t] }
-	%struct._prop_t = type { i8*, i8* }
-	%struct.objc_class = type opaque
-	%struct.objc_object = type opaque
-	%struct.objc_selector = type opaque
-@"\01L_OBJC_IMAGE_INFO" = internal constant [2 x i32] [i32 0, i32 16], section "__OBJC, __image_info,regular"		; <[2 x i32]*> [#uses=1]
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [4 x i8] c"t.m\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [20 x i8] c"/Volumes/work/Radar\00", section "llvm.metadata"		; <[20 x i8]*> [#uses=1]
-@.str2 = internal constant [10 x i8] c"clang 1.0\00", section "llvm.metadata"		; <[10 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 16, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([20 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([10 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 1 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str3 = internal constant [3 x i8] c"f1\00", section "llvm.metadata"		; <[3 x i8]*> [#uses=1]
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([3 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([3 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([3 x i8]* @.str3, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 3, { }* null, i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@llvm.dbg.array = internal constant [2 x { }*] [{ }* null, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*)], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
-@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@.str5 = internal constant [3 x i8] c"l0\00", section "llvm.metadata"		; <[3 x i8]*> [#uses=1]
-@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 459008, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), i8* getelementptr ([3 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 5, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-@.str6 = internal constant [3 x i8] c"f0\00", section "llvm.metadata"		; <[3 x i8]*> [#uses=1]
-@llvm.dbg.subprogram7 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([3 x i8]* @.str6, i32 0, i32 0), i8* getelementptr ([3 x i8]* @.str6, i32 0, i32 0), i8* getelementptr ([3 x i8]* @.str6, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* null, i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str8 = internal constant [2 x i8] c"x\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.variable9 = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram7 to { }*), i8* getelementptr ([2 x i8]* @.str8, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-@"\01L_OBJC_CLASS_NAME_" = internal global [1 x i8] zeroinitializer, section "__TEXT,__cstring,cstring_literals", align 1		; <[1 x i8]*> [#uses=1]
-@"\01L_OBJC_MODULES" = internal global %struct._objc_module { i32 7, i32 16, i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), %struct._objc_symtab* null }, section "__OBJC,__module_info,regular,no_dead_strip", align 4		; <%struct._objc_module*> [#uses=1]
-@llvm.used = appending global [3 x i8*] [i8* bitcast ([2 x i32]* @"\01L_OBJC_IMAGE_INFO" to i8*), i8* getelementptr ([1 x i8]* @"\01L_OBJC_CLASS_NAME_", i32 0, i32 0), i8* bitcast (%struct._objc_module* @"\01L_OBJC_MODULES" to i8*)], section "llvm.metadata"		; <[3 x i8*]*> [#uses=0]
-
-define void @f1() nounwind {
-entry:
-	%x.addr.i = alloca i32		; <i32*> [#uses=2]
-	%l0 = alloca void (i32)*, align 4		; <void (i32)**> [#uses=2]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	call void @llvm.dbg.stoppoint(i32 4, i32 3, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram7 to { }*))
-	store i32 1, i32* %x.addr.i
-	%0 = bitcast i32* %x.addr.i to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable9 to { }*))
-	call void @llvm.dbg.stoppoint(i32 2, i32 66, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.stoppoint(i32 5, i32 3, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram7 to { }*))
-	%1 = bitcast void (i32)** %l0 to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %1, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable to { }*))
-	store void (i32)* @f0, void (i32)** %l0
-	call void @llvm.dbg.stoppoint(i32 6, i32 1, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	ret void
-}
-
-declare void @llvm.dbg.func.start({ }*) nounwind readnone
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind readnone
-
-define internal void @f0(i32 %x) nounwind alwaysinline {
-entry:
-	%x.addr = alloca i32		; <i32*> [#uses=2]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram7 to { }*))
-	store i32 %x, i32* %x.addr
-	%0 = bitcast i32* %x.addr to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable9 to { }*))
-	call void @llvm.dbg.stoppoint(i32 2, i32 66, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram7 to { }*))
-	ret void
-}
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind readnone
-
-declare void @llvm.dbg.region.end({ }*) nounwind readnone
diff --git a/test/DebugInfo/2009-06-12-InlineFuncStart.ll b/test/DebugInfo/2009-06-12-InlineFuncStart.ll
deleted file mode 100644
index 03837a0..0000000
--- a/test/DebugInfo/2009-06-12-InlineFuncStart.ll
+++ /dev/null
@@ -1,75 +0,0 @@
-; RUN: llc < %s
-; RUN: llc %s -o - -O0
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 393233, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5627) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.subprogram5 = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([5 x i8]* @.str6, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str6, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str6 = internal constant [5 x i8] c"main\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-
-define i32 @foo() nounwind alwaysinline {
-entry:
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	store i32 42, i32* %0, align 4
-	%1 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %1, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	ret i32 %retval1
-}
-
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare void @llvm.dbg.region.end({ }*) nounwind
-
-define i32 @main() nounwind {
-entry:
-	%retval.i = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%1 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram5 to { }*))
-        br label %bb1
-
-return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram5 to { }*))
-	ret i32 %retval1
-
-bb1:
-	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*)) nounwind
-	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
-	store i32 42, i32* %0, align 4
-	%2 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %2, i32* %retval.i, align 4
-	%retval1.i = load i32* %retval.i		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*)) nounwind
-	store i32 %retval1.i, i32* %1, align 4
-	%3 = load i32* %1, align 4		; <i32> [#uses=1]
-	store i32 %3, i32* %retval, align 4
-	br label %return
-}
diff --git a/test/DebugInfo/2009-06-15-InlineFuncStart.ll b/test/DebugInfo/2009-06-15-InlineFuncStart.ll
deleted file mode 100644
index 43d5cd1..0000000
--- a/test/DebugInfo/2009-06-15-InlineFuncStart.ll
+++ /dev/null
@@ -1,77 +0,0 @@
-; Test inlined function handling. This test case is copied from 
-; 2009-06-12-InlineFunctStart.ll with one change. In function main, the bb1
-; does not have llvm.dbg.stoppiont intrinsic before llvm.dbg.func.start.
-; RUN: llc < %s
-; RUN: llc %s -o - -O0
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8* }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str3, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 1, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 393233, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 1, i8* getelementptr ([4 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([52 x i8]* @.str2, i32 0, i32 0) }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 393216, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [4 x i8] c"a.c\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str1 = internal constant [5 x i8] c"/tmp\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@.str2 = internal constant [52 x i8] c"4.2.1 (Based on Apple Inc. build 5627) (LLVM build)\00", section "llvm.metadata"		; <[52 x i8]*> [#uses=1]
-@.str3 = internal constant [4 x i8] c"foo\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 393252, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str4, i32 0, i32 0), { }* null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@.str4 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.subprogram5 = internal constant %llvm.dbg.subprogram.type { i32 393262, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([5 x i8]* @.str6, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str6, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 2, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str6 = internal constant [5 x i8] c"main\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-
-define i32 @foo() nounwind alwaysinline {
-entry:
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	store i32 42, i32* %0, align 4
-	%1 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %1, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*))
-	ret i32 %retval1
-}
-
-declare void @llvm.dbg.func.start({ }*) nounwind
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind
-
-declare void @llvm.dbg.region.end({ }*) nounwind
-
-define i32 @main() nounwind {
-entry:
-	%retval.i = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%1 = alloca i32		; <i32*> [#uses=2]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram5 to { }*))
-        br label %bb1
-
-return:		; preds = %entry
-	%retval1 = load i32* %retval		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 2, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram5 to { }*))
-	ret i32 %retval1
-
-bb1:
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*)) nounwind
-	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
-	store i32 42, i32* %0, align 4
-	%2 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %2, i32* %retval.i, align 4
-	%retval1.i = load i32* %retval.i		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 1, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*)) nounwind
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*)) nounwind
-	store i32 %retval1.i, i32* %1, align 4
-	%3 = load i32* %1, align 4		; <i32> [#uses=1]
-	store i32 %3, i32* %retval, align 4
-	br label %return
-}
diff --git a/test/DebugInfo/2009-06-15-abstract_origin.ll b/test/DebugInfo/2009-06-15-abstract_origin.ll
deleted file mode 100644
index d442a30..0000000
--- a/test/DebugInfo/2009-06-15-abstract_origin.ll
+++ /dev/null
@@ -1,275 +0,0 @@
-; RUN: llc %s -o - -asm-verbose -O0 | not grep ".long	0x0	## DW_AT_abstract_origin"
-
-	%llvm.dbg.anchor.type = type { i32, i32 }
-	%llvm.dbg.basictype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, i32 }
-	%llvm.dbg.compile_unit.type = type { i32, { }*, i32, i8*, i8*, i8*, i1, i1, i8*, i32 }
-	%llvm.dbg.composite.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }*, { }*, i32 }
-	%llvm.dbg.derivedtype.type = type { i32, { }*, i8*, { }*, i32, i64, i64, i64, i32, { }* }
-	%llvm.dbg.global_variable.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1, { }* }
-	%llvm.dbg.subprogram.type = type { i32, { }*, { }*, i8*, i8*, i8*, { }*, i32, { }*, i1, i1 }
-	%llvm.dbg.variable.type = type { i32, { }*, i8*, { }*, i32, { }* }
-	%struct.AAAAAImageParser = type { %struct.CObject* }
-	%struct.CObject = type { i32 }
-@llvm.dbg.compile_units = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 17 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str = internal constant [9 x i8] c"tcase.cc\00", section "llvm.metadata"		; <[9 x i8]*> [#uses=1]
-@.str1 = internal constant [6 x i8] c"/tmp/\00", section "llvm.metadata"		; <[6 x i8]*> [#uses=1]
-@.str2 = internal constant [55 x i8] c"4.2.1 (Based on Apple Inc. build 5646) (LLVM build 00)\00", section "llvm.metadata"		; <[55 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 4, i8* getelementptr ([9 x i8]* @.str, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 true, i1 false, i8* null, i32 0 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@.str3 = internal constant [8 x i8] c"tcase.h\00", section "llvm.metadata"		; <[8 x i8]*> [#uses=1]
-@llvm.dbg.compile_unit4 = internal constant %llvm.dbg.compile_unit.type { i32 458769, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.compile_units to { }*), i32 4, i8* getelementptr ([8 x i8]* @.str3, i32 0, i32 0), i8* getelementptr ([6 x i8]* @.str1, i32 0, i32 0), i8* getelementptr ([55 x i8]* @.str2, i32 0, i32 0), i1 false, i1 false, i8* null, i32 0 }, section "llvm.metadata"		; <%llvm.dbg.compile_unit.type*> [#uses=1]
-@.str5 = internal constant [8 x i8] c"CObject\00", section "llvm.metadata"		; <[8 x i8]*> [#uses=1]
-@.str6 = internal constant [4 x i8] c"int\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.basictype = internal constant %llvm.dbg.basictype.type { i32 458788, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str6, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, i32 5 }, section "llvm.metadata"		; <%llvm.dbg.basictype.type*> [#uses=1]
-@.str7 = internal constant [2 x i8] c"d\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([2 x i8]* @.str7, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 6, i64 32, i64 32, i64 0, i32 1, { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype8 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array = internal constant [3 x { }*] [{ }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype8 to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*)], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite9 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprograms = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 46 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str10 = internal constant [4 x i8] c"set\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@.str11 = internal constant [18 x i8] c"_ZN7CObject3setEi\00", section "llvm.metadata"		; <[18 x i8]*> [#uses=1]
-@llvm.dbg.subprogram = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str10, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str10, i32 0, i32 0), i8* getelementptr ([18 x i8]* @.str11, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 3, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite9 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.array12 = internal constant [2 x { }*] [{ }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype8 to { }*)], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
-@llvm.dbg.composite13 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array12 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str14 = internal constant [8 x i8] c"release\00", section "llvm.metadata"		; <[8 x i8]*> [#uses=1]
-@.str15 = internal constant [22 x i8] c"_ZN7CObject7releaseEv\00", section "llvm.metadata"		; <[22 x i8]*> [#uses=1]
-@llvm.dbg.subprogram16 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([8 x i8]* @.str14, i32 0, i32 0), i8* getelementptr ([8 x i8]* @.str14, i32 0, i32 0), i8* getelementptr ([22 x i8]* @.str15, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 4, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite13 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.array17 = internal constant [3 x { }*] [{ }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram16 to { }*)], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite18 = internal constant %llvm.dbg.composite.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([8 x i8]* @.str5, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 1, i64 32, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array17 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.derivedtype19 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite18 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array20 = internal constant [2 x { }*] [{ }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*)], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
-@llvm.dbg.composite = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array20 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.subprogram21 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([8 x i8]* @.str14, i32 0, i32 0), i8* getelementptr ([8 x i8]* @.str14, i32 0, i32 0), i8* getelementptr ([22 x i8]* @.str15, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 4, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.derivedtype22 = internal constant %llvm.dbg.derivedtype.type { i32 458790, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@.str23 = internal constant [5 x i8] c"this\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@llvm.dbg.variable = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram21 to { }*), i8* getelementptr ([5 x i8]* @.str23, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 4, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype22 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-@llvm.dbg.array24 = internal constant [2 x { }*] [{ }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*)], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
-@llvm.dbg.composite25 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array24 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str26 = internal constant [14 x i8] c"ReleaseObject\00", section "llvm.metadata"		; <[14 x i8]*> [#uses=1]
-@.str27 = internal constant [27 x i8] c"_Z13ReleaseObjectP7CObject\00", section "llvm.metadata"		; <[27 x i8]*> [#uses=1]
-@llvm.dbg.subprogram28 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([14 x i8]* @.str26, i32 0, i32 0), i8* getelementptr ([14 x i8]* @.str26, i32 0, i32 0), i8* getelementptr ([27 x i8]* @.str27, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 10, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite25 to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str29 = internal constant [7 x i8] c"object\00", section "llvm.metadata"		; <[7 x i8]*> [#uses=1]
-@llvm.dbg.variable30 = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram28 to { }*), i8* getelementptr ([7 x i8]* @.str29, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 10, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype22 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-@.str31 = internal constant [17 x i8] c"AAAAAImageParser\00", section "llvm.metadata"		; <[17 x i8]*> [#uses=1]
-@.str33 = internal constant [13 x i8] c"mCustomWhite\00", section "llvm.metadata"		; <[13 x i8]*> [#uses=1]
-@llvm.dbg.derivedtype34 = internal constant %llvm.dbg.derivedtype.type { i32 458765, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([13 x i8]* @.str33, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 21, i64 32, i64 32, i64 0, i32 1, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.derivedtype35 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite45 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array36 = internal constant [3 x { }*] [{ }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype35 to { }*), { }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*)], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite37 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array36 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str38 = internal constant [18 x i8] c"~AAAAAImageParser\00", section "llvm.metadata"		; <[18 x i8]*> [#uses=1]
-@llvm.dbg.subprogram39 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([18 x i8]* @.str38, i32 0, i32 0), i8* getelementptr ([18 x i8]* @.str38, i32 0, i32 0), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 24, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite37 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.array40 = internal constant [3 x { }*] [{ }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype35 to { }*), { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype19 to { }*)], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite41 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array40 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str42 = internal constant [36 x i8] c"_ZN16AAAAAImageParser3setEP7CObject\00", section "llvm.metadata"		; <[36 x i8]*> [#uses=1]
-@llvm.dbg.subprogram43 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([4 x i8]* @.str10, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str10, i32 0, i32 0), i8* getelementptr ([36 x i8]* @.str42, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 19, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite41 to { }*), i1 false, i1 false }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.array44 = internal constant [3 x { }*] [{ }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype34 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram39 to { }*), { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram43 to { }*)], section "llvm.metadata"		; <[3 x { }*]*> [#uses=1]
-@llvm.dbg.composite45 = internal constant %llvm.dbg.composite.type { i32 458771, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([17 x i8]* @.str31, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 16, i64 32, i64 32, i64 0, i32 0, { }* null, { }* bitcast ([3 x { }*]* @llvm.dbg.array44 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@llvm.dbg.derivedtype46 = internal constant %llvm.dbg.derivedtype.type { i32 458767, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite45 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.array47 = internal constant [2 x { }*] [{ }* null, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype46 to { }*)], section "llvm.metadata"		; <[2 x { }*]*> [#uses=1]
-@llvm.dbg.composite48 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([2 x { }*]* @llvm.dbg.array47 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str49 = internal constant [26 x i8] c"_ZN16AAAAAImageParserD2Ev\00", section "llvm.metadata"		; <[26 x i8]*> [#uses=1]
-@llvm.dbg.subprogram50 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([18 x i8]* @.str38, i32 0, i32 0), i8* getelementptr ([18 x i8]* @.str38, i32 0, i32 0), i8* getelementptr ([26 x i8]* @.str49, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 24, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite48 to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.derivedtype51 = internal constant %llvm.dbg.derivedtype.type { i32 458790, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 32, i64 32, i64 0, i32 0, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype46 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.derivedtype.type*> [#uses=1]
-@llvm.dbg.variable52 = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram50 to { }*), i8* getelementptr ([5 x i8]* @.str23, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 24, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype51 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-@.str53 = internal constant [26 x i8] c"_ZN16AAAAAImageParserD1Ev\00", section "llvm.metadata"		; <[26 x i8]*> [#uses=1]
-@llvm.dbg.subprogram54 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([18 x i8]* @.str38, i32 0, i32 0), i8* getelementptr ([18 x i8]* @.str38, i32 0, i32 0), i8* getelementptr ([26 x i8]* @.str53, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 24, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite48 to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@llvm.dbg.variable55 = internal constant %llvm.dbg.variable.type { i32 459009, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram54 to { }*), i8* getelementptr ([5 x i8]* @.str23, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*), i32 24, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype51 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-@llvm.dbg.array56 = internal constant [1 x { }*] [{ }* bitcast (%llvm.dbg.basictype.type* @llvm.dbg.basictype to { }*)], section "llvm.metadata"		; <[1 x { }*]*> [#uses=1]
-@llvm.dbg.composite57 = internal constant %llvm.dbg.composite.type { i32 458773, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* null, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 0, i64 0, i64 0, i64 0, i32 0, { }* null, { }* bitcast ([1 x { }*]* @llvm.dbg.array56 to { }*), i32 0 }, section "llvm.metadata"		; <%llvm.dbg.composite.type*> [#uses=1]
-@.str58 = internal constant [5 x i8] c"main\00", section "llvm.metadata"		; <[5 x i8]*> [#uses=1]
-@llvm.dbg.subprogram59 = internal constant %llvm.dbg.subprogram.type { i32 458798, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.subprograms to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([5 x i8]* @.str58, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str58, i32 0, i32 0), i8* getelementptr ([5 x i8]* @.str58, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 3, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite57 to { }*), i1 false, i1 true }, section "llvm.metadata"		; <%llvm.dbg.subprogram.type*> [#uses=1]
-@.str60 = internal constant [2 x i8] c"C\00", section "llvm.metadata"		; <[2 x i8]*> [#uses=1]
-@llvm.dbg.variable61 = internal constant %llvm.dbg.variable.type { i32 459008, { }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram59 to { }*), i8* getelementptr ([2 x i8]* @.str60, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.derivedtype.type* @llvm.dbg.derivedtype46 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.variable.type*> [#uses=1]
-@_ZZ4mainE3C.0 = private constant %struct.AAAAAImageParser zeroinitializer		; <%struct.AAAAAImageParser*> [#uses=2]
-@llvm.dbg.global_variables = linkonce constant %llvm.dbg.anchor.type { i32 458752, i32 52 }, section "llvm.metadata"		; <%llvm.dbg.anchor.type*> [#uses=1]
-@.str62 = internal constant [14 x i8] c"_ZZ4mainE3C.0\00", section "llvm.metadata"		; <[14 x i8]*> [#uses=1]
-@.str63 = internal constant [4 x i8] c"C.0\00", section "llvm.metadata"		; <[4 x i8]*> [#uses=1]
-@llvm.dbg.global_variable = internal constant %llvm.dbg.global_variable.type { i32 458804, { }* bitcast (%llvm.dbg.anchor.type* @llvm.dbg.global_variables to { }*), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i8* getelementptr ([14 x i8]* @.str62, i32 0, i32 0), i8* getelementptr ([4 x i8]* @.str63, i32 0, i32 0), i8* getelementptr ([14 x i8]* @.str62, i32 0, i32 0), { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*), i32 4, { }* bitcast (%llvm.dbg.composite.type* @llvm.dbg.composite45 to { }*), i1 false, i1 true, { }* bitcast (%struct.AAAAAImageParser* @_ZZ4mainE3C.0 to { }*) }, section "llvm.metadata"		; <%llvm.dbg.global_variable.type*> [#uses=0]
-
-define void @_ZN16AAAAAImageParserD2Ev(%struct.AAAAAImageParser* %this) nounwind ssp {
-entry:
-	%object_addr.i = alloca %struct.CObject*		; <%struct.CObject**> [#uses=4]
-	%retval.i = alloca %struct.CObject*		; <%struct.CObject**> [#uses=2]
-	%0 = alloca %struct.CObject*		; <%struct.CObject**> [#uses=2]
-	%this_addr = alloca %struct.AAAAAImageParser*		; <%struct.AAAAAImageParser**> [#uses=3]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram50 to { }*))
-	%1 = bitcast %struct.AAAAAImageParser** %this_addr to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %1, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable52 to { }*))
-	store %struct.AAAAAImageParser* %this, %struct.AAAAAImageParser** %this_addr
-	call void @llvm.dbg.stoppoint(i32 26, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
-	%2 = load %struct.AAAAAImageParser** %this_addr, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
-	%3 = getelementptr %struct.AAAAAImageParser* %2, i32 0, i32 0		; <%struct.CObject**> [#uses=1]
-	%4 = load %struct.CObject** %3, align 4		; <%struct.CObject*> [#uses=1]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram28 to { }*)) nounwind
-	%5 = bitcast %struct.CObject** %object_addr.i to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %5, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable30 to { }*)) nounwind
-	store %struct.CObject* %4, %struct.CObject** %object_addr.i
-	call void @llvm.dbg.stoppoint(i32 11, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
-	%6 = load %struct.CObject** %object_addr.i, align 4		; <%struct.CObject*> [#uses=1]
-	%7 = icmp ne %struct.CObject* %6, null		; <i1> [#uses=1]
-	br i1 %7, label %bb.i, label %_Z13ReleaseObjectP7CObject.exit
-
-bb.i:		; preds = %entry
-	call void @llvm.dbg.stoppoint(i32 12, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
-	%8 = load %struct.CObject** %object_addr.i, align 4		; <%struct.CObject*> [#uses=1]
-	call void @_ZN7CObject7releaseEv(%struct.CObject* %8) nounwind
-	br label %_Z13ReleaseObjectP7CObject.exit
-
-_Z13ReleaseObjectP7CObject.exit:		; preds = %bb.i, %entry
-	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
-	store %struct.CObject* null, %struct.CObject** %0, align 4
-	%9 = load %struct.CObject** %0, align 4		; <%struct.CObject*> [#uses=1]
-	store %struct.CObject* %9, %struct.CObject** %retval.i, align 4
-	%retval2.i = load %struct.CObject** %retval.i		; <%struct.CObject*> [#uses=0]
-	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
-	call void @llvm.dbg.stoppoint(i32 27, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram28 to { }*))
-	br label %bb
-
-bb:		; preds = %_Z13ReleaseObjectP7CObject.exit
-	call void @llvm.dbg.stoppoint(i32 27, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
-	br label %return
-
-return:		; preds = %bb
-	call void @llvm.dbg.stoppoint(i32 27, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram50 to { }*))
-	ret void
-}
-
-define linkonce_odr void @_ZN7CObject7releaseEv(%struct.CObject* %this) nounwind ssp {
-entry:
-	%this_addr = alloca %struct.CObject*		; <%struct.CObject**> [#uses=3]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram21 to { }*))
-	%0 = bitcast %struct.CObject** %this_addr to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %0, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable to { }*))
-	store %struct.CObject* %this, %struct.CObject** %this_addr
-	call void @llvm.dbg.stoppoint(i32 4, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
-	%1 = load %struct.CObject** %this_addr, align 4		; <%struct.CObject*> [#uses=1]
-	%2 = getelementptr %struct.CObject* %1, i32 0, i32 0		; <i32*> [#uses=1]
-	store i32 0, i32* %2, align 4
-	br label %return
-
-return:		; preds = %entry
-	call void @llvm.dbg.stoppoint(i32 4, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram21 to { }*))
-	ret void
-}
-
-declare void @llvm.dbg.func.start({ }*) nounwind readnone
-
-declare void @llvm.dbg.declare({ }*, { }*) nounwind readnone
-
-declare void @llvm.dbg.stoppoint(i32, i32, { }*) nounwind readnone
-
-declare void @llvm.dbg.region.end({ }*) nounwind readnone
-
-define void @_ZN16AAAAAImageParserD1Ev(%struct.AAAAAImageParser* %this) nounwind ssp {
-entry:
-	%object_addr.i = alloca %struct.CObject*		; <%struct.CObject**> [#uses=4]
-	%retval.i = alloca %struct.CObject*		; <%struct.CObject**> [#uses=2]
-	%0 = alloca %struct.CObject*		; <%struct.CObject**> [#uses=2]
-	%this_addr = alloca %struct.AAAAAImageParser*		; <%struct.AAAAAImageParser**> [#uses=3]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram54 to { }*))
-	%1 = bitcast %struct.AAAAAImageParser** %this_addr to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %1, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable55 to { }*))
-	store %struct.AAAAAImageParser* %this, %struct.AAAAAImageParser** %this_addr
-	call void @llvm.dbg.stoppoint(i32 26, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
-	%2 = load %struct.AAAAAImageParser** %this_addr, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
-	%3 = getelementptr %struct.AAAAAImageParser* %2, i32 0, i32 0		; <%struct.CObject**> [#uses=1]
-	%4 = load %struct.CObject** %3, align 4		; <%struct.CObject*> [#uses=1]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram28 to { }*)) nounwind
-	%5 = bitcast %struct.CObject** %object_addr.i to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %5, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable30 to { }*)) nounwind
-	store %struct.CObject* %4, %struct.CObject** %object_addr.i
-	call void @llvm.dbg.stoppoint(i32 11, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
-	%6 = load %struct.CObject** %object_addr.i, align 4		; <%struct.CObject*> [#uses=1]
-	%7 = icmp ne %struct.CObject* %6, null		; <i1> [#uses=1]
-	br i1 %7, label %bb.i, label %_Z13ReleaseObjectP7CObject.exit
-
-bb.i:		; preds = %entry
-	call void @llvm.dbg.stoppoint(i32 12, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
-	%8 = load %struct.CObject** %object_addr.i, align 4		; <%struct.CObject*> [#uses=1]
-	call void @_ZN7CObject7releaseEv(%struct.CObject* %8) nounwind
-	br label %_Z13ReleaseObjectP7CObject.exit
-
-_Z13ReleaseObjectP7CObject.exit:		; preds = %bb.i, %entry
-	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
-	store %struct.CObject* null, %struct.CObject** %0, align 4
-	%9 = load %struct.CObject** %0, align 4		; <%struct.CObject*> [#uses=1]
-	store %struct.CObject* %9, %struct.CObject** %retval.i, align 4
-	%retval2.i = load %struct.CObject** %retval.i		; <%struct.CObject*> [#uses=0]
-	call void @llvm.dbg.stoppoint(i32 13, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*)) nounwind
-	call void @llvm.dbg.stoppoint(i32 27, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram28 to { }*))
-	br label %bb
-
-bb:		; preds = %_Z13ReleaseObjectP7CObject.exit
-	call void @llvm.dbg.stoppoint(i32 27, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
-	br label %return
-
-return:		; preds = %bb
-	call void @llvm.dbg.stoppoint(i32 27, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit4 to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram54 to { }*))
-	ret void
-}
-
-define i32 @main() ssp {
-entry:
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%C = alloca %struct.AAAAAImageParser*		; <%struct.AAAAAImageParser**> [#uses=3]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%C.1 = alloca %struct.AAAAAImageParser*		; <%struct.AAAAAImageParser**> [#uses=4]
-	%1 = alloca %struct.AAAAAImageParser*		; <%struct.AAAAAImageParser**> [#uses=3]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	call void @llvm.dbg.func.start({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram59 to { }*))
-	%2 = bitcast %struct.AAAAAImageParser** %C to { }*		; <{ }*> [#uses=1]
-	call void @llvm.dbg.declare({ }* %2, { }* bitcast (%llvm.dbg.variable.type* @llvm.dbg.variable61 to { }*))
-	call void @llvm.dbg.stoppoint(i32 4, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%3 = call i8* @_Znwm(i32 4)		; <i8*> [#uses=1]
-	%4 = bitcast i8* %3 to %struct.AAAAAImageParser*		; <%struct.AAAAAImageParser*> [#uses=1]
-	store %struct.AAAAAImageParser* %4, %struct.AAAAAImageParser** %1, align 4
-	%5 = load %struct.AAAAAImageParser** %1, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
-	%6 = getelementptr %struct.AAAAAImageParser* %5, i32 0, i32 0		; <%struct.CObject**> [#uses=1]
-	%7 = load %struct.CObject** getelementptr (%struct.AAAAAImageParser* @_ZZ4mainE3C.0, i32 0, i32 0), align 4		; <%struct.CObject*> [#uses=1]
-	store %struct.CObject* %7, %struct.CObject** %6, align 4
-	%8 = load %struct.AAAAAImageParser** %1, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
-	store %struct.AAAAAImageParser* %8, %struct.AAAAAImageParser** %C, align 4
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%9 = load %struct.AAAAAImageParser** %C, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
-	store %struct.AAAAAImageParser* %9, %struct.AAAAAImageParser** %C.1, align 4
-	%10 = load %struct.AAAAAImageParser** %C.1, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
-	%11 = icmp ne %struct.AAAAAImageParser* %10, null		; <i1> [#uses=1]
-	br i1 %11, label %bb, label %bb1
-		; No predecessors!
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	br label %bb
-
-bb:		; preds = %12, %entry
-	call void @llvm.dbg.stoppoint(i32 5, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	%13 = load %struct.AAAAAImageParser** %C.1, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
-	call void @_ZN16AAAAAImageParserD1Ev(%struct.AAAAAImageParser* %13) nounwind
-	%14 = load %struct.AAAAAImageParser** %C.1, align 4		; <%struct.AAAAAImageParser*> [#uses=1]
-	%15 = bitcast %struct.AAAAAImageParser* %14 to i8*		; <i8*> [#uses=1]
-	call void @_ZdlPv(i8* %15) nounwind
-	br label %bb1
-
-bb1:		; preds = %bb, %entry
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	store i32 0, i32* %0, align 4
-	%16 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %16, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %bb1
-	%retval2 = load i32* %retval		; <i32> [#uses=1]
-	call void @llvm.dbg.stoppoint(i32 6, i32 0, { }* bitcast (%llvm.dbg.compile_unit.type* @llvm.dbg.compile_unit to { }*))
-	call void @llvm.dbg.region.end({ }* bitcast (%llvm.dbg.subprogram.type* @llvm.dbg.subprogram59 to { }*))
-	ret i32 %retval2
-}
-
-declare i8* @_Znwm(i32)
-
-declare void @_ZdlPv(i8*) nounwind
diff --git a/test/DebugInfo/2009-12-01-CurrentFn.ll b/test/DebugInfo/2009-12-01-CurrentFn.ll
new file mode 100644
index 0000000..d33a8f4
--- /dev/null
+++ b/test/DebugInfo/2009-12-01-CurrentFn.ll
@@ -0,0 +1,17 @@
+; RUN: llc < %s | grep "func_end1:" | count 1
+
+declare void @foo()
+
+define void @bar(i32 %i) nounwind ssp {
+entry:
+  tail call void @foo() nounwind, !dbg !0
+  ret void, !dbg !0
+}
+
+!0 = metadata !{i32 9, i32 0, metadata !1, null}
+!1 = metadata !{i32 458798, i32 0, metadata !2, metadata !"baz", metadata !"baz", metadata !"baz", metadata !2, i32 8, metadata !3, i1 true, i1 true}; [DW_TAG_subprogram ]
+!2 = metadata !{i32 458769, i32 0, i32 1, metadata !"2007-12-VarArrayDebug.c", metadata !"/Volumes/Data/ddunbar/llvm/test/FrontendC", metadata !"4.2.1 (Based on Apple Inc. build 5653) (LLVM build)", i1 true, i1 true, metadata !"", i32 0}; [DW_TAG_compile_unit ]
+!3 = metadata !{i32 458773, metadata !2, metadata !"", metadata !2, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !4, i32 0}; [DW_TAG_subroutine_type ]
+!4 = metadata !{null, metadata !5}
+!5 = metadata !{i32 458788, metadata !2, metadata !"int", metadata !2, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5}; [DW_TAG_base_type ]
+
diff --git a/test/FrontendC++/2006-11-06-StackTrace.cpp b/test/FrontendC++/2006-11-06-StackTrace.cpp
index 24eeda8..3732cb9 100644
--- a/test/FrontendC++/2006-11-06-StackTrace.cpp
+++ b/test/FrontendC++/2006-11-06-StackTrace.cpp
@@ -1,6 +1,6 @@
 // This is a regression test on debug info to make sure that we can get a
 // meaningful stack trace from a C++ program.
-// RUN: %llvmgcc -S -O0 -g %s -o - | llvm-as | \
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
 // RUN:    llc --disable-fp-elim -o %t.s -O0 -relocation-model=pic
 // RUN: %compile_c %t.s -o %t.o
 // RUN: %link %t.o -o %t.exe
diff --git a/test/FrontendC++/2006-11-30-NoCompileUnit.cpp b/test/FrontendC++/2006-11-30-NoCompileUnit.cpp
index cd8b988..242a37e 100644
--- a/test/FrontendC++/2006-11-30-NoCompileUnit.cpp
+++ b/test/FrontendC++/2006-11-30-NoCompileUnit.cpp
@@ -1,6 +1,6 @@
 // This is a regression test on debug info to make sure we don't hit a compile 
 // unit size issue with gdb.
-// RUN: %llvmgcc -S -O0 -g %s -o - | llvm-as | \
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
 // RUN:   llc --disable-fp-elim -o NoCompileUnit.s
 // RUN: %compile_c NoCompileUnit.s -o NoCompileUnit.o
 // RUN: %link NoCompileUnit.o -o NoCompileUnit.exe
diff --git a/test/FrontendC++/2006-11-30-Pubnames.cpp b/test/FrontendC++/2006-11-30-Pubnames.cpp
index 64214a8..3a08b3c 100644
--- a/test/FrontendC++/2006-11-30-Pubnames.cpp
+++ b/test/FrontendC++/2006-11-30-Pubnames.cpp
@@ -1,6 +1,6 @@
 // This is a regression test on debug info to make sure that we can access 
 // qualified global names.
-// RUN: %llvmgcc -S -O0 -g %s -o - | llvm-as | \
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
 // RUN:   llc --disable-fp-elim -o %t.s -O0
 // RUN: %compile_c %t.s -o %t.o
 // RUN: %link %t.o -o %t.exe
diff --git a/test/FrontendC++/2009-07-15-LineNumbers.cpp b/test/FrontendC++/2009-07-15-LineNumbers.cpp
index d603aa7..a8eda77 100644
--- a/test/FrontendC++/2009-07-15-LineNumbers.cpp
+++ b/test/FrontendC++/2009-07-15-LineNumbers.cpp
@@ -1,7 +1,7 @@
 // This is a regression test on debug info to make sure that we can
 // print line numbers in asm.
-// RUN: %llvmgcc -S -O0 -g %s -o - | llvm-as | \
-// RUN:    llc --disable-fp-elim -O0 -relocation-model=pic | grep {SrcLine 25}
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
+// RUN:    llc --disable-fp-elim -O0 -relocation-model=pic | grep { 2009-07-15-LineNumbers.cpp:25$}
 
 #include <stdlib.h>
 
diff --git a/test/FrontendC++/weak-external.cpp b/test/FrontendC++/weak-external.cpp
new file mode 100644
index 0000000..94360c2
--- /dev/null
+++ b/test/FrontendC++/weak-external.cpp
@@ -0,0 +1,17 @@
+// RUN: %llvmgxx %s -S -emit-llvm -O2 -o - | not grep {_ZNSs12_S_constructIPKcEEPcT_S3_RKSaIcESt20forward_iterator_tag}
+// PR4262
+
+// The "basic_string" extern template instantiation declaration is supposed to
+// suppress the implicit instantiation of non-inline member functions. Make sure
+// that we suppress the implicit instantiation of non-inline member functions
+// defined out-of-line. That we aren't instantiating the basic_string
+// constructor when we shouldn't be. Such an instantiation forces the implicit
+// instantiation of _S_construct<const char*>. Since _S_construct is a member
+// template, it's instantiation is *not* suppressed (despite being in
+// basic_string<char>), so we would emit it as a weak definition.
+
+#include <stdexcept>
+
+void dummysymbol() {
+  throw(std::runtime_error("string"));
+}
diff --git a/test/FrontendC/2005-06-15-ExpandGotoInternalProblem.c b/test/FrontendC/2005-06-15-ExpandGotoInternalProblem.c
index d7d03ba..0f076c9 100644
--- a/test/FrontendC/2005-06-15-ExpandGotoInternalProblem.c
+++ b/test/FrontendC/2005-06-15-ExpandGotoInternalProblem.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -std=c99 %s -S -o - | llvm-as | \
+// RUN: %llvmgcc -std=c99 %s -S -o - | \
 // RUN:    opt -std-compile-opts -disable-output
 // PR580
 
diff --git a/test/FrontendC/2005-09-24-AsmUserPrefix.c b/test/FrontendC/2005-09-24-AsmUserPrefix.c
index 9b9b153..952c7b3 100644
--- a/test/FrontendC/2005-09-24-AsmUserPrefix.c
+++ b/test/FrontendC/2005-09-24-AsmUserPrefix.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -o - | llvm-as | opt -std-compile-opts | llc | \
+// RUN: %llvmgcc %s -S -o - | opt -std-compile-opts | llc | \
 // RUN:    not grep _foo2
 
 void foo() __asm__("foo2");
diff --git a/test/FrontendC/2006-01-13-StackSave.c b/test/FrontendC/2006-01-13-StackSave.c
index dfe00fb..ae8d908 100644
--- a/test/FrontendC/2006-01-13-StackSave.c
+++ b/test/FrontendC/2006-01-13-StackSave.c
@@ -1,5 +1,5 @@
 // PR691
-// RUN: %llvmgcc %s -S -o - | llvm-as | opt -std-compile-opts | \
+// RUN: %llvmgcc %s -S -o - | opt -std-compile-opts | \
 // RUN:    llvm-dis | grep llvm.stacksave
 
 void test(int N) {
diff --git a/test/FrontendC/2006-01-23-FileScopeAsm.c b/test/FrontendC/2006-01-23-FileScopeAsm.c
index fb55a41..80e7195 100644
--- a/test/FrontendC/2006-01-23-FileScopeAsm.c
+++ b/test/FrontendC/2006-01-23-FileScopeAsm.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -o - | llvm-as | opt -std-compile-opts | \
+// RUN: %llvmgcc %s -S -o - | opt -std-compile-opts | \
 // RUN:    llvm-dis | grep {foo\[12345\]} | count 5
 
 __asm__ ("foo1");
diff --git a/test/FrontendC/2006-03-03-MissingInitializer.c b/test/FrontendC/2006-03-03-MissingInitializer.c
index 0d09d29..19d4bc7 100644
--- a/test/FrontendC/2006-03-03-MissingInitializer.c
+++ b/test/FrontendC/2006-03-03-MissingInitializer.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -o - | llvm-as | opt -std-compile-opts | \
+// RUN: %llvmgcc %s -S -o - | opt -std-compile-opts | \
 // RUN:    llvm-dis | grep {@nate.*internal global i32 0}
 
 struct X { int *XX; int Y;};
diff --git a/test/FrontendC/2007-01-24-InlineAsmCModifier.c b/test/FrontendC/2007-01-24-InlineAsmCModifier.c
index 5735cce..c601ccf 100644
--- a/test/FrontendC/2007-01-24-InlineAsmCModifier.c
+++ b/test/FrontendC/2007-01-24-InlineAsmCModifier.c
@@ -1,6 +1,6 @@
 // Verify that the %c modifier works and strips off any prefixes from 
 // immediates.
-// RUN: %llvmgcc -S %s -o - | llvm-as | llc | grep {pickANumber: 789514}
+// RUN: %llvmgcc -S %s -o - | llc | grep {pickANumber: 789514}
 
 void foo() {
   __asm__         volatile("/* " "pickANumber" ": %c0 */"::"i"(0xC0C0A));
diff --git a/test/FrontendC/2007-04-11-InlineAsmStruct.c b/test/FrontendC/2007-04-11-InlineAsmStruct.c
index 158a16e..49741c6 100644
--- a/test/FrontendC/2007-04-11-InlineAsmStruct.c
+++ b/test/FrontendC/2007-04-11-InlineAsmStruct.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -o - | llvm-as | llc
+// RUN: %llvmgcc %s -S -emit-llvm -o - | llc
 
 struct V { short X, Y; };
 int bar() {
diff --git a/test/FrontendC/2007-04-11-InlineAsmUnion.c b/test/FrontendC/2007-04-11-InlineAsmUnion.c
index 0435d64..83fe7db 100644
--- a/test/FrontendC/2007-04-11-InlineAsmUnion.c
+++ b/test/FrontendC/2007-04-11-InlineAsmUnion.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -o - | llvm-as | llc
+// RUN: %llvmgcc %s -S -emit-llvm -o - | llc
 
 union U { int x; float p; };
 void foo() {
diff --git a/test/FrontendC/2007-04-11-InlineStorageClassC89.c b/test/FrontendC/2007-04-11-InlineStorageClassC89.c
index 4631191..ab1f556 100644
--- a/test/FrontendC/2007-04-11-InlineStorageClassC89.c
+++ b/test/FrontendC/2007-04-11-InlineStorageClassC89.c
@@ -11,8 +11,9 @@
 // RUN:   grep -v linkonce | count 1
 // RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep define | \
 // RUN:   grep xstatnoWeak | grep internal | count 1
-// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep define | \
-// RUN:   grep xextnoWeak | grep available_externally | count 1
+// RUN: %llvmgcc %s -S -emit-llvm -O0 -o - | grep declare | \
+// RUN:   grep xextnoWeak | grep -v internal | grep -v weak | \
+// RUN:   grep -v linkonce | count 1
 inline int xglobWeak(int) __attribute__((weak));
 inline int xglobWeak (int i) {
   return i*2;
diff --git a/test/FrontendC/2007-04-11-InlineStorageClassC99.c b/test/FrontendC/2007-04-11-InlineStorageClassC99.c
index 3607999..f6193aa 100644
--- a/test/FrontendC/2007-04-11-InlineStorageClassC99.c
+++ b/test/FrontendC/2007-04-11-InlineStorageClassC99.c
@@ -1,13 +1,14 @@
-// RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
-// RUN:   grep xglobWeak | grep weak | count 1
+// RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep declare | \
+// RUN:   grep xglobWeak | grep extern_weak | count 1
 // RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
 // RUN:   grep xextWeak | grep weak | count 1
 // RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
 // RUN:   grep xWeaknoinline | grep weak | count 1
 // RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
 // RUN:   grep xWeakextnoinline | grep weak | count 1
-// RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
-// RUN:   grep xglobnoWeak | grep available_externally | count 1
+// RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep declare | \
+// RUN:   grep xglobnoWeak | grep -v internal | grep -v weak | \
+// RUN:   grep -v linkonce | count 1
 // RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
 // RUN:   grep xstatnoWeak | grep internal | count 1
 // RUN: %llvmgcc -std=c99 %s -S -emit-llvm -O0 -o - | grep define | \
diff --git a/test/FrontendC/2007-08-01-LoadStoreAlign.c b/test/FrontendC/2007-08-01-LoadStoreAlign.c
index ef890bf..75a82c1 100644
--- a/test/FrontendC/2007-08-01-LoadStoreAlign.c
+++ b/test/FrontendC/2007-08-01-LoadStoreAlign.c
@@ -1,5 +1,5 @@
 // RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | grep {align 1} | count 2
-// RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | llvm-as | llc
+// RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | llc
 
 struct p {
   char a;
diff --git a/test/FrontendC/2007-12-VarArrayDebug.c b/test/FrontendC/2007-12-VarArrayDebug.c
index cd6cfdf..966789e 100644
--- a/test/FrontendC/2007-12-VarArrayDebug.c
+++ b/test/FrontendC/2007-12-VarArrayDebug.c
@@ -1,5 +1,5 @@
-// RUN: %llvmgcc -S -g -O %s -o - | llvm-as | llc
-// RUN: %llvmgcc -S -g %s -o - | llvm-as | llc
+// RUN: %llvmgcc -S -g -O %s -o - | llc
+// RUN: %llvmgcc -S -g %s -o - | llc
 
 extern void foo (void);
 
diff --git a/test/FrontendC/2009-02-17-BitField-dbg.c b/test/FrontendC/2009-02-17-BitField-dbg.c
index 7ab14fd..3effd02 100644
--- a/test/FrontendC/2009-02-17-BitField-dbg.c
+++ b/test/FrontendC/2009-02-17-BitField-dbg.c
@@ -1,5 +1,5 @@
 // Check bitfields.
-// RUN: %llvmgcc -S -O0 -g %s -o - | llvm-as | \
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
 // RUN: llc --disable-fp-elim -o 2009-02-17-BitField-dbg.s
 // RUN: %compile_c 2009-02-17-BitField-dbg.s -o 2009-02-17-BitField-dbg.o
 // RUN: echo {ptype mystruct} > %t2
diff --git a/test/FrontendC/2009-12-07-BitFieldAlignment.c b/test/FrontendC/2009-12-07-BitFieldAlignment.c
new file mode 100644
index 0000000..a8312a5
--- /dev/null
+++ b/test/FrontendC/2009-12-07-BitFieldAlignment.c
@@ -0,0 +1,15 @@
+// RUN: %llvmgcc -m32 %s -S -o - | FileCheck %s
+// Set alignment on bitfield accesses.
+
+struct S {
+  int a, b;
+  void *c;
+  unsigned d : 8;
+  unsigned e : 8;
+};
+
+void f0(struct S *a) {
+// CHECK: %3 = load i32* %2, align 4
+// CHECK: store i32 %4, i32* %2, align 4
+  a->e = 0;
+}
diff --git a/test/FrontendC/BasicInstrs.c b/test/FrontendC/BasicInstrs.c
index 812b49d..ceed17c 100644
--- a/test/FrontendC/BasicInstrs.c
+++ b/test/FrontendC/BasicInstrs.c
@@ -1,7 +1,7 @@
 // This file can be used to see what a native C compiler is generating for a
 // variety of interesting operations.
 //
-// RUN: %llvmgcc -S %s -o - | llvm-as | llc
+// RUN: %llvmgcc -S %s -o - | llc
 
 unsigned int udiv(unsigned int X, unsigned int Y) {
   return X/Y;
diff --git a/test/FrontendC/extern-weak.c b/test/FrontendC/extern-weak.c
index f31a6ad..4729b04 100644
--- a/test/FrontendC/extern-weak.c
+++ b/test/FrontendC/extern-weak.c
@@ -1,5 +1,5 @@
 // RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | grep extern_weak
-// RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | llvm-as | llc
+// RUN: %llvmgcc -O3 -S -o - -emit-llvm %s | llc
 
 #if !defined(__linux__) && !defined(__FreeBSD__) && \
     !defined(__OpenBSD__) && !defined(__CYGWIN__) && !defined(__DragonFly__)
diff --git a/test/FrontendC/unaligned-memcpy.c b/test/FrontendC/unaligned-memcpy.c
index e5d810f..9e6ce07 100644
--- a/test/FrontendC/unaligned-memcpy.c
+++ b/test/FrontendC/unaligned-memcpy.c
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc %s -S -emit-llvm -o - | llvm-as | llc
+// RUN: %llvmgcc %s -S -emit-llvm -o - | llc
 
 void bork() {
   char Qux[33] = {0};
diff --git a/test/FrontendObjC/2009-08-17-DebugInfo.m b/test/FrontendObjC/2009-08-17-DebugInfo.m
index 0bc4e9a..2c72e95 100644
--- a/test/FrontendObjC/2009-08-17-DebugInfo.m
+++ b/test/FrontendObjC/2009-08-17-DebugInfo.m
@@ -1,6 +1,6 @@
 // This is a regression test on debug info to make sure that we can set a
 // breakpoint on a objective message.
-// RUN: %llvmgcc -S -O0 -g %s -o - | llvm-as | llc -o %t.s -O0
+// RUN: %llvmgcc -S -O0 -g %s -o - | llc -o %t.s -O0
 // RUN: %compile_c %t.s -o %t.o
 // RUN: %link %t.o -o %t.exe -framework Foundation
 // RUN: echo {break randomFunc\n} > %t.in 
diff --git a/test/FrontendObjC/2009-11-30-Objc-ID.m b/test/FrontendObjC/2009-11-30-Objc-ID.m
index dadccdc..787bf72 100644
--- a/test/FrontendObjC/2009-11-30-Objc-ID.m
+++ b/test/FrontendObjC/2009-11-30-Objc-ID.m
@@ -1,4 +1,4 @@
-// RUN: %llvmgcc -S -O0 -g %s -o - | llvm-as | \
+// RUN: %llvmgcc -S -O0 -g %s -o - | \
 // RUN:     llc --disable-fp-elim -o %t.s -O0 
 // RUN: grep id %t.s | grep DW_AT_name
 @interface A
diff --git a/test/LLVMC/AppendCmdHook.td b/test/LLVMC/AppendCmdHook.td
new file mode 100644
index 0000000..4a9d391
--- /dev/null
+++ b/test/LLVMC/AppendCmdHook.td
@@ -0,0 +1,28 @@
+// Check that hooks can be invoked from 'append_cmd'.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx -fexceptions -x c++ %t
+
+include "llvm/CompilerDriver/Common.td"
+
+// CHECK: std::string MyHook()
+
+def OptList : OptionList<[
+(switch_option "dummy1", (help "none")),
+(switch_option "dummy2", (help "none"))
+]>;
+
+def dummy_tool : Tool<[
+(cmd_line "dummy_cmd $INFILE"),
+(in_language "dummy_lang"),
+(out_language "dummy_lang"),
+(actions (case
+         // CHECK: push_back("-arg1")
+         // CHECK: push_back("-arg2")
+         (switch_on "dummy1"), (append_cmd "-arg1 -arg2"),
+         // CHECK: push_back("-arg3")
+         // CHECK: hooks::MyHook()
+         (switch_on "dummy2"), (append_cmd "-arg3 $CALL(MyHook)")))
+]>;
+
+def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
diff --git a/test/LLVMC/EmptyCompilationGraph.td b/test/LLVMC/EmptyCompilationGraph.td
index b30f84c..934905b 100644
--- a/test/LLVMC/EmptyCompilationGraph.td
+++ b/test/LLVMC/EmptyCompilationGraph.td
@@ -1,5 +1,6 @@
 // Check that the compilation graph can be empty.
-// RUN: tblgen -I %p/../../include --gen-llvmc %s
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: %compile_cxx -fexceptions -x c++ %t
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/EnvParentheses.td b/test/LLVMC/EnvParentheses.td
index 694468f..77aab95 100644
--- a/test/LLVMC/EnvParentheses.td
+++ b/test/LLVMC/EnvParentheses.td
@@ -2,6 +2,7 @@
 // http://llvm.org/bugs/show_bug.cgi?id=4157
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
 // RUN: not grep {)));} %t
+// RUN: %compile_cxx -fexceptions -x c++ %t
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/ExternOptions.td b/test/LLVMC/ExternOptions.td
index 5c69af7..a05f2ca 100644
--- a/test/LLVMC/ExternOptions.td
+++ b/test/LLVMC/ExternOptions.td
@@ -1,10 +1,13 @@
 // Check that extern options work.
 // The dummy tool and graph are required to silence warnings.
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
-// RUN: grep {extern .* AutoGeneratedSwitch_Wall} %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx -fexceptions -x c++ %t
 
 include "llvm/CompilerDriver/Common.td"
 
+// CHECK: extern cl::opt<bool> AutoGeneratedSwitch_Wall
+
 def OptList : OptionList<[(switch_option "Wall", (extern)),
                           (parameter_option "std", (extern)),
                           (prefix_list_option "L", (extern))]>;
diff --git a/test/LLVMC/ForwardAs.td b/test/LLVMC/ForwardAs.td
index 51bd494..ce6fbb0 100644
--- a/test/LLVMC/ForwardAs.td
+++ b/test/LLVMC/ForwardAs.td
@@ -1,7 +1,8 @@
 // Check the fix for PR4159.
 // http://llvm.org/bugs/show_bug.cgi?id=4159
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
-// RUN: grep unique_name %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx -fexceptions -x c++ %t
 
 include "llvm/CompilerDriver/Common.td"
 
@@ -12,6 +13,7 @@ def dummy_tool : Tool<[
 (in_language "dummy"),
 (out_language "dummy"),
 (actions (case
+         // CHECK: vec.push_back("unique_name")
          (not_empty "dummy"), (forward_as "dummy", "unique_name")))
 ]>;
 
diff --git a/test/LLVMC/ForwardTransformedValue.td b/test/LLVMC/ForwardTransformedValue.td
new file mode 100644
index 0000000..e263453
--- /dev/null
+++ b/test/LLVMC/ForwardTransformedValue.td
@@ -0,0 +1,26 @@
+// Check that forward_transformed_value works.
+// The dummy tool and graph are required to silence warnings.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx -fexceptions -x c++ %t
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[(parameter_option "a", (extern)),
+                          (prefix_list_option "b", (extern))]>;
+
+// CHECK: std::string HookA
+// CHECK: std::string HookB
+
+def dummy_tool : Tool<[
+(cmd_line "dummy_cmd $INFILE"),
+(in_language "dummy"),
+(out_language "dummy"),
+(actions (case
+         // CHECK: HookA(AutoGeneratedParameter_a
+         (not_empty "a"), (forward_transformed_value "a", "HookA"),
+         // CHECK: HookB(AutoGeneratedList_b
+         (not_empty "b"), (forward_transformed_value "b", "HookB")))
+]>;
+
+def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
diff --git a/test/LLVMC/ForwardValue.td b/test/LLVMC/ForwardValue.td
new file mode 100644
index 0000000..31e395e
--- /dev/null
+++ b/test/LLVMC/ForwardValue.td
@@ -0,0 +1,23 @@
+// Check that forward_value works.
+// The dummy tool and graph are required to silence warnings.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx -fexceptions -x c++ %t
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[(parameter_option "a", (extern)),
+                          (prefix_list_option "b", (extern))]>;
+
+def dummy_tool : Tool<[
+(cmd_line "dummy_cmd $INFILE"),
+(in_language "dummy"),
+(out_language "dummy"),
+(actions (case
+         // CHECK: vec.push_back(AutoGeneratedParameter_a)
+         (not_empty "a"), (forward_value "a"),
+         // CHECK: std::copy(AutoGeneratedList_b.begin()
+         (not_empty "b"), (forward_value "b")))
+]>;
+
+def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
diff --git a/test/LLVMC/HookWithArguments.td b/test/LLVMC/HookWithArguments.td
index 3bdb3ee..ba0bbe1 100644
--- a/test/LLVMC/HookWithArguments.td
+++ b/test/LLVMC/HookWithArguments.td
@@ -1,12 +1,15 @@
 // Check that hooks with arguments work.
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
-// RUN: grep {Hook(const char\\* Arg0, const char\\* Arg1, const char\\* Arg2);} %t | count 1
-// RUN: grep "/path" %t | count 1
-// RUN: grep "VARIABLE" %t | count 1
-// RUN: grep "/2path" %t | count 1
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx -fexceptions -x c++ %t
 
 include "llvm/CompilerDriver/Common.td"
 
+// CHECK: Hook(const char* Arg0, const char* Arg1, const char* Arg2);
+// CHECK: std::getenv("VARIABLE")
+// CHECK: "/2path"
+// CHECK: "/path"
+
 def dummy_tool : Tool<[
 (cmd_line "$CALL(Hook, 'Arg1',   'Arg2', 'Arg3 Arg3Cont')/path arg1 $ENV(VARIABLE)/2path arg2 $INFILE"),
 (in_language "dummy"),
diff --git a/test/LLVMC/HookWithInFile.td b/test/LLVMC/HookWithInFile.td
new file mode 100644
index 0000000..e15e43c
--- /dev/null
+++ b/test/LLVMC/HookWithInFile.td
@@ -0,0 +1,15 @@
+// Check that a hook can be given $INFILE as an argument.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx -fexceptions -x c++ %t
+
+include "llvm/CompilerDriver/Common.td"
+
+def dummy_tool : Tool<[
+// CHECK: Hook(inFile.c_str())
+(cmd_line "$CALL(Hook, '$INFILE')/path $INFILE"),
+(in_language "dummy"),
+(out_language "dummy")
+]>;
+
+def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
diff --git a/test/LLVMC/Init.td b/test/LLVMC/Init.td
new file mode 100644
index 0000000..355d83f
--- /dev/null
+++ b/test/LLVMC/Init.td
@@ -0,0 +1,24 @@
+// Check that (init true/false) and (init "str") work.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx -fexceptions -x c++ %t
+
+include "llvm/CompilerDriver/Common.td"
+
+def OptList : OptionList<[
+// CHECK: cl::init(true)
+(switch_option "dummy1", (help "none"), (init true)),
+// CHECK: cl::init("some-string")
+(parameter_option "dummy2", (help "none"), (init "some-string"))
+]>;
+
+def dummy_tool : Tool<[
+(cmd_line "dummy_cmd $INFILE"),
+(in_language "dummy_lang"),
+(out_language "dummy_lang"),
+(actions (case
+         (switch_on "dummy1"), (forward "dummy1"),
+         (not_empty "dummy2"), (forward "dummy2")))
+]>;
+
+def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
diff --git a/test/LLVMC/MultiValuedOption.td b/test/LLVMC/MultiValuedOption.td
index bd1e033..8cb1878 100644
--- a/test/LLVMC/MultiValuedOption.td
+++ b/test/LLVMC/MultiValuedOption.td
@@ -1,11 +1,13 @@
 // Check that multivalued options work.
 // The dummy tool and graph are required to silence warnings.
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
-// RUN: grep cl::multi_val(2) %t | count 1
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx -fexceptions -x c++ %t
 
 include "llvm/CompilerDriver/Common.td"
 
 def OptList : OptionList<[
+    // CHECK: cl::multi_val(2)
     (prefix_list_option "foo", (multi_val 2)),
     (parameter_list_option "baz", (multi_val 2), (extern))]>;
 
diff --git a/test/LLVMC/MultipleCompilationGraphs.td b/test/LLVMC/MultipleCompilationGraphs.td
index 64dbc9b..9702248 100644
--- a/test/LLVMC/MultipleCompilationGraphs.td
+++ b/test/LLVMC/MultipleCompilationGraphs.td
@@ -1,5 +1,6 @@
 // Check that multiple compilation graphs are allowed.
-// RUN: tblgen -I %p/../../include --gen-llvmc %s
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: %compile_cxx -fexceptions -x c++ %t
 
 include "llvm/CompilerDriver/Common.td"
 
diff --git a/test/LLVMC/NoActions.td b/test/LLVMC/NoActions.td
index 2a4a749..9c2d45a 100644
--- a/test/LLVMC/NoActions.td
+++ b/test/LLVMC/NoActions.td
@@ -1,8 +1,11 @@
 // Check that tools without associated actions are accepted.
-// RUN: tblgen -I %p/../../include --gen-llvmc %s | grep dummy_tool
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx -fexceptions -x c++ %t
 
 include "llvm/CompilerDriver/Common.td"
 
+// CHECK: class dummy_tool : public Tool {
 def dummy_tool : Tool<[
 (cmd_line "dummy_cmd $INFILE"),
 (in_language "dummy"),
diff --git a/test/LLVMC/NoCompilationGraph.td b/test/LLVMC/NoCompilationGraph.td
index 2eea3e9..96c1f17 100644
--- a/test/LLVMC/NoCompilationGraph.td
+++ b/test/LLVMC/NoCompilationGraph.td
@@ -1,4 +1,5 @@
 // Check that the compilation graph is not required.
-// RUN: tblgen -I %p/../../include --gen-llvmc %s
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: %compile_cxx -fexceptions -x c++ %t
 
 include "llvm/CompilerDriver/Common.td"
diff --git a/test/LLVMC/OneOrMore.td b/test/LLVMC/OneOrMore.td
index 38b7eb7..ddf7cd1 100644
--- a/test/LLVMC/OneOrMore.td
+++ b/test/LLVMC/OneOrMore.td
@@ -1,14 +1,16 @@
 // Check that (one_or_more) and (zero_or_one) properties work.
 // The dummy tool and graph are required to silence warnings.
 // RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
-// RUN: grep cl::ZeroOrOne %t | count 1
-// RUN: grep cl::OneOrMore %t | count 1
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx -fexceptions -x c++ %t
 
 include "llvm/CompilerDriver/Common.td"
 
 def OptList : OptionList<[
+    // CHECK: cl::OneOrMore
     (prefix_list_option "foo", (one_or_more)),
-    (parameter_list_option "baz", (zero_or_one))]>;
+    // CHECK: cl::Optional
+    (parameter_list_option "baz", (optional))]>;
 
 def dummy_tool : Tool<[
 (cmd_line "dummy_cmd $INFILE"),
diff --git a/test/LLVMC/OptionPreprocessor.td b/test/LLVMC/OptionPreprocessor.td
index 5b9f435..5661db8 100644
--- a/test/LLVMC/OptionPreprocessor.td
+++ b/test/LLVMC/OptionPreprocessor.td
@@ -1,8 +1,7 @@
 // Test for the OptionPreprocessor and any*.
-// RUN: ignore tblgen -I %p/../../include --gen-llvmc %s -o %t
-// RUN: grep W1 %t
-// RUN: grep W2 %t
-// RUN: grep W3 %t
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx -fexceptions -x c++ %t
 
 include "llvm/CompilerDriver/Common.td"
 
@@ -16,10 +15,14 @@ def OptList : OptionList<[
 ]>;
 
 def Preprocess : OptionPreprocessor<
-(case (and (switch_on "foo"), (any_switch_on ["bar", "baz"])),
+(case
+      // CHECK: W1
+      (and (switch_on "foo"), (any_switch_on ["bar", "baz"])),
            (warning "W1"),
+      // CHECK: W2
       (and (switch_on ["foo", "bar"]), (any_empty ["foo_p", "bar_p"])),
            (warning "W2"),
+      // CHECK: W3
       (and (empty ["foo_p", "bar_p"]), (any_not_empty ["baz_p"])),
            (warning "W3"))
 >;
diff --git a/test/LLVMC/OutputSuffixHook.td b/test/LLVMC/OutputSuffixHook.td
new file mode 100644
index 0000000..4ecad23
--- /dev/null
+++ b/test/LLVMC/OutputSuffixHook.td
@@ -0,0 +1,24 @@
+// Check that hooks can be invoked from 'output_suffix'.
+// RUN: tblgen -I %p/../../include --gen-llvmc %s -o %t
+// RUN: FileCheck -input-file %t %s
+// RUN: %compile_cxx -fexceptions -x c++ %t
+// XFAIL: *
+
+include "llvm/CompilerDriver/Common.td"
+
+// CHECK: std::string MyHook()
+
+def OptList : OptionList<[
+(switch_option "dummy1", (help "none"))
+]>;
+
+def dummy_tool : Tool<[
+(cmd_line "dummy_cmd $INFILE"),
+(in_language "dummy_lang"),
+(out_language "dummy_lang"),
+(actions (case
+         // CHECK: hooks::MyHook()
+         (switch_on "dummy1"), (output_suffix "$CALL(MyHook)")))
+]>;
+
+def DummyGraph : CompilationGraph<[SimpleEdge<"root", "dummy_tool">]>;
diff --git a/test/LLVMC/TestWarnings.td b/test/LLVMC/TestWarnings.td
index 1a4064e..9523e24 100644
--- a/test/LLVMC/TestWarnings.td
+++ b/test/LLVMC/TestWarnings.td
@@ -1,4 +1,4 @@
-// Check that the compiler warns about unused options.
+// Check that warnings about unused options are really emitted.
 // This should fail because the output is printed on stderr.
 // RUN: ignore tblgen -I %p/../../include --gen-llvmc %s |& grep "option '-Wall' has no effect!"
 
diff --git a/test/Transforms/ConstProp/loads.ll b/test/Transforms/ConstProp/loads.ll
index edd26b8..9151d25 100644
--- a/test/Transforms/ConstProp/loads.ll
+++ b/test/Transforms/ConstProp/loads.ll
@@ -101,3 +101,12 @@ entry:
 }
 
 
+; PR5551
+@test12g = private constant [6 x i8] c"a\00b\00\00\00"
+
+define i16 @test12() {
+  %a = load i16* getelementptr inbounds ([3 x i16]* bitcast ([6 x i8]* @test12g to [3 x i16]*), i32 0, i64 1) 
+  ret i16 %a
+; CHECK: @test12
+; CHECK: ret i16 98
+}
diff --git a/test/Transforms/DeadStoreElimination/const-pointers.ll b/test/Transforms/DeadStoreElimination/const-pointers.ll
index ce18c6f..ce3b24c 100644
--- a/test/Transforms/DeadStoreElimination/const-pointers.ll
+++ b/test/Transforms/DeadStoreElimination/const-pointers.ll
@@ -11,7 +11,7 @@ define void @test1(%t* noalias %pp) {
   %x = load i32* inttoptr (i32 12345 to i32*)
   store i32 %x, i32* %p
   ret void
-; CHECK define void @test1
+; CHECK: define void @test1
 ; CHECK: store
 ; CHECK-NOT: store
 ; CHECK: ret void
@@ -21,10 +21,10 @@ define void @test3() {
   store i32 1, i32* @g; <-- This is dead.
   store i32 42, i32* @g
   ret void
-;CHECK define void @test3
-;CHECK: store
-;CHECK-NOT: store
-;CHECK: ret void
+; CHECK: define void @test3
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: ret void
 }
 
 define void @test4(i32* %p) {
@@ -32,7 +32,7 @@ define void @test4(i32* %p) {
   %x = load i32* @g; <-- %p and @g could alias
   store i32 %x, i32* %p
   ret void
-; CHECK define void @test4
+; CHECK: define void @test4
 ; CHECK: store
 ; CHECK: store
 ; CHECK: ret void
diff --git a/test/Transforms/DeadStoreElimination/lifetime-simple.ll b/test/Transforms/DeadStoreElimination/lifetime-simple.ll
deleted file mode 100644
index 430e700..0000000
--- a/test/Transforms/DeadStoreElimination/lifetime-simple.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: opt < %s -dse -S | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin7"
-
-define i8 @test2(i8* %P) nounwind {
-; CHECK: @test2
-; CHECK-NOT: store i8 1
-; CHECK: ret i8 0
-entry:
-  call void @llvm.lifetime.start(i64 32, i8* %P)
-  call void @llvm.lifetime.end(i64 32, i8* %P)
-  store i8 1, i8* %P
-  ret i8 0
-}
-
-declare {}* @llvm.lifetime.start(i64 %S, i8* nocapture %P) readonly
-declare void @llvm.lifetime.end(i64 %S, i8* nocapture %P)
-\ No newline at end of file
diff --git a/test/Transforms/GVN/crash-no-aa.ll b/test/Transforms/GVN/crash-no-aa.ll
new file mode 100644
index 0000000..dae65dd
--- /dev/null
+++ b/test/Transforms/GVN/crash-no-aa.ll
@@ -0,0 +1,16 @@
+; RUN: opt -no-aa -gvn -S %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v1
+28:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-freebsd8.0"
+
+; PR5744
+define i32 @test1({i16, i32} *%P) {
+  %P2 = getelementptr {i16, i32} *%P, i32 0, i32 0
+  store i16 42, i16* %P2
+
+  %P3 = getelementptr {i16, i32} *%P, i32 0, i32 1
+  %V = load i32* %P3
+  ret i32 %V
+}
+
diff --git a/test/Transforms/GVN/crash.ll b/test/Transforms/GVN/crash.ll
index 30563cd..9167b6e 100644
--- a/test/Transforms/GVN/crash.ll
+++ b/test/Transforms/GVN/crash.ll
@@ -92,3 +92,46 @@ bb64.i:                                           ; Unreachable
 bb66.i:                                           ; Unreachable
   br label %bb69.i
 }
+
+
+
+; rdar://7438974
+
+@g = external global i64, align 8
+
+define i32* @foo() {
+do.end17.i:
+  %tmp18.i = load i7** undef
+  %tmp1 = bitcast i7* %tmp18.i to i8*
+  br i1 undef, label %do.body36.i, label %if.then21.i
+
+if.then21.i:
+  %tmp2 = bitcast i7* %tmp18.i to i8*
+  ret i32* undef
+
+do.body36.i:
+  %ivar38.i = load i64* @g 
+  %tmp3 = bitcast i7* %tmp18.i to i8*
+  %add.ptr39.sum.i = add i64 %ivar38.i, 8
+  %tmp40.i = getelementptr inbounds i8* %tmp3, i64 %add.ptr39.sum.i
+  %tmp4 = bitcast i8* %tmp40.i to i64*
+  %tmp41.i = load i64* %tmp4
+  br i1 undef, label %if.then48.i, label %do.body57.i
+
+if.then48.i:
+  %call54.i = call i32 @foo2()
+  br label %do.body57.i
+
+do.body57.i:
+  %tmp58.i = load i7** undef
+  %ivar59.i = load i64* @g
+  %tmp5 = bitcast i7* %tmp58.i to i8*
+  %add.ptr65.sum.i = add i64 %ivar59.i, 8
+  %tmp66.i = getelementptr inbounds i8* %tmp5, i64 %add.ptr65.sum.i
+  %tmp6 = bitcast i8* %tmp66.i to i64*
+  %tmp67.i = load i64* %tmp6
+  ret i32* undef
+}
+
+declare i32 @foo2()
+
diff --git a/test/Transforms/GVN/lifetime-simple.ll b/test/Transforms/GVN/lifetime-simple.ll
index 00a0c29..8139246 100644
--- a/test/Transforms/GVN/lifetime-simple.ll
+++ b/test/Transforms/GVN/lifetime-simple.ll
@@ -4,9 +4,9 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
 target triple = "i386-apple-darwin7"
 
 define i8 @test(i8* %P) nounwind {
-; CHECK: @test
+; CHECK: lifetime.start
 ; CHECK-NOT: load
-; CHECK: ret i8 undef
+; CHECK: lifetime.end
 entry:
   call void @llvm.lifetime.start(i64 32, i8* %P)
   %0 = load i8* %P
diff --git a/test/Transforms/GVN/rle-dominated.ll b/test/Transforms/GVN/rle-dominated.ll
deleted file mode 100644
index e86f592..0000000
--- a/test/Transforms/GVN/rle-dominated.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt < %s -gvn -S | grep load | count 2
-
-define i32 @main(i32** %p) {
-block1:
-  %z = load i32** %p
-	br i1 true, label %block2, label %block3
-
-block2:
- %a = load i32** %p
- br label %block4
-
-block3:
-  %b = load i32** %p
-  br label %block4
-
-block4:
-  %c = load i32** %p
-  %d = load i32* %c
-  ret i32 %d
-}
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index cdd1f4f..d419fd2 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -131,6 +131,84 @@ define i8* @coerce_mustalias7(i64 %V, i64* %P) {
 ; CHECK: ret i8*
 }
 
+; memset -> i16 forwarding.
+define signext i16 @memset_to_i16_local(i16* %A) nounwind ssp {
+entry:
+  %conv = bitcast i16* %A to i8* 
+  tail call void @llvm.memset.i64(i8* %conv, i8 1, i64 200, i32 1)
+  %arrayidx = getelementptr inbounds i16* %A, i64 42
+  %tmp2 = load i16* %arrayidx
+  ret i16 %tmp2
+; CHECK: @memset_to_i16_local
+; CHECK-NOT: load
+; CHECK: ret i16 257
+}
+
+; memset -> float forwarding.
+define float @memset_to_float_local(float* %A, i8 %Val) nounwind ssp {
+entry:
+  %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
+  tail call void @llvm.memset.i64(i8* %conv, i8 %Val, i64 400, i32 1)
+  %arrayidx = getelementptr inbounds float* %A, i64 42 ; <float*> [#uses=1]
+  %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
+  ret float %tmp2
+; CHECK: @memset_to_float_local
+; CHECK-NOT: load
+; CHECK: zext
+; CHECK-NEXT: shl
+; CHECK-NEXT: or
+; CHECK-NEXT: shl
+; CHECK-NEXT: or
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret float
+}
+
+;; non-local memset -> i16 load forwarding.
+define i16 @memset_to_i16_nonlocal0(i16* %P, i1 %cond) {
+  %P3 = bitcast i16* %P to i8*
+  br i1 %cond, label %T, label %F
+T:
+  tail call void @llvm.memset.i64(i8* %P3, i8 1, i64 400, i32 1)
+  br label %Cont
+  
+F:
+  tail call void @llvm.memset.i64(i8* %P3, i8 2, i64 400, i32 1)
+  br label %Cont
+
+Cont:
+  %P2 = getelementptr i16* %P, i32 4
+  %A = load i16* %P2
+  ret i16 %A
+
+; CHECK: @memset_to_i16_nonlocal0
+; CHECK: Cont:
+; CHECK-NEXT:   %A = phi i16 [ 514, %F ], [ 257, %T ]
+; CHECK-NOT: load
+; CHECK: ret i16 %A
+}
+
+@GCst = constant {i32, float, i32 } { i32 42, float 14., i32 97 }
+
+; memset -> float forwarding.
+define float @memcpy_to_float_local(float* %A) nounwind ssp {
+entry:
+  %conv = bitcast float* %A to i8*                ; <i8*> [#uses=1]
+  tail call void @llvm.memcpy.i64(i8* %conv, i8* bitcast ({i32, float, i32 }* @GCst to i8*), i64 12, i32 1)
+  %arrayidx = getelementptr inbounds float* %A, i64 1 ; <float*> [#uses=1]
+  %tmp2 = load float* %arrayidx                   ; <float> [#uses=1]
+  ret float %tmp2
+; CHECK: @memcpy_to_float_local
+; CHECK-NOT: load
+; CHECK: ret float 1.400000e+01
+}
+
+
+declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
+declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
+
+
+
+
 ;; non-local i32/float -> i8 load forwarding.
 define i8 @coerce_mustalias_nonlocal0(i32* %P, i1 %cond) {
   %P2 = bitcast i32* %P to float*
@@ -155,6 +233,7 @@ Cont:
 ; CHECK: ret i8 %A
 }
 
+
 ;; non-local i32/float -> i8 load forwarding.  This also tests that the "P3"
 ;; bitcast equivalence can be properly phi translated.
 define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) {
@@ -279,4 +358,177 @@ Cont:
 ; CHECK: ret i8 %A
 }
 
+define i32 @chained_load(i32** %p) {
+block1:
+  %z = load i32** %p
+	br i1 true, label %block2, label %block3
+
+block2:
+ %a = load i32** %p
+ br label %block4
+
+block3:
+  %b = load i32** %p
+  br label %block4
+
+block4:
+  %c = load i32** %p
+  %d = load i32* %c
+  ret i32 %d
+  
+; CHECK: @chained_load
+; CHECK: %z = load i32** %p
+; CHECK-NOT: load
+; CHECK: %d = load i32* %z
+; CHECK-NEXT: ret i32 %d
+}
+
+
+declare i1 @cond() readonly
+declare i1 @cond2() readonly
+
+define i32 @phi_trans2() {
+; CHECK: @phi_trans2
+entry:
+  %P = alloca i32, i32 400
+  br label %F1
+  
+F1:
+  %A = phi i32 [1, %entry], [2, %F]
+  %cond2 = call i1 @cond()
+  br i1 %cond2, label %T1, label %TY
+  
+T1:
+  %P2 = getelementptr i32* %P, i32 %A
+  %x = load i32* %P2
+  %cond = call i1 @cond2()
+  br i1 %cond, label %TX, label %F
+  
+F:
+  %P3 = getelementptr i32* %P, i32 2
+  store i32 17, i32* %P3
+  
+  store i32 42, i32* %P2  ; Provides "P[A]".
+  br label %F1
+
+TX:
+  ; This load should not be compiled to 'ret i32 42'.  An overly clever
+  ; implementation of GVN would see that we're returning 17 if the loop
+  ; executes once or 42 if it executes more than that, but we'd have to do
+  ; loop restructuring to expose this, and GVN shouldn't do this sort of CFG
+  ; transformation.
+  
+; CHECK: TX:
+; CHECK: ret i32 %x
+  ret i32 %x
+TY:
+  ret i32 0
+}
+
+define i32 @phi_trans3(i32* %p) {
+; CHECK: @phi_trans3
+block1:
+  br i1 true, label %block2, label %block3
+
+block2:
+ store i32 87, i32* %p
+ br label %block4
+
+block3:
+  %p2 = getelementptr i32* %p, i32 43
+  store i32 97, i32* %p2
+  br label %block4
+
+block4:
+  %A = phi i32 [-1, %block2], [42, %block3]
+  br i1 true, label %block5, label %exit
+  
+; CHECK: block4:
+; CHECK-NEXT: %D = phi i32 [ 87, %block2 ], [ 97, %block3 ]  
+; CHECK-NOT: load
+
+block5:
+  %B = add i32 %A, 1
+  br i1 true, label %block6, label %exit
+  
+block6:
+  %C = getelementptr i32* %p, i32 %B
+  br i1 true, label %block7, label %exit
+  
+block7:
+  %D = load i32* %C
+  ret i32 %D
+  
+; CHECK: block7:
+; CHECK-NEXT: ret i32 %D
+
+exit:
+  ret i32 -1
+}
+
+define i8 @phi_trans4(i8* %p) {
+; CHECK: @phi_trans4
+entry:
+  %X3 = getelementptr i8* %p, i32 192
+  store i8 192, i8* %X3
+  
+  %X = getelementptr i8* %p, i32 4
+  %Y = load i8* %X
+  br label %loop
+
+loop:
+  %i = phi i32 [4, %entry], [192, %loop]
+  %X2 = getelementptr i8* %p, i32 %i
+  %Y2 = load i8* %X2
+  
+; CHECK: loop:
+; CHECK-NEXT: %Y2 = phi i8 [ %Y, %entry ], [ 0, %loop ]
+; CHECK-NOT: load i8
+  
+  %cond = call i1 @cond2()
+
+  %Z = bitcast i8 *%X3 to i32*
+  store i32 0, i32* %Z
+  br i1 %cond, label %loop, label %out
+  
+out:
+  %R = add i8 %Y, %Y2
+  ret i8 %R
+}
+
+define i8 @phi_trans5(i8* %p) {
+; CHECK: @phi_trans5
+entry:
+  
+  %X4 = getelementptr i8* %p, i32 2
+  store i8 19, i8* %X4
+  
+  %X = getelementptr i8* %p, i32 4
+  %Y = load i8* %X
+  br label %loop
+
+loop:
+  %i = phi i32 [4, %entry], [3, %cont]
+  %X2 = getelementptr i8* %p, i32 %i
+  %Y2 = load i8* %X2  ; Ensure this load is not being incorrectly replaced.
+  %cond = call i1 @cond2()
+  br i1 %cond, label %cont, label %out
+
+cont:
+  %Z = getelementptr i8* %X2, i32 -1
+  %Z2 = bitcast i8 *%Z to i32*
+  store i32 50462976, i32* %Z2  ;; (1 << 8) | (2 << 16) | (3 << 24)
+
+
+; CHECK: store i32
+; CHECK-NEXT: getelementptr i8* %p, i32 3
+; CHECK-NEXT: load i8*
+  br label %loop
+  
+out:
+  %R = add i8 %Y, %Y2
+  ret i8 %R
+}
+
+
 
diff --git a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
index 5e639fd..a5be2b1 100644
--- a/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
+++ b/test/Transforms/GlobalOpt/2009-02-15-ResolveAlias.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -globalopt -S | grep {define void @a}
+; RUN: opt < %s -globalopt -S | FileCheck %s
 
 define internal void @f() {
+; CHECK-NOT: @f
+; CHECK: define void @a
 	ret void
 }
 
@@ -10,3 +12,13 @@ define void @g() {
 	call void()* @a()
 	ret void
 }
+
+@b = alias internal void ()* @g
+; CHECK-NOT: @b
+
+define void @h() {
+	call void()* @b()
+; CHECK: call void @g
+	ret void
+}
+
diff --git a/test/Transforms/IndVarSimplify/indirectbr.ll b/test/Transforms/IndVarSimplify/indirectbr.ll
new file mode 100644
index 0000000..b4ce153
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/indirectbr.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -indvars -S -disable-output
+; PR5758
+
+define zeroext i1 @foo() nounwind {
+entry:
+  indirectbr i8* undef, [label %"202", label %"133"]
+
+"132":                                            ; preds = %"133"
+  %0 = add i32 %1, 1                              ; <i32> [#uses=1]
+  br label %"133"
+
+"133":                                            ; preds = %"132", %entry
+  %1 = phi i32 [ %0, %"132" ], [ 0, %entry ]      ; <i32> [#uses=2]
+  %2 = icmp eq i32 %1, 4                          ; <i1> [#uses=1]
+  br i1 %2, label %"134", label %"132"
+
+"134":                                            ; preds = %"133"
+  ret i1 true
+
+"202":                                            ; preds = %entry
+  ret i1 false
+}
diff --git a/test/Transforms/InstCombine/compare-signs.ll b/test/Transforms/InstCombine/compare-signs.ll
index ba638ba..f8e4911 100644
--- a/test/Transforms/InstCombine/compare-signs.ll
+++ b/test/Transforms/InstCombine/compare-signs.ll
@@ -2,7 +2,7 @@
 ; PR5438
 
 ; TODO: This should also optimize down.
-;define i32 @bar(i32 %a, i32 %b) nounwind readnone {
+;define i32 @test1(i32 %a, i32 %b) nounwind readnone {
 ;entry:
 ;        %0 = icmp sgt i32 %a, -1        ; <i1> [#uses=1]
 ;        %1 = icmp slt i32 %b, 0         ; <i1> [#uses=1]
@@ -11,11 +11,21 @@
 ;        ret i32 %3
 ;}
 
-define i32 @qaz(i32 %a, i32 %b) nounwind readnone {
-; CHECK: @qaz
+; TODO: This optimizes partially but not all the way.
+;define i32 @test2(i32 %a, i32 %b) nounwind readnone {
+;entry:
+;        %0 = and i32 %a, 8            ;<i32>  [#uses=1]
+;        %1 = and i32 %b, 8            ;<i32>  [#uses=1]
+;        %2 = icmp eq i32 %0, %1         ;<i1>  [#uses=1]
+;        %3 = zext i1 %2 to i32          ;<i32>  [#uses=1]
+;        ret i32 %3
+;}
+
+define i32 @test3(i32 %a, i32 %b) nounwind readnone {
+; CHECK: @test3
 entry:
 ; CHECK: xor i32 %a, %b
-; CHECK; lshr i32 %0, 31
+; CHECK: lshr i32 %0, 31
 ; CHECK: xor i32 %1, 1
         %0 = lshr i32 %a, 31            ; <i32> [#uses=1]
         %1 = lshr i32 %b, 31            ; <i32> [#uses=1]
@@ -26,3 +36,23 @@ entry:
 ; CHECK-NOT: zext
 ; CHECK: ret i32 %2
 }
+
+; Variation on @test3: checking the 2nd bit in a situation where the 5th bit
+; is one, not zero.
+define i32 @test3i(i32 %a, i32 %b) nounwind readnone {
+; CHECK: @test3i
+entry:
+; CHECK: xor i32 %a, %b
+; CHECK: lshr i32 %0, 31
+; CHECK: xor i32 %1, 1
+        %0 = lshr i32 %a, 29            ; <i32> [#uses=1]
+        %1 = lshr i32 %b, 29            ; <i32> [#uses=1]
+        %2 = or i32 %0, 35
+        %3 = or i32 %1, 35
+        %4 = icmp eq i32 %2, %3         ; <i1> [#uses=1]
+        %5 = zext i1 %4 to i32          ; <i32> [#uses=1]
+        ret i32 %5
+; CHECK-NOT: icmp
+; CHECK-NOT: zext
+; CHECK: ret i32 %2
+}
diff --git a/test/Transforms/InstCombine/crash.ll b/test/Transforms/InstCombine/crash.ll
index 1528f6d..82ac575 100644
--- a/test/Transforms/InstCombine/crash.ll
+++ b/test/Transforms/InstCombine/crash.ll
@@ -137,3 +137,14 @@ define arm_apcscc void @test5() {
 exit:
        ret void
 }
+
+
+; PR5673
+
+@test6g = external global i32*  
+
+define arm_aapcs_vfpcc i32 @test6(i32 %argc, i8** %argv) nounwind {
+entry:
+  store i32* getelementptr (i32* bitcast (i32 (i32, i8**)* @test6 to i32*), i32 -2048), i32** @test6g, align 4
+  unreachable
+}
diff --git a/test/Transforms/InstCombine/sext-misc.ll b/test/Transforms/InstCombine/sext-misc.ll
deleted file mode 100644
index 3346ff8..0000000
--- a/test/Transforms/InstCombine/sext-misc.ll
+++ /dev/null
@@ -1,77 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep sext
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-
-declare i32 @llvm.ctpop.i32(i32)
-declare i32 @llvm.ctlz.i32(i32)
-declare i32 @llvm.cttz.i32(i32)
-
-define i64 @foo(i32 %x) {
-  %t = call i32 @llvm.ctpop.i32(i32 %x)
-  %s = sext i32 %t to i64
-  ret i64 %s
-}
-define i64 @boo(i32 %x) {
-  %t = call i32 @llvm.ctlz.i32(i32 %x)
-  %s = sext i32 %t to i64
-  ret i64 %s
-}
-define i64 @zoo(i32 %x) {
-  %t = call i32 @llvm.cttz.i32(i32 %x)
-  %s = sext i32 %t to i64
-  ret i64 %s
-}
-define i64 @coo(i32 %x) {
-  %t = udiv i32 %x, 3
-  %s = sext i32 %t to i64
-  ret i64 %s
-}
-define i64 @moo(i32 %x) {
-  %t = urem i32 %x, 30000
-  %s = sext i32 %t to i64
-  ret i64 %s
-}
-define i64 @yoo(i32 %x) {
-  %u = lshr i32 %x, 3
-  %t = mul i32 %u, 3
-  %s = sext i32 %t to i64
-  ret i64 %s
-}
-define i64 @voo(i32 %x) {
-  %t = and i32 %x, 511
-  %u = sub i32 20000, %t
-  %s = sext i32 %u to i64
-  ret i64 %s
-}
-define i32 @woo(i8 %a, i32 %f, i1 %p, i32* %z) {
-  %d = lshr i32 %f, 24
-  %e = select i1 %p, i32 %d, i32 0
-  %s = trunc i32 %e to i16
-  %n = sext i16 %s to i32
-  ret i32 %n
-}
-
-; rdar://6013816
-define i16 @test(i16 %t, i1 %cond) nounwind {
-entry:
-	br i1 %cond, label %T, label %F
-T:
-	%t2 = sext i16 %t to i32
-	br label %F
-
-F:
-	%V = phi i32 [%t2, %T], [42, %entry]
-	%W = trunc i32 %V to i16
-	ret i16 %W
-}
-
-; PR2638
-define i32 @test2(i32 %i) nounwind  {
-entry:
-        %tmp12 = trunc i32 %i to i8             ; <i8> [#uses=1]
-        %tmp16 = shl i8 %tmp12, 6               ; <i8> [#uses=1]
-        %a = ashr i8 %tmp16, 6            ; <i8> [#uses=1]
-        %b = sext i8 %a to i32            ; <i32> [#uses=1]
-        ret i32 %b
-}
-
diff --git a/test/Transforms/InstCombine/sext.ll b/test/Transforms/InstCombine/sext.ll
new file mode 100644
index 0000000..6deee1f
--- /dev/null
+++ b/test/Transforms/InstCombine/sext.ll
@@ -0,0 +1,128 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+declare i32 @llvm.ctpop.i32(i32)
+declare i32 @llvm.ctlz.i32(i32)
+declare i32 @llvm.cttz.i32(i32)
+
+define i64 @test1(i32 %x) {
+  %t = call i32 @llvm.ctpop.i32(i32 %x)
+  %s = sext i32 %t to i64
+  ret i64 %s
+  
+; CHECK: @test1
+; CHECK: zext i32 %t
+}
+
+define i64 @test2(i32 %x) {
+  %t = call i32 @llvm.ctlz.i32(i32 %x)
+  %s = sext i32 %t to i64
+  ret i64 %s
+
+; CHECK: @test2
+; CHECK: zext i32 %t
+}
+
+define i64 @test3(i32 %x) {
+  %t = call i32 @llvm.cttz.i32(i32 %x)
+  %s = sext i32 %t to i64
+  ret i64 %s
+
+; CHECK: @test3
+; CHECK: zext i32 %t
+}
+
+define i64 @test4(i32 %x) {
+  %t = udiv i32 %x, 3
+  %s = sext i32 %t to i64
+  ret i64 %s
+
+; CHECK: @test4
+; CHECK: zext i32 %t
+}
+
+define i64 @test5(i32 %x) {
+  %t = urem i32 %x, 30000
+  %s = sext i32 %t to i64
+  ret i64 %s
+; CHECK: @test5
+; CHECK: zext i32 %t
+}
+
+define i64 @test6(i32 %x) {
+  %u = lshr i32 %x, 3
+  %t = mul i32 %u, 3
+  %s = sext i32 %t to i64
+  ret i64 %s
+; CHECK: @test6
+; CHECK: zext i32 %t
+}
+
+define i64 @test7(i32 %x) {
+  %t = and i32 %x, 511
+  %u = sub i32 20000, %t
+  %s = sext i32 %u to i64
+  ret i64 %s
+; CHECK: @test7
+; CHECK: zext i32 %u to i64
+}
+
+define i32 @test8(i8 %a, i32 %f, i1 %p, i32* %z) {
+  %d = lshr i32 %f, 24
+  %e = select i1 %p, i32 %d, i32 0
+  %s = trunc i32 %e to i16
+  %n = sext i16 %s to i32
+  ret i32 %n
+; CHECK: @test8
+; CHECK: %d = lshr i32 %f, 24
+; CHECK: %n = select i1 %p, i32 %d, i32 0
+; CHECK: ret i32 %n
+}
+
+; rdar://6013816
+define i16 @test9(i16 %t, i1 %cond) nounwind {
+entry:
+	br i1 %cond, label %T, label %F
+T:
+	%t2 = sext i16 %t to i32
+	br label %F
+
+F:
+	%V = phi i32 [%t2, %T], [42, %entry]
+	%W = trunc i32 %V to i16
+	ret i16 %W
+; CHECK: @test9
+; CHECK: T:
+; CHECK-NEXT: br label %F
+; CHECK: F:
+; CHECK-NEXT: phi i16
+; CHECK-NEXT: ret i16
+}
+
+; PR2638
+define i32 @test10(i32 %i) nounwind  {
+entry:
+        %tmp12 = trunc i32 %i to i8
+        %tmp16 = shl i8 %tmp12, 6
+        %a = ashr i8 %tmp16, 6 
+        %b = sext i8 %a to i32 
+        ret i32 %b
+; CHECK: @test10
+; CHECK:  shl i32 %i, 30
+; CHECK-NEXT: ashr i32
+; CHECK-NEXT: ret i32
+}
+
+define void @test11(<2 x i16> %srcA, <2 x i16> %srcB, <2 x i16>* %dst) {
+  %cmp = icmp eq <2 x i16> %srcB, %srcA
+  %sext = sext <2 x i1> %cmp to <2 x i16>
+  %tmask = ashr <2 x i16> %sext, <i16 15, i16 15> 
+  store <2 x i16> %tmask, <2 x i16>* %dst
+  ret void                                                                                                                      
+; CHECK: @test11
+; CHECK-NEXT: icmp eq
+; CHECK-NEXT: sext <2 x i1>
+; CHECK-NEXT: store <2 x i16>
+; CHECK-NEXT: ret
+}                                                                                                                               
diff --git a/test/Transforms/InstCombine/xor-demorgans.ll b/test/Transforms/InstCombine/xor-demorgans.ll
deleted file mode 100644
index 3383845..0000000
--- a/test/Transforms/InstCombine/xor-demorgans.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep {= or}
-; PR3266
-; XFAIL: *
-
-define i1 @foo(i32 %x, i32 %y) nounwind {
-.summary:
-       %0 = icmp sgt i32 %x, 4         ; <i1> [#uses=1]
-       %1 = icmp sgt i32 %y, 0         ; <i1> [#uses=1]
-       %.demorgan = or i1 %1, %0               ; <i1> [#uses=1]
-       %2 = xor i1 %.demorgan, true            ; <i1> [#uses=1]
-       ret i1 %2
-}
diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll
index b2b9d69..361ec6c 100644
--- a/test/Transforms/JumpThreading/crash.ll
+++ b/test/Transforms/JumpThreading/crash.ll
@@ -212,3 +212,25 @@ bb13:
 bb61:                                            
   ret void
 }
+
+
+; PR5698
+define void @test7(i32 %x) {
+tailrecurse:
+  switch i32 %x, label %return [
+    i32 2, label %bb2
+    i32 3, label %bb
+  ]
+
+bb:         
+  switch i32 %x, label %return [
+    i32 2, label %bb2
+    i32 3, label %tailrecurse
+  ]
+
+bb2:        
+  ret void
+
+return:     
+  ret void
+}
diff --git a/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll b/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll
new file mode 100644
index 0000000..e3cdbb3
--- /dev/null
+++ b/test/Transforms/LICM/2009-12-10-LICM-Indbr-Crash.ll
@@ -0,0 +1,21 @@
+; Test for rdar://7452967
+; RUN: opt < %s -licm -disable-output
+define void @foo (i8* %v)
+{
+  entry:
+    br i1 undef, label %preheader, label %return
+
+  preheader:
+    br i1 undef, label %loop, label %return
+
+  loop:
+    indirectbr i8* undef, [label %preheader, label %stuff]
+
+  stuff:
+    %0 = load i8* undef, align 1
+    br label %loop
+
+  return:
+    ret void
+
+}
diff --git a/test/Unit/lit.cfg b/test/Unit/lit.cfg
index 8321593..34372bb 100644
--- a/test/Unit/lit.cfg
+++ b/test/Unit/lit.cfg
@@ -32,6 +32,12 @@ if config.test_exec_root is None:
     # configuration hasn't been created by the build system, or we are in an
     # out-of-tree build situation).
 
+    # Check for 'llvm_unit_site_config' user parameter, and use that if available.
+    site_cfg = lit.params.get('llvm_unit_site_config', None)
+    if site_cfg and os.path.exists(site_cfg):
+        lit.load_config(config, site_cfg)
+        raise SystemExit
+
     # Try to detect the situation where we are using an out-of-tree build by
     # looking for 'llvm-config'.
     #
diff --git a/test/lit.cfg b/test/lit.cfg
index 1939792..246f270 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -58,6 +58,12 @@ if config.test_exec_root is None:
     # configuration hasn't been created by the build system, or we are in an
     # out-of-tree build situation).
 
+    # Check for 'llvm_site_config' user parameter, and use that if available.
+    site_cfg = lit.params.get('llvm_site_config', None)
+    if site_cfg and os.path.exists(site_cfg):
+        lit.load_config(config, site_cfg)
+        raise SystemExit
+
     # Try to detect the situation where we are using an out-of-tree build by
     # looking for 'llvm-config'.
     #
diff --git a/tools/llvmc/doc/LLVMC-Reference.rst b/tools/llvmc/doc/LLVMC-Reference.rst
index 102795e..4d80a2a 100644
--- a/tools/llvmc/doc/LLVMC-Reference.rst
+++ b/tools/llvmc/doc/LLVMC-Reference.rst
@@ -336,8 +336,8 @@ separate option groups syntactically.
      it is synonymous with ``required``. Incompatible with ``required`` and
      ``zero_or_one``.
 
-   - ``zero_or_one`` - the option can be specified zero or one times. Useful
-     only for list options in conjunction with ``multi_val``. Incompatible with
+   - ``optional`` - the option can be specified zero or one times. Useful only
+     for list options in conjunction with ``multi_val``. Incompatible with
      ``required`` and ``one_or_more``.
 
    - ``hidden`` - the description of this option will not appear in
@@ -347,19 +347,30 @@ separate option groups syntactically.
    - ``really_hidden`` - the option will not be mentioned in any help
      output.
 
+   - ``comma_separated`` - Indicates that any commas specified for an option's
+     value should be used to split the value up into multiple values for the
+     option. This property is valid only for list options. In conjunction with
+     ``forward_value`` can be used to implement option forwarding in style of
+     gcc's ``-Wa,``.
+
    - ``multi_val n`` - this option takes *n* arguments (can be useful in some
      special cases). Usage example: ``(parameter_list_option "foo", (multi_val
      3))``; the command-line syntax is '-foo a b c'. Only list options can have
-     this attribute; you can, however, use the ``one_or_more``, ``zero_or_one``
+     this attribute; you can, however, use the ``one_or_more``, ``optional``
      and ``required`` properties.
 
    - ``init`` - this option has a default value, either a string (if it is a
-     parameter), or a boolean (if it is a switch; boolean constants are called
-     ``true`` and ``false``). List options can't have this attribute. Usage
-     examples: ``(switch_option "foo", (init true))``; ``(prefix_option "bar",
-     (init "baz"))``.
+     parameter), or a boolean (if it is a switch; as in C++, boolean constants
+     are called ``true`` and ``false``). List options can't have ``init``
+     attribute.
+     Usage examples: ``(switch_option "foo", (init true))``; ``(prefix_option
+     "bar", (init "baz"))``.
+
+   - ``extern`` - this option is defined in some other plugin, see `below`__.
+
+   __ extern_
 
-   - ``extern`` - this option is defined in some other plugin, see below.
+.. _extern:
 
 External options
 ----------------
@@ -547,7 +558,11 @@ The complete list of all currently implemented tool properties follows.
 
   - ``actions`` - A single big ``case`` expression that specifies how
     this tool reacts on command-line options (described in more detail
-    below).
+    `below`__).
+
+__ actions_
+
+.. _actions:
 
 Actions
 -------
@@ -585,35 +600,42 @@ The list of all possible actions follows.
 
 * Possible actions:
 
-   - ``append_cmd`` - append a string to the tool invocation
-     command.
-     Example: ``(case (switch_on "pthread"), (append_cmd
-     "-lpthread"))``
+   - ``append_cmd`` - Append a string to the tool invocation command.
+     Example: ``(case (switch_on "pthread"), (append_cmd "-lpthread"))``.
 
-   - ``error`` - exit with error.
+   - ``error`` - Exit with error.
      Example: ``(error "Mixing -c and -S is not allowed!")``.
 
-   - ``warning`` - print a warning.
+   - ``warning`` - Print a warning.
      Example: ``(warning "Specifying both -O1 and -O2 is meaningless!")``.
 
-   - ``forward`` - forward an option unchanged.  Example: ``(forward "Wall")``.
+   - ``forward`` - Forward the option unchanged.
+     Example: ``(forward "Wall")``.
 
-   - ``forward_as`` - Change the name of an option, but forward the
-     argument unchanged.
+   - ``forward_as`` - Change the option's name, but forward the argument
+     unchanged.
      Example: ``(forward_as "O0", "--disable-optimization")``.
 
-   - ``output_suffix`` - modify the output suffix of this
-     tool.
+   - ``forward_value`` - Forward only option's value. Cannot be used with switch
+     options (since they don't have values), but works fine with lists.
+     Example: ``(forward_value "Wa,")``.
+
+   - ``forward_transformed_value`` - As above, but applies a hook to the
+     option's value before forwarding (see `below`__). When
+     ``forward_transformed_value`` is applied to a list
+     option, the hook must have signature
+     ``std::string hooks::HookName (const std::vector<std::string>&)``.
+     Example: ``(forward_transformed_value "m", "ConvertToMAttr")``.
+
+     __ hooks_
+
+   - ``output_suffix`` - Modify the output suffix of this tool.
      Example: ``(output_suffix "i")``.
 
-   - ``stop_compilation`` - stop compilation after this tool processes
-     its input. Used without arguments.
+   - ``stop_compilation`` - Stop compilation after this tool processes its
+     input. Used without arguments.
+     Example: ``(stop_compilation)``.
 
-   - ``unpack_values`` - used for for splitting and forwarding
-     comma-separated lists of options, e.g. ``-Wa,-foo=bar,-baz`` is
-     converted to ``-foo=bar -baz`` and appended to the tool invocation
-     command.
-     Example: ``(unpack_values "Wa,")``.
 
 Language map
 ============
@@ -760,6 +782,8 @@ accessible only in the C++ code (i.e. hooks). Use the following code::
     extern const char* ProgramName;
     }
 
+    namespace hooks {
+
     std::string MyHook() {
     //...
     if (strcmp(ProgramName, "mydriver") == 0) {
@@ -767,6 +791,8 @@ accessible only in the C++ code (i.e. hooks). Use the following code::
 
     }
 
+    } // end namespace hooks
+
 In general, you're encouraged not to make the behaviour dependent on the
 executable file name, and use command-line switches instead. See for example how
 the ``Base`` plugin behaves when it needs to choose the correct linker options
diff --git a/tools/llvmc/example/mcc16/plugins/PIC16Base/PIC16Base.td b/tools/llvmc/example/mcc16/plugins/PIC16Base/PIC16Base.td
index df9b99e..5e6f6cb 100644
--- a/tools/llvmc/example/mcc16/plugins/PIC16Base/PIC16Base.td
+++ b/tools/llvmc/example/mcc16/plugins/PIC16Base/PIC16Base.td
@@ -41,9 +41,9 @@ def OptionList : OptionList<[
 //    (help "Optimization level 2. (Default)")),
 // (parameter_option "pre-RA-sched",
 //    (help "Example of an option that is passed to llc")),
- (prefix_list_option "Wa,",
+ (prefix_list_option "Wa,", (comma_separated),
     (help "Pass options to native assembler")),
- (prefix_list_option "Wl,",
+ (prefix_list_option "Wl,", (comma_separated),
     (help "Pass options to native linker"))
 // (prefix_list_option "Wllc,",
 //    (help "Pass options to llc")),
@@ -58,11 +58,11 @@ class clang_based<string language, string cmd, string ext_E> : Tool<
  (output_suffix "bc"),
  (cmd_line (case
            (switch_on "E"),
-           (case 
+           (case
               (not_empty "o"), !strconcat(cmd, " -E $INFILE -o $OUTFILE"),
               (default), !strconcat(cmd, " -E $INFILE")),
            (default), !strconcat(cmd, " $INFILE -o $OUTFILE"))),
- (actions (case 
+ (actions (case
                 (and (multiple_input_files), (or (switch_on "S"), (switch_on "c"))),
               (error "cannot specify -o with -c or -S with multiple files"),
                 (switch_on "E"), [(stop_compilation), (output_suffix ext_E)],
@@ -138,7 +138,7 @@ def gpasm : Tool<[
  (actions (case
           (switch_on "c"), (stop_compilation),
           (switch_on "g"), (append_cmd "-g"),
-          (not_empty "Wa,"), (unpack_values "Wa,")))
+          (not_empty "Wa,"), (forward_value "Wa,")))
 ]>;
 
 def mplink : Tool<[
@@ -147,13 +147,13 @@ def mplink : Tool<[
  (output_suffix "cof"),
  (cmd_line "$CALL(GetBinDir)mplink.exe -k $CALL(GetStdLinkerScriptsDir) -l $CALL(GetStdLibsDir) -p 16f1937  intrinsics.lib devices.lib $INFILE -o $OUTFILE"),
  (actions (case
-          (not_empty "Wl,"), (unpack_values "Wl,"),
+          (not_empty "Wl,"), (forward_value "Wl,"),
           (not_empty "L"), (forward_as "L", "-l"),
           (not_empty "K"), (forward_as "K", "-k"),
           (not_empty "m"), (forward "m"),
 //          (not_empty "l"), [(unpack_values "l"),(append_cmd ".lib")])),
-          (not_empty "k"), (unpack_values "k"),
-          (not_empty "l"), (unpack_values "l"))),
+          (not_empty "k"), (forward_value "k"),
+          (not_empty "l"), (forward_value "l"))),
  (join)
 ]>;
 
@@ -175,13 +175,13 @@ def LanguageMap : LanguageMap<[
 def CompilationGraph : CompilationGraph<[
     Edge<"root", "clang_cc">,
     Edge<"root", "llvm_ld">,
-    OptionalEdge<"root", "llvm_ld_optimizer", (case 
+    OptionalEdge<"root", "llvm_ld_optimizer", (case
                                          (switch_on "S"), (inc_weight),
                                          (switch_on "c"), (inc_weight))>,
     Edge<"root", "gpasm">,
     Edge<"root", "mplink">,
     Edge<"clang_cc", "llvm_ld">,
-    OptionalEdge<"clang_cc", "llvm_ld_optimizer", (case 
+    OptionalEdge<"clang_cc", "llvm_ld_optimizer", (case
                                          (switch_on "S"), (inc_weight),
                                          (switch_on "c"), (inc_weight))>,
     Edge<"llvm_ld", "pic16passes">,
diff --git a/tools/llvmc/plugins/Base/Base.td.in b/tools/llvmc/plugins/Base/Base.td.in
index 125e95c..8f928cc 100644
--- a/tools/llvmc/plugins/Base/Base.td.in
+++ b/tools/llvmc/plugins/Base/Base.td.in
@@ -38,12 +38,22 @@ def OptList : OptionList<[
     (help "Compile and assemble, but do not link")),
  (switch_option "pthread",
     (help "Enable threads")),
+ (switch_option "m32",
+    (help "Generate code for a 32-bit environment"), (hidden)),
+ (switch_option "m64",
+    (help "Generate code for a 64-bit environment"), (hidden)),
  (switch_option "fPIC",
     (help "Relocation model: PIC"), (hidden)),
  (switch_option "mdynamic-no-pic",
     (help "Relocation model: dynamic-no-pic"), (hidden)),
  (parameter_option "linker",
     (help "Choose linker (possible values: gcc, g++)")),
+ (parameter_option "mtune",
+    (help "Target a specific CPU type"), (hidden)),
+ (parameter_option "march",
+    (help "A synonym for -mtune"), (hidden)),
+ (parameter_option "mcpu",
+    (help "A deprecated synonym for -mtune"), (hidden)),
  (parameter_option "MF",
     (help "Specify a file to write dependencies to"), (hidden)),
  (parameter_option "MT",
@@ -61,9 +71,9 @@ def OptList : OptionList<[
     (help "Add a directory to include path")),
  (prefix_list_option "D",
     (help "Define a macro")),
- (prefix_list_option "Wa,",
+ (prefix_list_option "Wa,", (comma_separated),
     (help "Pass options to assembler")),
- (prefix_list_option "Wllc,",
+ (prefix_list_option "Wllc,", (comma_separated),
     (help "Pass options to llc")),
  (prefix_list_option "L",
     (help "Add a directory to link path")),
@@ -71,8 +81,11 @@ def OptList : OptionList<[
     (help "Search a library when linking")),
  (prefix_list_option "Wl,",
     (help "Pass options to linker")),
- (prefix_list_option "Wo,",
-    (help "Pass options to opt"))
+ (prefix_list_option "Wo,", (comma_separated),
+    (help "Pass options to opt")),
+ (prefix_list_option "m",
+     (help "Enable or disable various extensions (-mmmx, -msse, etc.)"),
+     (hidden))
 ]>;
 
 // Option preprocessor.
@@ -119,6 +132,12 @@ class llvm_gcc_based <string cmd_prefix, string in_lang, string E_ext> : Tool<
          (not_empty "I"), (forward "I"),
          (not_empty "F"), (forward "F"),
          (not_empty "D"), (forward "D"),
+         (not_empty "march"), (forward "march"),
+         (not_empty "mtune"), (forward "mtune"),
+         (not_empty "mcpu"), (forward "mcpu"),
+         (not_empty "m"), (forward "m"),
+         (switch_on "m32"), (forward "m32"),
+         (switch_on "m64"), (forward "m64"),
          (switch_on "O1"), (forward "O1"),
          (switch_on "O2"), (forward "O2"),
          (switch_on "O3"), (forward "O3"),
@@ -140,7 +159,7 @@ def opt : Tool<
 [(in_language "llvm-bitcode"),
  (out_language "llvm-bitcode"),
  (output_suffix "bc"),
- (actions (case (not_empty "Wo,"), (unpack_values "Wo,"),
+ (actions (case (not_empty "Wo,"), (forward_value "Wo,"),
                 (switch_on "O1"), (forward "O1"),
                 (switch_on "O2"), (forward "O2"),
                 (switch_on "O3"), (forward "O3"))),
@@ -162,7 +181,7 @@ def llvm_gcc_assembler : Tool<
  (cmd_line "@LLVMGCCCOMMAND@ -c -x assembler $INFILE -o $OUTFILE"),
  (actions (case
           (switch_on "c"), (stop_compilation),
-          (not_empty "Wa,"), (unpack_values "Wa,")))
+          (not_empty "Wa,"), (forward_value "Wa,")))
 ]>;
 
 def llc : Tool<
@@ -179,7 +198,11 @@ def llc : Tool<
           (switch_on "fPIC"), (append_cmd "-relocation-model=pic"),
           (switch_on "mdynamic-no-pic"),
                      (append_cmd "-relocation-model=dynamic-no-pic"),
-          (not_empty "Wllc,"), (unpack_values "Wllc,")))
+          (not_empty "march"), (forward "mcpu"),
+          (not_empty "mtune"), (forward "mcpu"),
+          (not_empty "mcpu"), (forward "mcpu"),
+          (not_empty "m"), (forward_transformed_value "m", "ConvertToMAttr"),
+          (not_empty "Wllc,"), (forward_value "Wllc,")))
 ]>;
 
 // Base class for linkers
@@ -195,6 +218,8 @@ class llvm_gcc_based_linker <string cmd_prefix> : Tool<
           (not_empty "F"), (forward "F"),
           (not_empty "framework"), (forward "framework"),
           (not_empty "weak_framework"), (forward "weak_framework"),
+          (switch_on "m32"), (forward "m32"),
+          (switch_on "m64"), (forward "m64"),
           (not_empty "l"), (forward "l"),
           (not_empty "Wl,"), (forward "Wl,")))
 ]>;
diff --git a/tools/llvmc/plugins/Base/Hooks.cpp b/tools/llvmc/plugins/Base/Hooks.cpp
new file mode 100644
index 0000000..661a914
--- /dev/null
+++ b/tools/llvmc/plugins/Base/Hooks.cpp
@@ -0,0 +1,33 @@
+#include <string>
+#include <vector>
+
+namespace hooks {
+typedef std::vector<std::string> StrVec;
+
+/// ConvertToMAttr - Convert -m* and -mno-* to -mattr=+*,-*
+std::string ConvertToMAttr(const StrVec& Opts) {
+  std::string out("-mattr=");
+
+  bool firstIter = true;
+  for (StrVec::const_iterator B = Opts.begin(), E = Opts.end(); B!=E; ++B) {
+    const std::string& Arg = *B;
+
+    if (firstIter)
+      firstIter = false;
+    else
+      out += ",";
+
+    if (Arg.find("no-") == 0 && Arg[3] != 0) {
+      out += '-';
+      out += Arg.c_str() + 3;
+    }
+    else {
+      out += '+';
+      out += Arg;
+    }
+  }
+
+  return out;
+}
+
+}
diff --git a/tools/llvmc/plugins/Clang/Clang.td b/tools/llvmc/plugins/Clang/Clang.td
index a179c53..ac8ac15 100644
--- a/tools/llvmc/plugins/Clang/Clang.td
+++ b/tools/llvmc/plugins/Clang/Clang.td
@@ -68,7 +68,7 @@ def as : Tool<
  (out_language "object-code"),
  (output_suffix "o"),
  (cmd_line "as $INFILE -o $OUTFILE"),
- (actions (case (not_empty "Wa,"), (unpack_values "Wa,"),
+ (actions (case (not_empty "Wa,"), (forward_value "Wa,"),
                 (switch_on "c"), (stop_compilation)))
 ]>;
 
@@ -82,7 +82,7 @@ def llvm_ld : Tool<
           (switch_on "pthread"), (append_cmd "-lpthread"),
           (not_empty "L"), (forward "L"),
           (not_empty "l"), (forward "l"),
-          (not_empty "Wl,"), (unpack_values "Wl,"))),
+          (not_empty "Wl,"), (forward_value "Wl,"))),
  (join)
 ]>;
 
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 12bb2ec..50a2e39 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PluginLoader.h"
+#include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/StandardPasses.h"
 #include "llvm/Support/SystemUtils.h"
 #include "llvm/Support/raw_ostream.h"
@@ -340,12 +341,14 @@ void AddStandardLinkPasses(PassManager &PM) {
 // main for opt
 //
 int main(int argc, char **argv) {
-  llvm_shutdown_obj X;  // Call llvm_shutdown() on exit.
+  sys::PrintStackTraceOnErrorSignal();
+  llvm::PrettyStackTraceProgram X(argc, argv);
+  
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   LLVMContext &Context = getGlobalContext();
   
   cl::ParseCommandLineOptions(argc, argv,
     "llvm .bc -> .bc modular optimizer and analysis printer\n");
-  sys::PrintStackTraceOnErrorSignal();
 
   // Allocate a full target machine description only if necessary.
   // FIXME: The choice of target should be controllable on the command line.
diff --git a/unittests/ADT/DeltaAlgorithmTest.cpp b/unittests/ADT/DeltaAlgorithmTest.cpp
new file mode 100644
index 0000000..3628922
--- /dev/null
+++ b/unittests/ADT/DeltaAlgorithmTest.cpp
@@ -0,0 +1,96 @@
+//===- llvm/unittest/ADT/DeltaAlgorithmTest.cpp ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/DeltaAlgorithm.h"
+#include <algorithm>
+#include <cstdarg>
+using namespace llvm;
+
+std::ostream &operator<<(std::ostream &OS,
+                         const std::set<unsigned> &S) {
+  OS << "{";
+  for (std::set<unsigned>::const_iterator it = S.begin(),
+         ie = S.end(); it != ie; ++it) {
+    if (it != S.begin())
+      OS << ",";
+    OS << *it;
+  }
+  OS << "}";
+  return OS;
+}
+
+namespace {
+
+class FixedDeltaAlgorithm : public DeltaAlgorithm {
+  changeset_ty FailingSet;
+  unsigned NumTests;
+
+protected:
+  virtual bool ExecuteOneTest(const changeset_ty &Changes) {
+    ++NumTests;
+    return std::includes(Changes.begin(), Changes.end(),
+                         FailingSet.begin(), FailingSet.end());
+  }
+
+public:
+  FixedDeltaAlgorithm(const changeset_ty &_FailingSet)
+    : FailingSet(_FailingSet),
+      NumTests(0) {}
+
+  unsigned getNumTests() const { return NumTests; }
+};
+
+std::set<unsigned> fixed_set(unsigned N, ...) {
+  std::set<unsigned> S;
+  va_list ap;
+  va_start(ap, N);
+  for (unsigned i = 0; i != N; ++i)
+    S.insert(va_arg(ap, unsigned));
+  va_end(ap);
+  return S;
+}
+
+std::set<unsigned> range(unsigned Start, unsigned End) {
+  std::set<unsigned> S;
+  while (Start != End)
+    S.insert(Start++);
+  return S;
+}
+
+std::set<unsigned> range(unsigned N) {
+  return range(0, N);
+}
+
+TEST(DeltaAlgorithmTest, Basic) {
+  // P = {3,5,7} \in S
+  //   [0, 20) should minimize to {3,5,7} in a reasonable number of tests.
+  std::set<unsigned> Fails = fixed_set(3, 3, 5, 7);
+  FixedDeltaAlgorithm FDA(Fails);
+  EXPECT_EQ(fixed_set(3, 3, 5, 7), FDA.Run(range(20)));
+  EXPECT_GE(33U, FDA.getNumTests());
+
+  // P = {3,5,7} \in S
+  //   [10, 20) should minimize to [10,20)
+  EXPECT_EQ(range(10,20), FDA.Run(range(10,20)));
+
+  // P = [0,4) \in S
+  //   [0, 4) should minimize to [0,4) in 11 tests.
+  //
+  // 11 = |{ {},
+  //         {0}, {1}, {2}, {3},
+  //         {1, 2, 3}, {0, 2, 3}, {0, 1, 3}, {0, 1, 2}, 
+  //         {0, 1}, {2, 3} }|
+  FDA = FixedDeltaAlgorithm(range(10));
+  EXPECT_EQ(range(4), FDA.Run(range(4)));
+  EXPECT_EQ(11U, FDA.getNumTests());  
+}
+
+}
+
diff --git a/unittests/ExecutionEngine/JIT/JITTest.cpp b/unittests/ExecutionEngine/JIT/JITTest.cpp
index 12c6b67..bbf3460 100644
--- a/unittests/ExecutionEngine/JIT/JITTest.cpp
+++ b/unittests/ExecutionEngine/JIT/JITTest.cpp
@@ -534,6 +534,31 @@ TEST_F(JITTest, FunctionPointersOutliveTheirCreator) {
 #endif
 }
 
+}  // anonymous namespace
+// This variable is intentionally defined differently in the statically-compiled
+// program from the IR input to the JIT to assert that the JIT doesn't use its
+// definition.
+extern "C" int32_t JITTest_AvailableExternallyGlobal;
+int32_t JITTest_AvailableExternallyGlobal = 42;
+namespace {
+
+TEST_F(JITTest, AvailableExternallyGlobalIsntEmitted) {
+  TheJIT->DisableLazyCompilation(true);
+  LoadAssembly("@JITTest_AvailableExternallyGlobal = "
+               "  available_externally global i32 7 "
+               " "
+               "define i32 @loader() { "
+               "  %result = load i32* @JITTest_AvailableExternallyGlobal "
+               "  ret i32 %result "
+               "} ");
+  Function *loaderIR = M->getFunction("loader");
+
+  int32_t (*loader)() = reinterpret_cast<int32_t(*)()>(
+    (intptr_t)TheJIT->getPointerToFunction(loaderIR));
+  EXPECT_EQ(42, loader()) << "func should return 42 from the external global,"
+                          << " not 7 from the IR version.";
+}
+
 // This code is copied from JITEventListenerTest, but it only runs once for all
 // the tests in this directory.  Everything seems fine, but that's strange
 // behavior.
diff --git a/unittests/ExecutionEngine/JIT/Makefile b/unittests/ExecutionEngine/JIT/Makefile
index 048924a..8de390b 100644
--- a/unittests/ExecutionEngine/JIT/Makefile
+++ b/unittests/ExecutionEngine/JIT/Makefile
@@ -13,3 +13,6 @@ LINK_COMPONENTS := asmparser core support jit native
 
 include $(LEVEL)/Makefile.config
 include $(LLVM_SRC_ROOT)/unittests/Makefile.unittest
+
+# Permit these tests to use the JIT's symbolic lookup.
+LD.Flags += $(RDYNAMIC)
diff --git a/utils/NewNightlyTest.pl b/utils/NewNightlyTest.pl
index a8cf8de..a306382 100755
--- a/utils/NewNightlyTest.pl
+++ b/utils/NewNightlyTest.pl
@@ -317,9 +317,9 @@ sub RunLoggedCommand {
   } else {
       if ($VERBOSE) {
           print "$Title\n";
-          print "$Command 2>&1 > $Log\n";
+          print "$Command > $Log 2>&1\n";
       }
-      system "$Command 2>&1 > $Log";
+      system "$Command > $Log 2>&1";
   }
 }
 
@@ -336,9 +336,9 @@ sub RunAppendingLoggedCommand {
   } else {
       if ($VERBOSE) {
           print "$Title\n";
-          print "$Command 2>&1 > $Log\n";
+          print "$Command >> $Log 2>&1\n";
       }
-      system "$Command 2>&1 >> $Log";
+      system "$Command >> $Log 2>&1";
   }
 }
 
@@ -393,10 +393,8 @@ sub CopyFile { #filename, newfile
 # to our central server via the post method
 #
 #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-sub SendData {
-    $host = $_[0];
-    $file = $_[1];
-    $variables = $_[2];
+sub WriteSentData {
+    $variables = $_[0];
 
     # Write out the "...-sentdata.txt" file.
 
@@ -406,6 +404,12 @@ sub SendData {
         $sentdata.= "$x  => $value\n";
     }
     WriteFile "$Prefix-sentdata.txt", $sentdata;
+}
+
+sub SendData {
+    $host = $_[0];
+    $file = $_[1];
+    $variables = $_[2];
 
     if (!($SUBMITAUX eq "")) {
         system "$SUBMITAUX \"$Prefix-sentdata.txt\"";
@@ -503,8 +507,8 @@ sub BuildLLVM {
   }
   RunAppendingLoggedCommand("(time -p $NICE $MAKECMD $MAKEOPTS)", $BuildLog, "BUILD");
 
-  if (`grep '^$MAKECMD\[^:]*: .*Error' $BuildLog | wc -l` + 0 ||
-      `grep '^$MAKECMD: \*\*\*.*Stop.' $BuildLog | wc -l` + 0) {
+  if (`grep -a '^$MAKECMD\[^:]*: .*Error' $BuildLog | wc -l` + 0 ||
+      `grep -a '^$MAKECMD: \*\*\*.*Stop.' $BuildLog | wc -l` + 0) {
     return 0;
   }
 
@@ -531,15 +535,15 @@ sub TestDirectory {
   $LLCBetaOpts = `$MAKECMD print-llcbeta-option`;
 
   my $ProgramsTable;
-  if (`grep '^$MAKECMD\[^:]: .*Error' $ProgramTestLog | wc -l` + 0) {
+  if (`grep -a '^$MAKECMD\[^:]: .*Error' $ProgramTestLog | wc -l` + 0) {
     $ProgramsTable="Error running test $SubDir\n";
     print "ERROR TESTING\n";
-  } elsif (`grep '^$MAKECMD\[^:]: .*No rule to make target' $ProgramTestLog | wc -l` + 0) {
+  } elsif (`grep -a '^$MAKECMD\[^:]: .*No rule to make target' $ProgramTestLog | wc -l` + 0) {
     $ProgramsTable="Makefile error running tests $SubDir!\n";
     print "ERROR TESTING\n";
   } else {
     # Create a list of the tests which were run...
-    system "egrep 'TEST-(PASS|FAIL)' < $ProgramTestLog ".
+    system "egrep -a 'TEST-(PASS|FAIL)' < $ProgramTestLog ".
            "| sort > $Prefix-$SubDir-Tests.txt";
   }
   $ProgramsTable = ReadFile "report.nightly.csv";
@@ -797,6 +801,9 @@ my %hash_of_data = (
   'a_file_sizes' => ""
 );
 
+# Write out the "...-sentdata.txt" file.
+WriteSentData \%hash_of_data;
+
 if ($SUBMIT || !($SUBMITAUX eq "")) {
   my $response = SendData $SUBMITSERVER,$SUBMITSCRIPT,\%hash_of_data;
   if( $VERBOSE) { print "============================\n$response"; }
diff --git a/utils/TableGen/CodeEmitterGen.cpp b/utils/TableGen/CodeEmitterGen.cpp
index 7e6c769..e9f30be 100644
--- a/utils/TableGen/CodeEmitterGen.cpp
+++ b/utils/TableGen/CodeEmitterGen.cpp
@@ -61,14 +61,11 @@ void CodeEmitterGen::reverseBits(std::vector<Record*> &Insts) {
 
 // If the VarBitInit at position 'bit' matches the specified variable then
 // return the variable bit position.  Otherwise return -1.
-int CodeEmitterGen::getVariableBit(const std::string &VarName,
+int CodeEmitterGen::getVariableBit(const Init *VarVal,
             BitsInit *BI, int bit) {
   if (VarBitInit *VBI = dynamic_cast<VarBitInit*>(BI->getBit(bit))) {
     TypedInit *TI = VBI->getVariable();
-    
-    if (VarInit *VI = dynamic_cast<VarInit*>(TI)) {
-      if (VI->getName() == VarName) return VBI->getBitNum();
-    }
+    if (TI == VarVal) return VBI->getBitNum();
   }
   
   return -1;
@@ -162,11 +159,11 @@ void CodeEmitterGen::run(raw_ostream &o) {
       if (!Vals[i].getPrefix() && !Vals[i].getValue()->isComplete()) {
         // Is the operand continuous? If so, we can just mask and OR it in
         // instead of doing it bit-by-bit, saving a lot in runtime cost.
-        const std::string &VarName = Vals[i].getName();
+        const Init *VarVal = Vals[i].getValue();
         bool gotOp = false;
         
         for (int bit = BI->getNumBits()-1; bit >= 0; ) {
-          int varBit = getVariableBit(VarName, BI, bit);
+          int varBit = getVariableBit(VarVal, BI, bit);
           
           if (varBit == -1) {
             --bit;
@@ -176,7 +173,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
             int N = 1;
             
             for (--bit; bit >= 0;) {
-              varBit = getVariableBit(VarName, BI, bit);
+              varBit = getVariableBit(VarVal, BI, bit);
               if (varBit == -1 || varBit != (beginVarBit - N)) break;
               ++N;
               --bit;
@@ -188,7 +185,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
               while (CGI.isFlatOperandNotEmitted(op))
                 ++op;
               
-              Case += "      // op: " + VarName + "\n"
+              Case += "      // op: " + Vals[i].getName() + "\n"
                    +  "      op = getMachineOpValue(MI, MI.getOperand("
                    +  utostr(op++) + "));\n";
               gotOp = true;
diff --git a/utils/TableGen/CodeEmitterGen.h b/utils/TableGen/CodeEmitterGen.h
index f0b3229..2dc34ba 100644
--- a/utils/TableGen/CodeEmitterGen.h
+++ b/utils/TableGen/CodeEmitterGen.h
@@ -23,6 +23,7 @@ namespace llvm {
 
 class RecordVal;
 class BitsInit;
+struct Init;
 
 class CodeEmitterGen : public TableGenBackend {
   RecordKeeper &Records;
@@ -35,7 +36,7 @@ private:
   void emitMachineOpEmitter(raw_ostream &o, const std::string &Namespace);
   void emitGetValueBit(raw_ostream &o, const std::string &Namespace);
   void reverseBits(std::vector<Record*> &Insts);
-  int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
+  int getVariableBit(const Init *VarVal, BitsInit *BI, int bit);
 };
 
 } // End llvm namespace
diff --git a/utils/TableGen/CodeGenDAGPatterns.h b/utils/TableGen/CodeGenDAGPatterns.h
index 398764b..c51232a 100644
--- a/utils/TableGen/CodeGenDAGPatterns.h
+++ b/utils/TableGen/CodeGenDAGPatterns.h
@@ -445,7 +445,7 @@ struct PatternToMatch {
                  const std::vector<Record*> &dstregs,
                  unsigned complexity):
     Predicates(preds), SrcPattern(src), DstPattern(dst), Dstregs(dstregs),
-    AddedComplexity(complexity) {};
+    AddedComplexity(complexity) {}
 
   ListInit        *Predicates;  // Top level predicate conditions to match.
   TreePatternNode *SrcPattern;  // Source pattern to match.
diff --git a/utils/TableGen/LLVMCConfigurationEmitter.cpp b/utils/TableGen/LLVMCConfigurationEmitter.cpp
index 8b55b81..5be9ab7 100644
--- a/utils/TableGen/LLVMCConfigurationEmitter.cpp
+++ b/utils/TableGen/LLVMCConfigurationEmitter.cpp
@@ -15,8 +15,6 @@
 #include "Record.h"
 
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringSet.h"
 #include <algorithm>
@@ -98,10 +96,13 @@ const std::string GetOperatorName(const DagInit& D) {
 
 // checkNumberOfArguments - Ensure that the number of args in d is
 // greater than or equal to min_arguments, otherwise throw an exception.
-void checkNumberOfArguments (const DagInit* d, unsigned min_arguments) {
-  if (!d || d->getNumArgs() < min_arguments)
+void checkNumberOfArguments (const DagInit* d, unsigned minArgs) {
+  if (!d || d->getNumArgs() < minArgs)
     throw GetOperatorName(d) + ": too few arguments!";
 }
+void checkNumberOfArguments (const DagInit& d, unsigned minArgs) {
+  checkNumberOfArguments(&d, minArgs);
+}
 
 // isDagEmpty - is this DAG marked with an empty marker?
 bool isDagEmpty (const DagInit* d) {
@@ -208,7 +209,8 @@ OptionType::OptionType stringToOptionType(const std::string& T) {
 namespace OptionDescriptionFlags {
   enum OptionDescriptionFlags { Required = 0x1, Hidden = 0x2,
                                 ReallyHidden = 0x4, Extern = 0x8,
-                                OneOrMore = 0x10, ZeroOrOne = 0x20 };
+                                OneOrMore = 0x10, Optional = 0x20,
+                                CommaSeparated = 0x40 };
 }
 
 /// OptionDescription - Represents data contained in a single
@@ -244,6 +246,9 @@ struct OptionDescription {
 
   bool isMultiVal() const;
 
+  bool isCommaSeparated() const;
+  void setCommaSeparated();
+
   bool isExtern() const;
   void setExtern();
 
@@ -253,8 +258,8 @@ struct OptionDescription {
   bool isOneOrMore() const;
   void setOneOrMore();
 
-  bool isZeroOrOne() const;
-  void setZeroOrOne();
+  bool isOptional() const;
+  void setOptional();
 
   bool isHidden() const;
   void setHidden();
@@ -296,6 +301,13 @@ bool OptionDescription::isMultiVal() const {
   return MultiVal > 1;
 }
 
+bool OptionDescription::isCommaSeparated() const {
+  return Flags & OptionDescriptionFlags::CommaSeparated;
+}
+void OptionDescription::setCommaSeparated() {
+  Flags |= OptionDescriptionFlags::CommaSeparated;
+}
+
 bool OptionDescription::isExtern() const {
   return Flags & OptionDescriptionFlags::Extern;
 }
@@ -317,11 +329,11 @@ void OptionDescription::setOneOrMore() {
   Flags |= OptionDescriptionFlags::OneOrMore;
 }
 
-bool OptionDescription::isZeroOrOne() const {
-  return Flags & OptionDescriptionFlags::ZeroOrOne;
+bool OptionDescription::isOptional() const {
+  return Flags & OptionDescriptionFlags::Optional;
 }
-void OptionDescription::setZeroOrOne() {
-  Flags |= OptionDescriptionFlags::ZeroOrOne;
+void OptionDescription::setOptional() {
+  Flags |= OptionDescriptionFlags::Optional;
 }
 
 bool OptionDescription::isHidden() const {
@@ -456,54 +468,64 @@ void OptionDescriptions::InsertDescription (const OptionDescription& o) {
 
 /// HandlerTable - A base class for function objects implemented as
 /// 'tables of handlers'.
-template <class T>
+template <typename Handler>
 class HandlerTable {
 protected:
   // Implementation details.
 
-  /// Handler -
-  typedef void (T::* Handler) (const DagInit*);
   /// HandlerMap - A map from property names to property handlers
   typedef StringMap<Handler> HandlerMap;
 
   static HandlerMap Handlers_;
   static bool staticMembersInitialized_;
 
-  T* childPtr;
 public:
 
-  HandlerTable(T* cp) : childPtr(cp)
-  {}
-
-  /// operator() - Just forwards to the corresponding property
-  /// handler.
-  void operator() (Init* i) {
-    const DagInit& property = InitPtrToDag(i);
-    const std::string& property_name = GetOperatorName(property);
-    typename HandlerMap::iterator method = Handlers_.find(property_name);
+  Handler GetHandler (const std::string& HandlerName) const {
+    typename HandlerMap::iterator method = Handlers_.find(HandlerName);
 
     if (method != Handlers_.end()) {
       Handler h = method->second;
-      (childPtr->*h)(&property);
+      return h;
     }
     else {
-      throw "No handler found for property " + property_name + "!";
+      throw "No handler found for property " + HandlerName + "!";
     }
   }
 
-  void AddHandler(const char* Property, Handler Handl) {
-    Handlers_[Property] = Handl;
+  void AddHandler(const char* Property, Handler H) {
+    Handlers_[Property] = H;
   }
+
 };
 
-template <class T> typename HandlerTable<T>::HandlerMap
-HandlerTable<T>::Handlers_;
-template <class T> bool HandlerTable<T>::staticMembersInitialized_ = false;
+template <class FunctionObject>
+void InvokeDagInitHandler(FunctionObject* Obj, Init* i) {
+  typedef void (FunctionObject::*Handler) (const DagInit*);
+
+  const DagInit& property = InitPtrToDag(i);
+  const std::string& property_name = GetOperatorName(property);
+  Handler h = Obj->GetHandler(property_name);
+
+  ((Obj)->*(h))(&property);
+}
+
+template <typename H>
+typename HandlerTable<H>::HandlerMap HandlerTable<H>::Handlers_;
+
+template <typename H>
+bool HandlerTable<H>::staticMembersInitialized_ = false;
 
 
 /// CollectOptionProperties - Function object for iterating over an
 /// option property list.
-class CollectOptionProperties : public HandlerTable<CollectOptionProperties> {
+class CollectOptionProperties;
+typedef void (CollectOptionProperties::* CollectOptionPropertiesHandler)
+(const DagInit*);
+
+class CollectOptionProperties
+: public HandlerTable<CollectOptionPropertiesHandler>
+{
 private:
 
   /// optDescs_ - OptionDescriptions table. This is where the
@@ -513,7 +535,7 @@ private:
 public:
 
   explicit CollectOptionProperties(OptionDescription& OD)
-    : HandlerTable<CollectOptionProperties>(this), optDesc_(OD)
+    : optDesc_(OD)
   {
     if (!staticMembersInitialized_) {
       AddHandler("extern", &CollectOptionProperties::onExtern);
@@ -524,12 +546,19 @@ public:
       AddHandler("one_or_more", &CollectOptionProperties::onOneOrMore);
       AddHandler("really_hidden", &CollectOptionProperties::onReallyHidden);
       AddHandler("required", &CollectOptionProperties::onRequired);
-      AddHandler("zero_or_one", &CollectOptionProperties::onZeroOrOne);
+      AddHandler("optional", &CollectOptionProperties::onOptional);
+      AddHandler("comma_separated", &CollectOptionProperties::onCommaSeparated);
 
       staticMembersInitialized_ = true;
     }
   }
 
+  /// operator() - Just forwards to the corresponding property
+  /// handler.
+  void operator() (Init* i) {
+    InvokeDagInitHandler(this, i);
+  }
+
 private:
 
   /// Option property handlers --
@@ -555,11 +584,18 @@ private:
     optDesc_.setReallyHidden();
   }
 
+  void onCommaSeparated (const DagInit* d) {
+    checkNumberOfArguments(d, 0);
+    if (!optDesc_.isList())
+      throw "'comma_separated' is valid only on list options!";
+    optDesc_.setCommaSeparated();
+  }
+
   void onRequired (const DagInit* d) {
     checkNumberOfArguments(d, 0);
-    if (optDesc_.isOneOrMore())
-      throw std::string("An option can't have both (required) "
-                        "and (one_or_more) properties!");
+    if (optDesc_.isOneOrMore() || optDesc_.isOptional())
+      throw "Only one of (required), (optional) or "
+        "(one_or_more) properties is allowed!";
     optDesc_.setRequired();
   }
 
@@ -572,42 +608,41 @@ private:
     correct |= (optDesc_.isSwitch() && (str == "true" || str == "false"));
 
     if (!correct)
-      throw std::string("Incorrect usage of the 'init' option property!");
+      throw "Incorrect usage of the 'init' option property!";
 
     optDesc_.InitVal = i;
   }
 
   void onOneOrMore (const DagInit* d) {
     checkNumberOfArguments(d, 0);
-    if (optDesc_.isRequired() || optDesc_.isZeroOrOne())
-      throw std::string("Only one of (required), (zero_or_one) or "
-                        "(one_or_more) properties is allowed!");
+    if (optDesc_.isRequired() || optDesc_.isOptional())
+      throw "Only one of (required), (optional) or "
+        "(one_or_more) properties is allowed!";
     if (!OptionType::IsList(optDesc_.Type))
       llvm::errs() << "Warning: specifying the 'one_or_more' property "
         "on a non-list option will have no effect.\n";
     optDesc_.setOneOrMore();
   }
 
-  void onZeroOrOne (const DagInit* d) {
+  void onOptional (const DagInit* d) {
     checkNumberOfArguments(d, 0);
     if (optDesc_.isRequired() || optDesc_.isOneOrMore())
-      throw std::string("Only one of (required), (zero_or_one) or "
-                        "(one_or_more) properties is allowed!");
+      throw "Only one of (required), (optional) or "
+        "(one_or_more) properties is allowed!";
     if (!OptionType::IsList(optDesc_.Type))
-      llvm::errs() << "Warning: specifying the 'zero_or_one' property"
+      llvm::errs() << "Warning: specifying the 'optional' property"
         "on a non-list option will have no effect.\n";
-    optDesc_.setZeroOrOne();
+    optDesc_.setOptional();
   }
 
   void onMultiVal (const DagInit* d) {
     checkNumberOfArguments(d, 1);
     int val = InitPtrToInt(d->getArg(0));
     if (val < 2)
-      throw std::string("Error in the 'multi_val' property: "
-                        "the value must be greater than 1!");
+      throw "Error in the 'multi_val' property: "
+        "the value must be greater than 1!";
     if (!OptionType::IsList(optDesc_.Type))
-      throw std::string("The multi_val property is valid only "
-                        "on list options!");
+      throw "The multi_val property is valid only on list options!";
     optDesc_.MultiVal = val;
   }
 
@@ -712,7 +747,13 @@ typedef std::vector<IntrusiveRefCntPtr<ToolDescription> > ToolDescriptions;
 
 /// CollectToolProperties - Function object for iterating over a list of
 /// tool property records.
-class CollectToolProperties : public HandlerTable<CollectToolProperties> {
+
+class CollectToolProperties;
+typedef void (CollectToolProperties::* CollectToolPropertiesHandler)
+(const DagInit*);
+
+class CollectToolProperties : public HandlerTable<CollectToolPropertiesHandler>
+{
 private:
 
   /// toolDesc_ - Properties of the current Tool. This is where the
@@ -722,7 +763,7 @@ private:
 public:
 
   explicit CollectToolProperties (ToolDescription& d)
-    : HandlerTable<CollectToolProperties>(this) , toolDesc_(d)
+    : toolDesc_(d)
   {
     if (!staticMembersInitialized_) {
 
@@ -738,6 +779,10 @@ public:
     }
   }
 
+  void operator() (Init* i) {
+    InvokeDagInitHandler(this, i);
+  }
+
 private:
 
   /// Property handlers --
@@ -749,8 +794,7 @@ private:
     Init* Case = d->getArg(0);
     if (typeid(*Case) != typeid(DagInit) ||
         GetOperatorName(static_cast<DagInit*>(Case)) != "case")
-      throw
-        std::string("The argument to (actions) should be a 'case' construct!");
+      throw "The argument to (actions) should be a 'case' construct!";
     toolDesc_.Actions = Case;
   }
 
@@ -851,8 +895,8 @@ int CalculatePriority(RecordVector::const_iterator B,
     priority  = static_cast<int>((*B)->getValueAsInt("priority"));
 
     if (++B != E)
-      throw std::string("More than one 'PluginPriority' instance found: "
-                        "most probably an error!");
+      throw "More than one 'PluginPriority' instance found: "
+        "most probably an error!";
   }
 
   return priority;
@@ -943,7 +987,7 @@ void TypecheckGraph (const RecordVector& EdgeVector,
     }
 
     if (NodeB == "root")
-      throw std::string("Edges back to the root are not allowed!");
+      throw "Edges back to the root are not allowed!";
   }
 }
 
@@ -963,7 +1007,7 @@ void WalkCase(const Init* Case, F1 TestCallback, F2 StatementCallback,
 
   // Error checks.
   if (GetOperatorName(d) != "case")
-    throw std::string("WalkCase should be invoked only on 'case' expressions!");
+    throw "WalkCase should be invoked only on 'case' expressions!";
 
   if (d.getNumArgs() < 2)
     throw "There should be at least one clause in the 'case' expression:\n"
@@ -983,8 +1027,8 @@ void WalkCase(const Init* Case, F1 TestCallback, F2 StatementCallback,
       const DagInit& Test = InitPtrToDag(arg);
 
       if (GetOperatorName(Test) == "default" && (i+1 != numArgs))
-        throw std::string("The 'default' clause should be the last in the"
-                          "'case' construct!");
+        throw "The 'default' clause should be the last in the "
+          "'case' construct!";
       if (i == numArgs)
         throw "Case construct handler: no corresponding action "
           "found for the test " + Test.getAsString() + '!';
@@ -1017,9 +1061,11 @@ class ExtractOptionNames {
     const DagInit& Stmt = InitPtrToDag(Statement);
     const std::string& ActionName = GetOperatorName(Stmt);
     if (ActionName == "forward" || ActionName == "forward_as" ||
-        ActionName == "unpack_values" || ActionName == "switch_on" ||
-        ActionName == "parameter_equals" || ActionName == "element_in_list" ||
-        ActionName == "not_empty" || ActionName == "empty") {
+        ActionName == "forward_value" ||
+        ActionName == "forward_transformed_value" ||
+        ActionName == "switch_on" || ActionName == "parameter_equals" ||
+        ActionName == "element_in_list" || ActionName == "not_empty" ||
+        ActionName == "empty") {
       checkNumberOfArguments(&Stmt, 1);
       const std::string& Name = InitPtrToString(Stmt.getArg(0));
       OptionNames_.insert(Name);
@@ -1406,9 +1452,9 @@ void EmitCaseConstructHandler(const Init* Case, unsigned IndentLevel,
            EmitCaseStatementCallback<F>(Callback, O), IndentLevel);
 }
 
-/// TokenizeCmdline - converts from "$CALL(HookName, 'Arg1', 'Arg2')/path" to
-/// ["$CALL(", "HookName", "Arg1", "Arg2", ")/path"] .
-/// Helper function used by EmitCmdLineVecFill and.
+/// TokenizeCmdline - converts from
+/// "$CALL(HookName, 'Arg1', 'Arg2')/path -arg1 -arg2" to
+/// ["$CALL(", "HookName", "Arg1", "Arg2", ")/path", "-arg1", "-arg2"].
 void TokenizeCmdline(const std::string& CmdLine, StrVector& Out) {
   const char* Delimiters = " \t\n\v\f\r";
   enum TokenizerState
@@ -1489,62 +1535,99 @@ void TokenizeCmdline(const std::string& CmdLine, StrVector& Out) {
   }
 }
 
-/// SubstituteSpecialCommands - Perform string substitution for $CALL
-/// and $ENV. Helper function used by EmitCmdLineVecFill().
-StrVector::const_iterator SubstituteSpecialCommands
-(StrVector::const_iterator Pos, StrVector::const_iterator End, raw_ostream& O)
+/// SubstituteCall - Given "$CALL(HookName, [Arg1 [, Arg2 [...]]])", output
+/// "hooks::HookName([Arg1 [, Arg2 [, ...]]])". Helper function used by
+/// SubstituteSpecialCommands().
+StrVector::const_iterator
+SubstituteCall (StrVector::const_iterator Pos,
+                StrVector::const_iterator End,
+                bool IsJoin, raw_ostream& O)
 {
+  const char* errorMessage = "Syntax error in $CALL invocation!";
+  checkedIncrement(Pos, End, errorMessage);
+  const std::string& CmdName = *Pos;
 
-  const std::string& cmd = *Pos;
-
-  if (cmd == "$CALL") {
-    checkedIncrement(Pos, End, "Syntax error in $CALL invocation!");
-    const std::string& CmdName = *Pos;
+  if (CmdName == ")")
+    throw "$CALL invocation: empty argument list!";
 
-    if (CmdName == ")")
-      throw std::string("$CALL invocation: empty argument list!");
+  O << "hooks::";
+  O << CmdName << "(";
 
-    O << "hooks::";
-    O << CmdName << "(";
 
+  bool firstIteration = true;
+  while (true) {
+    checkedIncrement(Pos, End, errorMessage);
+    const std::string& Arg = *Pos;
+    assert(Arg.size() != 0);
 
-    bool firstIteration = true;
-    while (true) {
-      checkedIncrement(Pos, End, "Syntax error in $CALL invocation!");
-      const std::string& Arg = *Pos;
-      assert(Arg.size() != 0);
+    if (Arg[0] == ')')
+      break;
 
-      if (Arg[0] == ')')
-        break;
+    if (firstIteration)
+      firstIteration = false;
+    else
+      O << ", ";
 
-      if (firstIteration)
-        firstIteration = false;
+    if (Arg == "$INFILE") {
+      if (IsJoin)
+        throw "$CALL(Hook, $INFILE) can't be used with a Join tool!";
       else
-        O << ", ";
-
+        O << "inFile.c_str()";
+    }
+    else {
       O << '"' << Arg << '"';
     }
+  }
 
-    O << ')';
+  O << ')';
 
-  }
-  else if (cmd == "$ENV") {
-    checkedIncrement(Pos, End, "Syntax error in $ENV invocation!");
-    const std::string& EnvName = *Pos;
+  return Pos;
+}
+
+/// SubstituteEnv - Given '$ENV(VAR_NAME)', output 'getenv("VAR_NAME")'. Helper
+/// function used by SubstituteSpecialCommands().
+StrVector::const_iterator
+SubstituteEnv (StrVector::const_iterator Pos,
+               StrVector::const_iterator End, raw_ostream& O)
+{
+  const char* errorMessage = "Syntax error in $ENV invocation!";
+  checkedIncrement(Pos, End, errorMessage);
+  const std::string& EnvName = *Pos;
+
+  if (EnvName == ")")
+    throw "$ENV invocation: empty argument list!";
+
+  O << "checkCString(std::getenv(\"";
+  O << EnvName;
+  O << "\"))";
 
-    if (EnvName == ")")
-      throw "$ENV invocation: empty argument list!";
+  checkedIncrement(Pos, End, errorMessage);
+
+  return Pos;
+}
+
+/// SubstituteSpecialCommands - Given an invocation of $CALL or $ENV, output
+/// handler code. Helper function used by EmitCmdLineVecFill().
+StrVector::const_iterator
+SubstituteSpecialCommands (StrVector::const_iterator Pos,
+                           StrVector::const_iterator End,
+                           bool IsJoin, raw_ostream& O)
+{
 
-    O << "checkCString(std::getenv(\"";
-    O << EnvName;
-    O << "\"))";
+  const std::string& cmd = *Pos;
 
-    checkedIncrement(Pos, End, "Syntax error in $ENV invocation!");
+  // Perform substitution.
+  if (cmd == "$CALL") {
+    Pos = SubstituteCall(Pos, End, IsJoin, O);
+  }
+  else if (cmd == "$ENV") {
+    Pos = SubstituteEnv(Pos, End, O);
   }
   else {
     throw "Unknown special command: " + cmd;
   }
 
+  // Handle '$CMD(ARG)/additional/text'.
   const std::string& Leftover = *Pos;
   assert(Leftover.at(0) == ')');
   if (Leftover.size() != 1)
@@ -1604,7 +1687,7 @@ void EmitCmdLineVecFill(const Init* CmdLine, const std::string& ToolName,
       }
       else {
         O << "vec.push_back(";
-        I = SubstituteSpecialCommands(I, E, O);
+        I = SubstituteSpecialCommands(I, E, IsJoin, O);
         O << ");\n";
       }
     }
@@ -1617,7 +1700,7 @@ void EmitCmdLineVecFill(const Init* CmdLine, const std::string& ToolName,
 
   O.indent(IndentLevel) << "cmd = ";
   if (StrVec[0][0] == '$')
-    SubstituteSpecialCommands(StrVec.begin(), StrVec.end(), O);
+    SubstituteSpecialCommands(StrVec.begin(), StrVec.end(), IsJoin, O);
   else
     O << '"' << StrVec[0] << '"';
   O << ";\n";
@@ -1697,7 +1780,7 @@ void EmitForwardOptionPropertyHandlingCode (const OptionDescription& D,
     break;
   case OptionType::Alias:
   default:
-    throw std::string("Aliases are not allowed in tool option descriptions!");
+    throw "Aliases are not allowed in tool option descriptions!";
   }
 }
 
@@ -1727,90 +1810,153 @@ struct ActionHandlingCallbackBase {
 
 /// EmitActionHandlersCallback - Emit code that handles actions. Used by
 /// EmitGenerateActionMethod() as an argument to EmitCaseConstructHandler().
-class EmitActionHandlersCallback : ActionHandlingCallbackBase {
+class EmitActionHandlersCallback;
+typedef void (EmitActionHandlersCallback::* EmitActionHandlersCallbackHandler)
+(const DagInit&, unsigned, raw_ostream&) const;
+
+class EmitActionHandlersCallback
+: public ActionHandlingCallbackBase,
+  public HandlerTable<EmitActionHandlersCallbackHandler>
+{
   const OptionDescriptions& OptDescs;
+  typedef EmitActionHandlersCallbackHandler Handler;
 
-  void processActionDag(const Init* Statement, unsigned IndentLevel,
-                        raw_ostream& O) const
+  /// EmitHookInvocation - Common code for hook invocation from actions. Used by
+  /// onAppendCmd and onOutputSuffix.
+  void EmitHookInvocation(const std::string& Str,
+                          const char* BlockOpen, const char* BlockClose,
+                          unsigned IndentLevel, raw_ostream& O) const
   {
-    const DagInit& Dag = InitPtrToDag(Statement);
-    const std::string& ActionName = GetOperatorName(Dag);
+    StrVector Out;
+    TokenizeCmdline(Str, Out);
 
-    if (ActionName == "append_cmd") {
-      checkNumberOfArguments(&Dag, 1);
-      const std::string& Cmd = InitPtrToString(Dag.getArg(0));
-      StrVector Out;
-      llvm::SplitString(Cmd, Out);
+    for (StrVector::const_iterator B = Out.begin(), E = Out.end();
+         B != E; ++B) {
+      const std::string& cmd = *B;
 
-      for (StrVector::const_iterator B = Out.begin(), E = Out.end();
-           B != E; ++B)
-        O.indent(IndentLevel) << "vec.push_back(\"" << *B << "\");\n";
-    }
-    else if (ActionName == "error") {
-      this->onErrorDag(Dag, IndentLevel, O);
-    }
-    else if (ActionName == "warning") {
-      this->onWarningDag(Dag, IndentLevel, O);
-    }
-    else if (ActionName == "forward") {
-      checkNumberOfArguments(&Dag, 1);
-      const std::string& Name = InitPtrToString(Dag.getArg(0));
-      EmitForwardOptionPropertyHandlingCode(OptDescs.FindOption(Name),
-                                            IndentLevel, "", O);
-    }
-    else if (ActionName == "forward_as") {
-      checkNumberOfArguments(&Dag, 2);
-      const std::string& Name = InitPtrToString(Dag.getArg(0));
-      const std::string& NewName = InitPtrToString(Dag.getArg(1));
-      EmitForwardOptionPropertyHandlingCode(OptDescs.FindOption(Name),
-                                            IndentLevel, NewName, O);
-    }
-    else if (ActionName == "output_suffix") {
-      checkNumberOfArguments(&Dag, 1);
-      const std::string& OutSuf = InitPtrToString(Dag.getArg(0));
-      O.indent(IndentLevel) << "output_suffix = \"" << OutSuf << "\";\n";
-    }
-    else if (ActionName == "stop_compilation") {
-      O.indent(IndentLevel) << "stop_compilation = true;\n";
+      O.indent(IndentLevel) << BlockOpen;
+
+      if (cmd.at(0) == '$')
+        B = SubstituteSpecialCommands(B, E,  /* IsJoin = */ true, O);
+      else
+        O << '"' << cmd << '"';
+
+      O << BlockClose;
     }
-    else if (ActionName == "unpack_values") {
-      checkNumberOfArguments(&Dag, 1);
-      const std::string& Name = InitPtrToString(Dag.getArg(0));
-      const OptionDescription& D = OptDescs.FindOption(Name);
-
-      if (D.isMultiVal())
-        throw std::string("Can't use unpack_values with multi-valued options!");
-
-      if (D.isList()) {
-        O.indent(IndentLevel)
-          << "for (" << D.GenTypeDeclaration()
-          << "::iterator B = " << D.GenVariableName() << ".begin(),\n";
-        O.indent(IndentLevel)
-          << "E = " << D.GenVariableName() << ".end(); B != E; ++B)\n";
-        O.indent(IndentLevel + Indent1)
-          << "llvm::SplitString(*B, vec, \",\");\n";
-      }
-      else if (D.isParameter()){
-        O.indent(IndentLevel) << "llvm::SplitString("
-                              << D.GenVariableName() << ", vec, \",\");\n";
-      }
-      else {
-        throw "Option '" + D.Name +
-          "': switches can't have the 'unpack_values' property!";
-      }
+  }
+
+  void onAppendCmd (const DagInit& Dag,
+                    unsigned IndentLevel, raw_ostream& O) const
+  {
+    checkNumberOfArguments(&Dag, 1);
+    this->EmitHookInvocation(InitPtrToString(Dag.getArg(0)),
+                             "vec.push_back(", ");\n", IndentLevel, O);
+  }
+
+  void onForward (const DagInit& Dag,
+                  unsigned IndentLevel, raw_ostream& O) const
+  {
+    checkNumberOfArguments(&Dag, 1);
+    const std::string& Name = InitPtrToString(Dag.getArg(0));
+    EmitForwardOptionPropertyHandlingCode(OptDescs.FindOption(Name),
+                                          IndentLevel, "", O);
+  }
+
+  void onForwardAs (const DagInit& Dag,
+                    unsigned IndentLevel, raw_ostream& O) const
+  {
+    checkNumberOfArguments(&Dag, 2);
+    const std::string& Name = InitPtrToString(Dag.getArg(0));
+    const std::string& NewName = InitPtrToString(Dag.getArg(1));
+    EmitForwardOptionPropertyHandlingCode(OptDescs.FindOption(Name),
+                                          IndentLevel, NewName, O);
+  }
+
+  void onForwardValue (const DagInit& Dag,
+                       unsigned IndentLevel, raw_ostream& O) const
+  {
+    checkNumberOfArguments(&Dag, 1);
+    const std::string& Name = InitPtrToString(Dag.getArg(0));
+    const OptionDescription& D = OptDescs.FindListOrParameter(Name);
+
+    if (D.isParameter()) {
+      O.indent(IndentLevel) << "vec.push_back("
+                            << D.GenVariableName() << ");\n";
     }
     else {
-      throw "Unknown action name: " + ActionName + "!";
+      O.indent(IndentLevel) << "std::copy(" << D.GenVariableName()
+                            << ".begin(), " << D.GenVariableName()
+                            << ".end(), std::back_inserter(vec));\n";
     }
   }
+
+  void onForwardTransformedValue (const DagInit& Dag,
+                                  unsigned IndentLevel, raw_ostream& O) const
+  {
+    checkNumberOfArguments(&Dag, 2);
+    const std::string& Name = InitPtrToString(Dag.getArg(0));
+    const std::string& Hook = InitPtrToString(Dag.getArg(1));
+    const OptionDescription& D = OptDescs.FindListOrParameter(Name);
+
+    O.indent(IndentLevel) << "vec.push_back(" << "hooks::"
+                          << Hook << "(" << D.GenVariableName()
+                          << (D.isParameter() ? ".c_str()" : "") << "));\n";
+  }
+
+  void onOutputSuffix (const DagInit& Dag,
+                       unsigned IndentLevel, raw_ostream& O) const
+  {
+    checkNumberOfArguments(&Dag, 1);
+    this->EmitHookInvocation(InitPtrToString(Dag.getArg(0)),
+                             "output_suffix = ", ";\n", IndentLevel, O);
+  }
+
+  void onStopCompilation (const DagInit& Dag,
+                          unsigned IndentLevel, raw_ostream& O) const
+  {
+    O.indent(IndentLevel) << "stop_compilation = true;\n";
+  }
+
+
+  void onUnpackValues (const DagInit& Dag,
+                       unsigned IndentLevel, raw_ostream& O) const
+  {
+    throw "'unpack_values' is deprecated. "
+      "Use 'comma_separated' + 'forward_value' instead!";
+  }
+
  public:
-  EmitActionHandlersCallback(const OptionDescriptions& OD)
-    : OptDescs(OD) {}
+
+  explicit EmitActionHandlersCallback(const OptionDescriptions& OD)
+    : OptDescs(OD)
+  {
+    if (!staticMembersInitialized_) {
+      AddHandler("error", &EmitActionHandlersCallback::onErrorDag);
+      AddHandler("warning", &EmitActionHandlersCallback::onWarningDag);
+      AddHandler("append_cmd", &EmitActionHandlersCallback::onAppendCmd);
+      AddHandler("forward", &EmitActionHandlersCallback::onForward);
+      AddHandler("forward_as", &EmitActionHandlersCallback::onForwardAs);
+      AddHandler("forward_value", &EmitActionHandlersCallback::onForwardValue);
+      AddHandler("forward_transformed_value",
+                 &EmitActionHandlersCallback::onForwardTransformedValue);
+      AddHandler("output_suffix", &EmitActionHandlersCallback::onOutputSuffix);
+      AddHandler("stop_compilation",
+                 &EmitActionHandlersCallback::onStopCompilation);
+      AddHandler("unpack_values",
+                 &EmitActionHandlersCallback::onUnpackValues);
+
+      staticMembersInitialized_ = true;
+    }
+  }
 
   void operator()(const Init* Statement,
                   unsigned IndentLevel, raw_ostream& O) const
   {
-    this->processActionDag(Statement, IndentLevel, O);
+    const DagInit& Dag = InitPtrToDag(Statement);
+    const std::string& ActionName = GetOperatorName(Dag);
+    Handler h = GetHandler(ActionName);
+
+    ((this)->*(h))(Dag, IndentLevel, O);
   }
 };
 
@@ -1866,11 +2012,9 @@ bool IsOutFileIndexCheckRequired (Init* CmdLine) {
     return IsOutFileIndexCheckRequiredCase(CmdLine);
 }
 
-// EmitGenerateActionMethod - Emit either a normal or a "join" version of the
-// Tool::GenerateAction() method.
-void EmitGenerateActionMethod (const ToolDescription& D,
-                               const OptionDescriptions& OptDescs,
-                               bool IsJoin, raw_ostream& O) {
+void EmitGenerateActionMethodHeader(const ToolDescription& D,
+                                    bool IsJoin, raw_ostream& O)
+{
   if (IsJoin)
     O.indent(Indent1) << "Action GenerateAction(const PathVector& inFiles,\n";
   else
@@ -1886,6 +2030,15 @@ void EmitGenerateActionMethod (const ToolDescription& D,
   O.indent(Indent2) << "bool stop_compilation = !HasChildren;\n";
   O.indent(Indent2) << "const char* output_suffix = \""
                     << D.OutputSuffix << "\";\n";
+}
+
+// EmitGenerateActionMethod - Emit either a normal or a "join" version of the
+// Tool::GenerateAction() method.
+void EmitGenerateActionMethod (const ToolDescription& D,
+                               const OptionDescriptions& OptDescs,
+                               bool IsJoin, raw_ostream& O) {
+
+  EmitGenerateActionMethodHeader(D, IsJoin, O);
 
   if (!D.CmdLine)
     throw "Tool " + D.Name + " has no cmd_line property!";
@@ -2016,7 +2169,7 @@ void EmitToolClassDefinition (const ToolDescription& D,
   else
     O << "Tool";
 
-  O << "{\nprivate:\n";
+  O << " {\nprivate:\n";
   O.indent(Indent1) << "static const char* InputLanguages_[];\n\n";
 
   O << "public:\n";
@@ -2075,16 +2228,17 @@ void EmitOptionDefinitions (const OptionDescriptions& descs,
     else if (val.isOneOrMore() && val.isList()) {
         O << ", cl::OneOrMore";
     }
-    else if (val.isZeroOrOne() && val.isList()) {
-        O << ", cl::ZeroOrOne";
+    else if (val.isOptional() && val.isList()) {
+        O << ", cl::Optional";
     }
 
-    if (val.isReallyHidden()) {
+    if (val.isReallyHidden())
       O << ", cl::ReallyHidden";
-    }
-    else if (val.isHidden()) {
+    else if (val.isHidden())
       O << ", cl::Hidden";
-    }
+
+    if (val.isCommaSeparated())
+      O << ", cl::CommaSeparated";
 
     if (val.MultiVal > 1)
       O << ", cl::multi_val(" << val.MultiVal << ')';
@@ -2143,7 +2297,7 @@ class EmitPreprocessOptionsCallback : ActionHandlingCallbackBase {
       O.indent(IndentLevel) << OptDesc.GenVariableName() << ".clear();\n";
     }
     else {
-      throw "Can't apply 'unset_option' to alias option '" + OptName + "'";
+      throw "Can't apply 'unset_option' to alias option '" + OptName + "'!";
     }
   }
 
@@ -2221,7 +2375,7 @@ void EmitPopulateLanguageMap (const RecordKeeper& Records, raw_ostream& O)
 
     ListInit* LangsToSuffixesList = LangMapRecord->getValueAsListInit("map");
     if (!LangsToSuffixesList)
-      throw std::string("Error in the language map definition!");
+      throw "Error in the language map definition!";
 
     for (unsigned i = 0; i < LangsToSuffixesList->size(); ++i) {
       const Record* LangToSuffixes = LangsToSuffixesList->getElementAsRecord(i);
@@ -2349,23 +2503,61 @@ void EmitPopulateCompilationGraph (const RecordVector& EdgeVector,
   O << "}\n\n";
 }
 
+/// HookInfo - Information about the hook type and number of arguments.
+struct HookInfo {
+
+  // A hook can either have a single parameter of type std::vector<std::string>,
+  // or NumArgs parameters of type const char*.
+  enum HookType { ListHook, ArgHook };
+
+  HookType Type;
+  unsigned NumArgs;
+
+  HookInfo() : Type(ArgHook), NumArgs(1)
+  {}
+
+  HookInfo(HookType T) : Type(T), NumArgs(1)
+  {}
+
+  HookInfo(unsigned N) : Type(ArgHook), NumArgs(N)
+  {}
+};
+
+typedef llvm::StringMap<HookInfo> HookInfoMap;
+
 /// ExtractHookNames - Extract the hook names from all instances of
-/// $CALL(HookName) in the provided command line string. Helper
+/// $CALL(HookName) in the provided command line string/action. Helper
 /// function used by FillInHookNames().
 class ExtractHookNames {
-  llvm::StringMap<unsigned>& HookNames_;
+  HookInfoMap& HookNames_;
+  const OptionDescriptions& OptDescs_;
 public:
-  ExtractHookNames(llvm::StringMap<unsigned>& HookNames)
-  : HookNames_(HookNames) {}
+  ExtractHookNames(HookInfoMap& HookNames, const OptionDescriptions& OptDescs)
+    : HookNames_(HookNames), OptDescs_(OptDescs)
+  {}
 
-  void operator()(const Init* CmdLine) {
-    StrVector cmds;
+  void onAction (const DagInit& Dag) {
+    const std::string& Name = GetOperatorName(Dag);
 
-    // Ignore nested 'case' DAG.
-    if (typeid(*CmdLine) == typeid(DagInit))
-      return;
+    if (Name == "forward_transformed_value") {
+      checkNumberOfArguments(Dag, 2);
+      const std::string& OptName = InitPtrToString(Dag.getArg(0));
+      const std::string& HookName = InitPtrToString(Dag.getArg(1));
+      const OptionDescription& D = OptDescs_.FindOption(OptName);
+
+      HookNames_[HookName] = HookInfo(D.isList() ? HookInfo::ListHook
+                                      : HookInfo::ArgHook);
+    }
+    else if (Name == "append_cmd" || Name == "output_suffix") {
+      checkNumberOfArguments(Dag, 1);
+      this->onCmdLine(InitPtrToString(Dag.getArg(0)));
+    }
+  }
+
+  void onCmdLine(const std::string& Cmd) {
+    StrVector cmds;
+    TokenizeCmdline(Cmd, cmds);
 
-    TokenizeCmdline(InitPtrToString(CmdLine), cmds);
     for (StrVector::const_iterator B = cmds.begin(), E = cmds.end();
          B != E; ++B) {
       const std::string& cmd = *B;
@@ -2375,7 +2567,6 @@ public:
         checkedIncrement(B, E, "Syntax error in $CALL invocation!");
         const std::string& HookName = *B;
 
-
         if (HookName.at(0) == ')')
           throw "$CALL invoked with no arguments!";
 
@@ -2383,16 +2574,38 @@ public:
           ++NumArgs;
         }
 
-        StringMap<unsigned>::const_iterator H = HookNames_.find(HookName);
+        HookInfoMap::const_iterator H = HookNames_.find(HookName);
 
-        if (H != HookNames_.end() && H->second != NumArgs)
+        if (H != HookNames_.end() && H->second.NumArgs != NumArgs &&
+            H->second.Type != HookInfo::ArgHook)
           throw "Overloading of hooks is not allowed. Overloaded hook: "
             + HookName;
         else
-          HookNames_[HookName] = NumArgs;
+          HookNames_[HookName] = HookInfo(NumArgs);
+      }
+    }
+  }
 
+  void operator()(const Init* Arg) {
+
+    // We're invoked on an action (either a dag or a dag list).
+    if (typeid(*Arg) == typeid(DagInit)) {
+      const DagInit& Dag = InitPtrToDag(Arg);
+      this->onAction(Dag);
+      return;
+    }
+    else if (typeid(*Arg) == typeid(ListInit)) {
+      const ListInit& List = InitPtrToList(Arg);
+      for (ListInit::const_iterator B = List.begin(), E = List.end(); B != E;
+           ++B) {
+        const DagInit& Dag = InitPtrToDag(*B);
+        this->onAction(Dag);
       }
+      return;
     }
+
+    // We're invoked on a command line.
+    this->onCmdLine(InitPtrToString(Arg));
   }
 
   void operator()(const DagInit* Test, unsigned, bool) {
@@ -2406,40 +2619,56 @@ public:
 /// FillInHookNames - Actually extract the hook names from all command
 /// line strings. Helper function used by EmitHookDeclarations().
 void FillInHookNames(const ToolDescriptions& ToolDescs,
-                     llvm::StringMap<unsigned>& HookNames)
+                     const OptionDescriptions& OptDescs,
+                     HookInfoMap& HookNames)
 {
-  // For all command lines:
+  // For all tool descriptions:
   for (ToolDescriptions::const_iterator B = ToolDescs.begin(),
          E = ToolDescs.end(); B != E; ++B) {
     const ToolDescription& D = *(*B);
+
+    // Look for 'forward_transformed_value' in 'actions'.
+    if (D.Actions)
+      WalkCase(D.Actions, Id(), ExtractHookNames(HookNames, OptDescs));
+
+    // Look for hook invocations in 'cmd_line'.
     if (!D.CmdLine)
       continue;
     if (dynamic_cast<StringInit*>(D.CmdLine))
       // This is a string.
-      ExtractHookNames(HookNames).operator()(D.CmdLine);
+      ExtractHookNames(HookNames, OptDescs).operator()(D.CmdLine);
     else
       // This is a 'case' construct.
-      WalkCase(D.CmdLine, Id(), ExtractHookNames(HookNames));
+      WalkCase(D.CmdLine, Id(), ExtractHookNames(HookNames, OptDescs));
   }
 }
 
 /// EmitHookDeclarations - Parse CmdLine fields of all the tool
 /// property records and emit hook function declaration for each
 /// instance of $CALL(HookName).
-void EmitHookDeclarations(const ToolDescriptions& ToolDescs, raw_ostream& O) {
-  llvm::StringMap<unsigned> HookNames;
+void EmitHookDeclarations(const ToolDescriptions& ToolDescs,
+                          const OptionDescriptions& OptDescs, raw_ostream& O) {
+  HookInfoMap HookNames;
 
-  FillInHookNames(ToolDescs, HookNames);
+  FillInHookNames(ToolDescs, OptDescs, HookNames);
   if (HookNames.empty())
     return;
 
   O << "namespace hooks {\n";
-  for (StringMap<unsigned>::const_iterator B = HookNames.begin(),
+  for (HookInfoMap::const_iterator B = HookNames.begin(),
          E = HookNames.end(); B != E; ++B) {
-    O.indent(Indent1) << "std::string " << B->first() << "(";
+    const char* HookName = B->first();
+    const HookInfo& Info = B->second;
+
+    O.indent(Indent1) << "std::string " << HookName << "(";
 
-    for (unsigned i = 0, j = B->second; i < j; ++i) {
-      O << "const char* Arg" << i << (i+1 == j ? "" : ", ");
+    if (Info.Type == HookInfo::ArgHook) {
+      for (unsigned i = 0, j = Info.NumArgs; i < j; ++i) {
+        O << "const char* Arg" << i << (i+1 == j ? "" : ", ");
+      }
+    }
+    else {
+      O << "const std::vector<std::string>& Arg";
     }
 
     O <<");\n";
@@ -2472,11 +2701,12 @@ void EmitIncludes(raw_ostream& O) {
     << "#include \"llvm/CompilerDriver/Plugin.h\"\n"
     << "#include \"llvm/CompilerDriver/Tool.h\"\n\n"
 
-    << "#include \"llvm/ADT/StringExtras.h\"\n"
     << "#include \"llvm/Support/CommandLine.h\"\n"
     << "#include \"llvm/Support/raw_ostream.h\"\n\n"
 
+    << "#include <algorithm>\n"
     << "#include <cstdlib>\n"
+    << "#include <iterator>\n"
     << "#include <stdexcept>\n\n"
 
     << "using namespace llvm;\n"
@@ -2570,7 +2800,7 @@ void EmitPluginCode(const PluginData& Data, raw_ostream& O) {
   EmitOptionDefinitions(Data.OptDescs, Data.HasSink, Data.HasExterns, O);
 
   // Emit hook declarations.
-  EmitHookDeclarations(Data.ToolDescs, O);
+  EmitHookDeclarations(Data.ToolDescs, Data.OptDescs, O);
 
   O << "namespace {\n\n";
 
diff --git a/utils/TableGen/OptParserEmitter.cpp b/utils/TableGen/OptParserEmitter.cpp
index ce1aef5..3cd5784 100644
--- a/utils/TableGen/OptParserEmitter.cpp
+++ b/utils/TableGen/OptParserEmitter.cpp
@@ -127,7 +127,18 @@ void OptParserEmitter::run(raw_ostream &OS) {
         OS << "INVALID";
 
       // The other option arguments (unused for groups).
-      OS << ", INVALID, 0, 0, 0, 0)\n";
+      OS << ", INVALID, 0, 0";
+
+      // The option help text.
+      if (!dynamic_cast<UnsetInit*>(R.getValueInit("HelpText"))) {
+        OS << ",\n";
+        OS << "       ";
+        write_cstring(OS, R.getValueAsString("HelpText"));
+      } else
+        OS << ", 0";
+
+      // The option meta-variable name (unused).
+      OS << ", 0)\n";
     }
     OS << "\n";
 
diff --git a/utils/TableGen/RegisterInfoEmitter.cpp b/utils/TableGen/RegisterInfoEmitter.cpp
index bf0721e..fcf4123 100644
--- a/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/utils/TableGen/RegisterInfoEmitter.cpp
@@ -162,7 +162,7 @@ private:
 
 public:
   RegisterSorter(std::map<Record*, std::set<Record*>, LessRecord> &RS)
-    : RegisterSubRegs(RS) {};
+    : RegisterSubRegs(RS) {}
 
   bool operator()(Record *RegA, Record *RegB) {
     // B is sub-register of A.
diff --git a/utils/lit/lit.py b/utils/lit/lit.py
index dcdce7d..293976f 100755
--- a/utils/lit/lit.py
+++ b/utils/lit/lit.py
@@ -230,7 +230,7 @@ def getTests(path, litConfig, testSuiteCache, localConfigCache):
     ts,path_in_suite = getTestSuite(path, litConfig, testSuiteCache)
     if ts is None:
         litConfig.warning('unable to find test suite for %r' % path)
-        return ()
+        return (),()
 
     if litConfig.debug:
         litConfig.note('resolved input %r to %r::%r' % (path, ts.name,